#!/usr/bin/perl -w
#
# Script: afind, a version of find for AFS that stays within a volume
# Original author: Francisco Lozano - Cern (20-Feb-96)
#

use strict;
use Getopt::Long;
my ($opt_h, $opt_n, $opt_ni, $opt_e, $opt_s, $opt_t, $opt_m, $opt_d, $opt_null, $opt_md);

use Text::ParseWords qw /shellwords/;

my %comp = ('-' => '<',
	    '+' => '>',
	    ''  => '==');

if (!&GetOptions ('help'=> \$opt_h,
	       'name=s' => \$opt_n,
	       'iname=s' => \$opt_ni,
	       'exec=s' => \$opt_e,
	       'type=s' => \$opt_t,
	       'size=s' => \$opt_s,
	       'maxdepth=i' => \$opt_md,
	       'mtime=s' => \$opt_m,
               '0|null' => \$opt_null,
	       'debug'  => \$opt_d)) {
    &Usage;
}
my $DIR = $ARGV[0];
$DIR ='.' unless $DIR;

$opt_d = 1 if $opt_d;

&Usage if $opt_h;

$opt_n =~ s/([\*\?])/\.$1/g if defined($opt_n);  # what is this - shell pattern support?
$opt_ni =~ s/([\*\?])/\.$1/g if defined($opt_ni);

if($opt_e) {
    &Usage unless $opt_e =~ m/{}/;
}
if ($opt_t) {
    # define a checking function. Never follow symlinks.
    &Usage unless $opt_t =~ m/^\w$/;
    if ($opt_t eq  'l') {
	sub t_test  { my $path = shift; return ( -l $path ); };
    } else {
	no warnings qw(redefine);
	eval 'sub t_test  { my $path = shift; return ( ! -l $path && -'.$opt_t.' $path ); }';
	&Usage if $@;
    }
}

if (defined($opt_s)) {
    # define a size-checking function - this will use the previous stat!
    #
    my %mult =( 'c' => 1,
		'w' => 2,
		'b' => 512,   # WHY make this the defaut in GNU find??
		'k' => 1024,
		'M' => 1048576,
		'G' => 1073741824 );
    if( $opt_s =~ m/^([-+]?)(\d+)([cwbkMG]?)$/ ) {
	my $size = $2;
	if($3) {
	    $size *= $mult{$3};
	} else {
	    $size *= $mult{'b'};
	}
	eval 'sub s_test  { my $s = (stat(_))[7] || 0; return ( $s '.$comp{$1}.$size.' ); }';
	$opt_s =1;
    } else {
	&Usage;
    }
}

if (defined($opt_m)) {
    # define a mtime-checking function - this will use the previous stat!
    my %mult =( 's' => 1,
		'm' => 60,
		'h' => 60*60,
		'd' => 60*60*24,
		'M' => 60*60*24*30 ,
		'Y' => 60*60*24*365 );
    if( $opt_m =~ m/^([-+]?)(\d+)([smhdMY]?)$/ ) {
	my $op = $1;
	my $mtime = $2;
	my $now = time();
	if($3) {
	    $mtime *= $mult{$3};
	} else {
	    $mtime *= $mult{'d'};
	}
	$mtime = $now - $mtime;
	if(!$op) { # 'exact' test actually means a time range
	    my $mtime_before;
	    if($3) {
		$mtime_before = $mtime - $mult{$3};
	    } else {
		$mtime_before = $mtime - $mult{'d'};
	    }
	    eval 'sub m_test  { my $m = (stat(_))[9] || 0; return ( $m < '.$mtime.' and $m > '.$mtime_before.' ); }';
	} else {
	    eval 'sub m_test  { my $m = (stat(_))[9] || 0; return ( '.$mtime.$comp{$op}.' $m ); }';
	}

	$opt_m =1;
    } else {
	&Usage;
    }
}


my $depth=0;
stat($DIR);  ## using cached 'stat' in action()
&action($DIR);
&scan_dir ($DIR);

sub scan_dir {
    my ($path) = @_;
    my (@dir, $entry, $ext_path);

    if ( ! opendir DIR, $path ) {
	print STDERR "cannot open $path:$!\n";
	return;
    }
    @dir = readdir DIR or die "cannot list $path:$!";
    closedir DIR;
    for $entry (@dir) {
        next if ($entry =~ /^\.{1,2}$/);
	$ext_path = "${path}/${entry}";
        if (-d $ext_path && ! -l $ext_path) {
	    my (undef, $inode) = stat ($ext_path);
	    if ($inode % 2 == 0) {
		# It is a mount point
		print STDERR "Skipping mount point $ext_path\n" if $opt_d;
		next;
	    } else {
		if(defined($opt_md)) {
		    if( $depth >= $opt_md) {
			print STDERR "Skipping too-deep $ext_path\n" if $opt_d;
			next;
		    }
		}
		$depth++;
		&action($ext_path);
		&scan_dir ($ext_path);
		$depth--;
	    }
	} else {
	    &action($ext_path);
	}
    }
}

sub action {
    my ($ext_path) = @_;
    my ($cmd,$word);
    my ($tmp);

    if (defined($opt_n) or defined($opt_ni)) {
	# only look at last component
	$ext_path =~ /([^\/]*)$/;
	return if(defined($opt_n) && !($1 =~ /^$opt_n$/));
	return if(defined($opt_ni) && !($1 =~ /^$opt_ni$/i));
    }
    if ($opt_t) {
	return unless &t_test($ext_path);
    }
    # optimisation: reuse stat() from caller, use '_' cached results in tests
    if ($opt_s or $opt_m) {
	if (-f _ && $opt_s) {  # 'size' test makes sense only on files
	    return unless &s_test();
	}
	if($opt_m) {
	    return unless &m_test();
	}
    }
    if ($opt_e) {
	my @cmd = &shellwords($opt_e);
        foreach $word (@cmd) {
            $word =~ s#{}#$ext_path#g;
        }
	$tmp = join (" ", @cmd);
	print STDERR "Command: $tmp\n" if $opt_d;
	system @cmd;
    } elsif ($opt_null) {
	print "$ext_path\000";
    } else {
	print "$ext_path\n";
    }	
}


sub Usage {
    print STDERR <<EOF;
Recursively list content within an AFS volume.

Usage: afind [path] [--debug] [--type <type>] [--size [+-]<size>[]] --mtime [+-]<mtime>[smhdMY] [--[i]name <regexpr>] [--maxdepth <depth>] [-e "<command> {}"] [-0]
       afind --help

 <regexp> evaluated against last part of filename, must match completely
        (anchored regex). --iname ignore capitalization.

 <type> can be the file type checks supported by perl (see "man perlfunc")

 <size> file is less, equal or bigger than this. Supports units
        "c"har = 1 byte
        "w"ord = 2 bytes,
        "b"lock = 512 chars (default)
        "k"ilobyte = 1024 bytes,
        "M"egabyte = 1024*1024 bytes,
        "G"igabyte = 1024*1024*1024 bytes
         (no support for TB,PB,EB. AFS, remember?)

 <depth> is the maximum directory depth to recurse into

 <mtime> file has been changed more recently, exactly or less recently. Units
        "s"econds,
        "m"inutes,
        "h"ours,
        "d"ays,
        "M"onths (30 day),
        "Y"ears (365 days)

 <command>: do not print filename, instead run command on them. '{}' will be
     replaced by the current path name, can occur several times (and needs to
     be there at least once).

 -0 : will print NULL-separated path names, default is to print
      newline-separated path names
EOF
    exit 1;
}
__END__

=head1 NAME

afind - restricted find with AFS modifications

=head1 SYNOPSIS

afind [path] [--debug] [--type <type>] [--size [+-]<size>[cwbkMG]]
                       [--mtime [+-]<mtime>[smhdMY]] [--[i]name <regexpr>]
                       [--maxdepth <depth>] [--exec "<command> {}"] [--0]

afind --help

=head1 DESCRIPTION

C<afind> is a small perl script which tries to emulate the Unix C<find>
(it does not have the full range of options of GNU C<find> , though ..).
C<afind> does not traverse neither AFS mount point nor symbolic links.
It can be used to perform AFS commands recursively in a safe
way. It starts searching from F<path>. In case F<path> is not
specified, the directory '.' is assumed.

=head1 OPTIONS

Short-form options are accepted.

=over

=item B<--[i]name> <regexpr>

C<afind> only considers those paths that match the regular expression
I<regexpr>. The match is done on the last path component only, not
on the full path.  B<--iname> ignores case when matching.

=item B<--type> <type>

only considers those files whose type is I<type>, where I<type> is the
same letter as the one used by Perl in the C<test> file function. The
most used ones:

=over

=item I<f>      regular file

=item I<d>      directory

=item I<r>      readable file

=item I<w>      writable file

=item I<z>      zero size file

=back

=item B<--exec> <command>

executes C<command> (possibly quoted). Any command argument {} is replaced by the current path name.

=item B<--0>

prints the entries separated by NULL (e.g to use with C<xargs -0>).
Default is to print entries separated by newlines.

=item B<--size>[+-]<size>[cwbkMG]

only considers files less than (-), exactly equal, or bigger (+) than
a given I<size> (with an optional unit). Default (as per GNU find) is
to assume 512-byte "blocks"..

=item B<--mtime>[+-]<mtime>[smhdMY]]

only considers files or directories younger than (-), exactly as old
as, or older (+) than the given value (with optional unit). Default is
days.

=item B<--maxdepth><depth>

will only recurse into directories up to the specified I<depth>.

=item B<--help>

prints help on the command.

=back

=head1 EXAMPLES

To delete all files matching *test*:

    afind . -t f -n "*test*" -e "rm -f {}"

Same, but more efficiently:

    afind . -t f -n "*test*" -0 | xargs -0 -- rm -f

=head1 SEE ALSO

L<find(1)>

=head1 AUTHOR

 Francisco Lozano         Tel : +41 22 767 44 36
 CN/DCI/UWS               Fax : +41 22 767 71 55
 CERN                     Mail: flozano@afsmail.cern.ch
 CH-1211 Geneve 23, Switzerland
 April 1996

=cut
