package PubMed::Files; use strict; use warnings; use Carp qw(confess); use Data::Dumper; use Date::Format; use File::Basename; use File::Path; use File::Touch; use File::Copy; use PubMed::Common; use PubMed::Paths; sub prepare { my $file=shift; my $dirname=dirname($file); if(not -d $dirname) { mkpath($dirname); } } sub find_pryr { my $dir=shift; my $pryr=0; my $verbose=&PubMed::Common::isatty(); foreach my $file (glob("$dir/*")) { if($verbose) { print "file $file\n"; } if(not $file=~m|pubmed(\d\d)n|) { if(not basename($file) eq 'README.txt') { warn "I can't deal with $file."; } next; } if($1 > $pryr) { $pryr=$1; } } return $pryr; } sub check_for_pryr { my $file=shift; my $pryr=shift; my $regex="pubmed$pryr".'n\d+'; if($file=~m|$regex|) { return 1; } return 0; } #sub pmid_to_fapi { # my $pmid=shift // confess "I need a pmid here"; # my $pemi=&PubMed::Common::pmid_to_pemi($pmid); # my $file=$PubMed::Paths::fapi_dir."/$pemi/$pmid.xml"; # ## we can save this overhead now # # &PubMed::Files::prepare($file); # return $file; #} #sub fapi_to_pmid { # my $fapi=shift // confess "I need a fapi here"; # my $bana=basename($fapi); # my $pmid=substr($bana,0,length($bana)-4); # return $pmid; #} sub find_newest { my $dir=shift; my $regx=shift // ''; my $verbose=shift // ''; if(not -d $dir) { confess "I need a directory here, you gave me '$dir'"; } my $o={}; my $min_age=time; my $min_file=''; foreach my $file (glob("$dir/*")) { if($regx) { if(not $file=~m|$regx|) { if($verbose) { print "find_newest: $file does not match '$regx'\n"; } next; } } my $age=-M $file; if($age < $min_age) { $min_age=$age; $min_file=$file; } } $o->{'file'}=$min_file; $o->{'age'}=$min_age; return $o; } sub find_oldest { my $dir=shift; my $regx=shift // ''; my $verbose=shift // ''; if(not -d $dir) { confess "I need a directory here, you gave me '$dir'"; } my $o={}; my $max_age=0; my $max_file=''; foreach my $file (glob("$dir/*")) { if($regx) { if(not $file=~m|$regx|) { if($verbose) { print "find_oldest: $file does not match '$regx'\n"; } next; } } my $age=-M $file; if($age > $max_age) { $max_age=$age; $max_file=$file; } } $o->{'file'}=$max_file; $o->{'age'}=$max_age; return $o; } sub file_to_date { my $file=shift // confess "I need a file here."; my $mtime=&mtime($file); my $date=time2str('%Y-%m-%d',$mtime); return $date; } sub sou_in_file { my $file=shift // confess "I need a file here."; if(not -f $file) { confess "I can't open $file"; } my $sou_file="$file.sou"; my $s="export LC_ALL=C; $PubMed::Paths::sort $file | $PubMed::Paths::uniq > $sou_file"; system($s); my $toucher = File::Touch->new(reference => $file, no_create => 1); $toucher->touch($sou_file); move($sou_file,$file); } sub mtime { my $file=shift // confess "I need a file here."; if(not -f $file) { confess "I can't see the file $file"; } my ($dev,$ino,$mode,$nlink,$uid,$gid,$rdev,$size, $atime,$mtime,$ctime,$blksize,$blocks) = stat($file); return $mtime; } ## a function reference ## sorting for raw files, probably not required every time. ## Fixme:: there is different version of this in PubMed::File our $sort_uniq = sub { my $fufi=shift // confess "I need a file here."; my $tmp = File::Temp->new(UNLINK => 0); my $s="export LC_ALL=C; $PubMed::Paths::sort $fufi | $PubMed::Paths::uniq > $tmp"; system($s); move($tmp,$fufi); }; sub does_file_need_renewal { my $file=shift; my @others=@_; my $verbose=0; if(not -f $file) { if($verbose) { print "file $file is not there, it needs renewing\n."; } return 1; } if($verbose) { print "file $file is there ... "; } if(-z $file) { if($verbose) { print "file $file is empty, it needs renewing\n."; } return 1; } ## -M Script start time minus file modification time, in days. my $target_time=-M $file; if($verbose) { print "target_time is $target_time\n"; } if($verbose) { print Dumper @others; } foreach my $file (@others) { if($verbose) { print "considering $file as renewal target\n"; } if(not $file) { confess "I don't have the file $file"; } if(-d $file) { ## directory case, not treated recursively my $dirname=$file; opendir( my $dir, $dirname ) or die "Error: can't open dir $dirname"; my $file; while ($file = readdir $dir ) { ## skipping "." and ".." if ( ($file eq "." ) or ( $file eq ".." )) { next; } $file="$dirname/$file"; if(-M $file < $target_time) { if($verbose) { print " $file is newer, renewal required\n"; } return 1; } } ## finished with this next; } if(not -f $file) { die "I can not find the file '$file'"; } if($verbose) { print "time on file $file is ", -M $file, "\n"; } if(-M $file < $target_time) { if($verbose) { print " but $file is newer, renewal required\n"; } return 1; } } if($verbose) { print " no reed to renew\n"; } return 0; } 1;