package PubMed::Dain; use strict; use warnings; use Carp; use Data::Dumper; use List::Util qw(shuffle); use File::Basename; use File::Compare; use File::Copy; use File::Slurp; use File::Temp; use PubMed::Common; use PubMed::Paths; use PubMed::Index; my $dain_dir=$PubMed::Paths::dain_dir; ## my $dich_dir=$PubMed::Paths::dich_dir; my $cut=$PubMed::Paths::cut; my $test=&PubMed::Common::isatty(); ## just a wrapper for update_line sub update_file { my $file=shift; if(not -f $file) { if($test) { print "I don't have the file $file\n"; } return; } open(F,"< $file"); my $tmp = File::Temp->new(UNLINK => 0); my $tmp_filename=$tmp->filename(); my $line; my $count_changes=0; while($line=) { chomp $line; my $check=&update_line($line,$file); ## no check is a success if(not $check) { print $tmp "$line\n"; next; } if($check ne $line) { if($test) { print "I replace '$line' by '$check'\n"; } $count_changes++; } print $tmp "$check\n"; } if($test) { print "I counted $count_changes changes.\n"; } if(not $count_changes) { if($test) { print "The file $file is not changed.\n"; } unlink $tmp_filename; return; } if($test) { print "I copy $tmp_filename to $file.\n"; } $tmp->close(); copy($tmp_filename,$file); unlink($tmp_filename); } sub update_line { my $line=shift; ## just for reporting my $file=shift; if($line=~m|^(\d+)$|) { my $pmid=$1; ## fapi case, obsolete #my $purd=&PubMed::Api::get_purd_by_pmid($pmid); #if($purd) { # ## return '' means line is ok # return ''; #} my $loc=&PubMed::Index::check_purd_by_pmid($pmid); if(not $loc) { confess "I have an unknown pmid in the file $file."; } if(not $loc=~m|^\d+ (\d+ \d+ \d{2}n\d{4}\s*)+$|) { confess "I found a bad loc '$loc'"; } return $loc; } $line=~m|^(\d+) | // confess "I don't like line '$line'"; my $pmid=$1; my $latest_line=&PubMed::Index::check_purd_by_pmid($pmid); my $loc=&PubMed::Index::get_latest_loc($latest_line); return $loc; } ## ### dich files contain an index what pmid is in what dain file ## #sub refresh_dich_files { ## # unlink glob("$dich_dir/*"); ## # foreach my $dain_fufi (glob("$dain_dir/*")) { ## # my $date=basename($dain_fufi); ## # ## deletes some member of $date_infi ## # #&deal_with_date($date); ## # my $s="$cut -d ' ' -f 1 $dain_fufi"; ## # foreach my $pmid (`$s`) { ## # chomp $pmid; ## # my $pemi=&PubMed::Common::pmid_to_pemi($pmid); ## # my $dich_file="$dich_dir/$pemi"; ## # open(F,">> $dich_file"); ## # print F "$pmid $date\n"; ## # close(F); ## # } ## # } ## #} ## #sub check_dich { ## # unlink glob("$dich_dir/????-??-??"); ## # foreach my $dich_fufi (glob("$dich_dir/*")) { ## # my $dich_bana=basename($dich_fufi); ## # if(not $dich_bana=~m|^\d+$|) { ## # next; ## # } ## # &check_dich_file($dich_fufi); ## # } ## #} ## # ## #sub check_dich_file { ## # my $dich_file=shift; ## # my $seen={}; ## # open(F,"< $dich_file"); ## # my $line; ## # while($line=) { ## # $line=~m|^(\d+) (\d{4}-\d{2}-\d{2})| or confess "This is a bad line '$line'"; ## # my $pmid=$1; ## # my $date=$2; ## # my $old_date=$seen->{$pmid} // ''; ## # if($seen->{$pmid}) { ## # print "pmid $pmid is duplicated in $old_date and $date\n"; ## # open(O,">> $dich_dir/$date"); ## # print O "$pmid\n"; ## # close O; ## # } ## # $seen->{$pmid}=$date; ## # } ## #} ## # ## #sub correct_dain_from_dich { ## # foreach my $dich_fufi (glob("$dich_dir/*")) { ## # my $dich_bana=basename($dich_fufi); ## # if(not $dich_bana=~m|^\d{4}-\d{2}-\d{2}$|) { ## # next; ## # } ## # &delete_from_dain($dich_fufi); ## # } ## #} ## # ## #sub delete_from_dain { ## # my $dich_file=shift; ## # open(F,"< $dich_file"); ## # my $pmid; ## # my $to_delete={}; ## # my $date=basename($dich_file); ## # while($pmid=) { ## # chomp $pmid; ## # $to_delete->{$pmid}=1; ## # } ## # close F; ## # my $tmp = File::Temp->new(UNLINK => 0); ## # my $tmp_filename=$tmp->filename(); ## # my $dain_file="$dain_dir/$date"; ## # if(not -f $dain_file) { ## # confess "I don't see the file '$dain_file'"; ## # } ## # open(F,"< $dain_file"); ## # my $dain; ## # while($dain=) { ## # chomp $dain; ## # $dain=~m|(\d+)| or confess "I have a bad line."; ## # my $pmid=$1; ## # if(defined($to_delete->{$pmid})) { ## # if($test) { ## # #print "I remove $pmid from $dain_file\n"; ## # } ## # next; ## # } ## # print $tmp "$dain\n"; ## # } ## # $tmp->close(); ## # copy($tmp_filename,$dain_file); ## # unlink $tmp_filename; ## #} 1;