package PubMed::Api; use strict; use warnings; use Carp qw(confess); use Encode; use Math::Round; use File::Slurp; use File::Touch; use LWP::Simple; use XML::LibXML; use PubMed::Common; use PubMed::Files; use PubMed::Paths; ## assemble the esearch URL my $base='http://eutils.ncbi.nlm.nih.gov/entrez/eutils'; my $general='db=pubmed&retmode=xml'; my $get_id="efetch.fcgi?$general&id"; my $dt='datetype=edat&retmax=99999'; my $sb="esearch.fcgi?$general&term=publisher+%5Bsb%5D&$dt"; ## a search resulting in string. Wait is optional sub get_from_api { my $ids=shift // confess "I need ids arrref here."; my $wait=shift // ''; my $in_ids=join(',',@{$ids}); my $url="$base/$get_id=$in_ids"; #print "$url\n"; my $output=get($url) or return; my $bytes=encode_utf8($output); if($wait) { my $count=scalar @{$ids}+1; my $seconds=nearest(1,log($count)); sleep $seconds; } return $bytes; } ## a search resulting in a file. Wait is done by caller sub sb_search { my $date=shift // confess "I need a date here."; if(not $date=~m|\d{4}/\d{2}/\d{2}|) { confess "This is a bad date: $date"; } my $file=shift // ''; my $url="$base/$sb&mindate=$date&maxdate=$date"; my $output=get($url) or return; my $bytes=encode_utf8($output); if($file) { write_file($file, $bytes); } return $bytes; } sub pmapi_file_to_fapi { my $pmapi_file=shift // confess "I need a file argument here."; if(not -f $pmapi_file) { confess "I can see the file $pmapi_file"; } my $doc=&PubMed::Common::load_and_return_xml($pmapi_file) or die; foreach my $pmart ($doc->getElementsByTagName('PubmedArticle')) { &save_pmart($pmart); } } sub pmapi_bytes_to_fapi { my $bytes=shift; my $date=shift // ''; my $touch_date=shift // ''; my $doc; eval { $doc = XML::LibXML->load_xml(string => (\$bytes)); }; if(ref $doc ne 'XML::LibXML::Document') { confess "I could not parse file '$bytes'\nLibXML says $@\n"; } foreach my $pmart ($doc->getElementsByTagName('PubmedArticle')) { &save_pmart($pmart,$date,$touch_date); } } sub save_pmart { my $pmart=shift; my $date=shift // ''; my $touch_date=shift // ''; my $ref_pmart = ref($pmart); if(not $ref_pmart eq 'XML::LibXML::Element') { confess "You did not give me an element, but a $ref_pmart."; } my $pmid=&PubMed::Common::get_pmid($pmart); my $file=&PubMed::Files::pmid_to_fapi($pmid); write_file($file,{binmode => ':utf8'},$pmart->toString(1)); if($date and $touch_date) { ## append to infa file my $out_file=$PubMed::Paths::infa_dir.'/'.$date; open(F,">> $out_file"); print F "$pmid\n"; close F; $touch_date->touch($out_file); } } ## old function that tries to use fapi #sub get_purd_by_pmid { # my $pmid=shift; # ## set a date on the file # my $date=shift; # my $do_i_check=shift // ''; # my $fapi=&PubMed::Files::pmid_to_fapi($pmid); # if(not -f $fapi) { # return ''; # } # my @lines=read_file($fapi); # my $in_record=0; # my $purd=''; # foreach my $line (@lines) { # if($line=~m|^\s+