package PubMed::Record; use strict; use warnings; use Carp; my $date_paths; $date_paths->[0]='/MedlineCitation/Article/Journal/JournalIssue/PubDate/Year'; $date_paths->[1]='/MedlineCitation/Article/Journal/JournalIssue/PubDate/MedlineDate'; my $pmid_path='/MedlineCitation/PMID'; ## a counter to see how sucessful the paths are our $count_paths; foreach my $date_path (@$date_paths) { $count_paths->{$date_path}=0; } sub get_year { my $txt=shift; my $doc=shift; my $xpc = XML::LibXML::XPathContext->new($doc); my $year=0; foreach my $path (@{$date_paths}) { $year=try_to_find_year($xpc,$path,$txt) or next; if($year) { $count_paths->{$path}++; last; } } if(not $year and $main::test) { print "no year in\n$txt\n"; exit; } if(not $year=~m|^\d{4}$|) { confess "I have a bad year $year in\n$txt\n"; } #print "year is $year.\n"; return $year; #print $txt; } sub get_pmid { my $txt=shift; my $doc=shift; my $xpc = XML::LibXML::XPathContext->new($doc); my @nodes=$xpc->find($pmid_path)->get_nodelist; if(not scalar @nodes) { confess "I found no pmid in \n$txt."; } if(@nodes) { if(scalar(@nodes) > 1) { confess "I found several years in \n$txt;"; } } my $pmid=$nodes[0]->textContent; if(not $pmid=~m|^\d+$|) { confess "This '$pmid' is not a pmid"; } return $pmid; } sub try_to_find_year { my $xpc=shift; my $path=shift; my $txt=shift; my @nodes=$xpc->find($path)->get_nodelist; if(@nodes) { if(scalar(@nodes) > 1) { confess "I found several years in \n$txt;"; } my $year=substr($nodes[0]->textContent,0,4); return $year; } return ''; } 1;