package PubMed::Search; use strict; use warnings; use Carp qw(confess); use Data::Dumper; use Fcntl qw(SEEK_SET O_RDONLY SEEK_CUR); use IO::File; ## go to the start of the line. I don't seem to ## find a build-in function to do that sub get_line { my $fh=shift; my $offset=shift // 0; if($offset) { $fh->seek($offset,SEEK_SET); } my $start = $fh->tell(); my $count=0; my $line; while($count<$start) { if(not $fh->seek(-2,SEEK_CUR)) { $fh->seek(-1,SEEK_CUR); my $line=$fh->getline(); chomp $line; return $line; } my $char=$fh->getc(); if(not defined($char)) { my $file_info=Dumper $fh; confess "I could not get the char from $file_info."; } my $ord=ord($char); if($ord == 10) { $line=$fh->getline; chomp $line; return $line; } } return ''; } sub get_pmid { my $in=shift; my $blank_pos=index($in,' ') // confess "I need a blank in '$in'"; my $pmid=substr($in,0,$blank_pos); return $pmid; } ## This searches for the target pmid, supported to be followed by ## a blank. The file's lines start with the pmids. Pmids are ## sorted by string order. sub search { my $file=shift; my $target=shift; my $fh = IO::File->new($file, O_RDONLY) or confess "I could not open the file '$file': $!"; my @stat=$fh->stat(); my ($lo, $hi) = (0,$stat[7]); my $old_mid=0; while(1) { my $mid = int(($lo + $hi)/2); $old_mid=$mid; $fh->seek($mid,SEEK_SET); my $line = &get_line($fh); my $pmid = &get_pmid($line); if($pmid eq $target) { return $line; } my $diff=$pmid cmp $target; my $start = $fh->tell(); my $length = length($line); if($diff < 0) { $lo=$mid+1; my $lo_line=get_line($fh,$lo); } else { $hi=$mid; my $hi_line=get_line($fh,$hi); } if($hi <= $lo) { ## failed to find the pmid return ''; } } } 1;