Slurping annotations
The FASTA formatted sequences can be saved from your browser as text,
or copied and pasted into a text file from your browser window.
The annotations for each sequence are rendered
as key=value
pairs in the name line (the information line
beginning with >
), following the accession numbers.
The following simple PERL script will convert a FASTA file as returned to an
array of hashes that can be processed further.
# output.fas:
# >L22956(499) 'subtype'='C' 'phenotype'='SI'
# atgagagtgagggggatactgaggaattgtcaacaa...
# use like this...
@seqs = parse_out('output.fas');
$seq[0]->{seq}; # returns 'atgagagtgagg...'
$seq[0]->{accession}; # returns L22956
$seq[0]->{lanl_id}; # returns 499
$seq[0]->{phenotype}; # returns 'SI'
$seq[0]->{subtype}; # returns 'C'
##
sub parse_out {
my $f = shift;
my $fh;
open $fh, $f or die $!;
my (@seqs, $h);
while (<$fh>) {
chop;
/^>/ && do {
$h = {};
@a = split(/\t/);
my $nm = shift @a;
my $dum;
($dum, @{$h}{qw( accession lanl_id )}) =
split(/[>()]/, $nm);
foreach my $pair (@a) {
my ($k, $v) = split(/=/,$pair);
$k =~ s{^'|'$}{}g;
$v =~ s{^'|'$}{}g;
$h->{$k} = $v;
}
next;
};
do {
$h->{seq} = $_;
push @seqs, $h;
};
}
return @seqs;
}
27 Dec 2008