#!/usr/bin/perl -w -s ##########INPUT Sequence, concatenated into a single string########## #skip annotation lines in case of fasta. if multiple annotation lines, concatenate these too. # unless(@ARGV==1) {die "please provide name of the file in the command line!!\n";} my$filename=$ARGV[0]; #takes filenname from input line open(IN, "< $filename") or die "cannot open $filename:$!"; #assigns filehandle IN to filename or dies my$seq=''; #assigns empty string my$line=''; my$name=''; my@bases=(); #assigns empty list while(defined($line=)){ chomp($line); if ($line=~/^>/) { #look for beginning of line starting with > (^ is an anchor for the beginning of the line) $name .= $line; } else { $seq .= $line ; } } #################### move sequence to array # check for all CAPS, report non ATGCs, remove white spaces # $seq =~ tr/atgc/ATGC/; #translates all ATGC to upper case $seq =~ s/\s//g;# substitutes all white spaces \s with nothing globally in $seq @bases=split(//,$seq); #splits string into separate elements (bases) my$num_bases=@bases; #length of array ###################calculate GC content my$num_GC=0; for ($i=0; $i<($num_bases); $i++) #counts Gs and Cs in @bases Note the number of bases is one larger than the array { if(($bases[$i]=~"G") or ($bases[$i]=~"C")) #if it matches G or C increase counter {$num_GC++; } if (!(($bases[$i]=~"G") or ($bases[$i]=~"A") or ($bases[$i]=~"T") or ($bases[$i]=~"C"))) {print "Warning there is a strange base $bases[$i] before position $i\n"; my$errors++;} } if (defined ($errors)){$num_bases=$num_bases-$errors}; $GC_content=($num_GC/$num_bases)*100; print "\nThe GC content of the sequence in the file ".'"'."$filename".'"'. " is $GC_content\%.\n\n"; if (!($name eq '')) {print "Annotation line(s) in $filename was/were $name\n";} # print "The sequence analyzed was:\n"; # print @bases;