#!/usr/bin/perl -w

#initialize genome name and base_hash

$my_genome = "";
%base_hash=(); 


#assign genome name to $my_genome
## The following is rather awkward - should have used glob as in class 4
@dir=`ls`;
foreach (@dir) {
   if (m/\.nfa$/) {if ($my_genome) {die "More then one genome in directory"} else {$my_genome=($_)} 
   }  
}

# better SOMETHING LIKE :
# while(defined($file=glob("*.nfa"))){}
# SHOWS THAT IN PERL THERE IS MAORE THAN ONE WAY TO GET THINGS DONE - though some look nicer than others

######## $my_genome =~  s/\n// ;
chomp ($my_genome); # couldn't get chomp ($my_genome) to work at first and used above line instead:( ;
print "\n\n$my_genome is the file name of the genome to be analyzed \n"; 

# open my genome for input 

open(IN, "< $my_genome") or die "cannot open $my_genome:$!";

$header = <IN>;

if ($header =~m/^>/) {print "\nthe analyzed genome has the following comment line:\n$header \n\n"};

if (!($header =~m/^>/)) {print "this is not in FASTA format \n\n"; 
			exit;}
###			exit; 

while (defined ($line=<IN>)){

#initialise @bases within loop
        
        @bases=();
	chomp($line);
	
    @bases=split(//,$line);

	foreach (@bases) { $base_hash{$_} += 1 }
}

close(IN);

##@bases_used = keys(%base_hash);

foreach (keys(%base_hash)) {
    print "base symbol \t $_ \t occurred $base_hash{$_}   times\n";
    $total_bp += $base_hash{$_}
}
print "\nthe genome contains $total_bp base pairs\n";


#calculate GC content

$GC_content = 100*($base_hash{C} + $base_hash{G})/$total_bp ;

printf "\nGC-content= %.2f percent \n", $GC_content; 

exit;



