#!/usr/bin/perl -w

#initialize genome name and base_hash
$my_genome = "";
%oligo_hash=(); 
@genome=();

#assign genome name to $my_genome
@dir=`ls`;
foreach (@dir) {
   if (m/\.nfa$/) {if ($my_genome) {die "More then one genome in directory"} else {$my_genome=($_)} 
   }  
}

chomp ($my_genome);
print "\n\n$my_genome is the file name of the genome to be analyzed \n"; 

# open my genome for input 
open (IN, "< $my_genome") or die "cannot open $my_genome:$!";

# open my_table for output
open (OUT, ">my_table" ) or die "cannot open my_table" ;


$header = <IN>;
#reads first line of file, next command test for fasta commentline

if ($header =~m/^>/) {print "\nthe analyzed genome has the following comment line:\n$header \n\n"};

if (!($header =~m/^>/)) {print "this is not in FASTA format \n\n"; 
			exit;}
###			exit - could have died instead; 


### Read genome into array @genome
$number=0;

while (defined ($line=<IN>)){

#initialise @bases within loop
        
        @bases=();
	chomp($line);
	
    @bases=split(//,$line);

	foreach (@bases) { 
	   $number += 1; 
	   $genome[$number]=$_;
	}           

	
}

close(IN);

# print "\n $number \n";

$oligofile='oligos4.seq';

open (INF, "< $oligofile") or die "cannot open $oligofile :$!";

$oligocounter=0;

while (defined ($line=<INF>)){
  $oligocounter+=1;
  chomp ($line);
  @input=split(/\t/,$line);
  $input[1] =~ s/\s// ; 
  $oligo[$oligocounter]=$input[0];
  $oligo_hash{$oligo[$oligocounter]}=0;
  $oligoc[$oligocounter]=$input[1];
  $oligo_hash{$oligoc[$oligocounter]}=0;
  
}

#print header of output table;
    for($k=1;$k<=$oligocounter;$k++){        
           print OUT "$oligo[$k]\t" ; 
    }
    print OUT "\n$number\n";
   
    
    
##calculate and print deltas

#count how often oligos occur along a sequence

for ($i = 1 ; $i<=($number-3) ;$i++) {
$string = $genome[$i].$genome[$i+1].$genome[$i+2].$genome[$i+3];

$oligo_hash{$string} += 1; 

# print results into table ;

   if ($i%10000==0) {
	
       for($k=1;$k<=$oligocounter;$k++){
          $delta=($oligo_hash{$oligo[$k]}-$oligo_hash{$oligoc[$k]});    
          print OUT "$delta\t";  
	 # print "$oligo[$k]\t$delta\t$oligo_hash{$oligo[$k]}\t";
	 # print "$delta\t" ; 
       }
   print OUT "\n";
       
   }

 } 


close (OUT);
close (INF);
exit;


