#!/usr/bin/perl

#########################################################
# How to execute a program for specified list of files
#########################################################

# The program below calculates alignments for all FASTA-formatted files
#  in the directory using clustalw program
# ClustalW is a widely used alignment program
# ClustalX is graphical interface to ClustalW
# As input we provide program with FASTA-formatted sequence files
# Specification of FASTA format is at http://en.wikipedia.org/wiki/Fasta_format 
# clustalw is installed on the cluster and it is freely available
# to download if you use other machine:
# ftp://ftp.ebi.ac.uk/pub/software/ 
##########################################################

#assumption: all files we want to align have ".fa" extension

while(defined($file=glob("*.fa"))){
    system("clustalw -align -infile=$file -type=protein");
}


# glob() in this context returns the next file name and undef
# when it runs out of names

# if you type "clustalw" in the command line, the program will go
# into interactive mode
# To avoid this, we used parameters in the command line
# To get list of possible parameters, type  "clustalw -options"
# to get help and a general description type clustalw -help
# 

#Note that we take advantage of interpolation to pass filename
# to the program 

#system command executes the command in a system.
# It does not return the output of the command back to program
# in this case we do not care, but if we would -- we would need to use
# backtics `` instead of system()


################################
# above should result in alignments saved into *.aln files

#often, list of files that need to be aligned is updated.
#however, we would not want to re-align every *.fa file 
# in the directory

#reminder: comment the code above

@files=glob("*.fa"); #in this context returns all file names
$num_files=@files;

$counter=0;
while(defined($file=glob("*.fa"))){
    @filename_parts=split(/\./,$file);
    $aln_file=$filename_parts[0].".aln";
    if(-e $aln_file){ #-e is a file test operator, asks does file exist
	print "$file was already aligned\n";
    }
    else{
	system("clustalw -align -infile=$file -type=protein");
	$counter++;
    }    
}
print "$counter files out of $num_files were aligned\n";



