#!/usr/local/bin/perl ######################################################### # How to execute a program for specified list of files ######################################################### # The program below calculates alignments for all FASTA-formatted files # in the directory using clustalw program # ClustalW is a widely used alignment program # ClustalX is graphical interface to ClustalW # As input we provide program with FASTA-formatted sequence files # Specification of FASTA format is at http://en.wikipedia.org/wiki/Fasta_format # clustalw is installed on the cluster and it is freely available # to download if you use other machine: # ftp://ftp.ebi.ac.uk/pub/software/ ########################################################## #assumption: all files we want to align have ".fa" extension while(defined($file=glob("*.fa"))){ system("clustalw -align -infile=$file -type=protein"); } #glob() in this context returns the next file name and undef # when it runs out of names #if you type "clustalw" in the command line, the program will go # into interactive mode # To avoid this, we used parameters in the command line # To get list of possible parameters, type "clustalw -options" # to get help and a general description type clustalw -help # #Note that we take advantage of interpolation to pass filename # to the program #system command executes the command in a system. #It does not return the output of the command back to program # in this case we do not care, but if we would -- we would need to use # backtics `` instead of system() ################################ # above should result in alignments saved into *.aln files #often, list of files that need to be aligned is updated. #however, we would not want to re-align every *.fa file # in the directory #reminder: comment the code above @files=glob("*.fa"); #in this context returns all file names $num_files=@files; $counter=0; while(defined($file=glob("*.fa"))){ @filename_parts=split(/\./,$file); $aln_file=$filename_parts[0].".aln"; if(-e $aln_file){ print "$file was already aligned\n"; } else{ system("clustalw -align -infile=$file -type=protein"); $counter++; } } print "$counter files out of $num_files were aligned\n";