#!/usr/local/bin/perl

#########################################################
# How to execute a program for specified list of files
#########################################################

# The program below makes alignments for all FASTA-formatted files
#  in the directory using clustalw program
# ClustalW is a widely used alignment program
# ClustalX is graphical interface to ClustalW
# As input we provide program with FASTA-formatted sequence files
# Specification of FASTA format is on pp. 181-182 of the textbook
# clustalw is installed on SP machine and it is freely available
# to download if you use other machine:
# ftp://ftp.ebi.ac.uk/pub/software/unix/clustalw/clustalw1.83.UNIX.tar.gz
##########################################################

#assumption: all files we want to align have ".fa" extension

while(defined($file=glob("*.fa"))){
    system("clustalw -align -infile=$file -type=protein");
}

#on SP you need to execute system("clustalw /align /infile=$file /type=protein");

#glob() in this context returns the next file name and undef
# when it runs out of names

#if you type "clustalw" in the command line, the program will go
# into interactive mode
# To avoid this, we used parameters in the command line
# To get list of possible parameters, type  "clustalw --options"

#Note that here we take advantage of interpolation to pass filename
# to the program 

#system command executes the command in a system.
#It does not return the output of the command back to program
# in this case we do not care, but if we would -- we would need to use
# backtics `` instead of system()


################################
# above should result in alignments saved into *.aln files

#often, list of files that need to be aligned is updated.
#however, we would not want to re-align every *.fa file 
# in the directory

#reminder: comment the code above

@files=glob("*.fa"); #in this context returns all file names
$num_files=@files;

$counter=0;
while(defined($file=glob("*.fa"))){
    @filename_parts=split(/\./,$file);
    $aln_file=$filename_parts[0].".aln";
    if(-e $aln_file){
	print "$file was already aligned\n";
    }
    else{
	system("clustalw -align -infile=$file -type=protein");
	$counter++;
    }    
}
print "$counter files out of $num_files were aligned\n";


#HOMEWORK ASSIGNMENTS
#1. Read Chapter 7 on Input and Output.
#2. Exercises 1 and 3, p. 59-60
#3. challenge: write a program that concatenates all *.fa files
#   in the directory into one big file. 
#   (again there is more than one way to do this)


