New_maxH_v3.3pl


#!perl
####FOR TESTING

#$ARGV[0] = "Lambda InCh disk2:Perl Scripts:New_maxH_v3_:Sequence:3_proteins";
#$ARGV[1] = "Lambda InCh disk2:Perl Scripts:New_maxH_v3_:Sequence:1_protein";

###########
#save as droplet
#file dragged on is ARGV


&setUp();

if (!defined(@ARGV)){
print "\n ARGV is undefined.
This is drag and drop.
Drag a file onto the icon of this one
and drop it.
It can handle several files at once.\n";
}
else {
#init output
$newText = $ARGV[0] . '.maxH';
if ($#ARGV > 0){
$newText = $newText . 's';
}
open(TEXTOUT, ">$newText"); #open for writing
print TEXTOUT ""; #AN EMPTY STRING should delete the file
close(TEXTOUT);
open(TEXTOUT, ">>$newText");#reopen for appending

for ($i_argv=0;$i_argv<=$#ARGV;$i_argv++){
print "ARGV = $ARGV[$i_argv]\n";
$inFile = $ARGV[$i_argv];
open (INPUT, $inFile) or die "What, no file, $InFile ? ";
$aSeq = '';
LINE: while (){
chomp;
if ($_ =~/>/ || $_ =~/;/){ #fasta or strider write text
#print "found name line\n";
if ($aSeq ne ''){
$oldName = $newName;
$newName = $_;
$aSeq = lc ($aSeq);
&do_it_all($aSeq);
$aSeq = '';
}
else{
$newName = $newName . $_;
}
next LINE;
}
elsif ($_ eq '//'){
#print "found end of record\n";
if ($aSeq ne ''){
$oldName = $newName;
$newName = '';
$aSeq = lc ($aSeq);
&do_it_all($aSeq);
$aSeq = '';
}
next LINE;
}
else {
$aSeq = $aSeq . $_;
next LINE;
}
}#while INPUT
if ($aSeq ne ''){
$oldName = $newName;
$newName = '';
$aSeq = lc ($aSeq);
&do_it_all($aSeq);
$aSeq = '';
}
print "that was the last line of $inFile";
close (INPUT);
#&writeFile;
#%sitesAt = ();
}#for @ARGV
print TEXTOUT "\nSorted maxH Values\n\n";
@smaxHList = sort numerically @maxHList;
foreach (@smaxHList) {
print TEXTOUT "$_\n";
}
close (TEXTOUT);
}#else


sub do_it_all{
$seqLen = length($aSeq);
@ret = &get_maxH($aSeq);
$maxH = shift @ret;
$maxH = $maxH/$places;
$probability_of_int = &get_prob($maxH);
push @maxHList, $maxH;
print TEXTOUT "$oldName\n";
print TEXTOUT "maxH = $maxH, at @ret of $seqLen aa, P(int) = $probability_of_int\n";
#print "$cycles cycles took $time seconds for a sequence of $seqLen bases\n";
foreach (@smaxHList) {
print "$_\n";
}
}


sub get_maxH{
local ($i) = 0;
local ($aSeq) = $_[0];
local ($seqLen) = length($aSeq);
local ($seq1) = substr ($aSeq, 0, 19);
local (@s1list) = split //, $seq1;
local ($Hval) = 0;
for (@s1list){ #first 19
$Hval = $Hval + $sh{$_}; #scalehash of it
}
local ($maxH) = $Hval;
local (@maxAt) = (0); #init
local (@seqList) = split //, $aSeq;
for ($i = 0; $i < $seqLen - $wl ; $i++){
$Hval = $Hval + $sh{$seqList[$i + $wl]} - $sh{$seqList[$i]};
if ($Hval > $maxH){
$maxH = $Hval;
@maxAt = ($i + 1);
}
elsif ($Hval == $maxH){
push @maxAt, $i+1;
}
}
return ($maxH, @maxAt)
}

sub get_prob{
local $h = $_[0];
local ($quad, $odds, $prob) = 0,0,0;
$quad = (94.724 * ($h**2)) - ( 420.066 * $h) + 417.68 ;
$odds = exp($quad);
$prob = 1 - ($odds / (1 + $odds));
return $prob;
}

sub numerically{ $a <=> $b; }

sub setUp {
$aSeq = 'acdefghiklmnpqrstvwy';
@aScale = (1.37,1.12,0.17,0.16,1.93,1.03,0.74,2.2,0.19,1.78,1.39,0.43,0.51,0.35,0.3,0.83,0.89,1.81,1.56,1.01);
@seqList = split //, $aSeq;
for ($i = 0 ;$i <= 19; $i++){
$sh{@seqList[$i]} = $aScale[$i];
}
$wl = 19;
$places = 1000000;
foreach $key (sort keys %sh){
$sh{$key} = int(.5+($sh{$key}/$wl)*$places);
}
}
# #