#!/bin/csh
#      protomat.csh <prefix> <return>
#	where prefix = directory and name of .in file, e.g.
#		../tmp/bm/15319/15319 for ../tmp/bm/15319/15319.in
#		which contains the input file made by makeblocks.pl
#	and return = email address, use WWW for web output
#----------------------------------------------------------------------------
#  1/ 8/97  Change clustalw option from -tree to -bootstrap
#  6/ 4/97  -bootstrap=100, or depends on nseq
#  7/ 7/97  Get $IN_WWW flag from processmail, if $IN_WWW reduce cputime,
#	    if > 25 sequences print warning 
#  8/ 7/97  Added block map files
#  8/25/97  Reduced number of bootstraps
# 10/ 4/97  Set #bootstraps=100 because clustalw doesn't normalize
#	    intermediate values making them difficult to interpret
# 10/25/97  Write warnings to the .warn file
#  1/19/98  Stop if > 250 sequences
#  7/15/98  Don't try to run Gibbs if min seq length < 8
#  8/ 5/98  Check for empty tree files & remove them so drawgram doesn't loop
#  1/12/99  Send trees back with searchable PSSMs.
#  9/ 2/99  Changes for clustalw 1.75
# 12/ 9/99  Changes for clustalw 1.80
#  6/ 6/00  Changes for revised motomat; all blocks in one file
#  7/16/01  Require at least 3 seqs
#  1/22/03  2nd arg is return address, mail results here (no longer
#		using processmail.sh)
#  1/23/03  Use quicktree instead of clustalw
#  4/23/03  Write blalign output to a file; don't want to use $r.out any more
#		except for error messages
#  5/ 9/03  Run makelis
#  8/28/03  Add calibration
#----------------------------------------------------------------------------
set prefix = $1
set return = $2
#umask 002
umask 006
unalias rm
unalias mv
set bindir = .
set NSEQ = 80000
set NRES = 29085965
set frq = ./default.amino.frq
#	Limit CPU time more severely for WWW executions
limit coredumpsize 1k
limit datasize 32m
if ($return == "WWW") then
   limit cputime 30m
else
   limit cputime 240m
endif
set minseq = 3
set maxseq = 250
set wwwchar = 20000
set maxchar = 100000

#    Split up the input file name
#  If $1 = ../tmp/bm/AA/AA, then $r = ../tmp/bm/AA/AA and $t = AA
#  motomat dumps all its files into the current directory, which
#   will be /home/blocks/bin/

set r = $prefix:r
set t = $r:t
rm -f $t.blks >& /dev/null
touch $r.err

#  makelis reads $prefix.in and writes $prefix.lis, .pros, .seqlen, .err, .warn
./blimps-bin/makelis $prefix $return


#	Find out how many sequences and characters there are
set nseq = (`grep -c "^>" $r.pros`)
if ( $nseq < $minseq ) then
   echo "ERROR: Block Maker requires at least 3 sequences" >> $r.err
   echo "       Use a pairwise alignment algorithm for 2" >> $r.err
   exit(-1)
endif
if ( $nseq > $maxseq ) then
   echo -n "ERROR: Your input has $nseq sequences which" >> $r.err
   echo " exceeds Block Maker's limit of $maxseq sequences" >> $r.err
   rm $r.pros
   exit(-1)
endif
if ( ($nseq > 100) || ($nseq > 25 && $return == "WWW") ) then
   echo -n "WARNING: Execution may not complete because of CPU time" >> $r.warn
   echo " limit with $nseq sequences." >> $r.warn
   if ( $return == "WWW" ) then
      echo "The CPU time limit is greater using the email option."
   endif
   echo " " >> $r.warn
endif
set nchar = (`wc -l $r.pros | awk '{print $1}'`)
if ($nchar > $maxchar) then
   echo -n "ERROR: Your input has $nchar characters which" >> $r.err
   echo " exceeds Block Maker's limit of $maxchar characters." >> $r.err
   rm $r.pros
   exit(-1)
   else 
      if (($return == "WWW") && ($nchar > $wwwchar)) then
         echo -n "ERROR: Your input has $nchar characters which" >> $r.err
         echo " exceeds Block Maker's WWW limit of $wwwchar characters." >> $r.err
	 echo " Please supply an email address." >> $r.err
         rm $r.pros
         exit(-1)
       endif
endif

#=============================================================================
#	Make blocks from the proteins (motifj executes motomat)
#	NOTE: motifj writes $t.motifj.pros with MINIMUM seq len marked for gibbs
#	For dups:  motifj 4 -$r.pros 0 dups 17 (0 seqs => n/2 to start)
$bindir/blimps-bin/motifj 4 -$r.pros >& /dev/null
#  Run motomat again so sequences aren't clumped/re-ordered
$bindir/blimps-bin/motomat $t.mot 1 1 -10  >& /dev/null
if ( -e $t.blks ) then
   #  Calibrate blocks
   #       Make matrices and observed frequency files
   $bindir/blimps-bin/blk2pssm $t.blks $t.mats B 3 >& /dev/null
   $bindir/blimps-bin/blk2pssm $t.blks $t.obsf B 21 >& /dev/null
   #               writes pssmdist.dat
   $bindir/blimps-bin/pssmdist $t.mats $t.obsf $frq $NSEQ $NRES >& /dev/null
   #	This is bad, should change pssmdist.c to name the file
   mv pssmdist.dat $t.dist
   $bindir/blimps-bin/pssmBL $t.dist $t.blks $t.cblks >& /dev/null
   rm $t.dist

#  Add weights to blocks
   $bindir/blimps-bin/blweight $t.cblks $r.mblks P M >& /dev/null
#	Produce a "multiple alignment"
   echo "              **BLOCKS from MOTIF**" > $r.maln
   $bindir/blimps-bin/blalign $r.mblks >> $r.maln
#	Make cobbler sequence = $r.mcob
   echo "TY	2"	     > $r.cf
   echo "BL	$r.mblks"   >> $r.cf
   echo "DB	$r.pros"    >> $r.cf
   echo "OU	$r.mcob"    >> $r.cf
   echo "SU	default.iij" >> $r.cf
   $bindir/blimps-bin/cobbler $r.cf >& /dev/null
else
   echo "ERROR: No blocks produced by MOTIF" >> $r.err
endif
#
#   	Clean up
rm motomat.err $t.mot $t.plt $t.blks $t.cblks >& /dev/null
#
#===========================================================
#	Make blocks using Gibbs now  
#
set minlen = (`grep " MINIMUM" $t.motifj.pros |  awk '{print $2}'`)
if ($minlen < 8) then
   echo "ERROR: Minimum sequence length is 8 for GIBBS" >> $r.err
   echo "ERROR: No blocks produced by GIBBS" >> $r.err
else
   #	Model heuristic; needs MINIMUM sequence length from $t.motifj.pros
   set model = (`$bindir/model.csh $t.motifj.pros`)
   $bindir/gibbs $t.motifj.pros $model -f -s3 >& $r.gblks

   #	Convert gibbs output for motomat
   grep ">" $r.lis > $t.temp
   cat $t.motifj.pros.sn >> $t.temp
   $bindir/blimps-bin/blk2mot $t.motifj.pros $t.temp $t.mot
   $bindir/blimps-bin/motomat $t.mot 1 1 -15 >& /dev/null
   if ( -e $t.blks ) then
      #  Calibrate blocks
      #       Make matrices and observed frequency files
      $bindir/blimps-bin/blk2pssm $t.blks $t.mats B 3 >& /dev/null
      $bindir/blimps-bin/blk2pssm $t.blks $t.obsf B 21 >& /dev/null
      #               writes pssmdist.dat
      $bindir/blimps-bin/pssmdist $t.mats $t.obsf $frq $NSEQ $NRES >& /dev/null
      #	This is bad, should change pssmdist.c to name the file
      mv pssmdist.dat $t.dist
      $bindir/blimps-bin/pssmBL $t.dist $t.blks $t.cblks >& /dev/null
      rm $t.dist

      #  Add weights to blocks
      $bindir/blimps-bin/blweight $t.cblks $r.gblks P M >& /dev/null
      #  	Make multiple alignment
      echo "              **BLOCKS from GIBBS**" > $r.galn
      $bindir/blimps-bin/blalign $r.gblks >> $r.galn
      #	Make cobbler sequence = $r.gcob
      echo "TY	2"	     > $r.cf
      echo "BL	$r.gblks"   >> $r.cf
      echo "DB	$r.pros"    >> $r.cf
      echo "OU	$r.gcob"    >> $r.cf
      echo "SU	default.iij" >> $r.cf
      $bindir/blimps-bin/cobbler $r.cf >& /dev/null
   else
      #  $r.gblks contains the Gibbs output now 
      cat $r.gblks >> $r.err
      echo "ERROR: No blocks produced by GIBBS" >> $r.err
      rm $r.gblks
   endif
endif
#
#---------------------------------------------------------------------------
#	Format PSSMs for MAST searches & make tree files & map files
if (-e $r.mblks) then
   $bindir/blimps-bin/blk2pssm $r.mblks $r.mmast M >& /dev/null
   #	Make block map files
   #	Need lengths of the sequences in the blocks - makelis creates $r.lis
   $bindir/blimps-bin/makeblockmap $r.mblks $r.mblks.mapfile -L$r.lis >& /dev/null
   #	Make tree files
   $bindir/blimps-bin/blalign $r.mblks -h > $r.fmblks
   if ($nseq < 3) then
      echo "Need at least 3 sequences to make a tree" >> $r.warn
   else
      if ( $nseq < 100) then
         @ nboot = 100
         (./quicktree -in a -out t -kimura -boot $nboot $r.fmblks > $r.mtree) >& $r.mout
      else
         (./quicktree -in a -out t -kimura $r.fmblks >& $r.mtree) >& $r.mout
      endif
   endif
#	If quicktree failed, get rid of the empty output file
   set nlin = 0
   if (-e $r.mtree) then
      set nlin = (`wc -l $r.mtree | awk '{print $1}'`)
   endif
   if ($nlin == 0) then
	rm $r.mtree >& /dev/null
        cat $r.mout >> $r.warn
   endif
   rm $r.mout
endif
#	Do the same for Gibbs
if (-e $r.gblks) then
   $bindir/blimps-bin/blk2pssm $r.gblks $r.gmast M >& /dev/null
   #
   $bindir/blimps-bin/makeblockmap $r.gblks $r.gblks.mapfile -L$r.lis >& /dev/null
   #	Make tree files
   $bindir/blimps-bin/blalign $r.mblks -h > $r.fgblks
   if ($nseq < 3) then
      echo "Need at least 3 sequences to make a tree" >> $r.warn
   else
      if ( $nseq < 100) then
         @ nboot = 100
         (./quicktree -in a -out t -kimura -boot $nboot $r.fgblks > $r.gtree) >& $r.gout
      else
         (./quicktree -in a -out t -kimura $r.fgblks >& $r.gtree) >& $r.gout
      endif
   endif
#	If quicktree failed, get rid of the empty output file
   set nlin = 0
   if (-e $r.gtree) then
      set nlin = (`wc -l $r.gtree | awk '{print $1}'`)
   endif
   if ($nlin == 0) then
	rm $r.gtree >& /dev/null
        cat $r.gout >> $r.warn
   endif
   rm $r.gout
endif

#---------------------------------------------------------------------------
#	Cat the blocks format blocks to the output in case they
#	want to do a search
if (-e $r.mblks) then
   echo "      **BLIMPS Searchable BLOCKS from MOTIF**" >> $r.search
echo "======================CUT HERE==============================" >> $r.search
   cat $r.mblks | tr '\015' ' ' >> $r.search
echo "======================CUT HERE==============================" >> $r.search
endif

if (-e $r.gblks) then
   echo "      **BLIMPS Searchable BLOCKS from GIBBS**" >> $r.search
echo "======================CUT HERE==============================" >> $r.search
   cat $r.gblks | tr '\015' ' '>> $r.search
echo "======================CUT HERE==============================" >> $r.search
endif

if (-e $r.mmast) then
   echo "      **MAST Searchable PSSMS from MOTIF**" >> $r.search
echo "======================CUT HERE==============================" >> $r.search
   cat $r.mmast | tr '\015' ' ' >> $r.search
echo "======================CUT HERE==============================" >> $r.search
endif

if (-e $r.gmast) then
   echo "      **MAST Searchable PSSMS from GIBBS**" >> $r.search
echo "======================CUT HERE==============================" >> $r.search
   cat $r.gmast | tr '\015' ' ' >> $r.search
echo "======================CUT HERE==============================" >> $r.search
endif

#		Show the trees
if (-e $r.mtree) then
   echo "      **CLUSTALW tree from MOTIF**" >> $r.search
echo "======================CUT HERE==============================" >> $r.search
   cat $r.mtree >> $r.search
echo "======================CUT HERE==============================" >> $r.search
endif

if (-e $r.gtree) then
   echo "      **CLUSTALW tree from GIBBS**" >> $r.search
echo "======================CUT HERE==============================" >> $r.search
   cat $r.gtree >> $r.search
echo "======================CUT HERE==============================" >> $r.search
endif


#------------------------------------------------------------------------------
#	Mail back the results if not working from WWW
if ($return != "WWW" && $return != "OLD") then
   #	Prepare the output file emailed to user
   cat $r.warn $r.err > $r.blk
   echo "Results available for 24 hours at" >> $r.blk
   echo "http://blocks.fhcrc.org/blocks-bin/bm_format.pl?$t" >> $r.blk
   echo "" >> $r.blk
   cat $r.maln >> $r.blk
   echo "" >> $r.blk
   cat $r.galn >> $r.blk
   /usr/bin/mailx -s "BlockMaker Results" -r "blocks@fhcrc.org" $return < $r.blk
   /usr/bin/mailx -s "BlockMaker Searchable Formats" -r "blocks@fhcrc.org" $return < $r.search
endif

#
#
#
#   	Clean up; leave $r.mblks and $r.gblks for logos link
#		  leave $r.mblks.mapfile & $r.gblks.mapfle for map link
#		  leave $r.mmast and $r.gmast for mast link
#		  leave $r.mtree and $r.gtree for tree link
#		  may also want to leave .mcob & .gcob ...
rm $t.*

exit(0)
