/********************************************************************************************************
 * QRNA - Comparative analysis of biological sequences 
 *         with pair hidden Markov models, pair stochastic context-free
 *        grammars, and probabilistic evolutionary  models.
 *       
 * Version 2.0.0 (JUN 2003)
 *
 * Copyright (C) 2000-2003 Howard Hughes Medical Institute/Washington University School of Medicine
 * All Rights Reserved
 * 
 *     This source code is distributed under the terms of the
 *     GNU General Public License. See the files COPYING and LICENSE
 *     for details.
 ***********************************************************************************************************/

/* evolrnadpscan.c
 *
 * ER, Mon Aug 16 13:06:24 CDT 2004 [St. Louis at work, Coro at Maribel's]
 * 
 * dynamic programming (viterbi and forward) with the RNAmodel
 *
 * calculates:
 *                       P(seqX,seqY \pi^* | RNAmodel)  [viterbi algorithm; \pi^* = best path ]
 *              \sum_\pi P(seqX,seqY \pi   | RNAmodel)  [forward algorithm ]
 * 
 */

#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <ctype.h>
#include <limits.h>


#include "funcs.h"
#include "globals.h"
#include "squid.h"
#include "structs.h"

static void tracebackRNAdiagscanfast(FILE *ofp, SQINFO sqinfoX, int *seqX, SQINFO sqinfoY, int *seqY, 
				     int leg, int win, int st, int stmod, int l, int lmax, 
				     double score,  
				     struct emodel_s *emodel, 
				     struct rnadpscanfast_s *dp, 
				     struct rnamtxscanfast_s *mtx,
				     struct ali_s *ali, 
				     int revstrand, int traceback, 
				     struct end3scan_s *ends);



/* Function: EvolViterbiRNADiagScanFast()
 * Date:     ER, Mon Aug 16 13:12:59 CDT 2004  [St. Louis at work, Coro with maribel]
 *
 * Purpose:  Calculates P(X,Y, best_align | RNA model)
 *
 * Strategy: The algorith is O(L) in time because we respect the  gaps
 *               so there is only one "length".
 *           The algorithm is O(L) in memory, but it can be implemented keeping 
 *               only the previous value.
 *
 * Args:     ofp          -- output file
 *           seqX, seqY   -- equal length sequences, ACGT and gaps (-) 
 *           start        -- start position
 *           d            -- length of sX,sY 
 *           rnamodel     -- rna_model structure
 *
 * Returns:  log likelihood, log P(seqX,seqY,\pi_{blast} | RNAmodel)
 */
double
EvolViterbiRNADiagScanFast(FILE *ofp, SQINFO sqinfoX, int *seqX, SQINFO sqinfoY, int *seqY, char *gss, 
			   int leg, int win, int st, int stmod, int l, int lmax, 
			   struct emodel_s *emodel, struct rnadpscanfast_s *dp, struct rnamtxscanfast_s *mtx,
			   struct ali_s *ali, int alignment, int cyk, int logodds, int parse, int revstrand, int traceback, 
			   int doends, struct end3scan_s *ends)
{
  int     d;
  int     i, imod;                 /* relative positions in seqX, seqY    */
  int     imodmin;
  int     iback, ibackmod;
  int     end, endmod;
  int     kmod;
  int     cur_x, cur_y;             /* nucleotides at those positions     */
  double  olend, orend;
  double  sc, bestsc; 
  double  score;
  
  olend = ViterbiOTHDiagScanFast(ofp, sqinfoX, seqX, sqinfoY, seqY, leg, win, st, stmod, 0, 0, emodel->rna->ROB, dp->rob, 
				 FALSE, FALSE, FALSE, ends->oth);
  orend = ViterbiOTHDiagScanFast(ofp, sqinfoX, seqX, sqinfoY, seqY, leg, win, st, stmod, 0, 0, emodel->rna->ROE, dp->roe, 
				 FALSE, FALSE, FALSE, ends->oth);

  if (l == 0 || lmax == 0) 
    return olend + emodel->rna->t[TCOBCOE] + orend;   
  
  i    = st + l - 1;
  imod = (stmod+l-1 > win-1)? stmod+l-1-win : stmod+l-1;
  if (imod != i%win) Die("bad coor transformation in ViterbiRNADiagScan()");  /* paranoia */

  imodmin = (imod-1 < 0)? imod-1+win : imod-1;

  cur_x = seqX[i];
  cur_y = seqY[i];
 
  /* state ROB
   */
  dp->rna->rbmx[imod] = ViterbiOTHDiagScanFast(ofp, sqinfoX, seqX, sqinfoY, seqY, leg, win, st, stmod, l, l, emodel->rna->ROB, dp->rob, 
					       FALSE, FALSE, FALSE, ends->oth);
  
  /* state RNA 
   */
  bestsc = olend + emodel->rna->t[TROBRNA] + mtx->wx[imod][l-1];

  for (d = 1; d < l; d++) {
    if ((sc = emodel->rna->t[TROBRNA] + dp->rna->rbmx[(imod-d<0)?imod-d+win:imod-d] + mtx->wx[imod][d-1]) > bestsc) bestsc = sc;
    if ((sc = emodel->rna->t[TROJRNA] + dp->rna->rjmx[(imod-d<0)?imod-d+win:imod-d] + mtx->wx[imod][d-1]) > bestsc) bestsc = sc;
  }
  dp->rna->rrmx[imod] = bestsc;

  /* state ROJ
   */
  bestsc = -BIGFLOAT;
  for (d = 0; d < l; d++) {
    if ((sc = emodel->rna->t[TRNAROJ] + dp->rna->rrmx[(imod-d<0)?imod-d+win:imod-d] + dp->rna->ROJ[imod][d]) > bestsc) bestsc = sc;
  }
  dp->rna->rjmx[imod] = bestsc;
  
  /* state ROE [Viterbi Backwards for the ROE othermodel]
   *
   *  from iback = st+lmax-l to end of full window. assign to ibackmod
   */
  iback    = st + lmax - l;
  ibackmod = iback%win;

  end    = st + lmax - 1;
  endmod = end%win;

  dp->rna->roemx[ibackmod] = ViterbiBackwardsOTHDiagScanFast(ofp, sqinfoX, seqX, sqinfoY, seqY, win, end, endmod, l, l, emodel->rna->ROE, dp->roe);

  /* state END
   */
  score = -BIGFLOAT;
  if (l == lmax) {
    
    /* special cases that involve no emissions by the flanking OTH models */
    if ((sc = emodel->rna->t[TROBROE] + dp->rna->roemx[stmod] + olend) > score) score = sc;
    if ((sc = emodel->rna->t[TROBROE] + dp->rna->rbmx[imod]   + orend) > score) score = sc;
    if ((sc = emodel->rna->t[TRNAROE] + dp->rna->rrmx[imod]   + orend) > score) score = sc;
    
    for (d = 0; d < l-1; d++) {
      
      kmod = (stmod+d>win-1)? stmod+d-win:stmod+d;
      
      if ((sc = dp->rna->rrmx[kmod] + emodel->rna->t[TRNAROE] + dp->rna->roemx[(kmod+1>win-1)? kmod+1-win:kmod+1]) > score) score = sc;
      if ((sc = dp->rna->rbmx[kmod] + emodel->rna->t[TROBROE] + dp->rna->roemx[(kmod+1>win-1)? kmod+1-win:kmod+1]) > score) score = sc;
      
    }
    
    if (doends || traceback)
      tracebackRNAdiagscanfast(ofp, sqinfoX, seqX, sqinfoY, seqY, leg, win, st, stmod, l, lmax, score, emodel, dp, mtx, ali, 
			       revstrand, traceback, ends);
  }
  
  return score;
}

/* Function: tracebackRNAdiagscanfast()
 * Date:     ER, Mon Aug 16 13:12:59 CDT 2004 [St. Louis at work, Coro with Maribel]
 *
 * Purpose:  Traceback of best align with viterbi algorith for RNA model.
 *
 * Args:     seqX, seqY   -- equal length sequences, ACGT and gaps (-) 
 *           L            -- lengths of sX,sY 
 *           rna          -- rna_model structure (the parameters of the model)
 *           rnadp        -- rna_dp    structure (the matrices for dp of the HMM part)
 *           mx           -- rnascfg   structure (the matrices for the SCFg part of the model)
 *
 * Returns:  void. prints the traceback for the vitebi algorithm.
 */
void   
tracebackRNAdiagscanfast(FILE *ofp, SQINFO sqinfoX, int *seqX, SQINFO sqinfoY, int *seqY, 
			 int leg, int win, int st, int stmod, int l, int lmax, 
			 double score,  
			 struct emodel_s *emodel, 
			 struct rnadpscanfast_s *dp, 
			 struct rnamtxscanfast_s *mtx,
			 struct ali_s *ali, 
			 int revstrand,
			 int traceback, 
			 struct end3scan_s *ends)
{
  struct tracer_s      *tr;      /* the traceback tree under construction  */
  struct tracer_s      *cur_tr;  /* ptr to node of tr we're working on     */
  struct tracerstack_s *dolist;  /* pushdown stack of active tr nodes      */
  int    i, imod, prv_i;         /* position in seqX, seqY                 */
  int    d, kmod, end;           /* position in seqX, seqY                 */
  int    cur_x, cur_y;           /* nucleotides at those positions         */
  int    cur_st, prv_st;
  int    len;
  float  sc, cur_sc, prv_sc;     /* do the comparisons as floats (for precision reasons) */
  double  olend, orend;
  int    flag;
  int    lc = 0;                 /* index for the counting of local RNA regions */
  int    verbose = FALSE;
  int    x;
  
  if (score <= -BIGFLOAT*BIGFLOAT || score >= BIGFLOAT*BIGFLOAT) 
    Die ("in tracebackRNAdiagscanfast(). Wallace Shawn says: 'Inconceivable score' %f", score);

  olend = ViterbiOTHDiagScanFast(ofp, sqinfoX, seqX, sqinfoY, seqY, leg, win, st, stmod, 0, 0, emodel->rna->ROB, dp->rob, 
				 FALSE, FALSE, FALSE, ends->oth);
  orend = ViterbiOTHDiagScanFast(ofp, sqinfoX, seqX, sqinfoY, seqY, leg, win, st, stmod, 0, 0, emodel->rna->ROE, dp->roe, 
				 FALSE, FALSE, FALSE, ends->oth);
  
 /* Initialize
   * Start at end = st + l - 1
   */
  tr     = InitTracer();       /* start a trace tree */
  dolist = InitTracerstack();  /* start a stack for traversing the trace tree */

  end = st + l - 1;
  if (traceback) {
    if (revstrand) fprintf(ofp, "\nRNA traceback [REVSTRAND] (diagonal viterbi) [start= %d, end = %d]\n", leg-1-end, leg-1-st);
    else           fprintf(ofp, "\nRNA traceback (diagonal viterbi) [start= %d, end = %d]\n", st, end);
  }

  cur_tr = AttachTracer(tr, end, stROE); 
  PushTracerstack(dolist, cur_tr);
  prv_sc = score;
  
  while ((cur_tr = PopTracerstack(dolist)) != NULL)
   {
     i    = cur_tr->emit;
     imod = i%win;

     len = i - st + 1;

     if (i > -1) {
       cur_x = seqX[i];
       cur_y = seqY[i];
     }
     else {
       cur_x = -1;
       cur_y = -1;
     }

     cur_sc = prv_sc;
     cur_st = cur_tr->type;

      if (traceback) fprintf(ofp,"tracing %s (%d) [%d %d] %f \n", rstNAME[cur_st], i, cur_x, cur_y, cur_sc);

     switch (cur_st){

     case stROB: 
       sc = ViterbiOTHDiagScanFast(ofp, sqinfoX, seqX, sqinfoY, seqY, leg, win, st, stmod, len, len, emodel->rna->ROB, dp->rob, 
				   FALSE, FALSE, FALSE, ends->oth);

       if (cur_sc < sc+MARGIN && cur_sc > sc-MARGIN) 
	 {
	   prv_st = -1;

	   if (traceback) fprintf(ofp,"END RNA traceback\n\n");  
	   break;
	 }
       else Die ("invalid traceback %s in ViterbiRNAdiagscanfast() pos: %d (%f, %f)", rstNAME[cur_st], i, cur_sc, sc);
       break;

     case stRNA: 
       flag = FALSE;

       sc = emodel->rna->t[TROBRNA] + mtx->wx[imod][len-1] + olend;
       if (i >= st &&
	   cur_sc < sc+MARGIN && cur_sc > sc-MARGIN) 
	 {
	   prv_i  = -1; 
	   prv_st = -1;
	   prv_sc = olend;
	   
	   if (traceback) {
	     fprintf(ofp," %s, %f\n", rstNAME[stROB], prv_sc);  
	     fprintf(ofp,"END RNA traceback\n\n");  
	   }

	   ends->rna->lend[lc] = st;
	   ends->rna->rend[lc] = i;
	   lc ++;
	   
	   flag   = TRUE;
	   break;
	 }
       
       for (d = 1; d < len; d++) {
	 sc = emodel->rna->t[TROBRNA] + dp->rna->rbmx[(imod-d<0)?imod-d+win:imod-d] + mtx->wx[imod][d-1]; 
	 if (!flag &&
	     cur_sc < sc+MARGIN && cur_sc > sc-MARGIN) 
	   {
	     prv_i  = i-d; 
	     prv_st = stROB;
	     prv_sc = dp->rna->rbmx[(imod-d<0)?imod-d+win:imod-d];

	     ends->rna->lend[lc] = prv_i+1;
	     ends->rna->rend[lc] = i;
	     lc ++;

	     flag   = TRUE;
	     break;
	   }
	 sc = emodel->rna->t[TROJRNA] + dp->rna->rjmx[(imod-d<0)?imod-d+win:imod-d] + mtx->wx[imod][d-1]; 
	 if (!flag &&
	     cur_sc < sc+MARGIN && cur_sc > sc-MARGIN) 
	   {
	     prv_i  = i-d; 
	     prv_st = stROJ;
	     prv_sc = dp->rna->rjmx[(imod-d<0)?imod-d+win:imod-d];

	     ends->rna->lend[lc]= prv_i+1;
	     ends->rna->rend[lc]= i;
	     lc ++;

	     flag   = TRUE;
	     break;
	   }
       }
       if (!flag) Die ("invalid traceback %s in ViterbiRNAdiagscanfast() pos: %d", rstNAME[cur_st], i);
       break;
       
       
     case stROJ: 
       flag = FALSE;
       for (d = 0; d < len; d++) {
	   prv_st = -1;
	 sc = emodel->rna->t[TRNAROJ] + dp->rna->rrmx[(imod-d<0)?imod-d+win:imod-d] + dp->rna->ROJ[imod][d];
	 if (cur_sc < sc+MARGIN && cur_sc > sc-MARGIN) 
	   {
	     prv_i  = i-d; 
	     prv_st = stRNA;
	     prv_sc = dp->rna->rrmx[(imod-d<0)?imod-d+win:imod-d];
	     flag   = TRUE;
	     break;
	   }
       }
       if (!flag) Die ("invalid traceback %s in ViterbiRNAdiagscanfast() pos: %d", rstNAME[cur_st], i);
       break;
       
     case stROE: 
       flag = FALSE;
       
       sc =  emodel->rna->t[TROBROE] + dp->rna->roemx[stmod] + olend;
       if (cur_sc < sc+MARGIN && cur_sc > sc-MARGIN) 
	 {
	   prv_i  = -1; 
	   prv_st = -1;
	   prv_sc = olend;

	   if (traceback) {
	     fprintf(ofp," %s, %f\n", rstNAME[stROB], prv_sc);  
	     fprintf(ofp,"END RNA traceback\n\n");  
	   }
	   
	   break;
	 }

       sc = emodel->rna->t[TROBROE] + dp->rna->rbmx[imod] + orend;
       if (cur_sc < sc+MARGIN && cur_sc > sc-MARGIN) 
	 {
	   prv_i  = i; 
	   prv_st = stROB;
	   prv_sc = dp->rna->rbmx[imod];
	   break;
	 }
 
       sc = emodel->rna->t[TRNAROE] + dp->rna->rrmx[imod] + orend;
       if (cur_sc < sc+MARGIN && cur_sc > sc-MARGIN) 
	 {
	   prv_i  = i; 
	   prv_st = stRNA;
	   prv_sc = dp->rna->rrmx[imod];
	   break;
	 }
       
       for (d = 0; d < len-1; d++) {
	 
	 kmod = (stmod+d>win-1)? stmod+d-win:stmod+d;
	 
	 sc = dp->rna->rrmx[kmod] + emodel->rna->t[TRNAROE] + dp->rna->roemx[(kmod+1>win-1)? kmod+1-win:kmod+1];
	 if (cur_sc < sc+MARGIN && cur_sc > sc-MARGIN) 
	   {
	     prv_i  = st+d; 
	     prv_st = stRNA;
	     prv_sc = dp->rna->rrmx[kmod];
	     flag   = TRUE;
	     break;
	   }

	 sc = dp->rna->rbmx[kmod] + emodel->rna->t[TROBROE] + dp->rna->roemx[(kmod+1>win-1)? kmod+1-win:kmod+1];
	 if (!flag &&
	     cur_sc < sc+MARGIN && cur_sc > sc-MARGIN) 
	   {
	     prv_i  = st+d; 
	     prv_st = stROB;
	     prv_sc = dp->rna->rbmx[kmod];
	     flag   = TRUE;
	     break;
	   }
       }
       
       if (!flag) Die ("invalid traceback %s in ViterbiRNAdiagscanfast() pos: %d", rstNAME[cur_st], i);
       break;
       
     default:
       Die("invalid state in ViterbiRNAdiagscanfast()");
     }
     
     if (prv_st != -1) {
       if (traceback) fprintf(ofp," %s->%s, %f\n", rstNAME[prv_st], rstNAME[cur_st], cur_sc - prv_sc);  
       PushTracerstack(dolist, AttachTracer(cur_tr, prv_i, prv_st));
     }

     if (lc >= MAX_NUM_ENDS) Die(" Too many ends in RNA traceback. Increase parameter MAX_NUM_ENDS");
   }

  if (verbose || traceback) {
    printf("RNA ends [%d %d]\n", st, end);
    for (x = 0; x < MAX_NUM_ENDS; x++)
      if (ends->rna->lend[x] > -1)
	printf("(%d..%d) ", ends->rna->lend[x], ends->rna->rend[x]);
    printf("\n");
  }

  FreeTracer(tr);
  FreeTracer(cur_tr);
  FreeTracerstack(dolist);       

}       


