#!/usr/bin/perl -w
#
# The logic of this program follows the paper
#       Venkatarajan, M.S., Schein, C. H., Braun W., 2003, 
#       "Identifying physical chemical property based sequence motifs in protein
#       families and superfamilies: Application to DNase I related endonuclease"
#       Bioinformatics 19:1381-1390
#
# In this script, the Bayesian method is implemented (Eq.14). The Eqs. 2-6 are
# implemented in the motifminer, which is called from this script.
#
#


use FindBin;
use lib "$FindBin::Bin";

use Alignments;
use utils;
use color;

# Define some constants
$CUTOFF_SIGNIF = -1;

$params = parse_cmd_line_args();

# The same engine is used for scoring the multiple sequence 
#   alignment and for scoring the database.
#
my $config = "$$params{'config'}.db";
create_motifminer_config($params,$$params{'database'},$config);
run_motifminer($config, $$params{'dbscores'},$$params{'detailed'}) unless $$params{'html'}{'dbg'};

if ( $$params{'detailed'} )
{
    $scores = read_detailed_scores($$params{'dbscores'});
    detailed_scores_create_html_output($params,$scores);
    `cp $$params{'prefix'}.html $$params{'prefix'}.inc`;
}
else
{
    $config = "$$params{'config'}.aln";
    create_motifminer_config($params,$$params{'alignment'},$config);
    run_motifminer($config,$$params{'alnscores'},$$params{'detailed'}) unless $$params{'html'}{'dbg'};
    
    $aln_scores = read_scores_and_do_stats($$params{'alnscores'});
    $db_scores  = read_scores_and_do_stats($$params{'dbscores'});
    
    $total_scores = score_proteins($params, $db_scores, $aln_scores);
    total_scores_create_text_output($params, $total_scores);
    total_scores_create_html_output($params, $total_scores);
    if ( !$$params{'html'}{'head'} )
    {
        # This could be done better, e.g. by printing the header separately
        #   and concatenating. But this is not the most time demanding step, 
        #   may change later...
        #
        `cp $$params{'prefix'}.html $$params{'prefix'}.inc`;
        $$params{'html'}{'head'} = 1;
        total_scores_create_html_output($params, $total_scores);
    }
}

clean_directory($params);

exit;

#--------------------------


sub parse_cmd_line_args
{
    my $params = {};
    
    $params->{'cmd_args'}  = join(' ',@ARGV);
    $params->{'vectorlib'} = 'vectorlib-swissprot-56.0.pm';
    $params->{'cutoff'}    = 0;
    $params->{'signif_threshold'} = 0.21;
    $params->{'nresults'}  = 500; # Print up to 500 results in html
    $params->{'detailed'}  = '';

    # These parameters are command-line editable via the -H option.
    $params->{'html'} = 
    {
        'head'        => 1,     # If unchecked, also the HTML '.inc' file will be created.
        'dbres_ncols' => 60,
        'nresults'    => $$params{'nresults'},  # It makes sense to have lower value for HTML output, because rendering is very slow.
        'aa_cols'     => 1,
    };

    while (my $arg=shift(@ARGV))
    {
        if ( $arg eq '-D' || $arg eq '--detailed' ) { $params->{'detailed'}='-v'; next; } 
        if ( $arg eq '-g' || $arg eq '--global-profile' ) { $params->{'global_prf'}=shift(@ARGV); next; } 
        if ( $arg eq '-s' || $arg eq '--substring-motifs' ) { $params->{'substr_motifs'}=uc(shift(@ARGV)); next; } 
        if ( $arg eq '-p' || $arg eq '--prefix' ) { $params->{'prefix'}=shift(@ARGV); next; } 
        if ( $arg eq '-f' || $arg eq '--score-filter' ) { $params->{'cutoff'}=shift(@ARGV); next; } 
        if ( $arg eq '-a' || $arg eq '--alignment' ) { $params->{'alignment'}=shift(@ARGV); next; }
        if ( $arg eq '-n' || $arg eq '--nresults' ) { $params->{'nresults'}=shift(@ARGV); next; }
        if ( $arg eq '-m' || $arg eq '--motifs-file' ) { $params->{'motifs_file'}=shift(@ARGV); next; }
        if ( $arg eq '-t' || $arg eq '--threshold' ) { $params->{'signif_threshold'}=shift(@ARGV); next; }
        if ( $arg eq '-M' || $arg eq '--motifs' ) 
        { 
            my @items  = split(/\s*,\s*/,shift(@ARGV));
            
            my $motifs = {};
            if ( $items[0] ne '-' ) 
            {   
                # Dash means no motifs 
                
                for my $motif (@items)
                {
                    $motifs->{$motif} = 1;
                }
            }
            $params->{'use_motifs'} = $motifs; 
            next; 
        }
        if ( $arg eq '-k' || $arg eq '--keep-files' ) { $params->{'keep_files'}=1; next; }
        if ( $arg eq '-d' || $arg eq '--db' ) { $params->{'database'}=shift(@ARGV); next; }
        if ( $arg eq '-V' || $arg eq '--vector-lib' ) { $params->{'vectorlib'}=shift(@ARGV); next; }
        if ( $arg eq '-H' || $arg eq '--html-params' ) 
        { 
            my $args  = shift(@ARGV);
            my @items = split(/,/,$args);
            for my $item (@items)
            {
                my ($key,$value) = split(/=/,$item);
                $$params{'html'}->{$key} = $value;
            }
            next; 
        }

        if ( $arg eq '-?' || $arg eq '-h' || $arg eq '--help' ) { usage(); } 
        usage("Unknown option: \"$arg\". (Use -h option for help.)\n");
    }

    if ( !exists($params->{'database'}) ) { usage("Missing the -d option.\n"); }

    # This is not required with the -s option, when motifs are created from substring.
    #   if ( !exists($params->{'motifs_file'}) ) { usage("Missing the -m option.\n"); }

    $SIG{__DIE__} = sub { clean_directory() };          # Make sure that the directory is always cleaned
    $SIG{'INT'}   = sub { clean_directory() };
    $SIG{'QUIT'}  = sub { clean_directory() };

    $$params{'prefix'} = Utils::init_result_dir($$params{'prefix'});

    open(LOG,">>$$params{prefix}.log") || usage("$$params{prefix}.log: $!\n");
    print LOG "motifsearch args: $$params{cmd_args}\n";
    close(LOG);

    if ( !$$params{'detailed'} )
    {
        if ( !exists($params->{'alignment'}) ) { usage("Missing the -a option.\n"); }

        # Convert the alignment to what I call PIR format
        my $aln = Alignments::parse_alignment($$params{'alignment'},{'substitute'=>'-:'});  # The substitute parameter tells the routine to eat gaps
        $$params{'alignment'} = "$$params{prefix}.aln.$$";                  # Create a unique temporary name - $$ stands for the current PID
        Alignments::write_pir_aln($aln,$$params{'alignment'});
        push @to_be_removed, $$params{'alignment'};
    }

    $params->{'config'}      = "$$params{prefix}_motifminer.conf";
    $params->{'dbscores'}    = "$$params{prefix}_db.scores";
    $params->{'alnscores'}   = "$$params{prefix}_aln.scores";
    $params->{'totals_html'} = "$$params{prefix}.html";
    $params->{'totals_text'} = "$$params{prefix}.txt";

    require $params->{'vectorlib'};

    # The motifs can be created also from either a substring of the reference sequence,
    #   or an arbitrary string. To create a motif from an arbitrary string, the global
    #   profile must not be given.
    #
    if ( exists($$params{'substr_motifs'}) )
    {
        if ( !exists($$params{'global_prf'}) ) 
        { 
            create_new_motifs_file($params, "$$params{prefix}_Modified.PCPprf");
        }
        else
        {
            modify_motifs_file($params, "$$params{prefix}_Modified.PCPprf");
        }
        $$params{'motifs_file'} = "$$params{prefix}_Modified.PCPprf";
    }

    my $motifs = Utils::read_prf_file_motifs($$params{'motifs_file'});
    my $motifs_used = {};
    if ( exists($$params{'use_motifs'}) )
    {
        $motifs_used = $$params{'use_motifs'};
    }
    else
    {
        for my $imotif (keys %$motifs) { $motifs_used->{$imotif} = 1; }
    }
    for my $motif (keys %$motifs)
    {
        if ( ! exists($$motifs_used{$motif}) ) { delete($$motifs{$motif}) }
    }
    $$params{'motifs_used'} = $motifs;

    return $params;
}


sub clean_directory
{
    if ( $$params{'keep_files'} ) { return }

    for my $file (@to_be_removed)
    {
        if ( -e $file ) { `rm -rf $file`; }
    }
}


sub usage
{
    my (@msg) = @_;

    clean_directory();

    if ( scalar @msg )
    {
        print STDERR join('',@msg);
        exit -1;
    }

    print STDERR 
        "Options:\n",
        "   -a,  --alignment <file>             Multiple sequence alignment.\n",
        "   -D,  --detailed                     Produce detailed output, where scores for each position of the sequence is shown.\n",
        "   -d,  --db <file>                    Search for motifs in the given database (fasta format). (Note: no check for gaps is done.)\n",
        "   -f,  --score-filter <float>         -1 to ignore non-significant, value from 0 to 1 for a fixed cutoff.\n",
        "   -g,  --global-profile <file>        The PCPMer .gprf file. (See also the -s option.)\n",
        "   -H,  --html-params <string>         HTML parameters, for example 'dbres_ncols=50,head=0', etc. See the \$\$params{html} variable for more.\n",
        "   -k,  --keep-files                   Do not remove temporary files on exit.\n",
        "   -m,  --motifs-file <file>           PCPprf file with motfs.\n",
        "   -M,  --motifs <list>                A comma-separated list of motifs to be included in the search, indexed from 0.\n",
        "                                           If missing, all motifs will be searched for. To explicitly select none, use with dash\n",
        "                                           \"-M -\" (e.g. with the -s option).\n",
        "   -n,  --nresults <int>               How many results should be returned.\n",
        "   -p,  --prefix <string>              Prefix of the output files.\n",
        "   -s,  --substring-motifs <string>    Search for profile motifs defined as substring of the reference sequence. (May contain dots.)\n",
        "                                           When the -g option is not given, arbitrary substring (without profile) is searched for.\n",
        "   -t,  --threshold <float>            Threshold to recognise significant properties.\n",
        "   -V,  --vector-lib <file>            Use the specified vectorlib.pm (see also create-vectorlib)\n",
        "\n";
    exit -1;
}


# Create a new .prf file from a string with faked deviations, entropy, etc. values.
sub create_new_motifs_file
{
    my ($params,$out_file) = @_;

    my @substr_motifs = split(/\s*,\s*/, $$params{'substr_motifs'});
    if ( !scalar @substr_motifs ) { usage("Could not parse the motif substrings \"$$params{'substr_motifs'}\".") }

    my $idx;
    my @gprf = ();
    my $nmotifs = 0;
    for my $smotif (@substr_motifs)
    {
        my @motif = ();
        
        for (my $i=0; $i<length($smotif); $i++)
        {
            $idx++;
            
            my $aa = substr($smotif,$i,1);
            if ( $aa eq '.' ) 
            { 
                push @gprf, {};
                next;
            }

            my $item;
            $$item{'aln_column'} = $idx;
            $$item{'nseqs'}      = 1; 
            $$item{'prf_line'}   = $idx;
            $$item{'reference'}  = $aa;
            for (my $iprop=1; $iprop<=$Vectorlib::nprops; $iprop++)
            {
                if ( !exists($$Vectorlib::vectors{1}{$aa}) ) 
                { 
                    Exit::exit_nicely(
                            "Unknown descriptor value E1 for amino acid \"$aa\"\n",
                            "Your substring contains a non-standard aminoacid, not covered by the vectorlib used.\n",
                            ); 
                }

                push @{$$item{'avgs'}}, $$Vectorlib::vectors{$iprop}{$aa};
                push @{$$item{'devs'}}, 0;
                push @{$$item{'entrps'}}, 1;
            }
            $$item{'total_entrp'} = 1;
            $$item{'similarity'}  = 1;
            $$item{'PCdist'}      = 0;

            push @gprf, $item;
            push @motif, $idx;
        }
        
        push @motifs,{ 'motif'=>\@motif, 'R'=>0.00 };
        $$params{'use_motifs'}{$nmotifs} = 1;
        $nmotifs++;
    }
    Utils::print_profile($out_file,"#",\@motifs,\@gprf);
}


# Create a new .prf file out of an existing one and append new motifs, which
#   will be created from .gprf file and given substrings.
#
sub modify_motifs_file
{
    my ($params,$out_file) = @_;
    
    my $prf_motifs = Utils::read_prf_file_motifs($$params{'motifs_file'});
    my $gprf       = Utils::read_prf_file($$params{'global_prf'});

    # Convert the prf_motifs to a simple list of residues. This format will be required
    #   by the print_profile subroutine. Because a new (temporary) .prf file will
    #   be created, we must also renumber the motifs in $$params{'use_motifs'}.
    #
    my $nmotifs = 0;
    my $imotif  = 0;
    my @motifs  = ();
    while ( exists($$prf_motifs{$imotif}) )
    {
        if ( exists($$params{'use_motifs'}) && !exists($$params{'use_motifs'}{$imotif}) ) 
        {
            $imotif++;
            next;
        }
        
        my @motif = ();
        for my $resi (@{$$prf_motifs{$imotif}{'residues'}})
        {
            push @motif, $$resi{'prf_line'};
        }
        push @motifs, { 'motif'=>\@motif, 'R'=>0.00 };

        $imotif++;
        $nmotifs++;
    }

    # Renumber the motifs requested by the user to reflect the order in the temporary .prf file.
    $$params{'use_motifs'} = {};
    for (my $imotif=0; $imotif<$nmotifs; $imotif++)
    {
        $$params{'use_motifs'}{$imotif} = 1;
    }

    # Determine the sequence from the global profile. The seq_nogaps will be
    #   used only for sanity check, when the substring given by the user does not
    #   match: if the profile contains gaps (was run with the --include-gaps
    #   option), check that the user knows that his substring must contain the
    #   gaps as well.
    #
    my @idxs = ();
    my $seq_nogaps  = '';
    my $seq = '';
    for (my $icol=0; $icol<scalar @$gprf; $icol++)
    {
        my $col = $$gprf[$icol];

        $seq .= $$col{'reference'};
        push @idxs, $icol+1;

        if ( $$col{'reference'} eq '-' ) { next }
        $seq_nogaps .= $$col{'reference'};
    }


    # Now parse the list of motifs from the substring
    my @substr_motifs = split(/\s*,\s*/, $$params{'substr_motifs'});
    if ( !scalar @substr_motifs ) { usage("Could not parse the motif substrings \"$$params{'substr_motifs'}\".") }

    
    # And see if the substring is present in the profile.
    for my $smotif (@substr_motifs)
    {
        # The substring may contain dots - we cannot use index().
        if ( !($seq =~ /$smotif/) ) 
        {
            if ( $seq_nogaps =~ /$smotif/ )
            {
                usage("The substring \"$smotif\" is not present in the sequence\n" 
                    . Utils::wrap_line($seq) 
                    . "\nbut it is present in the sequence when gaps are removed\n"
                    . Utils::wrap_line($seq_nogaps)
                    . "\nEither include the gaps (as dots) in the substring, or run \n"
                    . "MotifMaker without the \"include gaps\" option.");
            }
            usage("The substring \"$smotif\" is not present in the sequence\n" . Utils::wrap_line($seq));
        }
        
        # Make sure that the match is unique: This restriction could be easily treated,
        #   but in real life it probably does not make sense very often. If the motif
        #   appears multiple times in the same sequence, it is likely to be very short.
        #   Leave this for later.
        my $match = $&;
        if ( $' =~ /$smotif/ ) { usage("The substring \"$smotif\" is not unique.") }
        
        my @motif = ();
        my $position = index($seq, $match);
        for (my $i=0; $i<length($smotif); $i++)
        {
            if ( substr($smotif,$i,1) eq '.' ) { next }
            push @motif, $idxs[$i+$position];
        }
        push @motifs, { 'motif'=>\@motif, 'R'=>0.0 };
        $$params{'use_motifs'}{$nmotifs} = 1;
        $nmotifs++;
    }
    Utils::print_profile($out_file,"#",\@motifs,$gprf);
}


sub create_motifminer_config
{
    my ($params,$database,$config) = @_;
    open(CONF,">$config") || usage("$config: $!\n");

    my @order = sort keys %{$Vectorlib::vectors->{1}};

    my $motifs_list = '';
    if ( exists($$params{'use_motifs'}) )
    {
        $motifs_list = "motifs_list = " . join(',', sort {$a<=>$b} keys %{$$params{'use_motifs'}}) . "\n";
    }

    print CONF 
        "# This file was generated by 'motifsearch $$params{cmd_args}'.\n\n",
        "database    = $database\n",
        "motifs      = $$params{motifs_file}\n",
        $motifs_list,
        "signif_threshold = $$params{signif_threshold}\n",
        "\n",
        "nprops      = $Vectorlib::nprops\n",
        "order       = ", join(',',@order), "\n";

    for my $iprop (1..$Vectorlib::nprops)
    {
        my $vector = $Vectorlib::vectors->{$iprop};
        my @tmp = ();
        for my $aa (@order) { push @tmp,$vector->{$aa} }
        print CONF "$iprop  = ", join(',',@tmp), "\n";
    }

    close(CONF);
}

sub run_motifminer
{
    my ($config,$outfile,$options) = @_;
    CMD("$FindBin::Bin/motifminer -c $config -o $outfile $options");
}


sub total_scores_create_text_output
{
    my ($params, $total_scores) = @_;

    my $db = Alignments::parse_alignment($$params{'database'}, {'sanity_check'=>'none'});

    open(FILE,">$$params{totals_text}") || usage("$$params{totals_text}: $!\n");
    # negi print sequence name and its score
    #print "$$params{totals_text}";
    for (my $i=0; $i<@$total_scores; $i++)
    {
        if ( exists($$params{'nresults'}) && $$params{'nresults'} < $i ) { last }

        my $record = $$total_scores[$i];
        my $score  = $$record{'score'};
        my $db_idx = $$record{'db_idx'};
    
        printf FILE "%.2f\t$$db[$db_idx]->{id}\n", $score;
    }
    close(FILE);
}


sub detailed_scores_create_html_output
{
    my ($params, $detailed_scores) = @_;

    my $ncols = $$params{'html'}{'dbres_ncols'};

    my $db = Alignments::parse_alignment($$params{'database'}, {'sanity_check'=>'none'});
    my $motifs = $$params{'motifs_used'}; 
    
    open(HTML,">$$params{totals_html}") || usage("$$params{totals_html}: $!\n");

    if ( $$params{'html'}{'head'} )
    {
        print HTML html_header();
    }

    # Print what values were actually used for determining motifs.
    #   This will be eventually done differently.
    #
    print HTML "<div class='msearch_block'><a name='motifs'></a><b>Searched Motifs:</b><P>\n";
    print HTML Utils::format_html_motif_tables($motifs,{ signif_threshold=>$$params{'signif_threshold'} });
    print HTML "</div>\n";

    print HTML "<div class='msearch_block'><a name='summary'></a><b>Best matching positions:</b><P>\n";
    for my $iseq (sort {$a<=>$b} keys %$detailed_scores)
    {
        print HTML "<i>$$db[$iseq]{id}</i><br>\n";
        print HTML "<div style='padding-left:2em;' class='aln'>\n";
        
        # First get the best match in the sequence for each motif
        #   and create the hash required by align_motifs().
        #
        my $scored_motifs = $$detailed_scores{$iseq};
        my $tmp_motifs = {};
        my $sequence = '';
        for my $imotif (sort {$a<=>$b} keys %$scored_motifs)
        {
            my $values = $$scored_motifs{$imotif};
            my $best_offset  = 0;
            my $best_score   = 0;
            my $tmp_sequence = '';         # Uh, not very nice.
            for my $value (@$values)
            {
                if ( !$sequence )
                {
                    $tmp_sequence .= $$value{'aa'};
                }
                if ( $best_score < $$value{'score'} )
                {
                    $best_score  = $$value{'score'};
                    $best_offset = $$value{'offset'};
                }
            }
            if ( !$sequence )
            {
                $sequence = $tmp_sequence;
            }
            $$tmp_motifs{$imotif} = { 'offset'=>$best_offset, 'score'=>$best_score };
        }
        my ($aligned_motifs) = align_motifs($tmp_motifs, $sequence, $motifs, $$params{'html'}{'dbres_ncols'});
        print HTML $aligned_motifs;
        print HTML "</div>";
    }
    print HTML "</div>\n";

 
    print HTML "<div class='msearch_block'><a name='detailed'></a><b>Detailed output:</b><P>\n";
    my $iseq = 0;
    while ( exists($$detailed_scores{$iseq}) )
    {
        print HTML "<i>$$db[$iseq]{id}</i><br>\n";
        print HTML "<div style='padding-left: 2em;'>\n";
        
        my $motif_scores = $$detailed_scores{$iseq};
        for my $imotif ( sort { $a<=>$b } keys %$motif_scores )
        {
            my $positions = $$motif_scores{$imotif};
            print HTML " $$motifs{$imotif}->{'seq'} ";
            print HTML "<table><tr><td>";
            print HTML "<div class='aln'>";
            for (my $ipos=0; $ipos<scalar @$positions; $ipos++)
            {
                my $pos   = $$positions[$ipos];
                my $color = Color::get_color_string("ffffff","ffffff","5555ff",$$pos{'score'},0.4,0.4,1);
                printf HTML "<span style='background-color:$color;' title='%.3f (%d)'>$$pos{'aa'}</span>",$$pos{score},$ipos+1;
                if ( ($ipos%$ncols)==$ncols-1 ) { print HTML "<span style='margin-left:1em;font-size:x-small'>",$ipos+1,"</span><br>\n"; }
            }
            print HTML "</div>\n";
            print HTML "<tr><td style='padding-left:2em;padding-bottom:2em;' class='aln'><table>";
            my @pos = sort { $$b{'score'}<=>$$a{'score'} } @$positions;
            my $motiflen = length($$motifs{$imotif}->{'seq'});

            print HTML "<tr><td><td><td><td class='aln'>";
            for (my $j=0; $j<$motiflen; $j++)
            {
                my $aa  = substr($$motifs{$imotif}->{'seq'},$j,1);
                print HTML "<span class='aa_$aa' style='font-weight:bold;'>$aa</span>";
            }
            print HTML "<td>";
            for (my $i=0; $i<5; $i++)
            {
                my $color = Color::get_color_string("ffffff","ffffff","5555ff",$pos[$i]->{'score'},0.4,0.4,1);
                printf HTML "<tr><td>%d)<td style='background-color:$color;'>%.2f", $i+1,$pos[$i]->{'score'};
                print HTML  "<td style='font-family:monospace;font-size:x-small;text-align:right;padding-left:1em;'>",$pos[$i]->{offset}+1;
                print HTML  "<td class='aln'>";
                for (my $j=0; $j<$motiflen; $j++)
                {
                    my $idx = $pos[$i]->{offset} + $j;

                    # This happens at the end of sequences: motifminer (correctly) does not print
                    #   the scores for positions where the motif goes beyond the end of the sequence
                    #
                    my $aa;
                    if ( $idx>=scalar @$positions ) 
                    { 
                        $aa = '-';
                    }
                    else
                    {
                        $aa  = $$positions[$idx]->{'aa'};
                    }
                    print HTML "<span class='aa_$aa'>$aa</span>";
                }
                print HTML  "<td style='font-family:monospace;font-size:x-small;text-align:left;'>",$pos[$i]->{offset}+$motiflen;
            }
            print HTML "<tr><td>...<td><td><td>...</table>";
            print HTML "</table>";
            $imotif++;
        }
        print HTML "</div>\n";
        $iseq++;
    }
    print HTML "</div>\n";
    close(HTML);
}


sub html_header
{
        return << "EOT";
<HTML>
<head>
    <style>
        * { font-size: 10pt; font-family: Arial, Helvetica, sans-serif; }
        div.msearch_block { padding-top: 1em; padding-bottom:1em; border-bottom: solid black 1px; }
        .ms_db_results th 
        { 
            font-weight: normal;
            font-style: italic; 
            white-space:nowrap; 
            font-size: smaller;
        }
        table.motif_table
        {
            padding: 0;
            margin: 0;
            margin-right: 1em;
            border-collapse: collapse;
            display: inline; 
        }
        .motif_table td 
        { 
            padding: 0px;  
            padding-left:1px; 
            padding-right:1px;
            margin: 0px;
            text-align:center;
            font-family: monospace; 
            font-size: 8pt;
            border: solid 1px #bbbbbb;
        }
        table.aln 
        { 
            padding: 0px; 
            margin: 0px; 
            border-collapse: collapse; 
            margin-bottom: 2em;
        }
        table.dense
        {
            padding: 0px;
            margin: 0px;
            border-collapse: collapse;
        }
        .dense td
        {
            padding-right: 1em;
        }
        .aln td 
        { 
            padding: 0px; 
            padding-left:1px; 
            padding-right:1px; 
            margin: 0px; 
            text-align:center; 
            font-family: monospace; 
            font-size: 8pt; 
        }
        div.aln
        {
            padding-left: 2em;
            padding-top: 0.1em;
            padding-bottom: 0.5em;
            font-family: monospace;
            font-size: 8pt;
        }
        .aln span
        {
            padding: 1px;
            font-family: monospace;
            font-size: 8pt;
        }
        .aa_A { background-color: #c8c251; }
        .aa_C { background-color: #73c851; }
        .aa_D { background-color: #e96f6c; }
        .aa_E { background-color: #e96f6c; }
        .aa_F { background-color: #c8c251; }
        .aa_G { background-color: #73c851; }
        .aa_H { background-color: #9ca7de; }
        .aa_I { background-color: #c8c251; }
        .aa_K { background-color: #9ca7de; }
        .aa_L { background-color: #c8c251; }
        .aa_M { background-color: #c8c251; }
        .aa_N { background-color: #73c851; }
        .aa_P { background-color: #c8c251; }
        .aa_Q { background-color: #73c851; }
        .aa_R { background-color: #9ca7de; }
        .aa_S { background-color: #73c851; }
        .aa_T { background-color: #73c851; }
        .aa_V { background-color: #c8c251; }
        .aa_W { background-color: #c8c251; }
        .aa_Y { background-color: #73c851; }
        .ms_db_results td.motif { border-bottom: black solid 1px;}
    </style>
</head>
<body>
EOT
}

sub html_javascript
{
    return << "EOT";
        <script type='text/javascript'>
        var mouse_position = null;
        document.onmousemove = mouse_moved;
        function mouse_moved(event)
        {
            if ( ! event )
                event = window.event;
            mouse_position = mouse_coordinates(event);
        }
        function mouse_coordinates(event)
        {
            xpos = 0;
            ypos = 0;
        
            if ( event.pageX || event.pageY )
            {
                xpos = event.pageX;
                ypos = event.pageY;
            }
            else if ( event.clientX || event.clientY )
            {
                xpos = event.clientX + document.body.scrollLeft - document.body.clientLeft;
                ypos = event.clientY + document.body.scrollTop  - document.body.clientTop;
            }
            return { x:xpos, y:ypos };
        }
        function get_element(name)
        {
            var element;
            if ( document.getElementById )
                element = document.getElementById(name);
        
            else if ( document.all)
                element = document.all[name];
        
            else if ( document[name] )
                element = document[name];
        
            if ( element )
                return element;
        }
        function get_element_style(name)
        {
            var element = get_element(name)
            if ( element.style )
                return element.style;
        }
        function show_element_here(name)
        {
            var style  = get_element_style(name);
            style.top  = mouse_position.y+1;
            style.left = mouse_position.x+1;
            style.visibility = style.visibility=='visible' ? 'hidden' : 'visible';
        }
        function show_seq(sequence)
        {
            var element = get_element('seq');
            element.innerHTML = "<span onclick=\\"show_element_here('seq')\\" style='cursor:pointer; text-decoration:underline;font-size:smaller;'>(Close)</span>"
                + "<P style='font-family:monospace;'>" 
                + sequence;
            show_element_here('seq');
        }
        </script>
        <div id="seq" style="position:absolute;visibility:hidden;background-color:white;padding:1em;border:solid 1px black;"></div>
EOT
}


sub total_scores_create_html_output
{
    my ($params, $total_scores) = @_;

    open(HTML,">$$params{totals_html}") || usage("$$params{totals_html}: $!\n");
    my $db     = Alignments::parse_alignment($$params{'database'}, {'sanity_check'=>'none'});
    my $motifs = $$params{'motifs_used'}; 
    
    if ( $$params{'html'}{'head'} )
    {
        print HTML html_header();
    }

    print HTML html_javascript();

    # Print what values were actually used for determining motifs.
    #   This will be eventually done differently.
    #
    print HTML "<div class='msearch_block'><a name='motifs'></a><b>Searched Motifs:</b><P>\n";
    print HTML Utils::format_html_motif_tables($motifs,{ signif_threshold=>$$params{'signif_threshold'} });
    print HTML "</div>\n";


    print HTML "<div class='msearch_block'><a name=''></a>\n";
    print HTML "<table class='ms_db_results'>\n";
    print HTML "<tr><th>Score <th colspan='3' style='padding-left:1em;'>Description <th style='padding-left:1em;'>Motif scores\n";
    for (my $i=0; $i<@$total_scores; $i++)
    {
        if ( exists($$params{'html'}{'nresults'}) && $$params{'html'}{'nresults'} < $i ) { last }

        my $record = $$total_scores[$i];
        my $score  = $$record{'score'};
        my $db_idx = $$record{'db_idx'};

        if ( !($$db[$db_idx]->{id} =~ /^\s*(\S+)\s+(\S+)\s+/) ) 
        { 
            usage(
                "Could not parse the database entry \"($$db[$db_idx]->{id}\".\n",
                "Note: The database may be OK. This is a FIXME call, so far only Astral database has been used.");
        }
        my $pdb    = "<A href='http://astral.berkeley.edu/pdbstyle.cgi?id=$1&amp;output=text'>$1</A>";
        my $family = $2;
        my $descr  = $';

        my $seq = uc($$db[$db_idx]->{seq});
        my ($aligned_motifs) = align_motifs($$record{'motifs'}, $seq, $motifs, $$params{'html'}{'dbres_ncols'});

        my @motif_stats = ();
        push @motif_stats, "<tr><th>Seq<th>DB<th>Aln<td>";
        for my $imotif (sort {$a<=>$b} keys %{$$record{'motifs'}})
        {
            if ( exists($$record{'motifs'}{$imotif}{'ignore'}) ) { next }
            
            my $db_avg = $$record{'motifs'}{$imotif}->{'db_avg_score'};
            my $db_dev = $$record{'motifs'}{$imotif}->{'db_dev_score'};
            my $score  = $$record{'motifs'}{$imotif}->{'seq_score'};

            my $color = '';
            if ( $$record{'motifs'}{$imotif}{'signif'} == 0 ) { $color = 'style="color:red;"'; }
            elsif ( $$record{'motifs'}{$imotif}{'signif'} == 2 ) { $color = 'style="color:green;"'; }
            
            push @motif_stats, sprintf("<tr><td $color>%.2f<td>%.2f&plusmn;%.2f<td>%.2f<td $color>%s",
                    $score,
                    $db_avg, 
                    $db_dev,
                    $$record{'motifs'}{$imotif}->{'aln_avg_score'},
                    $$motifs{$imotif}->{'seq'},
                    );
        }

        my $wrapped_seq = Utils::wrap_line($seq,{separator=>'<br>'});
        printf HTML "<tr><td>%d.&nbsp;%.2f<td style='padding-left:1em;'>$pdb   <td>$family <td>$descr                       <td>\n", $i+1, $score;
        print HTML "<tr><td>     <td style='padding-left:1em;'><img onclick='show_seq(\"$wrapped_seq\")' alt='Show sequence' src='Images/disp-seq.jpg' style='cursor:pointer'>";
        print HTML "    <td colspan='2'> $aligned_motifs  <td style='padding-left:1em;'><table class='dense'>", 
            join("\n", @motif_stats),"</table>\n";
    }
    print HTML "</table>\n";
    print HTML "</div>\n";

    if ( $$params{'html'}{'head'} )
    {
        print HTML "</body></html>\n";
    }

    close(HTML);
}


sub lane_free
{
    my ($lane,$offset,$len) = @_;
    for (my $i=0; $i<$len; $i++)
    {
        if ( exists($$lane[$i+$offset]) ) { return 0 }
    }
    return 1;
}




# An intelligent printout of all motifs identified in this sequence: zip the motif lines so that
#   xx---------                 xx------xxx
#   --xxx------     becomes     --xxx------
#   -------xxx
#
# The data structure used:
#
#       $seq                                .. the sequence
#
#       $all_motifs = $$params{'motifs_used'}
#
#       $$rec_motifs{$imotif}->{'offset'}   .. position in the sequence
#       $$rec_motifs{$imotif}->{'score'}   
#       $$rec_motifs{$imotif}->{'ignore'}   .. if set, the motif will not be listed
#
sub align_motifs
{
    my ($rec_motifs, $seq, $all_motifs, $ncolumns) = @_;

    my $len = length($seq);

    my @motif_lanes = ();
    
    for my $imotif (sort { $$rec_motifs{$a}->{offset}<=>$$rec_motifs{$b}->{offset} } keys %$rec_motifs)   
    {
        # If score is not defined, this motif was filtered out.
        if ( exists($$rec_motifs{$imotif}->{'ignore'}) ) { next; }
        
        my $offset       = $$rec_motifs{$imotif}->{'offset'};
        my $motif_string = $$all_motifs{$imotif}->{'seq'};
        my $len          = length($motif_string);

        my $free_ilane = -1;
        for (my $ilane=0; $ilane<scalar @motif_lanes; $ilane++)
        {
            if ( lane_free($motif_lanes[$ilane],$offset-1,$len+2) ) # The -1 and +2 prevent merging of motifs (xxyy)
            { 
                $free_ilane = $ilane; 
                last;
            }
        }
        if ( $free_ilane==-1 ) 
        { 
            $free_ilane = scalar @motif_lanes; 
            if ( !exists($motif_lanes[$free_ilane]) ) 
            { 
                @{$motif_lanes[$free_ilane]} = (); 
            }
        }
        
        my $tmp_array = \@{$motif_lanes[$free_ilane]};
        for (my $i=0; $i<$len; $i++)
        {
            $$tmp_array[$i+$offset] = substr($motif_string,$i,1);
        }
    }

    # Wrap the lines $seq and (possibly) multiple motif lines.
    #
    my $iaa = 0;
    my $out = '<table class="aln">';
    while ($seq)
    {
        my $tmp = substr($seq,0,$ncolumns,'');
        my $len = length($tmp);
        my $i;

        $out .= '<tr><td style="font-size:x-small">' . ($iaa+1);
        for ($i=0; $i<$len; $i++)
        {
            $iaa++;
            my $letter = substr($tmp,$i,1);
            my $class  = $$params{'html'}{'aa_cols'} ? " class='aa_$letter'" : '';
            $letter = $letter eq '-' ? "<td title='$iaa'>$letter\n" : "<td$class title='$iaa'>$letter\n";
            $out .= $letter;
        }
        while ($i++<$ncolumns) { $out .= '<td>'; }
        $out .= "<td style='font-size:x-small'>$iaa";

        my $was_printed = 0;
        for (my $ilane=0; $ilane<scalar @motif_lanes; $ilane++)
        {
            my $len = scalar @{$motif_lanes[$ilane]};
            my @tmp = splice(@{$motif_lanes[$ilane]},0,$ncolumns<$len ? $ncolumns : $len);
            
            my $is_empty = 1;
            my $tmp_str  = '';
            for ($i=0; $i<scalar @tmp; $i++)
            {
                if ( defined($tmp[$i]) )
                {
                    $is_empty = 0;
                    my $class = ( $$params{'html'}{'aa_cols'} && $tmp[$i] ne '.' ) ? "class='motif aa_$tmp[$i]'" : "class='motif'";
                    $tmp_str .= "<td $class>$tmp[$i]";
                }
                else
                {
                    $tmp_str .= '<td>';
                }
            }
            if ( $is_empty ) { next }

            while ($i++<$ncolumns) { $tmp_str .= '<td>'; }
        
            $out .= "<tr><td>$tmp_str<td> \n";  # Two td's for column indices
            $was_printed = 1;
        }
        
        # It was requested that the wrapped sequence lines are separated by an empty line
        #   if ( $was_printed ) { $out .= "<tr><td colspan='$ncolumns'>&nbsp;"; }
        # 
        $out .= "<tr><td colspan='$ncolumns'>&nbsp;";
    }
    $out .= "</table>\n";

    return ($out);
}



# Returns list of total scores calculated from individual DB scores and avg+std_dev of
#   alignment scores. The records will be sorted descendently on output. The DB sequence
#   with the highest score is accessible as:
#
#       my $scores = score_proteins($params,$db_scores,$aln_scores);
#       my $db = Alignments::parse_alignment($my_db_file);
#       my $highest_score = $$scores[0]->{score};   # If score is undefined, the record was filtered out (see e.g. $CUTOFF_SIGNIF)
#       my $highest_db_id = $$scores[0]->{db_idx};
#       print "$highest_score .. $$db[$highest_db_id]->{id}\n";
#
sub score_proteins
{
    my ($params,$db_scores,$aln_scores) = @_;

    my ($first_idx) = sort { $a<=>$b } keys %$db_scores;   # Not all motifs had to be used in the search
    my $nseqs  = $$db_scores{$first_idx}->{n};
    my $cutoff = $$params{cutoff};
    my @out_scores = ();

    for (my $i=0; $i<$nseqs; $i++)
    {
        my $total_score = 0;
        my $contains_motifs = ();
        for my $imotif (sort {$a<=>$b} keys %$db_scores)
        {
            # Sanity check
            if ( ${$$db_scores{$first_idx}->{data}}[$i]->{seqid}  != ${$$db_scores{$imotif}->{data}}[$i]->{seqid} )
            {
                usage("FIXME: Sanity check failed .. ${$$db_scores{$first_idx}->{data}}[$i]->{seqid} vs. ${$$db_scores{$imotif}->{data}}[$i]->{seqid}\n");
            }
    
            my $aln_avg   = $$aln_scores{$imotif}->{avg};
            my $aln_dev   = $$aln_scores{$imotif}->{dev};
            my $db_avg    = $$db_scores{$imotif}->{avg};
            my $db_dev    = $$db_scores{$imotif}->{dev};
            my $seq_score = ${$$db_scores{$imotif}->{data}}[$i]->{score};

            if ( !$aln_avg ) { $aln_avg=0; }
            if ( !$aln_dev ) { $aln_dev=0; }

            $contains_motifs->{$imotif}->{'offset'}        = ${$$db_scores{$imotif}->{data}}[$i]->{offset};
            $contains_motifs->{$imotif}->{'db_avg_score'}  = $db_avg;
            $contains_motifs->{$imotif}->{'db_dev_score'}  = $db_dev;
            $contains_motifs->{$imotif}->{'aln_avg_score'} = $aln_avg;
            $contains_motifs->{$imotif}->{'seq_score'}     = $seq_score;

            # The term 1e-10 is to prevent dividing by zero for weak motifs,
            #   which have score values equal to 0 for all sequences.
            #
            my $denominator = ($aln_dev + $db_dev)**2 + 1e-10;  
            my $score = abs($aln_avg - $db_avg) * $seq_score / $denominator;
            $contains_motifs->{$imotif}->{'score'} = $score;

            # Determine what are significant hits, what lies in the gray area, and what is
            #   a random noise.
            #
            if ( $seq_score <= $db_avg + 2*$db_dev ) { $$contains_motifs{$imotif}{'signif'} = 0; }
            elsif ( $seq_score <= ($db_avg + 2*$db_dev+1.0)/2 ) { $$contains_motifs{$imotif}{'signif'} = 1; }
            else { $$contains_motifs{$imotif}{'signif'} = 2; }

            # Should this motif be ignored on the output and from the total score?
            if ( $cutoff==$CUTOFF_SIGNIF && $$contains_motifs{$imotif}{'signif'}==0 ) 
            { 
                $$contains_motifs{$imotif}{'ignore'} = 1;
                next;
            }
            if ( $cutoff!=0 && $cutoff > $seq_score ) 
            { 
                $$contains_motifs{$imotif}{'ignore'} = 1;
                next;
            }

            $total_score += $score;
        }
        if ( $total_score != 0 )
        {
            push @out_scores, { 
                score       => $total_score, 
                db_idx      => ${$$db_scores{$first_idx}->{data}}[$i]->{seqid}, 
                scores_idx  => $i,
                motifs      => $contains_motifs
            };
        }
    }

    my @sorted = sort { $$b{score} <=> $$a{score} } @out_scores;
    return \@sorted;
}


# The resulting scores are stored in a hash:
#   $$scores{$imotif}                               $imotif=0..nmotifs-1
#   $$scores{$imotif}->{n}                          Number of records (should be same for all motifs)
#   $$scores{$imotif}->{avg}                        Average score for all sequences
#   $$scores{$imotif}->{dev}                        Standard deviation
#   {$$scores{$imotif}->{data}}[$i]                 Detailed data for the sequence $i
#   {$$scores{$imotif}->{data}}[$i]->{score}        The highest score of $imotif in the $i-th sequence 
#   {$$scores{$imotif}->{data}}[$i]->{seqid}        The sequence id of $i-th sequence. Should exactly
#                                                       correspond to $i, unless motifminer is messed up.
#   {$$scores{$imotif}->{data}}[$i]->{offset}       The offset of the motif in the sequence.
#
# Everything is indexed from 0.
#
sub read_scores_and_do_stats
{
    my ($fname) = @_;

    open(FILE,"<$fname") || usage("read_scores_and_do_stats: $fname: $!\n");
    my $scores = {};

    while (my $line=<FILE>)
    {
        # 0       0       105     6.215852e-01
        # 0       1       6       6.491221e-01
        # $1 .. sequence number, $2 .. motif number, $3 .. offset in the sequence, $4 .. score
	#print "hello1 $line \n";
        if ( !($line=~/^(\d+)\t(\d+)\t(\d+)\t(\S+)$/) ) { usage("$fname: Could not parse: $line\n") }

	#print "hello2 $line \n";
        my $score = { seqid=>$1, offset=>$3, score=>$4};
        $scores->{$2}->{avg} += $4;
        $scores->{$2}->{n}++;
        push @{$scores->{$2}->{data}}, $score;
    }
    close(FILE);

    my ($first_idx) = sort { $a<=>$b } keys %$scores;   # Not all motifs had to be used in the search
    for my $motif (keys %$scores)
    {
        # Sanity check
        if ( $scores->{$motif}->{n} != $scores->{$first_idx}->{n} ) { usage("Sanity check failed: $scores->{$motif}->{n} != $scores->{$first_idx}->{n}\n"); }

        $scores->{$motif}->{avg} /= $scores->{$motif}->{n};
        my $mean = $scores->{$motif}->{avg};
        my $dev  = 0;
        for my $score (@{$scores->{$motif}->{data}})
        {
            $dev += ($score->{score} - $mean)**2;
        }
        $scores->{$motif}->{dev} = sqrt($dev / $scores->{$motif}->{n});
    }

    return $scores;
}

# Access e.g. as:
#
#   for my $iseq (sort {$a<=>$b} keys %$scores)
#   {
#       my $motifs = $$scores{$iseq};
#       for my $imotif (sort {$a<=>$b} keys %$motifs)
#       {
#           my $values = $$motifs{$imotif};
#           for my $value (@$values)
#           {
#               print "$iseq:$imotif .. $$value{aa} $$value{offset} $$value{score}\n";
#           }
#       }
#   }
#
sub read_detailed_scores
{
    my ($fname) = @_;

    open(FILE,"<$fname") || usage("read_detailed_scores: $fname: $!\n");

    my $scores = {};
    while (my $line=<FILE>)
    {
        # 0       0       0       1.943257e-01    S
        # 0       0       1       3.732984e-02    R
        # $1 .. sequence number, $2 .. motif number, $3 .. offset in the sequence, $4 .. score, $5 .. letter 
        if ( !($line=~/^(\d+)\t(\d+)\t(\d+)\t(\S+)\t(\S)$/) ) { usage("$fname: Could not parse: $line\n") }
        my $position = { aa=>$5, score=>$4, offset=>$3 };
        push @{$$scores{$1}{$2}}, $position;
    }
    close(FILE);
    return $scores;
}

sub CMD
{
    my ($cmd) = @_;

    #print STDERR "$cmd\n";
    my @out = `$cmd 2>&1`;
    if ( $? )
    {
        my @msg = ();
        push @msg, "The command \"$cmd\" failed.\n";
        if ( $! )
        {
            push @msg, " (\"$!\").";
        }
        if ( scalar @out )
        {
            push @msg, @out;
        }
        usage(@msg);
    }
}

