#!/opt/conda/conda-bld/repeatmasker_1697737943129/_h_env_placehold_placehold_placehold_placehold_placehold_placehold_placehold_placehold_placehold_placehold_placehold_placehold_placehold_placehold_placehold_placehold_placehold_placehold_placehold_placehold_/bin/perl
##---------------------------------------------------------------------------##
##  File:
##      @(#) RepeatMasker
##  Author:
##      Arian Smit <asmit@systemsbiology.org>
##      Robert Hubley <rhubley@systemsbiology.org>
##  Description:
##      Takes one or more DNA sequence files, in fasta format, and returns
##      masked sequence file(s) (repetitive DNA is masked) for database
##      searches and a file with detailed annotation of repeat locations.The
##      sequence data are screened against a library of repetitive sequences
##      using the program cross_match (Phil Green, unpublished) or
##      ABBlast ( Gish et al ), RMBlast ( NCBI, Hubley et al ), or
##      nhmmer ( Wheeler et al ).
##
## NOTE: See RepeatMaskerConfig.pm for necessary installation
##       customization.
##
#******************************************************************************
#* Copyright (C) University of Washington 1996-1999 Developed by Arian Smit,
#* Philip Green and Colin Wilson of the University of Washington Department of
#* Genomics.
#*
#* Copyright (C) Arian Smit 2000-2001
#*
#* Copyright (C) Institute for Systems Biology 2002-2021 Developed by
#* Arian Smit and Robert Hubley.
#*
#* This work is licensed under the Open Source License v2.1.  To view a copy
#* of this license, visit http://www.opensource.org/licenses/osl-2.1.php or
#* see the license.txt file contained in this distribution.
#*
###############################################################################
#  ChangeLog:
#
#    $Log$
#
###############################################################################
#
# To Do:
#
#

=head1 NAME

RepeatMasker - Mask repetitive DNA

=head1 SYNOPSIS

  RepeatMasker [-options] <seqfiles(s) in fasta format>

=head1 DESCRIPTION

The options are:

=over 4

=item -h(elp)

Detailed help

=back

Default settings are for masking all type of repeats in a 
primate sequence.

=over 4

=item -e(ngine) [crossmatch|wublast|abblast|ncbi|rmblast|hmmer]

Use an alternate search engine to the default.  Note: 'ncbi' and 'rmblast' 
are both aliases for the rmblastn search engine engine.  The generic NCBI 
blastn program is not sensitive enough for use with RepeatMasker at this 
time.

=item -pa(rallel) [number]   

The number of sequence batch jobs [50kb minimum] to run in parallel.  
RepeatMasker will fork off this number of parallel jobs, each running 
the search engine specified. For each search engine invocation 
( where applicable ) a fixed the number of cores/threads is used:

  RMBlast     4 cores
  ABBlast     4 cores
  nhmmer      2 cores
  crossmatch  1 core

To estimate the number of cores a RepeatMasker run will use simply 
multiply the -pa value by the number of cores the particular search
engine will use.

=item -s             

Slow search; 0-5% more sensitive, 2-3 times slower than default

=item -q             

Quick search; 5-10% less sensitive, 2-5 times faster than default

=item -qq            

Rush job; about 10% less sensitive, 4->10 times faster than default
(quick searches are fine under most circumstances)
repeat options 

=item -nolow   

Does not mask low_complexity DNA or simple repeats

NOTE: This is an important step in RepeatMasker, the identification
of low-divergence simple repeats early in RepeatMasker's search
phase lowers the overall false-positive rate for TE annotations 
considerably.  To simply remove simple repeats from the final output
of RepeatMasker use postprocessing tools such as:

 egrep -v "Simple|Satellite" my_data.out > filtered.out

To remove these annotations from the final output.  The -nolow option
should only be used when there is a specific reason to avoid pre/post
masking tandem/simple/low-complexity sequences.

=item -noint   

Only masks low complex/simple repeats (no interspersed repeats)

=item -norna         

Does not mask small RNA (pseudo) genes

=item -alu           

Only masks Alus (and 7SLRNA, SVA and LTR5)(only for primate DNA)

=item -div [number]  

Masks only those repeats with [number] percent diverged from consensus

=item -lib [filename] 

Allows use of a custom library (e.g. from another species)

=item -cutoff [number]

Sets cutoff score for masking repeats when using -lib (default 225)

=item -species  <query species>   

Specify the species or clade of the input sequence.  The species
name must be a valid NCBI Taxonomy Database species name and be
contained in the RepeatMasker repeat database.  Some examples are:

  -species human
  -species mouse
  -species rattus
  -species "ciona savignyi"
  -species arabidopsis

Other commonly used species:

mammal, carnivore, rodentia, rat, cow, pig, cat, dog, chicken, 
fugu, danio, "ciona intestinalis" drosophila, anopheles, worm, 
diatoaea, artiodactyl, arabidopsis, rice, wheat, and maize 

=back

Contamination options

=over 4

=item -is_only       

Only clips E coli insertion elements out of fasta and .qual files

=item -is_clip       

Clips IS elements before analysis (default: IS only reported)

=item -no_is         

Skips bacterial insertion element check

=back

Running options

=over 4

=item -gc [number]   

Use matrices calculated for 'number' percentage background GC level 

=item -gccalc        

RepeatMasker calculates the GC content even for batch files/small seqs

=item -frag [number] 

Maximum sequence length masked without fragmenting (default 60000)

=item -nocut         

Skips the steps in which repeats are excised

=item -noisy         

Prints search engine progress report to screen (defaults to .stderr file)  

=item -nopost

Do not postprocess the results of the run ( i.e. call ProcessRepeats ).
NOTE: This options should only be used when ProcessRepeats will be run manually
on the results.

=back

output options

=over 4

=item -dir [directory name] 

Writes output to this directory (default is query file directory, 
"-dir ." will write to current directory).

=item -a(lignments) 

Writes alignments in .align output file

=item -inv     

Alignments are presented in the orientation of the repeat (with option -a)

=item -lcambig

Outputs ambiguous DNA transposon fragments using a lower case
name.  All other repeats are listed in upper case.  Ambiguous
fragments match multiple repeat elements and can only be
called based on flanking repeat information.

=item -small   

Returns complete .masked sequence in lower case

=item -xsmall  

Returns repetitive regions in lowercase (rest capitals) rather than masked

=item -x       

Returns repetitive regions masked with Xs rather than Ns

=item -poly    

Reports simple repeats that may be polymorphic (in file.poly)

=item -source

Includes for each annotation the HSP "evidence".  Currently this option
is only available with the "-html" output format listed below.

=item -html

Creates an additional output file in xhtml format.

=item -ace     

Creates an additional output file in ACeDB format

=item -gff     

Creates an additional Gene Feature Finding format output

=item -u       

Creates an additional annotation file not processed by ProcessRepeats

=item -xm      

Creates an additional output file in cross_match format (for parsing)

=item -no_id   

Leaves out final column with unique ID for each element (was default)

=item -e(xcln) 

Calculates repeat densities (in .tbl) excluding runs of >=20 N/Xs 
in the query

=back

=head1 CONFIGURATION OVERRIDES

=head1 SEE ALSO

=over 4

Crossmatch, ProcessRepeats

=back

=head1 COPYRIGHT

2002-2020 Copyright (C) Institute for Systems Biology Developed by
Arian Smit and Robert Hubley.

2000-2001 Copyright (C) Arian Smit.

1996-1999 Copyright (C) University of Washington, Developed by Arian Smit,
Philip Green and Colin Wilson of the University of Washington Department of
Genomics.

=head1 AUTHORS

Arian Smit <asmit@systemsbiology.org>

Robert Hubley <rhubley@systemsbiology.org>

=cut

#
# Module Dependence
#
use strict;
use FindBin;
use lib $FindBin::RealBin;
use Carp;
use Getopt::Long;
use POSIX qw(:sys_wait_h);
use Storable qw(nstore retrieve);
use Pod::Text;
use File::Copy;
use File::Spec;
use File::Path;
use Data::Dumper;
use Cwd;

# RepeatMasker Libraries
use EMBL;
use DFAM;
use RepeatMaskerConfig;
use SearchResult;
use SearchResultCollection;
use Taxonomy;
use CrossmatchSearchEngine;
use WUBlastSearchEngine;
use HMMERSearchEngine;
use NCBIBlastSearchEngine;
use SimpleBatcher;
use FastaDB;
use TRF;
use TRFSearchResult;
use LibraryUtils;
# For timing only
#use Time::HiRes qw(gettimeofday);

# A bug in 5.8 produces way too many warnings
if ( $] && $] >= 5.008003 ) {
  use warnings;
}

#
# Version
#
my $version = $RepeatMaskerConfig::VERSION;
print "RepeatMasker version $version\n";

if ( $ARGV[ 0 ] && $ARGV[ 0 ] eq '-v' ) {
  exit( 0 );
}

my $cmdLine = $0 . join( ' ', @ARGV );

#
# Option processing
#  e.g.
#   -t: Single letter binary option
#   -t=s: String parameters
#   -t=i: Number paramters
#
my @opts = qw( a|alignments ace alu cutoff=s dir=s
    div=s excln frag=s gc=s gccalc gff
    help int noint inv is_clip is_only lib=s
    low nolow lcambig libdir=s no_id no_is nocut
    noisy norna parallel=i poly q qq
    s small species|sp=s u fptest rbrm_only
    x xm xsmall nopost engine|e=s source html debug=s );
# Add configuration parameters as additional command-line options
push @opts, RepeatMaskerConfig::getCommandLineOptions();

#
# Provide the POD text from this file and 
# from the config file by merging them 
# together.  The heading "CONFIGURATION
# OVERRIDES" provides the insertion point
# for the configuration POD.
#
sub usage {
  my $p = Pod::Text->new();
  $p->output_fh(*STDOUT);
  my $pod_str;
  open IN,"<$0" or die "Could not open self ($0) for generating documentation!";
  while (<IN>){
    if ( /^=head1\s+CONFIGURATION OVERRIDES\s*$/ )
    {
      my $c_pod = RepeatMaskerConfig::getPOD();
      if ( $c_pod ) {
        $pod_str .= $_ . $c_pod;
      }
    }else {
      $pod_str .= $_;
    }
  }
  close IN;
  print "$0 - $version\n";
  $p->parse_string_document($pod_str);
  exit(1);
}

#
# Get the supplied command line options, and set flags
#
my %options = ();
unless ( &GetOptions( \%options, @opts ) ) {
  usage();
}

# Print the internal POD documentation if something is missing
if ( $#ARGV == -1 && !$options{'help'} ) {
  print "No query sequence file indicated\n\n";
  usage();
}

# Print out the big help file if requested
my $REPEATMASKER_DIR = $FindBin::RealBin;
my $PAGER = $ENV{PAGER};
$PAGER = "more" if !defined $PAGER;
system( "$PAGER $REPEATMASKER_DIR/repeatmasker.help\n" ),
    exit( 0 )
    if $options{'help'};

#
# Resolve configuration settings using the following precedence: 
# command line first, then environment, followed by config
# file.
#
RepeatMaskerConfig::resolveConfiguration(\%options);
my $config = $RepeatMaskerConfig::configuration;
my $NHMMSCAN_PRGM = $config->{'HMMER_DIR'}->{'value'} . "/nhmmscan";
my $HMMPRESS_PRGM = $config->{'HMMER_DIR'}->{'value'} . "/hmmpress";
my $HMMSTAT_PRGM = $config->{'HMMER_DIR'}->{'value'} . "/hmmstat";
my $RMBLASTN_PRGM = $config->{'RMBLAST_DIR'}->{'value'} . "/rmblastn";
my $NCBIBLASTDB_PRGM = $config->{'RMBLAST_DIR'}->{'value'} . "/makeblastdb";
my $CROSSMATCH_PRGM = $config->{'CROSSMATCH_DIR'}->{'value'} . "/cross_match";
my $WUBLASTP_PRGM = $config->{'ABBLAST_DIR'}->{'value'} . "/blastp";
my $SETDB_PRGM = $config->{'ABBLAST_DIR'}->{'value'} . "/setdb";
my $TRF_PRGM = $config->{'TRF_PRGM'}->{'value'};

# Determine the installation directory using 
# location of the program that was invoked.
my $LIBDIR = $config->{'LIBDIR'}->{'value'};
my $MATDIR = "$REPEATMASKER_DIR/Matrices";

die "The assumed RepeatMasker installation directory\n" .
    "    $REPEATMASKER_DIR\n" . 
    "does not appear to be correct.  E.g it does not\n" .
    "contain a 'Libraries' or 'Matrices' subdirectory.\n" .
    "This can occur if hard links are used to invoke\n" .
    "this script.\n"
    unless ( -d $LIBDIR && -d $MATDIR );

#
# Get the date
#
my $date = localtime( time() );

# Debugging flag
my $DEBUG = 0;
$DEBUG = 1 if ( $RepeatMaskerConfig::DEBUGALL == 1 ||
                ( exists $options{'debug'} 
                  && $options{'debug'} & 1 ) );

# Windows does not support the use of ":" in a filename.
$date =~ s/[ ,\t,\n:]//g;

#
# Setup the search engine
#
my $searchEngine;
my $engine = $config->{'DEFAULT_SEARCH_ENGINE'}->{'value'};
if ( defined $options{'engine'} ) {
  if ( $options{'engine'} =~
       /^(ncbi|rmblast|wublast|abblast|crossmatch|hmmer|nhmmer)$/i )
  {
    $engine = lc( $options{'engine'} );
  }
  else {
    die "I have never heard of the search engine $options{'engine'}.  Please\n"
        . "use rmblast/abblast/hmmer or crossmatch.\n";
  }
}

# Normalize engine names, so that later code does not have to worry about e.g.
# "nhmmer" vs "hmmer" and can assume "hmmer"
$engine = "wublast" if ( $engine eq "abblast" );
$engine = "hmmer" if ( $engine eq "nhmmer" );
$engine = "ncbi" if ( $engine eq "rmblast" );

# Save normalized engine name back into options as well
$options{'engine'} = $engine;

if ( defined $options{'int'} || defined $options{'low'} ) {
  die "\nThe options -int and -low have been deprecated.  Please use either\n"
      . "-noint or -nolow instead.\n\n";
}

if ( defined $options{'nolow'} ) {
  print "\nWARNING: The nolow option should be used with caution.  This option\n" . 
        "         doesn't simply filter out simple repeats and low-complexity\n" .
        "         annotations from the output, rather it doesn't run these\n" .
        "         searches at all.  The simple repeats, and low-complexity\n" . 
        "         sequences may then be falsely annotated as fragments of\n" .
        "         TE families that contain short stretches of them.\n\n";
}

if ( !-e $TRF_PRGM ) {
  die "TRF program not configured! This version of RepeatMasker requires a\n"
      . "local installatoin of TRF.  Please visit:\n"
      . "               http://tandem.bu.edu/trf/trf.html\n"
      . "to obtain the current version.  Once installed please re-run the \n"
      . "RepeatMasker configure script to setup RepeatMasker to use the new\n"
      . "installation.";
}

my $engineDEBUG = 0;
$engineDEBUG = 1 if ( $RepeatMaskerConfig::DEBUGALL == 1 ||
                      ( exists $options{'debug'}
                        && $options{'debug'} & 2 ) );
if ( $engine eq "wublast" ) {
  $searchEngine = WUBlastSearchEngine->new(
                             pathToEngine => $WUBLASTP_PRGM,
                             DEBUG        => $engineDEBUG );

  if ( not defined $searchEngine ) {
    die "Cannot execute $WUBLASTP_PRGM\n";
  }
  print "Search Engine: ABBlast/WUBlast [ "
      . $searchEngine->getVersion() . " ]\n";
}
elsif ( $engine eq "hmmer" ) {
  $searchEngine = HMMERSearchEngine->new(
                             pathToEngine => $NHMMSCAN_PRGM, 
                             DEBUG        => $engineDEBUG );

  # Used to allow a single instance to use all the cores ( ie. default nhmmer
  # prior to 3.2 ).  Now ( like nhmmer 3.2 ) we require the user to be explicit
  # about how many parallel proceses to use.
  # BEFORE:
  #if ( exists $options{'parallel'} && $options{'parallel'} > 1 ) {
  # Limit nhmmer cores to 2 per batch.
  #$searchEngine->setCores( 2 );
  #}
  # NOW:
  # For each parallel batch invoke nhmmer with 2 cores each.  I.e -pa 4
  # would run 4 nhmmer jobs each using 2 cores
  $searchEngine->setCores( 2 );

  if ( not defined $searchEngine ) {
    die "Cannot execute $NHMMSCAN_PRGM\n";
  }
  print "Search Engine: HMMER [ " . $searchEngine->getVersion() . " ]\n";

  # TODO: We do not support IS searches with HMMER yet.
  $options{'no_is'} = 1;

}
elsif ( $engine eq "crossmatch" ) {
  $searchEngine = CrossmatchSearchEngine->new(
                           pathToEngine => $CROSSMATCH_PRGM,
                           DEBUG        => $engineDEBUG );
  if ( not defined $searchEngine ) {
    die "Cannot execute $CROSSMATCH_PRGM\n";
  }
  print "Search Engine: Crossmatch [ " . $searchEngine->getVersion() . " ]\n";
}
elsif ( $engine eq "ncbi" ) {
  $searchEngine = NCBIBlastSearchEngine->new(
                             pathToEngine => $RMBLASTN_PRGM,
                             DEBUG        => $engineDEBUG );
  if ( not defined $searchEngine ) {
    die "Cannot execute $RMBLASTN_PRGM";
  }
  print "Search Engine: NCBI/RMBLAST [ " . $searchEngine->getVersion() . " ]\n";
}
else {
  die "Search engine ( $engine ) is unknown to RepeatMasker.  Please check "
      . "the RepeatMaskerConfig.pm or rerun the configure script!.\n";
}

#
# Verify that the libraries exist
#
my $libType = "CONSENSUS";
$libType = "HMM" if ( $engine eq "hmmer" );

#
# RepeatMasker Batching Parameters
#
my $fragmentSize = 60000;

#
# Selection of a batch overlap length can have large impacts on the program.
# The overlap boundaries are places where edge effects produce partial
# overlapping annotations.  Also matrix differences in flanking batches
# can cause the same repeat to have different divergence, score and alignment
# length characteristics.
#
my $overlapLen = 2000;

#
# User supplied fragment length
#
if ( defined $options{'frag'} ) {
  if ( $options{'frag'} < ( 2 * $overlapLen ) ) {
    warn "RepeatMasker: You may not use a fragment size (-frag "
        . "$options{'frag'} ) which is less than 2 times the overlap "
        . "length (overlapLen = $overlapLen).  Defaulting to $fragmentSize\n";
  }
  else {
    $fragmentSize = $options{'frag'};
  }
}

#
# Parse filenames
#
foreach my $file ( @ARGV ) {
  if ( $file =~ /\s/ ) {
    die "RepeatMasker can not handle filenames with spaces "
        . "like the file \"$file\"\n";
  }
  elsif ( $file =~ /([\`\!\$\^\&\*\(\)\{\}\[\]\|\\\;\"\'\<\>\?])/ ) {
    die "RepeatMasker can not handle filenames with the special "
        . "character \"$1\" as in the file \"$file\"\n";
  }
}

#
# If $file is across a system boundary writing temporary files
# to the file's directory takes a lot of time, so these are written
# to a temporary directory:
my ( $tempdir, $runnumber ) = &createTempDir( \%options, $date, $ARGV[ 0 ] );

#
# Species & Library Setup
#
my $tax =
    Taxonomy->new( famdbfile => "$LIBDIR/RepeatMaskerLib.h5" );

#
# The search path for finding a place to store the cache
#
my @LIBPATH = ( $LIBDIR,
                $ENV{'HOME'} . "/.RepeatMaskerCache" );

# Add tempdir to the end of the library search path
push @LIBPATH, $tempdir;


my ( $resolvedSpecies, $generalLibDir, $speciesLibDir, $customLibDir, $rmLibraryVersion ) =
    &initLibrariesFromFamdb(
                    $options{'species'},
                    $options{'lib'},
                    $LIBDIR,
                    $tempdir,
                    \@LIBPATH,
                    $searchEngine);
my $dbversion = $rmLibraryVersion;
$options{'species'} = $resolvedSpecies;

# Call isA a few times to populate the isACache. This must be done before
# forking so that each fork sees the cached value instead of re-checking
$tax->isA($options{'species'}, 'primates');
$tax->isA($options{'species'}, 'rodentia');

# Read in a frozen hash of element id's which are refineable.  This
# hash is created by initLibraries the first time a library is initialized.
my $refineableHashRef;
if ( !$options{"lib"} ) {
  $refineableHashRef = retrieve( "$speciesLibDir/refineableHash.dat" );
}

# Functions saveOldFiles and cleanUp expect the -dir directory to
# already exist.
if ( $options{'dir'} && !-d $options{'dir'} ) {
  mkdir( $options{'dir'}, 0777 )
      or die "Output directory ( -dir "
      . $options{'dir'}
      . " ) doesn't exist and could not be created.\n";
}

#
# Main loop
#
FILECYCLE:
foreach my $file ( @ARGV ) {

  unless ( -r $file ) {
    print "cannot read file $file in " . cwd() . "\n";
    next;
  }

  unless ( -s $file ) {
    print "File $file appears to be empty.\n";
    next;
  }

  my $compressed = "";
  if ( $file =~ /\.gz$/ ) {
    unless ( `gunzip $file 2>&1` ) {

      # Name $file only changes if gunzip did not complain
      # (file may end with .gz but not be zipped
      $file =~ s/\.gz$//;
      $compressed = "zipped";
    }
  }
  elsif ( $file =~ /\.Z$/ ) {
    unless ( `uncompress $file 2>&1` ) {
      $file =~ s/\.Z$//;
      $compressed = "Zed";
    }
  }

  # With one file $#ARGV == 0
  print "\nanalyzing file $file\n" if ( $#ARGV >= 0 );

  # Don't mess with original query file and remember original
  # location, e.g. for quality file.
  my $fileori = $file;
  my ( $originaldir, $fileend ) = ( File::Spec->splitpath( $file ) )[ 1, 2 ];
  $originaldir = "." if ( $originaldir eq "" );

  my $file = "$tempdir\/$fileend";

  ## Look for quality files and read in
  my $qualFile = "$fileori" . ".qual";

  # foo.seq files have foo.qual quality files;
  $qualFile =~ s/\.seq.qual$/\.qual/;

  # Check if files will be overwritten
  &saveOldFiles( $fileori, $fileend, $originaldir, $date, \%options );

  ## Create a batcher object.  Upon construction the
  ## object will survey the fasta file, check for syntax
  ## errors, and create a byte index for all parseable sequences
  ## in the file.  Copy the file so we don't mess with the
  ## original.
  system( " cp $fileori $file " );
  ## If the input file has read-only permissions simply copying
  ## the file through the OS will retain these permissions
  chmod 0700, $file;
  my $db = FastaDB->new(
                         fileName    => $file,
                         openMode    => SeqDBI::ReadWrite,
                         maxIDLength => 50
  );
  my $batcher = SimpleBatcher->new( $db, $fragmentSize, $overlapLen );

  ## How many sequences are in the fasta file?
  my $totseqcnt = $db->getSeqCount();

  ## Don't process this file unless we have some
  ## unambiguous (ACGT) sequence.  Sublength
  ## is the length of the sequence minus any
  ## ambiguous base codes ( ie. N, S, X, W ... )
  my $sublength = 0;
  unless ( $sublength = $db->getSubtLength() ) {
    &SkipFile( $file );
    &cleanUp( \%options, $runnumber, $tempdir, $fileori, $fileend, $file,
              $originaldir, $compressed )
        unless ( $DEBUG );

    next FILECYCLE;
  }

  # GC level is calculated over the length of the non-ambiguous sequence.
  my $totGClevel = 100 * $db->getGCLength() / $sublength;
  $totGClevel = sprintf "%4.2f", $totGClevel;

  my $maskfile        = "$file.masked";
  my $fullmaskfile    = "$file.masked.all";
  my $fullcatfile     = "$file.cat.all";
  my $fullRefinedFile = "$file.ref";
  my $fullcutfile     = "$file.cut.all";
  my $fullLogFile     = "$file.log";
  my $fullErrFile     = "$file.stderr";
  my $numX      = 1;                  # The number of X's to take the place of a
                                      # repeat base.
  my $totseqlen = $db->getSeqLength();
  my $compressCatFile = 1 if ( $totseqlen > 10000000 );

  my $batchCount = $batcher->getBatchCount();
  my $numberChildren = $options{'parallel'} ? $options{'parallel'} : 1;
  $numberChildren = $batchCount if ( $batchCount < $numberChildren );

  my $badForkCount = 0;     # A failsafe for in case the fork goes badly
  my $badForkMax   = 20;    # The number of bad forks ( in a row ) before exit
  my $retryLimit   = 2;     # Attempt to run a batch 2 times before failing it
  my %children     = ();    # A hash of children PIDs with their batch nums
  my $child_id     = 0;     # The PID of returned by fork
  my %batchStatus  = ();    # A hash which holds the retry count for
                            #  each batch number.
  my $nextBatchToConcatenate = 1;

  # Initialize the batchStatus hash
  for ( my $k = 1 ; $k <= $batchCount ; $k++ ) {
    $batchStatus{$k} = {
                         'retry'    => 0,
                         'childPID' => -1
    };
  }

  #
  # Job processing loop
  #   Process all batches stored in the batchStatus hash.  If
  #   for some reason a job fails the entry in $batchStatus is
  #   incremented.  We continue to re-run this batch until the
  #   $retryLimit is reached.
  #
JOBLOOP:
  while ( keys( %batchStatus ) ) {

    #
    # First check the status of currently running
    # proceses.
    #
    if ( keys( %children ) ) {

      # Wait for at least one to exit;
      print "Waiting for a child to finish or die\n" if ( $DEBUG );
      my $childPID = wait();
      my $retVal   = ( $? >> 8 );

      # Check if we returned with a valid PID
      if ( $childPID > 0 ) {

        ## Child process is gone
        # Find out what batch it was working on
        my $batchNum = $children{$childPID};

        # Delete it from the children list
        delete $children{$childPID};

        my $batchFile = $file . "_batch-" . $batchNum;

        # Check it's status
        if (    $retVal == 0
             && -e "$batchFile.cat"
             && -s "$batchFile.masked" )
        {

          print "Child completed: PID=$childPID, batch=$batchNum\n"
              . " RetVal=$retVal\n"
              if ( $DEBUG );
          ## Child completed ok.
          ## Append log, and stderrs
          system( "cat $batchFile.masked.log >> $fullLogFile" )
              if ( -e "$batchFile.masked.log" );
          system( "cat $batchFile.masked.stderr >> $fullErrFile" )
              if ( -e "$batchFile.masked.stderr" );
          system( "touch $batchFile.cat" ) if ( !-e "$batchFile.cat" );

          # Now remove them
          unlink( "$batchFile.masked", "$batchFile.masked.log",
                  "$batchFile.masked.stderr" )
              unless ( $DEBUG );
          print "Child for batch=$batchNum completed ok....\n" if ( $DEBUG );

          # Delete the batchStatus entry
          delete $batchStatus{$batchNum};
        }
        else {
          ## Process failed.

          print "Child die'd.  Organize the funeral for PID=$childPID, "
              . " batch=$batchNum, RetVal=$retVal\n"
              if ( $DEBUG );

          # Check how many times we have run it
          if ( $batchStatus{$batchNum}->{'retry'} < $retryLimit ) {
            ## Under the retry limit...rerun it!
            print "Child for batch=$batchNum failed ($retVal) " . "retry#"
                . $batchStatus{$batchNum}->{'retry'} . "\n"
                if ( $DEBUG );
            $batchStatus{$batchNum}->{'retry'}++;
            $batchStatus{$batchNum}->{'childPID'} = -1;
            print "WARNING: Retrying batch ( $batchNum ) [ $retVal,"
                . ( -e "$batchFile.cat" ) . ", "
                . ( -s "$batchFile.masked" )
                . "]...\n";
          }
          else {
            ## Too many retries.
            ## We are out of here!
            print "\n\nFATAL ERROR: RepeatMasker giving up. One or more\n"
                . "batches failed!  Unfortunately this type of error\n"
                . "cannot be recovered from. Please submit the following\n"
                . "details to the feedback page at the repeatmasker\n"
                . "website:\n\n"
                . "       http://www.repeatmasker.org\n\n"
                . "RepeatMasker Version: $version\n"
                . "Library Version: $rmLibraryVersion\n"
                . "Search Engine: $engine [ "
                . $searchEngine->getVersion() . " ]\n"
                . "Command Line: $cmdLine\n"
                . "Batch Number: $batchNum\n"
                . "Disk Space:\n"
                . `df $tempdir` . "\n";
            if ( -e "/proc/meminfo" ) {
              print "System Memory:\n";
              open IN, "</proc/meminfo";
              while ( <IN> ) {
                print if ( /Mem|Cache|Swap/ );
              }
              close IN;
            }
            print "Further details about this problem may be found in\n"
                . "the directory: $tempdir\n";

            opendir TDIR, "$tempdir"
                or die "RepeatMasker: Could not open $tempdir for reading!\n";
            my $mostCurrent = 0;
            my $prefix      = "";
            while ( my $tFile = readdir( TDIR ) ) {
              if ( $tFile =~ /^(.*)Results-(\d+)\.out/ ) {
                $prefix = $1;
                $mostCurrent = $2 if ( $2 > $mostCurrent );
              }
            }
            close TDIR;
            if ( $mostCurrent > 0 ) {
              print "The following file(s) in this directory may be useful\n"
                  . "for debugging this failure:\n"
                  . "      $tempdir/$prefix\Results-$mostCurrent.out\n";
              if ( -s "$tempdir/$prefix\Results-$mostCurrent\.err" ) {
                print "      $tempdir/$prefix\Results-$mostCurrent.err\n";
              }
            }
            print "\n\n";
            exit( -1 );
          }
        }
      }
      else {

        # Child is still running
      }
    }    # End if ( keys( %children ...

    # Append annotations in batch-order.  Using -pa # ( multithreading )
    # there is no guarantee they will be ready in batch order.
    while ( $nextBatchToConcatenate <= $batchCount ) {
      if ( !exists $batchStatus{$nextBatchToConcatenate}
           && -e "$file" . "_batch-" . "$nextBatchToConcatenate.cat" )
      {
        if ( $compressCatFile ) {
          system(   "gzip -c $file"
                  . "_batch-"
                  . "$nextBatchToConcatenate.cat "
                  . ">> $fullcatfile.gz" );
        }
        else {
          system(   "cat $file"
                  . "_batch-"
                  . "$nextBatchToConcatenate.cat "
                  . ">> $fullcatfile" );
        }
        unlink( "$file" . "_batch-" . "$nextBatchToConcatenate.cat" )
            unless ( $DEBUG );
        $nextBatchToConcatenate++;
      }
      else {
        last;
      }
    }

    # Gather a list of batches to work on
    my @batchNums = grep { ( $batchStatus{$_}->{'childPID'} < 0 ) }
        sort( { $a <=> $b } keys( %batchStatus ) );

    # Decide how many jobs to start
    my $numberToStart = 0;
    if ( @batchNums > ( $numberChildren - keys( %children ) ) ) {

      # Simply the number requested - the number running
      $numberToStart = ( $numberChildren - keys( %children ) );
    }
    else {

      # Simply the remaining batches
      $numberToStart = @batchNums;
    }

    #
    # Loop through and fork to our hearts
    # content.
    #
    for ( my $k = 0 ; $k < $numberToStart ; $k++ ) {

  FORK:
      if ( $child_id = fork ) {
        # Our children are our future
        print "Parent produced child $child_id\n" if ( $DEBUG );
        $children{$child_id} = $batchNums[ $k ];
        $batchStatus{ $batchNums[ $k ] }->{'childPID'} = $child_id;
      }
      elsif ( $child_id == 0 ) {
        my $batchSeqFile  = $file . "_batch-" . $batchNums[ $k ];
        my $batchCatFile  = $batchSeqFile . ".cat";
        my $batchMaskFile = $batchSeqFile . ".masked";

        ## Get batch parameters
        my $seq_cnt = $batcher->getBatchSeqCount( $batchNums[ $k ] );
        my $seqlen  = $batcher->getBatchSeqLength( $batchNums[ $k ] );
        my $frac_GC = $batcher->getBatchAverageGC( $batchNums[ $k ] );
        print "Creating Batch "
            . $batchNums[ $k ]
            . " seq count = $seq_cnt "
            . "len = $seqlen, average GC = $frac_GC\n"
            if ( $DEBUG );

        ## Create the batch file
        $batcher->writeBatchFile( $batchNums[ $k ], $batchMaskFile );

        my $GC_frac = 0;
        if ( $options{'gc'} ) {

          # user decides GC background
          $GC_frac = $options{'gc'};
        }
        elsif ( ( $seq_cnt > 1 || $seqlen <= 2000 )
                && !$options{'gccalc'} )
        {

          # More than one sequence *or* too short to get
          # acccurate measure of background then take average matrices
          # NOTE: Option -gccalc overules this behaviour
          $GC_frac = 43;
        }
        else {

          # program calculates GC background from sequence
          $GC_frac = $frac_GC;
        }

        # Check for E coli insertion elements
        unless (    $options{'no_is'}
                 || $options{'fptest'}
                 || !-s "$generalLibDir/is.lib" )
        {
          my $sequenceArrayRef;
          my $seqWithNameHashRef;
          &locateISElements(
                             \%options,
                             $batcher,
                             $batchNums[ $k ],
                             $batchMaskFile,
                             $qualFile,
                             $REPEATMASKER_DIR,
                             $searchEngine,
                             $file,
                             $generalLibDir
          );
        }

        unless ( $options{'is_only'} ) {
          my $batchSeqDB = FastaDB->new(
                                         fileName    => "$batchSeqFile.masked",
                                         openMode    => SeqDBI::ReadWrite,
                                         maxIDLength => 50
          );

          if ( $options{'fptest'} ) {
            &runLowComplexTests(
                      \%options,          $REPEATMASKER_DIR,
                      $GC_frac,           $batchSeqFile,
                      $batchMaskFile,     $generalLibDir,
                      $speciesLibDir,     $batchCount,
                      $searchEngine,      $numX,
                      $batchSeqDB,        "batch $batchNums[$k] of $batchCount",
                      $tax,               $customLibDir,
                      $tempdir,           $batchNums[ $k ],
                      $refineableHashRef, $batcher
            );
          }
          else {
            if ( $searchEngine->isa( "HMMERSearchEngine" ) ) {
              &runHMMERSearchStages(
                      \%options,          $REPEATMASKER_DIR,
                      $GC_frac,           $batchSeqFile,
                      $batchMaskFile,     $generalLibDir,
                      $speciesLibDir,     $batchCount,
                      $searchEngine,      $numX,
                      $batchSeqDB,        "batch $batchNums[$k] of $batchCount",
                      $tax,               $customLibDir,
                      $tempdir,           $batchNums[ $k ],
                      $refineableHashRef, $batcher
              );
            }
            else {
              &runSearchStages(
                      \%options,          $REPEATMASKER_DIR,
                      $GC_frac,           $batchSeqFile,
                      $batchMaskFile,     $generalLibDir,
                      $speciesLibDir,     $batchCount,
                      $searchEngine,      $numX,
                      $batchSeqDB,        "batch $batchNums[$k] of $batchCount",
                      $tax,               $customLibDir,
                      $tempdir,           $batchNums[ $k ],
                      $refineableHashRef, $batcher
              );
            }
          }

          ## The rule of thumb is that if we didn't return
          ## results we at least have an empty *.cat file.
          if ( !-e "$batchSeqFile.cat" ) {
            system( "touch $batchSeqFile.cat" );
          }
          unlink( <$batchSeqFile.tmp.* $batchSeqFile.temp.*> );
        }
        else {

          # Create an emtpy cat file.
          system( "touch $batchSeqFile.cat" );
        }

        if ( $batcher->isBatchFragmented( $batchNums[ $k ] ) ) {

          # The batch file included fragments so
          # the annotation positions need updating
          # before we can integrate them into the
          # the final results.
          &adjustFragmentPositions( $batcher, $batchCatFile );
        }

        # Exit child process with clean status
        exit( 0 );

      }    # elsif ( $child_id == 0 )...
      elsif ( $! =~ /No more process/ ) {

        # Supposedly recoverable fork error
        $badForkCount++;
        sleep 5;
        redo FORK;
      }
      else {    # Weird fork error
        print "\nWARNING: Cannot fork...for unknown reasons!\n";
        $badForkCount++;
        last;
      }
    }    # End for ( my $k = 0 ; $k < $numberToStart...

    # Give up if it looks bad for us
    ## TODO: Consider die'ng here...since there
    ##       is nothing more we can do to recover.
    last if ( $badForkCount == $badForkMax );

    #
    # Just so we don't loop endlessly.  Lets
    # hang around here until at least one
    # process quits. NOTE: This may be dangerous
    # consider this more....
    #
    print "Parent waiting for child to finish...\n" if ( $DEBUG );

  }    # End of JOBLOOP

  ## TODO: This is where we used to make the cut file.  This
  ##       option should be rolled into a new utility which
  ##       annotates/cuts based on the annotation database.

  #
  # Report batch overlap boundaries for anyone who cares
  #
  my $overlapBoundariesHashRef = $batcher->getOverlapBoundaries();
  if ( keys( %{$overlapBoundariesHashRef} ) > 0 ) {
    if ( $compressCatFile ) {
      system(
         "echo \"## Batch Overlap Boundaries\" | gzip -c  >> $fullcatfile.gz" );
    }
    else {
      system( "echo \"## Batch Overlap Boundaries\" >> $fullcatfile" );
    }
    foreach my $fragSeqName ( keys( %{$overlapBoundariesHashRef} ) ) {
      my $overlapList =
          join( ", ", @{ $overlapBoundariesHashRef->{$fragSeqName} } );
      if ( $compressCatFile ) {
        system(   "echo \"##   $fragSeqName  $overlapList\" | "
                . "gzip -c  >>  $fullcatfile.gz" );
      }
      else {
        system( "echo \"##   $fragSeqName  $overlapList\" >>  $fullcatfile" );
      }
    }
  }

  # Rename final cat file
  if ( -s "$fullcatfile.gz" ) {
    rename "$fullcatfile.gz", "$file.cat.gz";
  }
  elsif ( -s "$fullcatfile" ) {
    rename "$fullcatfile", "$file.cat";
  }
  rename $fullcutfile, "$file.cut" if -s $fullcutfile;

  #
  # Setup options for processRepeats
  #
  my $speed = "undef";
  if ( $options{'q'} ) {
    $speed = "quick";
  }
  elsif ( $options{'qq'} ) {
    $speed = "rushjob";
  }
  elsif ( $options{'s'} ) {
    $speed = "sensitive";
  }
  else {
    $speed = "default";
  }

  my $engine     = $searchEngine->getPathToEngine();
  my @path       = split( /[\\\/]/, $engine );
  my $engineInfo =
      "run with " . pop( @path ) . " version " . $searchEngine->getVersion();

  if ( $compressCatFile ) {
    open( CATOUT, "| gzip -c >>$file.cat.gz" );
  }
  else {
    open( CATOUT, ">>$file.cat" );
  }

  print CATOUT "## Total Sequences: " . $db->getSeqCount() . "\n";
  print CATOUT "## Total Length: " . $db->getSeqLength() . "\n";
  print CATOUT "## Total NonMask ( excluding >20bp runs of N/X bases ): "
      . $db->getXNLength() . "\n";
  print CATOUT "## Total NonSub ( excluding all non ACGT bases ):"
      . $db->getSubtLength() . "\n";
  print CATOUT "RepeatMasker version $version , $speed mode\n";
  print CATOUT "$engineInfo\nRM Library: $dbversion\n";
  close CATOUT;

  undef $db;

  # TODO: Check on .seqending stuff in SEQDBI

  if ( -s "$fileori.alert" && !$options{'is_clip'} ) {
    if ( $compressCatFile ) {
      &systemint( "gzip -c $fileori.alert >> $file.cat.gz" );
    }
    else {
      &systemint( "cat $fileori.alert >> $file.cat" );
    }
  }

  unless ( exists $options{'nopost'} || $options{'fptest'} ) {

    #
    # Alter source file for masking if we want to go with
    # the clipped IS sequence file.  All annotations will
    # be based on the $file.withoutIS rather than the original
    # file.
    #
    my $maskSourceFile = $file;
    $maskSourceFile = "$file.withoutIS"
        if ( $options{'is_clip'}
             && -s "$file.withoutIS" );

    my $prOptions = "";
    $prOptions .= "-ace " if ( $options{'ace'} );
    $prOptions .= "-a "   if ( $options{'a'} );
    $prOptions .= "-lib " . $options{'lib'} . " "
        if ( $options{'lib'} && !$options{'species'} );
    $prOptions .= "-gff "     if ( $options{'gff'} );
    $prOptions .= "-no_id "   if ( $options{'no_id'} );
    $prOptions .= "-poly "    if ( $options{'poly'} );
    $prOptions .= "-u "       if ( $options{'u'} );
    $prOptions .= "-lcambig " if ( $options{'lcambig'} );
    $prOptions .= "-source "  if ( $options{'source'} || $options{'a'} );
    $prOptions .= "-html "    if ( $options{'html'} );
    $prOptions .= "-xm "      if ( $options{'xm'} );
    $prOptions .= "-excln "   if ( $options{'excln'} );
    $prOptions .= "-noint "   if ( $options{'noint'} );
    $prOptions .= "-species \"" . $options{'species'} . "\" "
        if ( defined $options{'species'}
             && $options{'species'} ne "" );
    $prOptions .= "-orifile $fileori ";
    $prOptions .= "-maskSource $maskSourceFile ";
    $prOptions .= "-x " if ( $options{'x'} );
    $prOptions .= "-xsmall " if ( $options{'xsmall'} );

    if ( $compressCatFile ) {
      $prOptions .= "$file.cat.gz";
    }
    else {
      $prOptions .= "$file.cat";
    }

    #
    # Run processrepeats
    #
    # For timing only
    #my $t0 = gettimeofday( );
    &systemint(
        "$REPEATMASKER_DIR/ProcessRepeats " . "$prOptions" )
        unless $options{'is_only'};

    # For timing only
    #if ( $DEBUG ) {
    #  my $t1 = gettimeofday( );
    #  my $elapsed = $t1 - $t0;
    #  print "ProcessRepeats runtime: $elapsed secs\n"
    #}

    if ( $options{'is_clip'} && -s "$file.withoutIS.masked" ) {
      rename( "$maskSourceFile.masked", "$file.masked" );
    }

  }

  &cleanUp( \%options, $runnumber, $tempdir, $fileori, $fileend, $file,
            $originaldir, $compressed )
      unless ( $DEBUG );
}    # END FILECYCLE

&systemint( "rm -R $tempdir" ) unless ( $DEBUG );

# We are soooo done
exit( 0 );

######################## S U B R O U T I N E S ############################

##-------------------------------------------------------------------------##
## Use:  my   &adjustFragmentPositions( $batcher, $catfile );
##
##  Returns
##
##      Adjust positions of results from a fragmented
##      batch.
##
## Eample cross_match hit line.  All positions are 1-based
##
## Foward Strand:
##
## Column  Description                                      Eg
## ==============================================================
##   0     Smith-Waterman Score                             2334
##   1     Percent Divergence                               8.44
##   2     Percent Deletions                                0.00
##   3     Percent Insertions                               3.25
##   4     Query Sequence                                   Human
##   5     Query Begin Pos                                  127
##   6     Query End Pos                                    737
##   7     Query Left - The bp left after query end         (8222)
##   8     Subject Sequence                                 AluSx#SINE/Alu
##   9     Subject Begin                                    1
##   10    Subject End                                      298
##   11    Subject Left                                     (14)
##   12    ID - A unique id for this run of crossmatch      2
##
## Reverse Strand:
##
## Column  Description                                      Eg
## ==============================================================
##   0     Smith-Waterman Score                             2334
##         ... same as above..
##   8     Strand Designator                                C
##   9     Subject Sequence                                 AluSx#SINE/Alu
##   10    Subject Left                                     (14)
##   11    Subject End                                      298
##   12    Subject Begin                                    1
##   13    ID                                               3
##
##    Globals Used: None
##
##-------------------------------------------------------------------------##
sub adjustFragmentPositions {
  my $batcher = shift;
  my $catfile = shift;

  my $seqDBObj      = $batcher->getSeqDBObj();
  my $searchResults =
      CrossmatchSearchEngine::parseOutput( searchOutput => "$catfile" );

  my %deletedIDs = ();
  for ( my $i = $searchResults->size() - 1 ; $i >= 0 ; $i-- ) {
    my $result     = $searchResults->get( $i );
    my $batchSeqID = $result->getQueryName();
    next if (    $batchSeqID eq "refinement"
              || $result->getId() =~ /\[/ );
    my $origSeqID  = $batcher->getSeqIDFromBatchSeqID( $batchSeqID );
    my $adjustment = $batcher->translateBatchSeqPositionToFastaSeq( $batchSeqID,
                          $result->getQueryStart() ) - $result->getQueryStart();
    $result->setQueryStart( $result->getQueryStart() + $adjustment );
    $result->setQueryEnd( $result->getQueryEnd() + $adjustment );
    $result->setQueryRemaining(
               $seqDBObj->getSeqLength( $origSeqID ) - $result->getQueryEnd() );
    $result->setQueryName( $origSeqID );

    # Truncate repeats starting at the middle of the overlap.
    my ( $beginValidPos, $endValidPos ) =
        $batcher->getSeqIDValidRange( $batchSeqID );

    # Rules for deleting elements outside our boundary
    if (    $result->getQueryStart() <= $beginValidPos
         && $result->getQueryEnd() <= $beginValidPos )
    {
      if ( $DEBUG ) {
        print STDERR " DELETING ANNOT!\n";
        print STDERR $result->toStringFormatted( SearchResult::OutFileFormat );
        print STDERR "beginValidPos = $beginValidPos "
            . "endValidPos = $endValidPos\n";
      }
      $deletedIDs{ "[" . $result->getId() . "]" } = 1;
      $searchResults->remove( $i );
      next;
    }

    if (    $endValidPos > 0
         && $result->getQueryEnd() > $endValidPos
         && $result->getQueryStart() > $endValidPos )
    {
      if ( $DEBUG ) {
        print STDERR " DELETING ANNOT2!\n";
        print STDERR $result->toStringFormatted( SearchResult::OutFileFormat );
        print STDERR "beginValidPos = $beginValidPos "
            . "endValidPos = $endValidPos\n";
      }
      $deletedIDs{ "[" . $result->getId() . "]" } = 1;
      $searchResults->remove( $i );
      next;
    }
  }

  # Cleanup the refinement entries which are children of a removed
  # annotation.
  for ( my $i = $searchResults->size() - 1 ; $i >= 0 ; $i-- ) {
    my $result = $searchResults->get( $i );
    my $id     = $result->getId();
    $searchResults->remove( $i )
        if ( $id =~ /\[/ && exists $deletedIDs{$id} );
  }
  $searchResults->write( "$catfile", SearchResult::AlignWithQuerySeq );

}

##-------------------------------------------------------------------------##
## Use:  my &locateISElements( \%options, $batcher,
##                             $batchNum, $file, $qualFile,
##                             $REPEATMASKER_DIR,
##                             $searchEngine,
##                             $outFilesPrefix, $generalLibDir );
##
##
##  Returns
##
##   Only needs to support two atomic operations.  Report and
##   clip.
##
##   Flow:
##      1. Searches $file for IS elements and saves output
##         to $file.iscat (Note...removes .masked from name if
##         it exists).
##
##      2. If a quality file is handed to us it reads the *entire*
##         quality file into memory ( qualData local datastructure ).
##
##      3. Parse search results and locate IS elements.
##
##      4. Clip IS elements from $file if is_clip or is_only options
##         used.
##
##      5. Clip qual values from qualData local datastructure if
##         is_clip or is_only options used.
##
##      6. Saves a copy of the sequence/qual at this stage
##         if is_clip or is_only options are used.  The
##         saved copies are named:
##                $file.withoutIS, $file.qual.withoutIS.
##
##   NOTE: A basic understanding of Dutch and French is necessary
##         to decypher this subroutine.
##
##   Globals Used: none
##-------------------------------------------------------------------------##
sub locateISElements {
  my %options        = %{ shift() };
  my $batcher        = shift;
  my $batchNum       = shift;
  my $file           = shift;
  my $qualFile       = shift;
  my $DIRECTORY      = shift;
  my $searchEngine   = shift;
  my $outFilesPrefix = shift;
  my $generalLibDir  = shift;

  print "\nChecking for E. coli insertion elements\n";

  my @ISlines     = ();
  my @ISFailed    = ();
  my $ISclipornot = 0;
  my $qualprob    = 0;
  my %clipfailed  = ();

  my $seqDB = FastaDB->new(
                            fileName    => $file,
                            openMode    => SeqDBI::ReadWrite,
                            maxIDLength => 50
  );

  my $filePrefix = $file;
  $filePrefix =~ s/\.masked//;

  my $minscore = 17;
  my $minmatch = 15;
  my $lib      = "$generalLibDir/is.lib";
  my $matrix   = "identity.matrix";
  my ( $gap_initValue, $ins_gap_extValue, $del_gap_extValue ) = ( "", "", "" );
  my $bandwidth = "";
  my $masklevel = "";
  my $maskfile  = $file;
  my $raw       = "";
  my $wordraw   = "";
  my $outfile   = "$filePrefix" . ".iscat";
  my $searchResults;

  ## Perform the search
  ( $minmatch, $bandwidth, $searchResults ) = &search(
                           \%options,         $DIRECTORY,        $outfile,
                           $maskfile,         $lib,              $minmatch,
                           $bandwidth,        $matrix,           $gap_initValue,
                           $ins_gap_extValue, $del_gap_extValue, $minscore,
                           $masklevel,        $searchEngine,     $wordraw,
                           $raw
  );

  #
  # Read in quality file data if available
  #
  #   This should probably be generalized and made
  #   into an object for dealing with quality files.
  #   For now we will leave well enough alone.
  #
  my %qualData  = ();
  my %qualHdr   = ();
  my @qualNames = ();
  my $qualname  = "";
  if ( -s $qualFile ) {
    if ( -r $qualFile ) {
      open( INQUAL, $qualFile );
      while ( <INQUAL> ) {
        chomp;
        if ( /^>/ ) {
          $qualname = $_;
          $qualname =~ s/^>(\S*).*/$1/;
          push @qualNames, $qualname;
          $qualHdr{$qualname} = $_;
        }
        else {
          $qualData{$qualname} .= $_;
        }
      }
      close INQUAL;
    }
    else {
      $qualprob = 1;
    }
  }
  else {
    $qualprob = 1;
  }

  #
  # Open the search results file.
  #
  my $lastname  = "";
  my $lastbegin = "";
  my $lastend   = "";
  my $lastscore = 0;
  my $lastis    = "";
  my $lastor    = "";
  my $naam      = "";
  my $begin     = 0;
  my $end       = 0;
  my $or        = "";
  my $is        = "";
  my $beginis   = 0;
  my $leftis    = 0;
  my $deleted   = 0;
  my $remaining = 0;

  #
  # Loop through search results
  #
  for ( my $i = 0 ; $i < $searchResults->size() ; $i++ ) {
    my $result = $searchResults->get( $i );
    $naam      = $result->getQueryName();
    $begin     = $result->getQueryStart();
    $end       = $result->getQueryEnd();
    $remaining = $result->getQueryRemaining();
    if ( $result->getOrientation() ne "C" ) {
      $or = 'pos';
    }
    else {
      $or = 'neg';
    }
    $is      = $result->getSubjName();
    $leftis  = $result->getSubjRemaining();
    $beginis = $result->getSubjStart();

    $deleted = 0 unless $lastname && $naam eq $lastname;
    ## $deleted keeps track of nr of bp in IS elements already
    ## deleted from the same query sequence;
    $leftis =~ tr/()//d;
    ## remove classification
    $is =~ s/\#\w+//;

    ## Allows to clip an IS element with a gap in the middle
    ## (may be a sequencing gap)

    ## $lastis is set if last IS was incomplete but did either
    ## start at pos 1 and had + orientation or end at last bp
    ## and had - orientation
    if ( $lastis ) {
      ## Following allows to clip an IS element with a gap
      ## in the middle (may be a sequencing gap)
      if (
              $is   eq $lastis
           && $naam eq $lastname
           && $or   eq $lastor
           && (    $or eq 'pos' && $leftis == 0
                || $or eq 'neg' && $beginis == 1 )
           && $begin - $lastend <= 2
          )
      {    # rather conservatively
        $begin   = $lastbegin;
        $beginis = 1 if $or eq 'pos' && $leftis == 0;
        $leftis  = 0 if $or eq 'neg' && $beginis == 1;
      }
      else {
        push @ISFailed, ( $i - 1 ) if $lastscore > 25;
        $lastis = $lastbegin = $lastend = "";
        $lastor = $lastscore = "";
      }
    }
    ## complete element
    if ( $beginis == 1 && $leftis == 0 ) {
      my $length = $end + 1 - $begin;
      ## This section needs occasional updating (new
      ## elements, new information on flexibility in
      ## duplication lengths)
      my @dupLengths = ();
      if ( $is eq 'IS1' ) {
        @dupLengths = ( 9, 8, 10 );
      }
      elsif ( $is eq 'IS2' ) {
        @dupLengths = ( 5 );
      }
      elsif ( $is eq 'IS3' ) {
        @dupLengths = ( 3 );
      }
      elsif ( $is eq 'IS5' ) {
        @dupLengths = ( 4 );
      }
      elsif ( $is eq 'IS10' ) {
        @dupLengths = ( 9 );
      }
      elsif ( $is eq 'IS30' ) {
        @dupLengths = ( 2 );
      }
      elsif ( $is eq 'IS150' ) {
        @dupLengths = ( 3, 4 );
      }
      elsif ( $is eq 'IS186' ) {
        @dupLengths = ( 10, 11 );
      }
      elsif ( $is eq 'Tn1000' ) {
        @dupLengths = ( 5 );
      }
      else {
        @dupLengths = ();
      }
      ## Get the left and right flanking site duplications
      my ( $links, $rechts ) = qw(links rechts);
      my $dupLength = 0;
      ## Try all known flanking site lengths
      while ( @dupLengths && $links ne $rechts ) {
        $dupLength = shift( @dupLengths );
        next if (    $dupLength > ( $begin - 1 )
                  || $dupLength > $remaining );
        $links =
            $seqDB->getSubstr( $naam, $begin - $dupLength - 1 - $deleted,
                               $dupLength );
        $rechts = $seqDB->getSubstr( $naam, $end - $deleted, $dupLength );
      }

      ## If flanking site duplications are identical (these
      ## are cloning artifacts; there was no time for
      ## substitutions to take place to make the sites different
      if ( $links ne "" && $links eq $rechts ) {
        ## Alters $seqWithName by deleting a segment
        ## including the IS element and one flanking site,
        ## thereby reconstructing the original sequence
        if ( $options{'is_only'} || $options{'is_clip'} ) {
          $seqDB->setSubstr( $naam,
                             $begin - 1 - $deleted,
                             $length + $dupLength, "" );
          ## Similarly removes qual values in $qualData
          if ( $qualname ) {
            my @qual = split( / /, $qualData{$naam} );
            splice @qual, $begin - 1 - $deleted, $length + $dupLength;
            $qualData{$naam} = join " ", @qual;
          }
          $deleted += $length + $dupLength;
        }
        push @ISlines, $i;
      }
      else {
        push @ISFailed, $i;
      }
      $lastis = $lastbegin = $lastend = $lastscore = "";
    }
    elsif (    $beginis == 1 && $or eq 'pos'
            || $leftis == 0 && $or eq 'neg' )
    {
      ## save info to see if rest of the IS element follows
      $lastis    = $is;
      $lastbegin = $begin;
      $lastend   = $end;
      $lastor    = $or;
      $lastscore = $result->getScore( $i );
    }
    else {
      $lastis = $lastbegin = $lastend = "";

      # only report if match scores > 25; otherwise may be
      # freak false positive
      push @ISFailed, ( $i ) if ( $result->getScore( $i ) > 25 );
    }
    $lastname = $naam;
  }

  if ( $lastis ) {    #often with IS_only
    push @ISFailed, ( $searchResults->size() - 1 ) if ( $lastscore > 25 );
  }

  ## adjust position for broken up very large sequences
  if ( @ISlines && $batcher->isBatchFragmented( $batchNum ) ) {
    my $seqDBObj = $batcher->getSeqDBObj();
    foreach my $resultIndex ( @ISlines ) {
      my $ISResult = $searchResults->get( $resultIndex );
      my $qryName  = $ISResult->getQueryName();
      my $qryBegin = $ISResult->getQueryStart();
      my $qryEnd   = $ISResult->getQueryEnd();
      my $qryLeft  = $ISResult->getQueryRemaining();

      my $adjustment =
          $batcher->translateBatchSeqPositionToFastaSeq( $qryName, $qryBegin ) -
          $qryBegin;

      $ISResult->setQueryStart( $qryBegin + $adjustment );
      $ISResult->setQueryEnd( $qryEnd + $adjustment );

      $ISResult->setQueryRemaining(
         $seqDBObj->getSeqLength( $batcher->getSeqIDFromBatchSeqID( $qryName ) )
             - $qryEnd );
    }
  }

  if ( @ISlines ) {
    my $qualthere = "";
    if ( $qualprob ) {
      $qualthere =
            "\nNo corresponding Phred file ($qualFile) "
          . "could be read in this directory,\n so no "
          . "modified quality file has been made\n";
    }
    else {
      $qualthere =
            "\nThe quality file $outFilesPrefix.qualwithoutIS "
          . "corresponds to this clipped fasta sequence file\n";
    }
    if ( $options{'is_only'} ) {
      $ISclipornot =
            "These elements and one flanking duplicated "
          . "site have been clipped out\nof the sequence "
          . "in the file $outFilesPrefix.withoutIS\n$qualthere\n";
    }
    elsif ( $options{'is_clip'} ) {
      $ISclipornot =
            "These elements and one flanking duplicated site "
          . "have been clipped out\nbefore the repeatmasker "
          . "run, so that coordinates do not correspond\n"
          . "everywhere with the original sequence. A clipped "
          . "version of the\nsequence(s) is in the file "
          . "$outFilesPrefix.withoutIS\n$qualthere\n";
    }
    else {
      $ISclipornot =
            "These elements can be clipped out with the "
          . "options is_clip or is_only.  The latter does "
          . "not run the 'normal' RepeatMasker routine and "
          . "positions in the current\n.out file will not "
          . "correspond with the -is_only reconstructed "
          . "sequence.\n";
    }

    my $ISreport = "One or more E. coli IS elements were found:\n";
    foreach my $resultIndex ( @ISlines ) {
      my $ISResult = $searchResults->get( $resultIndex );
      $ISreport .= "  "
          . $ISResult->getSubjName() . " in "
          . $ISResult->getQueryName() . ": "
          . $ISResult->getQueryStart() . " - "
          . $ISResult->getQueryEnd() . "\n";
    }
    $ISreport .= "\n$ISclipornot\n";

    if ( @ISFailed ) {
      $ISreport .=
            "The following E coli IS elements could not be "
          . "confidently clipped out:\n";
      foreach my $resultIndex ( @ISFailed ) {
        my $ISResult = $searchResults->get( $resultIndex );
        $ISreport .= "  "
            . $ISResult->getSubjName() . " in "
            . $ISResult->getQueryName() . ": "
            . $ISResult->getQueryStart() . " - "
            . $ISResult->getQueryEnd() . "\n";
      }
    }
    print "$ISreport\n";
    if ( $options{'is_only'} || $options{'is_clip'} ) {
      system( "cat $file >> $outFilesPrefix.withoutIS" );
      if ( $qualname ) {
        my $qualout = "$outFilesPrefix.qual.withoutIS";
        $qualout =~ s/.seq.qual.withoutIS/.qual.withoutIS/;
        open( QUALOUT, ">$qualout" );
      }
      ## $nom French too!
      foreach my $nom ( @qualNames ) {

        # fixed with \n sept 03
        print QUALOUT "$qualHdr{$nom}\n$qualData{$nom}\n";
      }
      close QUALOUT if $qualname;
    }
    open( ALERTOUT, ">>$outFilesPrefix.alert" );
    print ALERTOUT "$ISreport";
    close ALERTOUT;
  }

  #elsif ( $clipfailed{$file} ) {
  elsif ( @ISFailed ) {
    my $ISreport =
          "The following E coli IS elements could not be "
        . "confidently clipped out:\n";
    foreach my $resultIndex ( @ISFailed ) {
      my $ISResult = $searchResults->get( $resultIndex );
      $ISreport .= "  "
          . $ISResult->getSubjName() . " in "
          . $ISResult->getQueryName() . ": "
          . $ISResult->getQueryStart() . " - "
          . $ISResult->getQueryEnd() . "\n";
    }
    open( ALERTOUT, ">>$outFilesPrefix.alert" );
    print ALERTOUT "$ISreport";
    close ALERTOUT;
    print "$ISreport";
  }

  #unlink $iscat;
}

##-------------------------------------------------------------------------##
## Use:  my ( $minmatch, $bandwidth, $resultsCollection ) =
##                   &search( \%options, $DIRECTORY, $outfile, $maskfile,
##                                $lib, $minmatch, $bandwidth, $matrix,
##                                $gap_initValue, $ins_gap_extValue,
##                                $del_gap_extValue, $minscore, $masklevel,
##                                $searchEngine, $wordraw,
##                                $raw );
##
##    Runs the search engine using the settings specified in the
##    subroutine runSearchStages, traps error results and aborts
##    on search engine failure.
##
##  Input
##
##       \%options : The RepeatMasker option hashtable
##
##  Returns
##
##       $minmatch : The new minmatch ( as it may have changed )
##       $bandwidth : The new bandwidth ( dito )
##       $resultsCollection :
##
##
##  Globals Used: None
##  Globals Modified: None
##
##-------------------------------------------------------------------------##
sub search {
  my %options          = %{ shift() };    # The RepeatMasker option hashtable
  my $DIRECTORY        = shift;
  my $outfile          = shift;
  my $maskfile         = shift;
  my $lib              = shift;
  my $minmatch         = shift;
  my $bandwidth        = shift;
  my $matrix           = shift;
  my $gap_initValue    = shift;
  my $ins_gap_extValue = shift;
  my $del_gap_extValue = shift;
  my $minscore         = shift;
  my $masklevel        = shift;
  my $searchEngine     = shift;
  my $wordraw          = shift;
  my $raw              = shift;

  my $alignments = "";
  my $poly       = "";
  my $quiet      = "";

  print "RepeatMasker::search( \%options, $DIRECTORY, $outfile, $maskfile,\n"
      . "                      $lib, $minmatch, $bandwidth, $matrix\n"
      . "                      $gap_initValue, $ins_gap_extValue, $del_gap_extValue\n"
      . "                      $minscore, $masklevel, \$searchEngine, $wordraw\n"
      . "                      $raw );\n"
      if ( $DEBUG );

  # Set options for searchengine
  $poly = "-poly" if $options{'poly'};
  $quiet = "2>> $outfile.stderr" unless $options{'noisy'};

  if ( $options{'div'} && $options{'div'} <= 20 ) {
    ++$minmatch;
    ++$minmatch if $options{'div'} <= 10;
  }

  my $matrixPrefix = "crossmatch";
  if ( $searchEngine->isa( "WUBlastSearchEngine" ) ) {
    $matrixPrefix = "wublast/aa";
  }
  elsif (    $searchEngine->isa( "NCBIBlastSearchEngine" )
          || $searchEngine->isa( "HMMERSearchEngine" ) )
  {
    $matrixPrefix = "ncbi/nt";
  }

  if (    $searchEngine->isa( "WUBlastSearchEngine" )
       && $options{'s'}
       && $lib !~ /simple|at\.lib/ )
  {
    $minmatch -= 1;
    $bandwidth += 15;
  }

  ##
  ## Special case.  If we are refining a previous alignment we are
  ## starting with a fixed region from which we want to make a pseudo
  ## global alignment.  We expect the refined alignments to be close
  ## to be close to the length of the full region.  If we pass -1
  ## to the bandwidth parameter of NCBIBlastSearchEngine it will
  ## respond by relaxing the -xdrop_gap_final parameter to something
  ## higher than the minscore threshold.
  ##
  if (    $searchEngine->isa( "NCBIBlastSearchEngine" )
       && $lib =~ /refine/ )
  {
    $bandwidth = -1;
  }

  my $cycle = 1;

  $searchEngine->setQuery( $maskfile );
  $searchEngine->setSubject( $lib );
  $searchEngine->setMatrix( "$DIRECTORY/Matrices/$matrixPrefix/$matrix" );
  if ( $wordraw ) {
    $searchEngine->setWordRaw( 1 );
  }
  else {
    $searchEngine->setWordRaw( 0 );
  }
  if ( $raw ) {
    $searchEngine->setScoreMode( SearchEngineI::basicScoreMode );
  }
  else {
    $searchEngine->setScoreMode( SearchEngineI::complexityAdjustedScoreMode );
  }

  ## RMH: 11/20/12
  ##   We now insist upon alignments for all search engines, even if the user
  ##   doesn't need them.  Now the -a flag simply tells the program whether or
  ##   not to generate a final *.align file.  The alignment data is really
  ##   critical to the program's annotation decisions.
  $searchEngine->setGenerateAlignments( 1 );
  $searchEngine->setGapInit( $gap_initValue );
  $searchEngine->setInsGapExt( $ins_gap_extValue );
  $searchEngine->setDelGapExt( $del_gap_extValue );

  my $maskLevelNum = $masklevel;
  if ( $masklevel =~ /-masklevel\s+(\d+)\s*/ ) {
    $maskLevelNum = $1;
  }
  $searchEngine->setMaskLevel( $maskLevelNum );
  $searchEngine->setMinScore( $minscore );

  my $cmd     = "";
  my $status  = 0;
  my $outFile = "";
  my $errFile = "";
  my $retry   = 0;
  my $resultCollection;
  while ( $cycle ) {

    ( $status, $resultCollection, $outFile, $errFile ) = $searchEngine->search(
                                                         minMatch  => $minmatch,
                                                         bandwidth => $bandwidth
    );

    if ( $status ) {
      if ( $searchEngine->isa( "HMMERSearchEngine" ) ) {

        # HMMER Errors
        print "WARNING: The search engine returned an error ("
            . ( $? >> 8 )
            . ", status = $status )\n"
            . "Engine parameters: "
            . $searchEngine->getParameters() . "\n"
            . "A search phase could not complete on this batch.\n"
            . "The batch file will be re-run and if possible the\n"
            . "program will resume.\n";
        exit( -1 );
      }
      elsif ( $bandwidth > 14 ) {
        $bandwidth = 14;
        warn "WARNING: Comparison failed. Retrying with smaller bandwidth\n";
      }
      elsif ( $bandwidth == 4 ) {    # second or third simple repeats check
            # Extreme measures for very long simple satellites
        $bandwidth = 1;
        warn "WARNING: Comparison failed. Retrying with smaller bandwidth\n";
      }
      elsif ( $minmatch < 10 ) {
        $minmatch++;
        warn "WARNING: Comparison failed. Retrying with larger minmatch "
            . "($minmatch)\n";
      }
      else {
        print "WARNING: The search engine returned an error ("
            . ( $? >> 8 )
            . ", status = $status )\n"
            . "Engine parameters: "
            . $searchEngine->getParameters() . "\n"
            . "A search phase could not complete on this batch.\n"
            . "The batch file will be re-run and if possible the\n"
            . "program will resume.\n";
        exit( -1 );
      }
    }
    else {
      $cycle = 0;
    }
  }

  return ( $minmatch, $bandwidth, $resultCollection );
}    # sub search

##-------------------------------------------------------------------------##
## Use:  my $searchRecipeHashRef = &getSearchRecipes();
##
##  Returns
##    A data structure holding the parameters used for
##    all our searches.  These are recipes that can be
##    used in multiple locations in this program.
##
##    Globals Used: None
##    Globals Modified: None
##-------------------------------------------------------------------------##
sub getSearchRecipes {
  my %searchRecipes = (
           "perfect_simple_repeats" => {
                                         'description'      => "",
                                         'minscore'         => 180,
                                         'minmatch'         => [ 8, 9, 10, 11 ],
                                         'max_matrix_gc'    => -1,
                                         'matrix'           => "simple1.matrix",
                                         'gap_initValue'    => -40,
                                         'ins_gap_extValue' => -15,
                                         'del_gap_extValue' => -15,
                                         'bandwidth'        => 1,
                                         'filterContained'  => 0,
                                         'masklevel'        => 1,
                                         'raw'              => 1,
                                         'wordraw'          => 1,
                                         'chooseClass'      => "simple",
                                         'excise'           => 1,
           },
           "general_search_parameters" => {
                                            'description'   => "",
                                            'minscore'      => 225,
                                            'minmatch'      => [ 8, 9, 11, 13 ],
                                            'max_matrix_gc' => -1,
                                            'matrix'        => "20p##g.matrix",
                                            'gap_initValue' => -30,
                                            'ins_gap_extValue' => -6,
                                            'del_gap_extValue' => -5,
                                            'bandwidth'        => 14,
                                            'filterContained'  => 0,
                                            'masklevel'        => 90,
                                            'raw'              => 0,
                                            'wordraw'          => 0,
                                            'chooseClass'      => "masking",
                                            'excise'           => 0,
           },
           "cut_young_sines_in_primates" => {
                                              'description' => "primates",
                                              'minscore'    => 1200,
                                              'minmatch'    => [ 7, 8, 10, 12 ],
                                              'max_matrix_gc' => -1,
                                              'matrix' => "14p##g.matrix",
                                              'gap_initValue'    => -35,
                                              'ins_gap_extValue' => -7,
                                              'del_gap_extValue' => -6,
                                              'bandwidth'        => 20,
                                              'filterContained'  => 0,
                                              'masklevel'        => 1,
                                              'raw'              => 0,
                                              'wordraw'          => 0,
                                              'chooseClass'      => "alu",
                                              'excise'           => 1,
           },
           "mask_young_sines_in_primates" => {
                                     'description' => "primates - nocut option",
                                     'minscore'    => 1500,
                                     'minmatch'    => [ 7, 8, 10, 12 ],
                                     'max_matrix_gc'    => -1,
                                     'matrix'           => "14p##g.matrix",
                                     'gap_initValue'    => -35,
                                     'ins_gap_extValue' => -7,
                                     'del_gap_extValue' => -6,
                                     'bandwidth'        => 20,
                                     'filterContained'  => 0,
                                     'masklevel'        => 80,
                                     'raw'              => 0,
                                     'wordraw'          => 0,
                                     'chooseClass'      => "alumask",
                                     'excise'           => 0,
           },
           "mask_sines_in_primates" => {
                                         'description'      => "primates",
                                         'minscore'         => 225,
                                         'minmatch'         => [ 7, 8, 8, 9 ],
                                         'max_matrix_gc'    => -1,
                                         'matrix'           => "20p##g.matrix",
                                         'gap_initValue'    => -30,
                                         'ins_gap_extValue' => -6,
                                         'del_gap_extValue' => -5,
                                         'bandwidth'        => 14,
                                         'filterContained'  => 0,
                                         'masklevel'        => 80,
                                         'raw'              => 0,
                                         'wordraw'          => 0,
                                         'chooseClass'      => "alumask",
                                         'excise'           => 0,
           },
           "mask_sines_in_non_primate_mammals" => {
                                         'description' => "non-primate-mammals",
                                         'minscore'    => 225,
                                         'minmatch'    => [ 6, 7, 8, 10 ],
                                         'max_matrix_gc'    => -1,
                                         'matrix'           => "18p##g.matrix",
                                         'gap_initValue'    => -30,
                                         'ins_gap_extValue' => -6,
                                         'del_gap_extValue' => -5,
                                         'bandwidth'        => 14,
                                         'filterContained'  => 0,
                                         'masklevel'        => 1,
                                         'raw'              => 0,
                                         'wordraw'          => 0,
                                         'chooseClass'      => "cut1",
                                         'excise'           => 1,
           },
           "general_full_length_repeats" => {
             'description' => "larger bandwidth allows spanning of larger gaps",
             'minscore'    => 300,
             'minmatch'    => [ 9, 10, 11, 13 ],
             'max_matrix_gc'    => -1,
             'matrix'           => "18p##g.matrix",
             'gap_initValue'    => -33,
             'ins_gap_extValue' => -5,
             'del_gap_extValue' => -4,
             'bandwidth'        => 40,
             'filterContained'  => 0,
             'masklevel'        => 1,
             'raw'              => 0,
             'wordraw'          => 0,
             'chooseClass'      => "cut1",
             'excise'           => 1,
           },
           "complete_3end_of_young_line1s" => {
                                                'description' => "",
                                                'minscore'    => 300,
                                                'minmatch' => [ 9, 10, 11, 13 ],
                                                'max_matrix_gc' => -1,
                                                'matrix' => "18p##g.matrix",
                                                'gap_initValue'    => -33,
                                                'ins_gap_extValue' => -5,
                                                'del_gap_extValue' => -4,
                                                'bandwidth'        => 40,
                                                'filterContained'  => 0,
                                                'masklevel'        => 90,
                                                'raw'              => 0,
                                                'wordraw'          => 0,
                                                'chooseClass'      => "cut2",
                                                'excise'           => 1,
           },
           "older_ALUs_in_primates" => {
                                         'description'      => "",
                                         'minscore'         => 800,
                                         'minmatch'         => [ 7, 8, 10, 12 ],
                                         'max_matrix_gc'    => -1,
                                         'matrix'           => "14p##g.matrix",
                                         'gap_initValue'    => -35,
                                         'ins_gap_extValue' => -7,
                                         'del_gap_extValue' => -6,
                                         'bandwidth'        => 20,
                                         'filterContained'  => 1,
                                         'masklevel'        => 1,
                                         'raw'              => 0,
                                         'wordraw'          => 0,
                                         'chooseClass'      => "alumask",
                                         'excise'           => 0,
           },
           "more_ALUs_in_primates" => {
                                        'description'      => "",
                                        'minscore'         => 400,
                                        'minmatch'         => [ 7, 8, 9, 11 ],
                                        'max_matrix_gc'    => -1,
                                        'matrix'           => "18p##g.matrix",
                                        'gap_initValue'    => -30,
                                        'ins_gap_extValue' => -6,
                                        'del_gap_extValue' => -5,
                                        'bandwidth'        => 14,
                                        'filterContained'  => 0,
                                        'masklevel'        => 10,
                                        'raw'              => 0,
                                        'wordraw'          => 0,
                                        'chooseClass'      => "alumask",
                                        'excise'           => 0,
           },
           "short_repeats_and_satellites_rodents" => {
                                               'description' => "rodentia only",
                                               'minscore'    => 210,
                                               'minmatch'    => [ 7, 8, 9, 10 ],
                                               'max_matrix_gc' => -1,
                                               'matrix' => "25p##g.matrix",
                                               'gap_initValue'    => -27,
                                               'ins_gap_extValue' => -5,
                                               'del_gap_extValue' => -5,
                                               'bandwidth'        => 14,
                                               'filterContained'  => 0,
                                               'masklevel'        => 90,
                                               'raw'              => 0,
                                               'wordraw'          => 0,
                                               'chooseClass'      => "sines",
                                               'excise'           => 0,
           },
           "short_repeats_and_satellites" => {
                                               'description' => "",
                                               'minscore'    => 225,
                                               'minmatch' => [ 7, 8, 10, 12 ],
                                               'max_matrix_gc' => -1,
                                               'matrix' => "20p##g.matrix",
                                               'gap_initValue'    => -30,
                                               'ins_gap_extValue' => -6,
                                               'del_gap_extValue' => -5,
                                               'bandwidth'        => 14,
                                               'filterContained'  => 0,
                                               'masklevel'        => 90,
                                               'raw'              => 0,
                                               'wordraw'          => 0,
                                               'chooseClass'      => "sines",
                                               'excise'           => 0,
           },
           "long_interspersed_repeats" => {
                                            'description'   => "",
                                            'minscore'      => 225,
                                            'minmatch'      => [ 7, 8, 10, 12 ],
                                            'max_matrix_gc' => -1,
                                            'matrix'        => "20p##g.matrix",
                                            'gap_initValue' => -30,
                                            'ins_gap_extValue' => -6,
                                            'del_gap_extValue' => -5,
                                            'bandwidth'        => 14,
                                            'filterContained'  => 0,
                                            'masklevel'        => 90,
                                            'raw'              => 0,
                                            'wordraw'          => 0,
                                            'chooseClass'      => "longlib",
                                            'excise'           => 0,
           },
           "ancient_repeats" => {
                                  'description'      => "",
                                  'minscore'         => 180,
                                  'minmatch'         => [ 6, 6, 8, 9 ],
                                  'max_matrix_gc'    => -1,
                                  'matrix'           => "25p##g.matrix",
                                  'gap_initValue'    => -27,
                                  'ins_gap_extValue' => -6,
                                  'del_gap_extValue' => -5,
                                  'bandwidth'        => [ 25, 14, 14, 14 ],
                                  'filterContained'  => 0,
                                  'masklevel'        => 90,
                                  'raw'              => 0,
                                  'wordraw'          => 0,
                                  'chooseClass'      => "mirs",
                                  'excise'           => 0,
           },
           "tough_ancient_repeats" => {
                                        'description'      => "",
                                        'minscore'         => 250,
                                        'minmatch'         => [ 6, 6, 8, 9 ],
                                        'max_matrix_gc'    => -1,
                                        'matrix'           => "25p##g.matrix",
                                        'gap_initValue'    => -27,
                                        'ins_gap_extValue' => -6,
                                        'del_gap_extValue' => -5,
                                        'bandwidth'       => [ 25, 14, 14, 14 ],
                                        'filterContained' => 0,
                                        'masklevel'       => 90,
                                        'raw'             => 1,
                                        'wordraw'         => 0,
                                        'chooseClass'     => "masking",
                                        'excise'          => 0,
           },
           "retroviruses" => {
                               'description'      => "",
                               'minscore'         => 250,
                               'minmatch'         => [ 9, 10, 11, 13 ],
                               'max_matrix_gc'    => -1,
                               'matrix'           => "20p##g.matrix",
                               'gap_initValue'    => -30,
                               'ins_gap_extValue' => -6,
                               'del_gap_extValue' => -5,
                               'bandwidth'        => 14,
                               'filterContained'  => 0,
                               'masklevel'        => 90,
                               'raw'              => 0,
                               'wordraw'          => 0,
                               'chooseClass'      => "masking",
                               'excise'           => 0,
           },
           "tough_line1s_in_eutheria" => {
                                           'description'   => "Eutheria",
                                           'minscore'      => 300,
                                           'minmatch'      => [ 7, 8, 9, 11 ],
                                           'max_matrix_gc' => 49,
                                           'matrix'        => "25p##g.matrix",
                                           'gap_initValue' => -27,
                                           'ins_gap_extValue' => -6,
                                           'del_gap_extValue' => -5,
                                           'bandwidth'        => 14,
                                           'filterContained'  => 0,
                                           'masklevel'        => 90,
                                           'raw'              => 1,
                                           'wordraw'          => 0,
                                           'chooseClass'      => "l1",
                                           'excise'           => 0,
           },
           "simple_repeats_again" => {
                                       'description'      => "",
                                       'minscore'         => 180,
                                       'minmatch'         => [ 7, 8, 9, 10 ],
                                       'max_matrix_gc'    => -1,
                                       'matrix'           => "simple1.matrix",
                                       'gap_initValue'    => -40,
                                       'ins_gap_extValue' => -15,
                                       'del_gap_extValue' => -15,
                                       'bandwidth'        => 4,
                                       'filterContained'  => 0,
                                       'masklevel'        => 25,
                                       'raw'              => 1,
                                       'wordraw'          => 1,
                                       'chooseClass'      => "masking",
                                       'excise'           => 0,
           },
           "simple_repeats_flanking" => {
                                          'description'      => "",
                                          'minscore'         => 200,
                                          'minmatch'         => [ 6, 7, 8, 9 ],
                                          'max_matrix_gc'    => -1,
                                          'matrix'           => "simple.matrix",
                                          'gap_initValue'    => -35,
                                          'ins_gap_extValue' => -10,
                                          'del_gap_extValue' => -10,
                                          'bandwidth'        => 4,
                                          'filterContained'  => 0,
                                          'masklevel'        => 75,
                                          'raw'              => 1,
                                          'wordraw'          => 1,
                                          'chooseClass'      => "masking",
                                          'excise'           => 0,
           },
           "low_complexity" => {
                                 'description'      => "",
                                 'minscore'         => 21,
                                 'minmatch'         => [ 5, 5, 5, 5 ],
                                 'max_matrix_gc'    => -1,
                                 'matrix'           => "at.matrix",
                                 'gap_initValue'    => -10,
                                 'ins_gap_extValue' => -3,
                                 'del_gap_extValue' => -3,
                                 'bandwidth'        => 2,
                                 'filterContained'  => 0,
                                 'masklevel'        => 95,
                                 'raw'              => 1,
                                 'wordraw'          => 1,
                                 'chooseClass'      => "",
                                 'excise'           => 0,
           },
           "refinement" => {
                             'description'      => "",
                             'minscore'         => 180,
                             'minmatch'         => [ 7, 8, 9, 11 ],
                             'max_matrix_gc'    => -1,
                             'matrix'           => "18p##g.matrix",
                             'gap_initValue'    => -30,
                             'ins_gap_extValue' => -6,
                             'del_gap_extValue' => -5,
                             'bandwidth'        => 40,
                             'filterContained'  => 0,
                             'masklevel'        => 101,
                             'raw'              => 1,
                             'wordraw'          => 1,
                             'chooseClass'      => "",
                             'excise'           => 0,
           }
  );
  return \%searchRecipes;
}

##-------------------------------------------------------------------------##
## Use:  my $param = &selectParameter( $options, $arrayRef );
##
##  Returns
##
##    Globals Used: None
##    Globals Modified: None
##-------------------------------------------------------------------------##
sub selectParameter {
  my $options  = shift;
  my $arrayRef = shift;

  if ( ref( $arrayRef ) eq "ARRAY" ) {
    if ( $options->{'qq'} ) {
      return $arrayRef->[ 3 ];
    }
    elsif ( $options->{'q'} ) {
      return $arrayRef->[ 2 ];
    }
    elsif ( $options->{'s'} ) {
      return $arrayRef->[ 0 ];
    }
    else {
      return $arrayRef->[ 1 ];
    }
  }
  else {
    return ( $arrayRef );
  }
}

##-------------------------------------------------------------------------##
## Use:  &runTestStage( $options, ... );
##
##      In Development:  This routine is meant to test sub
##                       regions of repeat models/sequences that may
##                       give rise to false positives.
##
##  Returns
##
##    Globals Used: None
##    Globals Modified: None
##-------------------------------------------------------------------------##
sub runTestStage {
  my $options             = shift;
  my $stageText           = shift;
  my $batchIdentifierText = shift;
  my $searchParams        = shift;
  my $lib                 = shift;
  my $outfile             = shift;
  my $maskfile            = shift;
  my $searchEngine        = shift;
  my $batchNum            = shift;
  my $batcher             = shift;
  my $tax                 = shift;
  my $excisions           = shift;
  my $numX                = shift;
  my $refineableHashRef   = shift;
  my $refinementHash      = shift;
  my $stage               = shift;
  my $filterType          = shift;
  my $DIRECTORY           = shift;
  my $seqDB               = shift;

  if ( $stageText ne "" ) {
    print $stageText;
    if ( $batchIdentifierText ne "" ) {
      print " in " . $batchIdentifierText . "\n";
    }
    else {
      print "\n";
    }
  }

  # Or better
  # $batcher->isBatchFragmented( $batchNum );
  # Or even better
  # $batcher->isBatchFragmented( $seqId, $batchNum ) not implemented
  my $fragCnt = $batcher->getBatchCount();

  #
  # Resolve matrix parameter
  #
  my $matrix = $searchParams->{'matrix'};
  if ( $searchParams->{'matrix'} =~ /(\d+)p\#+g.matrix/ ) {
    my $div = $1;
    my $GC_frac;
    if ( $options->{'gc'} ) {

      # user decides GC background
      $GC_frac = $options->{'gc'};
    }
    else {
      my $seq_cnt = $batcher->getBatchSeqCount( $batchNum );
      my $seqlen  = $batcher->getBatchSeqLength( $batchNum );
      if ( $options->{'gccalc'} || ( $seq_cnt == 1 && $seqlen > 2000 ) ) {
        $GC_frac = $batcher->getBatchAverageGC( $batchNum );
      }
      else {
        $GC_frac = 43;
      }
    }
    if (    defined $searchParams->{'max_matrix_gc'}
         && $searchParams->{'max_matrix_gc'} > 0
         && $GC_frac > $searchParams->{'max_matrix_gc'} )
    {
      $GC_frac = $searchParams->{'max_matrix_gc'};
    }
    my $GC = &chooseMatrices( $GC_frac );
    $matrix = $div . "p" . $GC . ".matrix";
  }
  my $minscore =
      $searchParams->{'minscore'} - int( $searchParams->{'minscore'} * 0.075 );
  print "minscore is now $minscore\n";
  my $minmatch = selectParameter( $options, $searchParams->{'minmatch'} );
  my $gap_initValue = $searchParams->{'gap_initValue'};
  my $ins_gap_extValue = $searchParams->{'ins_gap_extValue'};
  my $del_gap_extValue = $searchParams->{'del_gap_extValue'};
  my $bandwidth   = selectParameter( $options, $searchParams->{'bandwidth'} );
  my $masklevel   = 101;
  my $raw         = $searchParams->{'raw'};
  my $wordraw     = $searchParams->{'wordraw'};
  my $chooseClass = $searchParams->{'chooseClass'};
  my $excise      = $searchParams->{'excise'};

  my ( $minmatch, $bandwidth, $resultsCollection ) = &search(
                           $options,          $DIRECTORY,        $outfile,
                           $maskfile,         $lib,              $minmatch,
                           $bandwidth,        $matrix,           $gap_initValue,
                           $ins_gap_extValue, $del_gap_extValue, $minscore,
                           $masklevel,        $searchEngine,     $wordraw,
                           $raw
  );

  for ( my $k = 0 ; $k < $resultsCollection->size() ; $k++ ) {
    my $result = $resultsCollection->get( $k );
    $result->setId( $stage );
  }

  $resultsCollection->write( $outfile, SearchResult::AlignWithQuerySeq );

  return ( $resultsCollection->size() );
}

##-------------------------------------------------------------------------##
## Use:  &runTRFStage( $options, ... );
##
##      TRF replacement for Simple/Low_complexity Repeat consensus
##      based search stages.  TODO: Document
##
##  Returns
##
##    Globals Used: None
##    Globals Modified: None
##-------------------------------------------------------------------------##
sub runTRFStage {
  my $options             = shift;
  my $stageText           = shift;
  my $batchIdentifierText = shift;
  my $searchParams        = shift;
  my $lib                 = shift;
  my $outfile             = shift;
  my $maskfile            = shift;
  my $searchEngine        = shift;
  my $batchNum            = shift;
  my $batcher             = shift;
  my $tax                 = shift;
  my $excisions           = shift;
  my $numX                = shift;
  my $refineableHashRef   = shift;
  my $refinementHash      = shift;
  my $stage               = shift;
  my $filterType          = shift;
  my $DIRECTORY           = shift;
  my $seqDB               = shift;

  $DEBUG = 0;
  my ( $workingVol, $workingDir, $workingFile ) =
      File::Spec->splitpath( $outfile );
  $workingDir = "." if ( $workingDir eq "" );

  if ( $stageText ne "" ) {
    print $stageText;
    if ( $batchIdentifierText ne "" ) {
      print " in " . $batchIdentifierText . "\n";
    }
    else {
      print "\n";
    }
  }

  my $trf = TRF->new( pathToEngine => $TRF_PRGM,
                      workDir      => $workingDir );

  my $minCopyNumber;
  my $excise = 0;
  my $lambda = 0;
  my @mu     = ();
  if ( $searchParams eq "PERFECT" ) {

    #
    # Search for young tandem repeats
    #
    $trf->setMatchWeight( 2 );
    $trf->setMismatchPenalty( 7 );
    $trf->setDelta( 7 );
    $trf->setPm( 80 );
    $trf->setPi( 10 );
    $trf->setMinScore( 50 );
    $trf->setMaxPeriod( 10 );
    $excise        = 1;
    $minCopyNumber = 4;
    $lambda        = 0.41;
    @mu = ( 8.51, 1.04, 6.26, 8.65, 10.36, 7.19, 8.81, 10.84, 12.97, 15.07 );
  }
  else {

    #
    # Search for old tandem repeats
    #
    $trf->setMatchWeight( 2 );
    $trf->setMismatchPenalty( 3 );
    $trf->setDelta( 5 );
    $trf->setPm( 75 );
    $trf->setPi( 20 );
    $trf->setMinScore( 33 );
    $trf->setMaxPeriod( 7 );
    $excise        = 0;
    $minCopyNumber = 5;
    $lambda        = 0.32;
    @mu            = ( 0.79, 1.00, 4.78, 6.94, 8.68, 9.84, 11.94 );
  }

  my ( $retCode, $trfResults, $trfOutFile, $trfErrFile ) = $trf->search(
                                                      sequenceFile => $maskfile,
                                                      workDir => $workingDir );

  if ( defined $trfErrFile && $trfErrFile ne "" ) {
    # TRF Errors
    my $signal = $retCode & 127;
    my $prgcode = $retCode >> 8;
    print "WARNING: TRF returned an error ("
        . "Return code = $prgcode, signal = $signal)\n"
        . "TRF parameters: "
        . $trf->getParameters() . "\n"
        . "A search phase could not complete on this batch.\n"
        . "The batch file will be re-run and if possible the\n"
        . "program will resume.\n";
    exit( -1 );
  }

  my $matrix =
      Matrix->new(
                  fileName => "$DIRECTORY/Matrices/crossmatch/simple1.matrix" );
  my $newResultCol = SearchResultCollection->new();
  if ( $DEBUG ) {
    print "  TRF Returned: " . $trfResults->size() . " results\n";
  }
  for ( my $i = $trfResults->size() - 1 ; $i >= 0 ; $i-- ) {
    my $result = $trfResults->get( $i );
    bless $result, "TRFSearchResult";

    if ( $result->getCopyNumber() > $minCopyNumber ) {
      my ( $newScore, $kimura, $CpGSites, $percIns, $percDel, $scoreArray,
           $goodRegions )
          = $result->rescoreAlignment(
                                       scoreMatrix    => $matrix,
                                       gapOpenPenalty => -30,
                                       gapExtPenalty  => -15,
                                       xDrop          => 500
          );

      # Cutoff for simple1.matrix score.  NOTE: This score is comparable
      # to other interpersed repeat scores from crossmatch/abblast/rmblast
      # but not from the bitscores of hmmer. Looking into converting TRF
      # raw scores into bitscores directly for hmmer runs.
      #
      # Now that we are using bitscores perhaps we should base this
      # cutoff on the bitscore?
      next if ( $newScore < 20 );

      my $bitScore = sprintf(
                              "%.0d",
                              $result->rawToBitScore(
                                        $lambda, $mu[ $result->getPeriod() - 1 ]
                              )
      );
      $result->setScore( $bitScore );

      $result->setPctDiverge( sprintf( "%0.2f", $kimura ) );
      $result->setPctInsert( sprintf( "%0.2f",  $percIns ) );
      $result->setPctDelete( sprintf( "%0.2f",  $percDel ) );
      if ( $#{$goodRegions} > 1 ) {
        my $frags = $result->fragmentSearchResult( regionList => $goodRegions );
        foreach my $frag ( @{$frags} ) {
          $newResultCol->add( $frag );
        }
      }
      else {
        $newResultCol->add( $result );
      }
    }
  }
  $trfResults = $newResultCol;
  if ( $DEBUG ) {
    print "  Filtered TRF Set: " . $trfResults->size() . " results\n";
  }

  #
  # Mask level filtering
  #   - Only important if we plan to excise the repeats.
  #     Then we can't afford overlap.
  #
  if ( $searchParams eq "PERFECT" ) {
    $trfResults->maskLevelFilter( value => 1 );
    if ( $DEBUG ) {
      print "  TRF Set After Masklevel: " . $trfResults->size() . " results\n";
    }
  }

  # Or better
  # $batcher->isBatchFragmented( $batchNum );
  # Or even better
  # $batcher->isBatchFragmented( $seqId, $batchNum ) not implemented
  my $fragCnt = $batcher->getBatchCount();

  &filterResults( $options, $filterType, $fragCnt, $lib, $trfResults, $tax );

  &postProcessSearch(
                      $options,          $trfResults, $excisions,
                      $excise,           $numX,       $seqDB,
                      $options->{'inv'}, $outfile,    $refineableHashRef,
                      $refinementHash,   $batchNum,   $stage
  );
  $DEBUG = 0;

  return ( $trfResults->size() );
}

##-------------------------------------------------------------------------##
## Use:  &runStage( $options, ... );
##
##  Returns
##
##    Globals Used: None
##    Globals Modified: None
##-------------------------------------------------------------------------##
sub runStage {
  my $options             = shift;
  my $stageText           = shift;
  my $batchIdentifierText = shift;
  my $searchParams        = shift;
  my $lib                 = shift;
  my $outfile             = shift;
  my $maskfile            = shift;
  my $searchEngine        = shift;
  my $batchNum            = shift;
  my $batcher             = shift;
  my $tax                 = shift;
  my $excisions           = shift;
  my $numX                = shift;
  my $refineableHashRef   = shift;
  my $refinementHash      = shift;
  my $stage               = shift;
  my $filterType          = shift;
  my $DIRECTORY           = shift;
  my $seqDB               = shift;

  if ( $stageText ne "" ) {
    print $stageText;
    if ( $batchIdentifierText ne "" ) {
      print " in " . $batchIdentifierText . "\n";
    }
    else {
      print "\n";
    }
  }

  # Or better
  # $batcher->isBatchFragmented( $batchNum );
  # Or even better
  # $batcher->isBatchFragmented( $seqId, $batchNum ) not implemented
  my $fragCnt = $batcher->getBatchCount();

  #
  # Resolve matrix parameter
  #
  my $matrix = $searchParams->{'matrix'};
  if ( $searchParams->{'matrix'} =~ /(\d+)p\#+g.matrix/ ) {
    my $div = $1;
    my $GC_frac;
    if ( $options->{'gc'} ) {

      # user decides GC background
      $GC_frac = $options->{'gc'};
    }
    else {
      my $seq_cnt = $batcher->getBatchSeqCount( $batchNum );
      my $seqlen  = $batcher->getBatchSeqLength( $batchNum );
      if ( $options->{'gccalc'} || ( $seq_cnt == 1 && $seqlen > 2000 ) ) {
        $GC_frac = $batcher->getBatchAverageGC( $batchNum );
      }
      else {
        $GC_frac = 43;
      }
    }
    if (    defined $searchParams->{'max_matrix_gc'}
         && $searchParams->{'max_matrix_gc'} > 0
         && $GC_frac > $searchParams->{'max_matrix_gc'} )
    {
      $GC_frac = $searchParams->{'max_matrix_gc'};
    }
    my $GC = &chooseMatrices( $GC_frac );
    $matrix = $div . "p" . $GC . ".matrix";
  }

  my $minscore      = $searchParams->{'minscore'};
  my $minmatch      = selectParameter( $options, $searchParams->{'minmatch'} );
  my $gap_initValue = $searchParams->{'gap_initValue'};
  my $ins_gap_extValue = $searchParams->{'ins_gap_extValue'};
  my $del_gap_extValue = $searchParams->{'del_gap_extValue'};
  my $bandwidth = selectParameter( $options, $searchParams->{'bandwidth'} );
  my $filterContained = $searchParams->{'filterContained'};
  my $masklevel       = $searchParams->{'masklevel'};
  my $raw             = $searchParams->{'raw'};
  my $wordraw         = $searchParams->{'wordraw'};
  my $chooseClass     = $searchParams->{'chooseClass'};
  my $excise          = $searchParams->{'excise'};

  #
  #  Turn off masklevel filtering at the SearchEngine level.  This
  #  is a hack to get around a bug in the way cross_match is handling
  #  the filtering of overlapping hits.  We apply the masklevel after
  #  we get back the results now for all engines.
  #
  my $cmMaskLevel = 101;

  if ( $DEBUG ) {
    print "Saving pre-stage maskfile to: $maskfile-$stage\n";
    system( "cp $maskfile $maskfile-$stage" );
  }

  my ( $minmatch, $bandwidth, $resultsCollection ) = &search(
                           $options,          $DIRECTORY,        $outfile,
                           $maskfile,         $lib,              $minmatch,
                           $bandwidth,        $matrix,           $gap_initValue,
                           $ins_gap_extValue, $del_gap_extValue, $minscore,
                           $cmMaskLevel,      $searchEngine,     $wordraw,
                           $raw
  );

  if ( $filterContained ) {
    $resultsCollection->filterContainedResults( value => $masklevel );
  }

  if ( $DEBUG ) {
    print "Pre-MaskLevel Annotations:\n";
    for ( my $i = 0 ; $i < $resultsCollection->size() ; $i++ ) {
      print "#$i:  "
          . $resultsCollection->get( $i )
          ->toStringFormatted( SearchResult::NoAlign );
    }
    print "\n";
  }

  # shortcutlib & shortlib & masklib
  if ( $stage == 401 || $stage == 501 || $stage == 502 || $stage == 452 ) {
    &preMaskLevelFilter( $resultsCollection );
  }

  #
  # Mask level filtering
  #
  print "Using: masklevel = $masklevel\n" if ( 0 );
  $resultsCollection->maskLevelFilter( value => $masklevel );

  #print $CLASS
  #    . "::search: "
  #    . $resultsCollection->size()
  #    . " hits "
  #    . "after masklevel filtering\n"
  #  if ( $this->getDEBUG() );

  &filterResults( $options, $filterType, $fragCnt, $lib, $resultsCollection,
                  $tax );

  &postProcessSearch(
                      $options,           $resultsCollection,
                      $excisions,         $excise,
                      $numX,              $seqDB,
                      $options->{'inv'},  $outfile,
                      $refineableHashRef, $refinementHash,
                      $batchNum,          $stage
  );

  return ( $resultsCollection->size() );
}

##-------------------------------------------------------------------------##
## Use:  &runLowComplexTests (
##           \%options, $REPEATMASKER_DIR, $GC,
##           $file, $maskfile,
##           $generalLibDir,
##           $speciesLibDir, $fragCnt,
##           $searchEngine,
##           $numX, $seqDB, [$batchIdentifierText],
##           $tax, $customLibDir, $tmpDir, $batchNum, $refineableHashRef );
##
##  Returns
##
##         Phases of a search:
##          - Search for simple repeats ( simple.lib )
##
##          Species Specific:
##            - Search user supplied library
##
##          Mammal Specific:
##          - Search sinecutlib if it exists ( full length abundant SINEs )
##          - Search shortcutlib if it exists ( full-length IRs )
##          - Search cutlib if it exists
##
##          Homo:
##              - Search sinecutlib again
##
##          - Search shortlib if it exists
##          - Search longlib if it exists
##          - Search mirs.lib
##          - Search mir.lib
##          - Search retro.lib ***if it exists***
##          - Search l1.lib
##
##          - Search simple.lib (again?)
##          - Search at.lib
##
##
##    Globals Used: None
##    Globals Modified: None
##-------------------------------------------------------------------------##
sub runLowComplexTests {
  my %options   = %{ shift() };    # The RepeatMasker option hashtable
  my $DIRECTORY = shift;
  my $GC_frac   = shift;
  my $file      = shift;
  my $maskfile  = shift;

  #  my $sinecutlib          = shift;
  #  my $cutlib              = shift;
  #  my $shortcutlib         = shift;
  #  my $shortlib            = shift;
  #  my $longlib             = shift;
  #  my $retrolib            = shift;
  my $generalLibDir       = shift;
  my $speciesLibDir       = shift;
  my $fragCnt             = shift;
  my $searchEngine        = shift;
  my $numX                = shift;
  my $seqDB               = shift;
  my $batchIdentifierText = shift;
  my $tax                 = shift;
  my $customLibDir        = shift;
  my $tempdir             = shift;
  my $batchNum            = shift;
  my $refineableHashRef   = shift;
  my $batcher             = shift;

  my $searchRecipes = &getSearchRecipes();

  my $repBoundRef;
  my $stage = 0;
  my $resultsCollection;
  my %refinementHash = ();
  my $cutAlus        = 0;

  # reused to hold overlaplist
  my $excisions = [];

  my (
       $minscore,         $minmatch,      $lib,
       $matrix,           $gap_initValue, $ins_gap_extValue,
       $del_gap_extValue, $bandwidth,     $masklevel,
       $raw,              $wordraw,       $outfile
      )
      = ();

  # Stages block.
  {

    $options{'species'} = "human";
    my ( $custLibVol, $custLibDir, $custLibFile ) =
        File::Spec->splitpath( $options{'lib'} );
    my $message = "identifying matches to " . $custLibFile . " sequences";
    $lib = "$customLibDir/$custLibFile";

    my $db = FastaDB->new(
                           fileName    => $options{'lib'},
                           openMode    => SeqDBI::ReadOnly,
                           maxIDLength => 50
    );

    # >SINEC_Fc#SINE/tRNA-Lys @Felis @Carnivora  [S:40,50]

    my %stageToLibName = (
                           35 => "sinecutlib",
                           40 => "shortcutlib",
                           45 => "cutlib",
                           50 => "shortlib",
                           55 => "longlib",
                           60 => "mirslib",
                           65 => "mirlib",
                           70 => "retrolib",
                           75 => "l1.lib",
                           85 => "refinelib",
                           80 => "specieslib"
    );

    my %inStage = ();

    #print "$lib contains " . $db->getSeqCount() . "\n";
    foreach my $seqID ( $db->getIDs() ) {
      my $desc = $db->getDescription( $seqID );

      #print "Desc = $desc\n";
      if ( $desc =~ /\[S:([\d,]+)\]/ ) {
        my @values = split( /,/, $1 );
        foreach my $value ( @values ) {
          $inStage{ $stageToLibName{$value} } = 1;
        }
      }
      else {
        $inStage{'specieslib'} = 1;
      }
    }

    #print "Stages: " . Dumper( \%inStage ) . "\n";

    # all species but mammals are currently treated in a naive fashion
    my $message = "Testing using species_specific params";
    &runTestStage(
            \%options,            $message,
            $batchIdentifierText, $searchRecipes->{'general_search_parameters'},
            $lib,                 "$file.tmp.t$stage",
            $maskfile,            $searchEngine,
            $batchNum,            $batcher,
            $tax,                 $excisions,
            $numX,                $refineableHashRef,
            \%refinementHash,     "general_search_parameters-" . $stage++,
            "masking",            $DIRECTORY,
            $seqDB
        )
        if ( $inStage{'specieslib'} );

    if ( $tax->isA( $options{'species'}, "primates" ) == 1 ) {
      &runTestStage(
                     \%options,
                     "Testing using full-length ALUs (masking) params",
                     $batchIdentifierText,
                     $searchRecipes->{'mask_young_sines_in_primates'},
                     $lib,
                     "$file.tmp.t$stage",
                     $maskfile,
                     $searchEngine,
                     $batchNum,
                     $batcher,
                     $tax,
                     $excisions,
                     $numX,
                     $refineableHashRef,
                     \%refinementHash,
                     "mask_young_sines_in_primates-" . $stage++,
                     "alumask",
                     $DIRECTORY,
                     $seqDB
          )
          if ( $inStage{'sinecutlib'} );
      &runTestStage(
                     \%options,
                     "Testing using full-length ALUs (cut) params",
                     $batchIdentifierText,
                     $searchRecipes->{'cut_young_sines_in_primates'},
                     $lib,
                     "$file.tmp.t$stage",
                     $maskfile,
                     $searchEngine,
                     $batchNum,
                     $batcher,
                     $tax,
                     $excisions,
                     $numX,
                     $refineableHashRef,
                     \%refinementHash,
                     "cut_young_sines_in_primates-" . $stage++,
                     "alu",
                     $DIRECTORY,
                     $seqDB
          )
          if ( $inStage{'sinecutlib'} );
      &runTestStage(
                     \%options,
                     "",
                     $batchIdentifierText,
                     $searchRecipes->{'cut_young_sines_in_primates'},
                     $lib,
                     "$file.tmp.t$stage",
                     $maskfile,
                     $searchEngine,
                     $batchNum,
                     $batcher,
                     $tax,
                     $excisions,
                     $numX,
                     $refineableHashRef,
                     \%refinementHash,
                     "cut_young_sines_in_primates-" . $stage++,
                     "alu",
                     $DIRECTORY,
                     $seqDB
      );

      &runTestStage(
               \%options,            "Testing using remaining ALU params",
               $batchIdentifierText, $searchRecipes->{'mask_sines_in_primates'},
               $lib,                 "$file.tmp.t$stage",
               $maskfile,            $searchEngine,
               $batchNum,            $batcher,
               $tax,                 $excisions,
               $numX,                $refineableHashRef,
               \%refinementHash,     "mask_sines_in_primates-" . $stage++,
               "alumask",            $DIRECTORY,
               $seqDB
          )
          if ( $inStage{'sinecutlib'} );

      ##
      ## Non-primate mammals
      ##
    }
    else {
      &runTestStage(
                     \%options,
                     "Testing using young abundant SINEs params",
                     $batchIdentifierText,
                     $searchRecipes->{'mask_sines_in_non_primate_mammals'},
                     $lib,
                     "$file.tmp.t$stage",
                     $maskfile,
                     $searchEngine,
                     $batchNum,
                     $batcher,
                     $tax,
                     $excisions,
                     $numX,
                     $refineableHashRef,
                     \%refinementHash,
                     "mask_sines_in_non_primate_mammals-" . $stage++,
                     "cut1",
                     $DIRECTORY,
                     $seqDB
          )
          if ( $inStage{'sinecutlib'} );
    }

    ##
    ## shortcut lib exists
    ##
    &runTestStage(
                   \%options,
                   "Testing using full-length interspersed repeats params",
                   $batchIdentifierText,
                   $searchRecipes->{'general_full_length_repeats'},
                   $lib,
                   "$file.tmp.t$stage",
                   $maskfile,
                   $searchEngine,
                   $batchNum,
                   $batcher,
                   $tax,
                   $excisions,
                   $numX,
                   $refineableHashRef,
                   \%refinementHash,
                   "general_full_length_repeats-" . $stage++,
                   "cut1",
                   $DIRECTORY,
                   $seqDB
        )
        if ( $inStage{'shortcutlib'} );

    &runTestStage(
                   \%options,
                   "",
                   $batchIdentifierText,
                   $searchRecipes->{'complete_3end_of_young_line1s'},
                   $lib,
                   "$file.tmp.t$stage",
                   $maskfile,
                   $searchEngine,
                   $batchNum,
                   $batcher,
                   $tax,
                   $excisions,
                   $numX,
                   $refineableHashRef,
                   \%refinementHash,
                   "complete_3end_of_young_line1s-" . $stage++,
                   "cut2",
                   $DIRECTORY,
                   $seqDB
        )
        if ( $inStage{'cutlib'} );

    ##
    ##  primates
    ##
    if ( $tax->isA( $options{'species'}, "primates" ) == 1 ) {
      ######  mask more alus #####
      &runTestStage(
               \%options,            "Testing using remaining ALUs params",
               $batchIdentifierText, $searchRecipes->{'older_ALUs_in_primates'},
               $lib,                 "$file.tmp.t$stage",
               $maskfile,            $searchEngine,
               $batchNum,            $batcher,
               $tax,                 $excisions,
               $numX,                $refineableHashRef,
               \%refinementHash,     "older_ALUs_in_primates-" . $stage++,
               "alumask",            $DIRECTORY,
               $seqDB
          )
          if ( $inStage{'sinecutlib'} );

      ######  mask even more alus #####
      &runTestStage(
                \%options,            "",
                $batchIdentifierText, $searchRecipes->{'more_ALUs_in_primates'},
                $lib,                 "$file.tmp.t$stage",
                $maskfile,            $searchEngine,
                $batchNum,            $batcher,
                $tax,                 $excisions,
                $numX,                $refineableHashRef,
                \%refinementHash,     "more_ALUs_in_primates-" . $stage++,
                "alumask",            $DIRECTORY,
                $seqDB
          )
          if ( $inStage{'sinecutlib'} );
    }

    if ( $tax->isA( $options{'species'}, "rodentia" ) == 1 ) {
      &runTestStage(
                     \%options,
                     "Testing using most interspersed repeats params",
                     $batchIdentifierText,
                     $searchRecipes->{'short_repeats_and_satellites_rodents'},
                     $lib,
                     "$file.tmp.t$stage",
                     $maskfile,
                     $searchEngine,
                     $batchNum,
                     $batcher,
                     $tax,
                     $excisions,
                     $numX,
                     $refineableHashRef,
                     \%refinementHash,
                     "short_repeats_and_satellites_rodents-" . $stage++,
                     "sines",
                     $DIRECTORY,
                     $seqDB
          )
          if ( $inStage{'shortlib'} );
    }
    else {
      &runTestStage(
                     \%options,
                     "Testing using most interspersed repeats params",
                     $batchIdentifierText,
                     $searchRecipes->{'short_repeats_and_satellites'},
                     $lib,
                     "$file.tmp.t$stage",
                     $maskfile,
                     $searchEngine,
                     $batchNum,
                     $batcher,
                     $tax,
                     $excisions,
                     $numX,
                     $refineableHashRef,
                     \%refinementHash,
                     "short_repeats_and_satellites" . $stage++,
                     "sines",
                     $DIRECTORY,
                     $seqDB
          )
          if ( $inStage{'shortlib'} );
    }

    # currently long and short together for
    # non-primate/non-rodent mammals
    ##### mask longer rep seqs  #####
    &runTestStage(
            \%options,            "identifying long interspersed repeats",
            $batchIdentifierText, $searchRecipes->{'long_interspersed_repeats'},
            $lib,                 "$file.tmp.t$stage",
            $maskfile,            $searchEngine,
            $batchNum,            $batcher,
            $tax,                 $excisions,
            $numX,                $refineableHashRef,
            \%refinementHash,     "long_interspersed_repeats-" . $stage++,
            "longlib",            $DIRECTORY,
            $seqDB
        )
        if ( $inStage{'longlib'} );
    &runTestStage(
                   \%options,            "identifying ancient repeats",
                   $batchIdentifierText, $searchRecipes->{'ancient_repeats'},
                   $lib,                 "$file.tmp.t$stage",
                   $maskfile,            $searchEngine,
                   $batchNum,            $batcher,
                   $tax,                 $excisions,
                   $numX,                $refineableHashRef,
                   \%refinementHash,     "ancient_repeats-" . $stage++,
                   "mirs",               $DIRECTORY,
                   $seqDB
        )
        if ( $inStage{'mirslib'} );

    &runTestStage(
                \%options,            "",
                $batchIdentifierText, $searchRecipes->{'tough_ancient_repeats'},
                $lib,                 "$file.tmp.t$stage",
                $maskfile,            $searchEngine,
                $batchNum,            $batcher,
                $tax,                 $excisions,
                $numX,                $refineableHashRef,
                \%refinementHash,     "tough_ancient_repeats-" . $stage++,
                "masking",            $DIRECTORY,
                $seqDB
        )

#        if ( $inStage{'mirlib'} );
#    &runTestStage(
#                  \%options,            "identifying retrovirus-like sequences",
#                  $batchIdentifierText, $searchRecipes->{'retroviruses'},
#                  $lib,                 "$file.tmp.t$stage",
#                  $maskfile,            $searchEngine,
#                  $batchNum,            $batcher,
#                  $tax,                 $excisions,
#                  $numX,                $refineableHashRef,
#                  \%refinementHash,     "retroviruses-" . $stage++,
#                  "masking",            $DIRECTORY,
#                  $seqDB
#        )
#        if ( $inStage{'retrolib'} );

#    if ( $tax->isA( $options{'species'}, "eutheria" ) == 1 ) {
#
#      # these LINEs are not scanned in marsupials; perhaps will
#      # find LINEs to put in later
#      ##### mask undetected LINE1 bodies #####
#      &runTestStage(
#             \%options,            "identifying tough LINE1s",
#             $batchIdentifierText, $searchRecipes->{'tough_line1s_in_eutheria'},
#             $lib,                 "$file.tmp.t$stage",
#             $maskfile,            $searchEngine,
#             $batchNum,            $batcher,
#             $tax,                 $excisions,
#             $numX,                $refineableHashRef,
#             \%refinementHash,     "tough_line1s_in_eutheria-" . $stage++,
#             "l1",                 $DIRECTORY,
#             $seqDB
#          )
#          if ( $inStage{'l1.lib'} );
#    }    # if eutheria
  }

  if ( <$file.tmp.t*> ) {
    systemint( "cat $file.tmp.t* > $file.cat" );
  }

  return;
}
##-------------------------------------------------------------------------##
## Use:  &runHMMERSearchStages (
##           \%options, $REPEATMASKER_DIR, $GC,
##           $file, $maskfile,
##           $generalLibDir,
##           $speciesLibDir, $fragCnt,
##           $searchEngine,
##           $numX, $seqDB, [$batchIdentifierText],
##           $tax, $customLibDir, $tmpDir, $batchNum, $refineableHashRef );
##
##  Returns
##
##         Phases of a search:
##          - Search for simple repeats ( simple.lib )
##
##          Species Specific:
##            - Search user supplied library
##
##          Mammal Specific:
##          - Search sinecutlib if it exists ( full length abundant SINEs )
##          - Search shortcutlib if it exists ( full-length IRs )
##          - Search cutlib if it exists
##
##          Homo:
##              - Search sinecutlib again
##
##          - Search shortlib if it exists
##          - Search longlib if it exists
##          - Search mirs.lib
##          - Search mir.lib
##          - Search retro.lib ***if it exists***
##          - Search l1.lib
##
##          - Search simple.lib (again?)
##          - Search at.lib
##
##
##    Globals Used: None
##    Globals Modified: None
##-------------------------------------------------------------------------##
sub runHMMERSearchStages {
  my %options   = %{ shift() };    # The RepeatMasker option hashtable
  my $DIRECTORY = shift;
  my $GC_frac   = shift;
  my $file      = shift;
  my $maskfile  = shift;

  #  my $sinecutlib          = shift;
  #  my $cutlib              = shift;
  #  my $shortcutlib         = shift;
  #  my $shortlib            = shift;
  #  my $longlib             = shift;
  #  my $retrolib            = shift;
  my $generalLibDir       = shift;
  my $speciesLibDir       = shift;
  my $fragCnt             = shift;
  my $searchEngine        = shift;
  my $numX                = shift;
  my $seqDB               = shift;
  my $batchIdentifierText = shift;
  my $tax                 = shift;
  my $customLibDir        = shift;
  my $tempdir             = shift;
  my $batchNum            = shift;
  my $refineableHashRef   = shift;
  my $batcher             = shift;

  my $searchRecipes = &getSearchRecipes();

  my $repBoundRef;
  my $resultsCollection;
  my %refinementHash = ();
  my $cutAlus        = 0;

  #
  # Data structures which help in recording the locations
  # of repeat excision.
  #
  my $excisions = {};

  my (
       $minscore,         $minmatch,      $lib,
       $matrix,           $gap_initValue, $ins_gap_extValue,
       $del_gap_extValue, $bandwidth,     $masklevel,
       $raw,              $wordraw,       $outfile
      )
      = ();

  # Stages block.
  {
    ##
    ## Simple Repeats
    ##    - excise almost perfect simple repeats
    ##
    # 11/9/2015
    #unless ( $options{'nocut'} || $options{'low'} || $options{'alu'} ) {
    unless ( $options{'nolow'} || $options{'alu'} ) {
      &runTRFStage(
                    \%options,            "identifying Simple Repeats",
                    $batchIdentifierText, "PERFECT",
                    "",                   "$file.tmp.simple1",
                    $maskfile,            $searchEngine,
                    $batchNum,            $batcher,
                    $tax,                 $excisions,
                    $numX,                $refineableHashRef,
                    \%refinementHash,     "251",
                    "simple",             $DIRECTORY,
                    $seqDB
      );
    }    # Simple repeats

    ##
    ## High complexity repeat searches
    ##
    unless ( $options{'noint'} ) {

      #unless only low complexity DNA is to be masked

      ##
      ## Single species specific library
      ##
      if (    defined $options{'lib'}
           || -s "$speciesLibDir/specieslib"
           || -s "$speciesLibDir/specieslib.bsq"
           || -s "$speciesLibDir/specieslib.hmm"
           || -s "$speciesLibDir/specieslib.xps" )
      {
        my $message = "";
        if ( defined $options{'lib'} ) {
          my ( $custLibVol, $custLibDir, $custLibFile ) =
              File::Spec->splitpath( $options{'lib'} );
          $message = "identifying matches to " . $custLibFile . " sequences";
          $lib     = "$customLibDir/$custLibFile";
        }
        else {
          $message =
              "identifying matches to " . $options{'species'} . " sequences";
          $lib = "$speciesLibDir/specieslib";
        }

        # all species but mammals are currently treated in a naive fashion
        my $searchParams = $searchRecipes->{'general_search_parameters'};

        # BAD BAD BAD...this changes the DS.
        $searchParams->{'minscore'} = $options{'cutoff'} if $options{'cutoff'};
        &runStage(
                   \%options,            $message,
                   $batchIdentifierText, $searchParams,
                   $lib,                 "$file.tmp.custom",
                   $maskfile,            $searchEngine,
                   $batchNum,            $batcher,
                   $tax,                 $excisions,
                   $numX,                $refineableHashRef,
                   \%refinementHash,     "001",
                   "masking",            $DIRECTORY,
                   $seqDB
        );
        last if ( $seqDB->getSubtLength() < 15 );
      }    # User supplied lib

      ##
      ## RepeatMasker specific libraries
      ##
      else {
        ##
        ## Primates
        ##
        my $aluIdx = 1;
        if ( $tax->isA( $options{'species'}, "primates" ) == 1 ) {

          # 11/9/2015
          #if ( $options{'nocut'} ) {
          if ( 0 ) {
            &runStage(
                       \%options,
                       "identifying full-length ALUs",
                       $batchIdentifierText,
                       $searchRecipes->{'mask_young_sines_in_primates'},
                       "$speciesLibDir/sinecutlib",
                       "$file.tmp.alu0",
                       $maskfile,
                       $searchEngine,
                       $batchNum,
                       $batcher,
                       $tax,
                       $excisions,
                       $numX,
                       $refineableHashRef,
                       \%refinementHash,
                       "351",
                       "alumask",
                       $DIRECTORY,
                       $seqDB
            );
          }
          else {
            $cutAlus = &runStage(
                                \%options,
                                "identifying full-length ALUs",
                                $batchIdentifierText,
                                $searchRecipes->{'cut_young_sines_in_primates'},
                                "$speciesLibDir/sinecutlib",
                                "$file.tmp.alu0",
                                $maskfile,
                                $searchEngine,
                                $batchNum,
                                $batcher,
                                $tax,
                                $excisions,
                                $numX,
                                $refineableHashRef,
                                \%refinementHash,
                                "352",
                                "alu",
                                $DIRECTORY,
                                $seqDB
            );

            if ( $cutAlus ) {

              # Any following full-length Alus only were
              # exposed after excising previous Alus.
              &runStage(
                         \%options, "",
                         $batchIdentifierText,
                         $searchRecipes->{'cut_young_sines_in_primates'},
                         "$speciesLibDir/sinecutlib",
                         "$file.tmp.alu1", $maskfile, $searchEngine,
                         $batchNum, $batcher, $tax, $excisions,
                         $numX, $refineableHashRef, \%refinementHash,
                         "353", "alu", $DIRECTORY, $seqDB
              );
            }    # if ( $cutAlus )
          }

          if ( $options{'alu'} ) {
            ######  mask remaining Alus  #####
            &runStage(
                       \%options,
                       "",
                       $batchIdentifierText,
                       $searchRecipes->{'mask_sines_in_primates'},
                       "$speciesLibDir/sinecutlib",
                       "$file.tmp.alu4",
                       $maskfile,
                       $searchEngine,
                       $batchNum,
                       $batcher,
                       $tax,
                       $excisions,
                       $numX,
                       $refineableHashRef,
                       \%refinementHash,
                       "354",
                       "alumask",
                       $DIRECTORY,
                       $seqDB
            );
            last;
          }    # if ( $options{'alu'} )

          ##
          ## Non-primate mammals
          ##
        }
        elsif (    -s "$speciesLibDir/sinecutlib"
                || -s "$speciesLibDir/sinecutlib.bsq"
                || -s "$speciesLibDir/sinecutlib.hmm"
                || -s "$speciesLibDir/sinecutlib.xps" )
        {
          &runStage(
                     \%options,
                     "identifying young abundant SINEs",
                     $batchIdentifierText,
                     $searchRecipes->{'mask_sines_in_non_primate_mammals'},
                     "$speciesLibDir/sinecutlib",
                     "$file.tmp.alu1",
                     $maskfile,
                     $searchEngine,
                     $batchNum,
                     $batcher,
                     $tax,
                     $excisions,
                     $numX,
                     $refineableHashRef,
                     \%refinementHash,
                     "355",
                     "cut1",
                     $DIRECTORY,
                     $seqDB
          );
        }

        ##
        ## shortcut lib exists
        ##
        if (    -s "$speciesLibDir/shortcutlib"
             || -s "$speciesLibDir/shortcutlib.bsq"
             || -s "$speciesLibDir/shortcutlib.hmm"
             || -s "$speciesLibDir/shortcutlib.xps" )
        {
          ###### excise all short full-length elements ######
          &runStage(
                     \%options,
                     "identifying full-length interspersed repeats",
                     $batchIdentifierText,
                     $searchRecipes->{'general_full_length_repeats'},
                     "$speciesLibDir/shortcutlib",
                     "$file.tmp.cut1",
                     $maskfile,
                     $searchEngine,
                     $batchNum,
                     $batcher,
                     $tax,
                     $excisions,
                     $numX,
                     $refineableHashRef,
                     \%refinementHash,
                     "401",
                     "cut1",
                     $DIRECTORY,
                     $seqDB
          );

          if (    -s "$speciesLibDir/cutlib"
               || -s "$speciesLibDir/cutlib.bsq"
               || -s "$speciesLibDir/cutlib.hmm"
               || -s "$speciesLibDir/cutlib.xps" )
          {

            # cut out complete 3' ends of young L1
            # elements (same parameters)
            &runStage(
                       \%options, "",
                       $batchIdentifierText,
                       $searchRecipes->{'complete_3end_of_young_line1s'},
                       "$speciesLibDir/cutlib",
                       "$file.tmp.cut2", $maskfile, $searchEngine,
                       $batchNum, $batcher, $tax, $excisions,
                       $numX, $refineableHashRef, \%refinementHash,
                       "451", "cut2", $DIRECTORY, $seqDB
            );
          }    # if ( -s $cutlib )
        }    # if ( $shortcublib

        if ( $options{'cut'} && !$fragCnt ) {
          my $cutfile = $file . ".cut";
          copy( $maskfile, $cutfile );
        }

        ##
        ##  primates
        ##
        if ( $tax->isA( $options{'species'}, "primates" ) == 1 ) {
          ######  mask more alus #####
          &runStage(
                     \%options,
                     "identifying remaining ALUs",
                     $batchIdentifierText,
                     $searchRecipes->{'older_ALUs_in_primates'},
                     "$speciesLibDir/sinecutlib",
                     "$file.tmp.alu2",
                     $maskfile,
                     $searchEngine,
                     $batchNum,
                     $batcher,
                     $tax,
                     $excisions,
                     $numX,
                     $refineableHashRef,
                     \%refinementHash,
                     "356",
                     "alumask",
                     $DIRECTORY,
                     $seqDB
          );
          last if ( $seqDB->getSubtLength() < 15 );

        }

        ##
        ##  Most interspersed repeats
        ##
        if ( -s "$speciesLibDir/masklib.hmm" ) {
          &runStage(
                     \%options,
                     "identifying most interspersed repeats",
                     $batchIdentifierText,
                     $searchRecipes->{'long_interspersed_repeats'},
                     "$speciesLibDir/masklib",
                     "$file.tmp.reps",
                     $maskfile,
                     $searchEngine,
                     $batchNum,
                     $batcher,
                     $tax,
                     $excisions,
                     $numX,
                     $refineableHashRef,
                     \%refinementHash,
                     "452",
                     "longlib",
                     $DIRECTORY,
                     $seqDB
          );
          last if ( $seqDB->getSubtLength() < 15 );
        }
      }    # if ( not user supplied lib or species only search
      last if ( $seqDB->getSubtLength() < 15 );
    }    # unless only low complexity sequences are masked

    unless ( $options{'nolow'} ) {

      &runTRFStage(
                    \%options,            "identifying Simple Repeats",
                    $batchIdentifierText, "DIVERGED",
                    "",                   "$file.tmp.simple2",
                    $maskfile,            $searchEngine,
                    $batchNum,            $batcher,
                    $tax,                 $excisions,
                    $numX,                $refineableHashRef,
                    \%refinementHash,     "252",
                    "masking",            $DIRECTORY,
                    $seqDB
      );

    }    # unless low complexity masking is skipped
  }    # end of stages block

  if (
       keys( %refinementHash )
       && (    -s "$speciesLibDir/refinelib"
            || -s "$speciesLibDir/refinelib.bsq"
            || -s "$speciesLibDir/refinelib.hmm"
            || -s "$speciesLibDir/refinelib.xps" )
      )
  {

    # The design goal is to have each repeat class have it's own refinement
    # library.  For now we just search against the same library.
    foreach my $refinementClass ( keys( %refinementHash ) ) {
      print "refining $refinementClass elements";
      if ( $batchIdentifierText ne "" ) {
        print " in " . $batchIdentifierText . "\n";
      }
      else {
        print "\n";
      }

      open OUT, ">$file.unRefinedEles.fa";
      my $index = 0;

      my $refIdx = 0;
      foreach my $ele ( @{ $refinementHash{$refinementClass} } ) {
        print OUT ">ref" . $refIdx++ . "\n";
        $index++;
        my $seq = $ele->{'seq'};
        $seq =~ s/(\S{50})/$1\n/g;
        $seq .= "\n"
            unless ( $seq =~ /.*\n+$/s );
        print OUT $seq;
      }
      close OUT;

      # At some point we may make each repeat class have
      # it's own refinment lib
      $lib       = "$speciesLibDir/refinelib";
      $minscore  = 180;                          # N/A
      $bandwidth = 40;                           # N/A
      $masklevel = "-masklevel 101";
      $raw       = "-raw";                       # N/A
      $wordraw   = "-word_raw";                  # N/A

      $minmatch = selectParameter( \%options, [ 7, 8, 9, 11 ] );

      # TODO...fix the refinement section
      my $GC = &chooseMatrices( $GC_frac );      # N/A
      $matrix = "18p" . "$GC" . ".matrix";       # N/A
      ( $gap_initValue, $ins_gap_extValue, $del_gap_extValue ) =
          ( -30, -6, -5 );                       # N/A

      $maskfile = "$file.unRefinedEles.fa";
      $outfile  = "$file.tmp.ref";
      ( $minmatch, $bandwidth, $resultsCollection ) = &search(
                           \%options,         $DIRECTORY,        $outfile,
                           $maskfile,         $lib,              $minmatch,
                           $bandwidth,        $matrix,           $gap_initValue,
                           $ins_gap_extValue, $del_gap_extValue, $minscore,
                           $masklevel,        $searchEngine,     $wordraw,
                           $raw
      );

      ## Adjust positions
      for ( my $i = $resultsCollection->size() - 1 ; $i >= 0 ; $i-- ) {
        my $current = $resultsCollection->get( $i );
        my $seqID   = $current->getQueryName();
        my $refIdx;
        if ( $seqID =~ /ref(\d+)/ ) {
          $refIdx = $1;
        }
        else {
          warn "Something went wrong with refinement ids $seqID\n";
        }
        my $refEntry   = $refinementHash{$refinementClass}->[ $refIdx ];
        my $origID     = $refEntry->{'sSeqID'};
        my $start      = $refEntry->{'qStart'};
        my $end        = $refEntry->{'qEnd'};
        my $origLen    = $refEntry->{'len'};
        my $parentID   = $refEntry->{'id'};
        my $refinedLen =
            $current->getQueryEnd() - $current->getQueryStart() + 1;

        if ( $origLen - $refinedLen > 10 ) {

          # Unlikely candidate -- does not fully align
          #print "Delete: $origLen $refinedLen\n";
          $resultsCollection->remove( $i );
        }
        else {
          $current->setQueryName( "refinement" );
          $current->setId( "[$parentID]" );
        }
      }

      $resultsCollection->write( $outfile, SearchResult::AlignWithQuerySeq );
      unlink( $maskfile );
      unlink( "$maskfile.log" );

    }
  }

  if ( <$file.tmp.*> ) {
    systemint( "cat $file.tmp.* > $file.cat" );
  }

  if ( -s "$file.cat" ) {

    # Once and for all let's create a sorted cat file...what do you say?
    my $combinedSearchResults =
        CrossmatchSearchEngine::parseOutput( searchOutput => "$file.cat" );
    $combinedSearchResults->sort(
      sub ($$) {
        (    ( !( $_[ 0 ]->getQueryName() cmp "refinement" ) )
          || ( $_[ 0 ]->getQueryName() cmp $_[ 1 ]->getQueryName() )
          || ( $_[ 0 ]->getQueryStart() <=> $_[ 1 ]->getQueryStart() )
          || ( $_[ 0 ]->getLineageId() cmp $_[ 1 ]->getLineageId() ) );
      }
    );

    $combinedSearchResults->write( "$file.cat",
                                   SearchResult::AlignWithQuerySeq );
  }

  return;
}

##-------------------------------------------------------------------------##
## Use:  &runSearchStages (
##           \%options, $REPEATMASKER_DIR, $GC,
##           $file, $maskfile,
##           $generalLibDir,
##           $speciesLibDir, $fragCnt,
##           $searchEngine,
##           $numX, $seqDB, [$batchIdentifierText],
##           $tax, $customLibDir, $tmpDir, $batchNum, $refineableHashRef );
##
##  Returns
##
##         Phases of a search:
##          - Search for simple repeats ( simple.lib )
##
##          Species Specific:
##            - Search user supplied library
##
##          Mammal Specific:
##          - Search sinecutlib if it exists ( full length abundant SINEs )
##          - Search shortcutlib if it exists ( full-length IRs )
##          - Search cutlib if it exists
##
##          Homo:
##              - Search sinecutlib again
##
##          - Search shortlib if it exists
##          - Search longlib if it exists
##          - Search mirs.lib
##          - Search mir.lib
##          - Search retro.lib ***if it exists***
##          - Search l1.lib
##
##          - Search simple.lib (again?)
##          - Search at.lib
##
##
##    Globals Used: None
##    Globals Modified: None
##-------------------------------------------------------------------------##
sub runSearchStages {
  my %options   = %{ shift() };    # The RepeatMasker option hashtable
  my $DIRECTORY = shift;
  my $GC_frac   = shift;
  my $file      = shift;
  my $maskfile  = shift;

  #  my $sinecutlib          = shift;
  #  my $cutlib              = shift;
  #  my $shortcutlib         = shift;
  #  my $shortlib            = shift;
  #  my $longlib             = shift;
  #  my $retrolib            = shift;
  my $generalLibDir       = shift;
  my $speciesLibDir       = shift;
  my $fragCnt             = shift;
  my $searchEngine        = shift;
  my $numX                = shift;
  my $seqDB               = shift;
  my $batchIdentifierText = shift;
  my $tax                 = shift;
  my $customLibDir        = shift;
  my $tempdir             = shift;
  my $batchNum            = shift;
  my $refineableHashRef   = shift;
  my $batcher             = shift;

  my $searchRecipes = &getSearchRecipes();

  my $repBoundRef;
  my $resultsCollection;
  my %refinementHash = ();
  my $cutAlus        = 0;

  #
  # Data structures which help in recording the locations
  # of repeat excision.
  #
  my $excisions = {};

  my (
       $minscore,         $minmatch,      $lib,
       $matrix,           $gap_initValue, $ins_gap_extValue,
       $del_gap_extValue, $bandwidth,     $masklevel,
       $raw,              $wordraw,       $outfile
      )
      = ();

  # Stages block.
  {
    ##
    ## Simple Repeats
    ##    - delete almost perfect simple repeats
    ##
    # 11/9/2015
    #unless ( $options{'nocut'} || $options{'low'} || $options{'alu'} ) {
    unless ( $options{'nolow'} || $options{'alu'} ) {
      &runTRFStage(
                    \%options,            "identifying Simple Repeats",
                    $batchIdentifierText, "PERFECT",
                    "",                   "$file.tmp.simple1",
                    $maskfile,            $searchEngine,
                    $batchNum,            $batcher,
                    $tax,                 $excisions,
                    $numX,                $refineableHashRef,
                    \%refinementHash,     "251",
                    "simple",             $DIRECTORY,
                    $seqDB
      );

    }    # Simple repeats

    ##
    ## High complexity repeat searches
    ##
    unless ( $options{'noint'} ) {

      #unless only low complexity DNA is to be masked

      ##
      ## Single species specific library
      ##
      if (    defined $options{'lib'}
           || -s "$speciesLibDir/specieslib"
           || -s "$speciesLibDir/specieslib.bsq"
           || -s "$speciesLibDir/specieslib.hmm"
           || -s "$speciesLibDir/specieslib.xps" )
      {
        my $message = "";
        if ( defined $options{'lib'} ) {
          my ( $custLibVol, $custLibDir, $custLibFile ) =
              File::Spec->splitpath( $options{'lib'} );
          $message = "identifying matches to " . $custLibFile . " sequences";
          $lib     = "$customLibDir/$custLibFile";
        }
        else {
          $message =
              "identifying matches to " . $options{'species'} . " sequences";
          $lib = "$speciesLibDir/specieslib";
        }

        # all species but mammals are currently treated in a naive fashion
        my $searchParams = $searchRecipes->{'general_search_parameters'};

        # BAD BAD BAD...this changes the DS.
        $searchParams->{'minscore'} = $options{'cutoff'} if $options{'cutoff'};
        &runStage(
                   \%options,            $message,
                   $batchIdentifierText, $searchParams,
                   $lib,                 "$file.tmp.custom",
                   $maskfile,            $searchEngine,
                   $batchNum,            $batcher,
                   $tax,                 $excisions,
                   $numX,                $refineableHashRef,
                   \%refinementHash,     "001",
                   "masking",            $DIRECTORY,
                   $seqDB
        );
        last if ( $seqDB->getSubtLength() < 15 );
      }    # User supplied lib

      ##
      ## RepeatMasker specific libraries
      ##
      else {
        ##
        ## Primates
        ##
        my $aluIdx = 1;
        if ( $tax->isA( $options{'species'}, "primates" ) == 1 ) {

          # 11/9/2015
          #if ( $options{'nocut'} ) {
          if ( 0 ) {
            &runStage(
                       \%options,
                       "identifying full-length ALUs",
                       $batchIdentifierText,
                       $searchRecipes->{'mask_young_sines_in_primates'},
                       "$speciesLibDir/sinecutlib",
                       "$file.tmp.alu0",
                       $maskfile,
                       $searchEngine,
                       $batchNum,
                       $batcher,
                       $tax,
                       $excisions,
                       $numX,
                       $refineableHashRef,
                       \%refinementHash,
                       "351",
                       "alumask",
                       $DIRECTORY,
                       $seqDB
            );
          }
          else {
            $cutAlus = &runStage(
                                \%options,
                                "identifying full-length ALUs",
                                $batchIdentifierText,
                                $searchRecipes->{'cut_young_sines_in_primates'},
                                "$speciesLibDir/sinecutlib",
                                "$file.tmp.alu0",
                                $maskfile,
                                $searchEngine,
                                $batchNum,
                                $batcher,
                                $tax,
                                $excisions,
                                $numX,
                                $refineableHashRef,
                                \%refinementHash,
                                "352",
                                "alu",
                                $DIRECTORY,
                                $seqDB
            );

            if ( $cutAlus ) {

              # Any following full-length Alus only were
              # exposed after excising previous Alus.
              &runStage(
                         \%options, "",
                         $batchIdentifierText,
                         $searchRecipes->{'cut_young_sines_in_primates'},
                         "$speciesLibDir/sinecutlib",
                         "$file.tmp.alu1", $maskfile, $searchEngine,
                         $batchNum, $batcher, $tax, $excisions,
                         $numX, $refineableHashRef, \%refinementHash,
                         "353", "alu", $DIRECTORY, $seqDB
              );
            }    # if ( $cutAlus )
          }

          if ( $options{'alu'} ) {
            ######  mask remaining Alus  #####
            &runStage(
                       \%options,
                       "",
                       $batchIdentifierText,
                       $searchRecipes->{'mask_sines_in_primates'},
                       "$speciesLibDir/sinecutlib",
                       "$file.tmp.alu4",
                       $maskfile,
                       $searchEngine,
                       $batchNum,
                       $batcher,
                       $tax,
                       $excisions,
                       $numX,
                       $refineableHashRef,
                       \%refinementHash,
                       "354",
                       "alumask",
                       $DIRECTORY,
                       $seqDB
            );
            last;
          }    # if ( $options{'alu'} )

          ##
          ## Non-primate mammals
          ##
        }
        elsif (    -s "$speciesLibDir/sinecutlib"
                || -s "$speciesLibDir/sinecutlib.bsq"
                || -s "$speciesLibDir/sinecutlib.hmm"
                || -s "$speciesLibDir/sinecutlib.xps" )
        {
          &runStage(
                     \%options,
                     "identifying young abundant SINEs",
                     $batchIdentifierText,
                     $searchRecipes->{'mask_sines_in_non_primate_mammals'},
                     "$speciesLibDir/sinecutlib",
                     "$file.tmp.alu1",
                     $maskfile,
                     $searchEngine,
                     $batchNum,
                     $batcher,
                     $tax,
                     $excisions,
                     $numX,
                     $refineableHashRef,
                     \%refinementHash,
                     "355",
                     "cut1",
                     $DIRECTORY,
                     $seqDB
          );
        }

        ##
        ## shortcut lib exists
        ##
        if (    -s "$speciesLibDir/shortcutlib"
             || -s "$speciesLibDir/shortcutlib.bsq"
             || -s "$speciesLibDir/shortcutlib.hmm"
             || -s "$speciesLibDir/shortcutlib.xps" )
        {
          ###### excise all short full-length elements ######
          &runStage(
                     \%options,
                     "identifying full-length interspersed repeats",
                     $batchIdentifierText,
                     $searchRecipes->{'general_full_length_repeats'},
                     "$speciesLibDir/shortcutlib",
                     "$file.tmp.cut1",
                     $maskfile,
                     $searchEngine,
                     $batchNum,
                     $batcher,
                     $tax,
                     $excisions,
                     $numX,
                     $refineableHashRef,
                     \%refinementHash,
                     "401",
                     "cut1",
                     $DIRECTORY,
                     $seqDB
          );

          # for testing
          #copy( $maskfile, "/tmp/backup1" );

          if (    -s "$speciesLibDir/cutlib"
               || -s "$speciesLibDir/cutlib.bsq"
               || -s "$speciesLibDir/cutlib.hmm"
               || -s "$speciesLibDir/cutlib.xps" )
          {

            # cut out complete 3' ends of young L1
            # elements (same parameters)
            &runStage(
                       \%options, "",
                       $batchIdentifierText,
                       $searchRecipes->{'complete_3end_of_young_line1s'},
                       "$speciesLibDir/cutlib",
                       "$file.tmp.cut2", $maskfile, $searchEngine,
                       $batchNum, $batcher, $tax, $excisions,
                       $numX, $refineableHashRef, \%refinementHash,
                       "451", "cut2", $DIRECTORY, $seqDB
            );
          }    # if ( -s $cutlib )
        }    # if ( $shortcublib

        if ( $options{'cut'} && !$fragCnt ) {
          my $cutfile = $file . ".cut";
          copy( $maskfile, $cutfile );
        }

        ##
        ##  primates
        ##
        if ( $tax->isA( $options{'species'}, "primates" ) == 1 ) {
          ######  mask more alus #####
          &runStage(
                     \%options,
                     "identifying remaining ALUs",
                     $batchIdentifierText,
                     $searchRecipes->{'older_ALUs_in_primates'},
                     "$speciesLibDir/sinecutlib",
                     "$file.tmp.alu2",
                     $maskfile,
                     $searchEngine,
                     $batchNum,
                     $batcher,
                     $tax,
                     $excisions,
                     $numX,
                     $refineableHashRef,
                     \%refinementHash,
                     "356",
                     "alumask",
                     $DIRECTORY,
                     $seqDB
          );
          last if ( $seqDB->getSubtLength() < 15 );

          ######  mask even more alus #####
          &runStage(
                     \%options,
                     "",
                     $batchIdentifierText,
                     $searchRecipes->{'more_ALUs_in_primates'},
                     "$speciesLibDir/sinecutlib",
                     "$file.tmp.alu3",
                     $maskfile,
                     $searchEngine,
                     $batchNum,
                     $batcher,
                     $tax,
                     $excisions,
                     $numX,
                     $refineableHashRef,
                     \%refinementHash,
                     "357",
                     "alumask",
                     $DIRECTORY,
                     $seqDB
          );
          last if ( $seqDB->getSubtLength() < 15 );
        }

        # for testing
        #copy( $maskfile, "/tmp/backup" );

        ######  mask the remaining other short repeats and satellites#####
        if (    -s "$speciesLibDir/shortlib"
             || -s "$speciesLibDir/shortlib.bsq"
             || -s "$speciesLibDir/shortlib.hmm"
             || -s "$speciesLibDir/shortlib.xps" )
        {

          if ( $tax->isA( $options{'species'}, "rodentia" ) == 1 ) {

            # TODO: Test that this isn't true when -lib is used!!!!
            &runStage(
                    \%options, "identifying most interspersed repeats",
                    $batchIdentifierText,
                    $searchRecipes->{'short_repeats_and_satellites_rodents'},
                    "$speciesLibDir/shortlib",
                    "$file.tmp.sines", $maskfile, $searchEngine,
                    $batchNum, $batcher,           $tax,       $excisions,
                    $numX,     $refineableHashRef, \%refinementHash,
                    "501",     "sines",            $DIRECTORY, $seqDB
            );
          }
          else {
            &runStage(
                       \%options,
                       "identifying most interspersed repeats",
                       $batchIdentifierText,
                       $searchRecipes->{'short_repeats_and_satellites'},
                       "$speciesLibDir/shortlib",
                       "$file.tmp.sines",
                       $maskfile,
                       $searchEngine,
                       $batchNum,
                       $batcher,
                       $tax,
                       $excisions,
                       $numX,
                       $refineableHashRef,
                       \%refinementHash,
                       "502",
                       "sines",
                       $DIRECTORY,
                       $seqDB
            );
          }
          last if ( $seqDB->getSubtLength() < 15 );
        }    # if ( -s $shortlib )

        if (    -s "$speciesLibDir/longlib"
             || -s "$speciesLibDir/longlib.bsq"
             || -s "$speciesLibDir/longlib.hmm"
             || -s "$speciesLibDir/longlib.xps" )
        {

          # currently long and short together for
          # non-primate/non-rodent mammals
          ##### mask longer rep seqs  #####
          &runStage(
                     \%options,
                     "identifying long interspersed repeats",
                     $batchIdentifierText,
                     $searchRecipes->{'long_interspersed_repeats'},
                     "$speciesLibDir/longlib",
                     "$file.tmp.reps",
                     $maskfile,
                     $searchEngine,
                     $batchNum,
                     $batcher,
                     $tax,
                     $excisions,
                     $numX,
                     $refineableHashRef,
                     \%refinementHash,
                     "551",
                     "longlib",
                     $DIRECTORY,
                     $seqDB
          );
          ## I could run bodies and UTRs separately, ensuring
          ## better coverage at overlapping regions
          ## and allowing to set masklevel to 80 or so for UTRs,
          ## so that oddment extensions are avoided
          last if ( $seqDB->getSubtLength() < 15 );
        }

        if (    -s "$speciesLibDir/mirslib"
             || -s "$speciesLibDir/mirslib.bsq"
             || -s "$speciesLibDir/mirslib.hmm"
             || -s "$speciesLibDir/mirslib.xps" )
        {
          ##### mask MIRs #####
          &runStage(
                  \%options,                "identifying ancient repeats",
                  $batchIdentifierText,     $searchRecipes->{'ancient_repeats'},
                  "$speciesLibDir/mirslib", "$file.tmp.mirs",
                  $maskfile,                $searchEngine,
                  $batchNum,                $batcher,
                  $tax,                     $excisions,
                  $numX,                    $refineableHashRef,
                  \%refinementHash,         "601",
                  "mirs",                   $DIRECTORY,
                  $seqDB
          );

          last if ( $seqDB->getSubtLength() < 15 );
        }    # if ( -s "$speciesLibDir/mirslib....

        #if (    -s "$speciesLibDir/mirlib"
        #     || -s "$speciesLibDir/mirlib.bsq"
        #     || -s "$speciesLibDir/mirlib.hmm"
        #     || -s "$speciesLibDir/mirlib.xps" )
        #{
        #
        #          # This has to do with trying to distinguish
        #          # very low scoring MIRs which get complexity adjusted
        #          # away by previous search.
        #          ##### mask MIRs #####
        #          &runStage(
        #                     \%options,
        #                     "",
        #                     $batchIdentifierText,
        #                     $searchRecipes->{'tough_ancient_repeats'},
        #                     "$speciesLibDir/mirlib",
        #                     "$file.tmp.mir",
        #                     $maskfile,
        #                     $searchEngine,
        #                     $batchNum,
        #                     $batcher,
        #                     $tax,
        #                     $excisions,
        #                     $numX,
        #                     $refineableHashRef,
        #                     \%refinementHash,
        #                     "651",
        #                     "masking",
        #                     $DIRECTORY,
        #                     $seqDB
        #          );
        #          last if ( $seqDB->getSubtLength() < 15 );
        #        }    # if ( -s "$speciesLibDir/mirlib...

        if (    -s "$speciesLibDir/retrolib"
             || -s "$speciesLibDir/retrolib.bsq"
             || -s "$speciesLibDir/retrolib.hmm"
             || -s "$speciesLibDir/retrolib.xps" )
        {
          ##### mask retroviral internal sequences #####
          &runStage(
                     \%options,
                     "identifying retrovirus-like sequences",
                     $batchIdentifierText,
                     $searchRecipes->{'retroviruses'},
                     "$speciesLibDir/retrolib",
                     "$file.tmp.retro",
                     $maskfile,
                     $searchEngine,
                     $batchNum,
                     $batcher,
                     $tax,
                     $excisions,
                     $numX,
                     $refineableHashRef,
                     \%refinementHash,
                     "701",
                     "masking",
                     $DIRECTORY,
                     $seqDB
          );
          last if ( $seqDB->getSubtLength() < 15 );
        }

        #if ( $tax->isA( $options{'species'}, "eutheria" ) == 1 ) {

        # these LINEs are not scanned in marsupials; perhaps will
        # find LINEs to put in later
        ##### mask undetected LINE1 bodies #####
        #  &runStage(
        #             \%options,
        #             "identifying tough LINE1s",
        #             $batchIdentifierText,
        #             $searchRecipes->{'tough_line1s_in_eutheria'},
        #             "$generalLibDir/l1.lib",
        #             "$file.tmp.l1",
        #             $maskfile,
        #             $searchEngine,
        #             $batchNum,
        #             $batcher,
        #             $tax,
        #             $excisions,
        #             $numX,
        #             $refineableHashRef,
        #             \%refinementHash,
        #             "751",
        #             "l1",
        #             $DIRECTORY,
        #             $seqDB
        #  );
        #}    # if eutheria
      }    # if ( not user supplied lib or species only search
      last if ( $seqDB->getSubtLength() < 15 );
    }    # unless only low complexity sequences are masked

    unless ( $options{'nolow'} ) {

      &runTRFStage(
                    \%options,            "identifying Simple Repeats",
                    $batchIdentifierText, "DIVERGED",
                    "",                   "$file.tmp.simple2",
                    $maskfile,            $searchEngine,
                    $batchNum,            $batcher,
                    $tax,                 $excisions,
                    $numX,                $refineableHashRef,
                    \%refinementHash,     "252",
                    "masking",            $DIRECTORY,
                    $seqDB
      );

    }    # unless low complexity masking is skipped

  }    # end of stages block

  if (
       keys( %refinementHash )
       && (    -s "$speciesLibDir/refinelib"
            || -s "$speciesLibDir/refinelib.bsq"
            || -s "$speciesLibDir/refinelib.hmm"
            || -s "$speciesLibDir/refinelib.xps" )
      )
  {

    # The design goal is to have each repeat class have it's own refinement
    # library.  For now we just search against the same library.
    foreach my $refinementClass ( keys( %refinementHash ) ) {
      print "refining $refinementClass elements";
      if ( $batchIdentifierText ne "" ) {
        print " in " . $batchIdentifierText . "\n";
      }
      else {
        print "\n";
      }

      open OUT, ">$file.unRefinedEles.fa";
      my $index = 0;

      my $refIdx = 0;
      foreach my $ele ( @{ $refinementHash{$refinementClass} } ) {
        print OUT ">ref" . $refIdx++ . "\n";
        $index++;
        my $seq = $ele->{'seq'};
        $seq =~ s/(\S{50})/$1\n/g;
        $seq .= "\n"
            unless ( $seq =~ /.*\n+$/s );
        print OUT $seq;
      }
      close OUT;

      # At some point we may make each repeat class have
      # it's own refinment lib
      $lib       = "$speciesLibDir/refinelib";
      $minscore  = 180;
      $bandwidth = 40;
      $masklevel = "-masklevel 101";
      $raw       = "-raw";
      $wordraw   = "-word_raw";

      $minmatch = selectParameter( \%options, [ 7, 8, 9, 11 ] );

      # TODO...fix the refinement section
      my $GC = &chooseMatrices( $GC_frac );
      $matrix = "18p" . "$GC" . ".matrix";
      ( $gap_initValue, $ins_gap_extValue, $del_gap_extValue ) =
          ( -30, -6, -5 );

      $maskfile = "$file.unRefinedEles.fa";
      $outfile  = "$file.tmp.ref";
      ( $minmatch, $bandwidth, $resultsCollection ) = &search(
                           \%options,         $DIRECTORY,        $outfile,
                           $maskfile,         $lib,              $minmatch,
                           $bandwidth,        $matrix,           $gap_initValue,
                           $ins_gap_extValue, $del_gap_extValue, $minscore,
                           $masklevel,        $searchEngine,     $wordraw,
                           $raw
      );

      ## Adjust positions
      for ( my $i = $resultsCollection->size() - 1 ; $i >= 0 ; $i-- ) {
        my $current = $resultsCollection->get( $i );
        my $seqID   = $current->getQueryName();
        my $refIdx;
        if ( $seqID =~ /ref(\d+)/ ) {
          $refIdx = $1;
        }
        else {
          warn "Something went wrong with refinement ids $seqID\n";
        }
        my $refEntry   = $refinementHash{$refinementClass}->[ $refIdx ];
        my $origID     = $refEntry->{'sSeqID'};
        my $start      = $refEntry->{'qStart'};
        my $end        = $refEntry->{'qEnd'};
        my $origLen    = $refEntry->{'len'};
        my $parentID   = $refEntry->{'id'};
        my $refinedLen =
            $current->getQueryEnd() - $current->getQueryStart() + 1;

        if ( $origLen - $refinedLen > 10 ) {

          # Unlikely candidate -- does not fully align
          $resultsCollection->remove( $i );
        }
        else {
          $current->setQueryName( "refinement" );
          $current->setId( "[$parentID]" );
        }
      }

      $resultsCollection->write( $outfile, SearchResult::AlignWithQuerySeq );
      unlink( $maskfile );
      unlink( "$maskfile.log" );

    }
  }

  if ( <$file.tmp.*> ) {
    systemint( "cat $file.tmp.* > $file.cat" );
  }

  # Once and for all let's create a sorted cat file...what do you say?
  my $combinedSearchResults =
      CrossmatchSearchEngine::parseOutput( searchOutput => "$file.cat" );
  $combinedSearchResults->sort(
    sub ($$) {
      (    ( !( $_[ 0 ]->getQueryName() cmp "refinement" ) )
        || ( $_[ 0 ]->getQueryName() cmp $_[ 1 ]->getQueryName() )
        || ( $_[ 0 ]->getQueryStart() <=> $_[ 1 ]->getQueryStart() )
        || ( $_[ 0 ]->getLineageId() cmp $_[ 1 ]->getLineageId() ) );
    }
  );

  $combinedSearchResults->write( "$file.cat", SearchResult::AlignWithQuerySeq );

  return;
}
##-------------------------------------------------------------------------##
## Use:  my $matrix = &chooseMatrices( $GC_frac );
##
##
##  Returns
##
##   Globals Used: None
##-------------------------------------------------------------------------##
sub chooseMatrices {
  my $GC_frac = shift;

  my $matrix = "";

  if ( $GC_frac <= 36 ) {
    $matrix = "35g";
  }
  elsif ( $GC_frac <= 38 ) {
    $matrix = "37g";
  }
  elsif ( $GC_frac <= 40 ) {
    $matrix = "39g";
  }
  elsif ( $GC_frac <= 42 ) {
    $matrix = "41g";
  }
  elsif ( $GC_frac <= 44 ) {
    $matrix = "43g";
  }
  elsif ( $GC_frac <= 46 ) {
    $matrix = "45g";
  }
  elsif ( $GC_frac <= 48 ) {
    $matrix = "47g";
  }
  elsif ( $GC_frac <= 50 ) {
    $matrix = "49g";
  }
  elsif ( $GC_frac <= 52 ) {
    $matrix = "51g";
  }
  else {
    $matrix = "53g";
  }

  return ( $matrix );
}

##-------------------------------------------------------------------------##
## Use:  my addToRefinementCollection( $resultsCollection );
##
##    IN DEVELOPMENT.  This routine decides if a result should be
##    be refined further ( by another search against a refinement library ).
##
##    $refinementHash =  { 'SINE/Alu' => [ { 'seed' => SearchResult,
##                                           'seq' => '' }, { .. } ] }
## Now:
##    $refinementHash =  { 'SINE/Alu' => [ { 'fasta_id' => "qId:qStart-...",
##                                           'id' => "m_b1s328i38",
##                                           'seq' => '' }, { .. } ] }
##
##  Returns
##
##    Globals Used: None
##
##-------------------------------------------------------------------------##
sub addToRefinementCollection {
  my $current        = shift;
  my $refinementHash = shift;
  my $seqDB          = shift;
  my $batchNum       = shift;
  my $stage          = shift;
  my $elementIdx     = shift;
  my $adjStart       = shift;
  my $adjEnd         = shift;
  my $excisions      = shift;

  # Get element fields
  my $subjName = $current->getSubjName();
  my $seqID    = $current->getQueryName();
  my $qStart   = $current->getQueryStart();
  my $qEnd     = $current->getQueryEnd();
  my $parentId = $current->getId();

  # Break up the name in to a classification Type/Subtype
  my ( $sName, $classification ) = ( $subjName =~ /(.*)\#(.*)/ );

  die "RepeatMasker: Error while parsing $subjName into "
      . "( $sName#$classification )\n"
      if ( $classification eq "" || $sName eq "" );

  # Add to class/subclass refinement collection
  my $refColl = $refinementHash->{$classification};
  if ( !defined $refColl ) {
    $refColl = [];
    $refinementHash->{$classification} = $refColl;
  }
  my $querySeq = $current->getQueryString();
  if ( $querySeq eq "" ) {
    die "Search engine did not return an alignment for an annotation!\n";

    # Search engine did not return the alignment so we must
    # grab it ourselves from the DB -- can't anymore, we have already
    # clipped out this element ( if it's in a cut stage ).
    #$querySeq = $seqDB->getSubstr( $seqID, $qStart - 1, $qEnd - $qStart + 1 );
  }
  $querySeq =~ s/-//g;

  ##
  ## Remove excision marker "x" from the alignment data.  This alignment marker is
  ## no longer useful in prohibiting overextension and should not be aligned
  ## to the sequence.  It would have been easier to simply remove the "x" from the
  ## sequence, however nhmmer/nhmmscan convert the "X" into an "N" before printing
  ## the alignment.  Only recourse is to look for recorded excisions:
  ##
  if ( $excisions->{$seqID} ) {
    my @markerRemoval = ();

    # Loop over previous cut out elements for this $seqID
    my $internalBegin  = -1;
    my $internalEnd    = -1;
    my $newResultBegin = $qStart;

    foreach my $hit ( @{ $excisions->{$seqID} } ) {
      my $hitBegin = $hit->[ 1 ];
      my $hitEnd   = $hitBegin + $hit->[ 2 ] - 1;
      last if ( $hitBegin > $qEnd );
      next if ( $hitBegin < $qStart );
      next
          if (    $hitBegin == $qStart
               && $hitEnd == $qEnd );

      $internalBegin = $hitBegin;
      $internalEnd   = $hitEnd;

      #print "Found internal: $internalBegin - $internalEnd\n"
      #    if ( $DEBUG );
      if ( ( $internalBegin - 1 ) > $newResultBegin ) {
        if ( substr( $querySeq, $internalBegin - $qStart, 1 ) =~ /[xn]/i ) {
          push @markerRemoval, ( $internalBegin - $qStart );
        }
      }
      $newResultBegin = $hitEnd + 1;
    }
    if ( @markerRemoval ) {
      foreach my $marker ( sort { $b <=> $a } @markerRemoval ) {

        #print "Removing marker $marker\n";
        substr( $querySeq, $marker, 1 ) = "";
      }

      #print "querySeq = $querySeq ( " . length($querySeq) . " )\n";
    }
  }
  my $len = length( $querySeq );

  #my $fastaID = "$seqID:$adjStart-$adjEnd-$len-$parentId-$subjName";
  # If 'seq' is blank the sequence can be obtained from the seed result.
  push @$refColl,
      {
        'seed'   => $current,
        'qSeqID' => $seqID,
        'qStart' => $adjStart,
        'qEnd'   => $adjEnd,
        'len'    => $len,
        'sSeqID' => $sName,
        'id'     => $parentId,
        'seq'    => $querySeq
      };

}

##-------------------------------------------------------------------------##
## Use:  my isRefineable( $searchResult );
##
##    This routine returns the refineable status
##    given an element.  A refineable element can
##    be searched against a highly detailed library
##    to return a more refined set of alignments.
##
##  Returns
##
##        1 = true, 0 = false.
##
##    Globals Used: None
##
##-------------------------------------------------------------------------##
sub isRefineable {
  my $result            = shift;
  my $refineableHashRef = shift;

  my $name = $result->getSubjName();
  my ( $id, $classification ) = ( $name =~ /(.*)\#(.*)/ );
  if ( defined $refineableHashRef->{$id} ) {
    return 1;
  }
  return 0;
}

##-------------------------------------------------------------------------##
## Use:  my preMaskLevelFilter( $resultsCollection );
##
##
##  Returns
##
##    Currently handles cases where we want to filter out results
##    prior to applying the masklevel filter.  Right now this
##    only applies to LAVA and their problematic Alu sequences.
##
##    Globals Used: None
##
##-------------------------------------------------------------------------##
sub preMaskLevelFilter {
  my $searchResults = shift;

  $DEBUG = 0;
  print "preMaskLevelFilter( \$searchResults ); Called\n"
      if ( $DEBUG );

  if ( 0 && $DEBUG ) {
    print "Original Annotations:\n";
    for ( my $i = 0 ; $i < $searchResults->size() ; $i++ ) {
      print "#$i:  "
          . $searchResults->get( $i )
          ->toStringFormatted( SearchResult::NoAlign );
    }
    print "\n";
  }

  my @deleteList = ();
  for ( my $i = 0 ; $i < $searchResults->size() ; $i++ ) {

    my $current = $searchResults->get( $i );
    my $name    = $current->getSubjName();
    my $begin   = $current->getSubjStart();
    my $end     = $current->getSubjEnd();

    # SVA Contains a bit of Alu.  Originally we searched SVA in
    # sinecutlib along with Alu sequences so that we could
    # compete the Alu with the internal SVA alu-like sequences
    # ( which are diverged enough to fairly compete ).  We
    # also checked for and removed fragments that only match
    # the SVA region which is nearly identical to LTR5 int-ltr
    # pairs.
    # Now we have SVA ( and it's cousin LAVA ) in shortcut lib
    # and buffer the Alu matches in sinecut lib.  In the step
    # below we remove the SVA fragments aligned only to the
    # and LTR5 regions.
    if (
      $name =~ /^SVA/
      && (
           $begin >= 855     && $end < 1270    #  End of the internal
                                               #    LTR5 sequence
           || $begin >= 1263 && $end < 1372
      )                                     #  LTR5 LTR sequence
        )
    {
      print
"Deleting annotation ($i): preMaskFilter #1 - SVA LTR5 misidentification\n"
          if ( $DEBUG );
      push @deleteList, $i;
    }
    elsif (    $name =~ /^LAVA/
            && $current->getScore() < 500 )
    {
      print "Deleting annotation ($i): preMaskFilter #2 - LAVA false positive\n"
          if ( $DEBUG );
      push @deleteList, $i;
    }
  }
  if ( @deleteList ) {
    print "Doing actual removals:\n" if ( $DEBUG );
    foreach my $index ( sort { $b <=> $a } @deleteList ) {
      print "Removing element $index\n" if ( $DEBUG );
      $searchResults->remove( $index );
    }
  }

  # DEBUG output
  if ( 0 && $DEBUG ) {
    print "Final Annotations:\n";
    for ( my $i = 0 ; $i < $searchResults->size() ; $i++ ) {
      print "#$i:  "
          . $searchResults->get( $i )
          ->toStringFormatted( SearchResult::NoAlign );
    }
    print "\n";
  }
  $DEBUG = 0;
}

##-------------------------------------------------------------------------##
## Use:  my filterResults( \%options, $what, $fragCnt, $lib,
##                  $resultsCollection, $tax );
##
##
##  Returns
##
##    Globals Used: None
##
##-------------------------------------------------------------------------##
sub filterResults {
  my %options       = %{ shift() };
  my $chooseClass   = shift;
  my $fragCnt       = shift;
  my $lib           = shift;
  my $searchResults = shift;
  my $tax           = shift;

  $DEBUG = 0;
  print "filterResults( \%options, $chooseClass, $fragCnt,"
      . " $lib, \$searchResults ); Called\n"
      if ( $DEBUG );

  my (
       $printed_one,   $lastoneisfineorf2, $lastoneisfine3utr,
       $lastonecut,    $delayprint,        $lastlineName,
       $lastlinebegin, $lastlineend,       $lastlineleft
  );

  my $undelPrevious = undef;
  my $previous;
  my $current;
  my $next;

  my @deleteList = ();

  # DEBUG output
  if ( $DEBUG ) {
    print "Original Annotations:\n";
    for ( my $i = 0 ; $i < $searchResults->size() ; $i++ ) {
      print "#$i:  "
          . $searchResults->get( $i )
          ->toStringFormatted( SearchResult::NoAlign );
    }
    print "\n";
  }

  #
  # Modify search results and flag annotations for deletion
  #
  for ( my $i = 0 ; $i < $searchResults->size() ; $i++ ) {

    $current = $searchResults->get( $i );

    my $name = $current->getSubjName();

    # Look for "#buffer" annotations and remove them.  Buffer
    # sequences are sequence (fragments) thrown into a library
    # to protect a sequence from being matched.  This is often
    # used when there is some overlap between libraries.  Matches
    # to #buffer sequences are more likely elements which will
    # be checked at a later stage.
    if ( $name =~ /\#buffer/ ) {    # like MT2B in rodcutsines.lib
      print "Deleting annotation ($i): It's a #buffer sequence.\n"
          if ( $DEBUG );
      push @deleteList, $i;
      next;
    }

    # Grab the current line of data into variables for easy reference
    my $score = $current->getScore();

    # This change works around a problem with NHMMSCAN allowing huge
    # insertions in an alignment.  The percIns goes through the roof
    # and we got division by zero errors here.
    my $diverge;
    if ( ( 100 - $current->getPctInsert() ) <= 0 ) {
      $diverge = $current->getPctDiverge();
    }
    else {
      $diverge = 100 *
          ( $current->getPctDiverge() / ( 100 - $current->getPctInsert() ) );
    }

    my $gaps          = $current->getPctDelete() + $current->getPctInsert();
    my $queryleftover = $current->getQueryRemaining();
    my $reverse       = 0;
    if ( $current->getOrientation() eq "C" ) {
      $reverse = 1;
    }
    my $begin = $current->getSubjStart();
    my $end   = $current->getSubjEnd();
    my $left  = $current->getSubjRemaining();

    #
    # Simple filter on divergence level
    #
    if ( $options{'div'} && $diverge > $options{'div'} ) {
      print "Deleting annotation ($i): Divergence too high.\n" if ( $DEBUG );
      push @deleteList, $i;
      next;
    }

    # Only consider previous elements if they are
    # contained in the same sequence.  Also do not
    # consider previous elements if they were marked
    # for deletion.
    my $havePreviousElement = 0;
    if ( $i > 0 ) {
      $havePreviousElement = 1;
      $previous            = $searchResults->get( $i - 1 );
      if ( $current->getQueryName() ne $previous->getQueryName()
           || ( @deleteList && $deleteList[ $#deleteList ] == $i - 1 ) )
      {
        $havePreviousElement = 0;
        $previous            = undef;
      }
    }

    # Similarly for next elements
    # except elements can't have been deleted yet
    my $haveNextElement = 0;
    if ( $i < $searchResults->size() - 1 ) {
      $next = $searchResults->get( $i + 1 );
      $haveNextElement = 1 if $current->getQueryName() eq $next->getQueryName();
    }

    #
    # Class cut2 lib : CASE #2
    #
    if ( $chooseClass eq 'cut2' ) {

      # cut2.lib
      # cutting out youngish LINE1 3' ends; a tricky business
      # but can have a large (positive) effect only the younger
      # subset of elements in the repeat library are cut; the
      # other consensus seqs are only there to correctly classify
      # matches

# this (high) limit prevents cutting out ancient elements
# misassigned as young elements
#
# WARNING: This only looks back one.  So...in a case like this:
#
#  Original Annotations:
#  #0:  511 15.96 0.00 0.00 CebAlb_disco_line_24154 693 786 (615) C L1PA8_3end#LINE/L1 (1) 918 825
#  #1:  341 24.37 9.92 0.84 CebAlb_disco_line_24154 693 811 (590) C L1PA8A_3end#LINE/L1 (0) 878 748
#  #2:  508 19.81 0.00 0.00 CebAlb_disco_line_24154 693 798 (603) C L1PA7_3end#LINE/L1 (1) 900 795
#
#  Deleting annotation (1): Filter #9 - < 75% identical.
#  Doing actual removals:
#  Removing element 1
#  Final Annotations:
#  #0:  511 15.96 0.00 0.00 CebAlb_disco_line_24154 693 786 (615) C L1PA8_3end#LINE/L1 (1) 918 825
#  #1:  508 19.81 0.00 0.00 CebAlb_disco_line_24154 693 798 (603) C L1PA7_3end#LINE/L1 (1) 900 795
#
# This should be fixed.
#
      if ( $diverge + $gaps < 25 ) {

        if ( $name =~ /_3end\#/ && $left < 20 ) {

          # If we overlap with the previous element by more than 20bp
          if (    $havePreviousElement
               && $previous->getQueryEnd() > $current->getQueryStart() + 20 )
          {

            # Delete this element if we are contained by the previous element
            # or if the previous element was cut.
            # not necessary to look at next element:
            # longer extending element is always presented first
            if (    $previous->getQueryEnd() >= $current->getQueryEnd()
                 || $lastonecut )
            {
              $lastoneisfine3utr = "";
              $lastonecut        = 0;
              print "Deleting annotation ($i): Filter #3 - Contained\n"
                  . "by another annot.\n"
                  if ( $DEBUG );
              push @deleteList, $i;
            }
            ## Overlap of body and 3' end
            # If the previous element was a good body (orf2) and this is
            # in the correct orientation (forward).
            elsif ( $lastoneisfineorf2 && !$reverse ) {
              print "Modifying annotation ($i): Filter #4 - Extending \n"
                  . "line 3' end to include body.\n"
                  if ( $DEBUG );
              if ( $previous->getScore() > $score ) {

                # body scored better so use it's score and pctSub
                $current->setScore( $previous->getScore() );
                $current->setPctDiverge( $previous->getPctDiverge() );
              }

              # Include previous element boundries
              $current->setQueryStart( $previous->getQueryStart() );
              my $tmpName = $name;
              $tmpName =~ s/\#/extended\#/;
              $current->setSubjName( $tmpName );    # used in ProcessRepeats
              $current->setSubjBegin( $lastlinebegin );
              if ( $lastoneisfineorf2 eq 'L1P_orf2' ) {
                $current->setSubjEnd( $end + 3144 );
              }
              else {  # currently only rodent as alternative; need to generalize
                $current->setSubjEnd( $end + 4384 );
              }

              # Remove the body annotation and keep the 3' extended annotation.
              $lastonecut        = 1;
              $lastoneisfine3utr = "";
              print "Deleting annotation ($i-1): Filter #4 - Body assimilated\n"
                  . " by 3' end annot.\n"
                  if ( $DEBUG );
              push @deleteList, $i - 1;
            }
            else {

              # Some freakish combination remove it
              $lastonecut        = 0;
              $lastlinebegin     = "";
              $lastoneisfineorf2 = "";
              print "Deleting annotation ($i-1): Filter #5 - Unknown 3' "
                  . "overlap combination.\n"
                  if ( $DEBUG );
              push @deleteList, $i;
            }
          }    # If we overlap with the previous element by more than 20bp...
               # Overlap <= 20 and several other factors.
               # L1 ORFs and body consensus sequences are built with a 150bp
               # overlap.  If the alignment starts at > 150bp then it truely
               # matches the 3'end part.
          elsif (
            ( $begin > 150 || !$reverse && $name =~ /L1PA\d\#|L1Hs|L1Rn|L1Md/ )

            # L1PA, L1Hs, L1Rn...are young elements. It's safe to assume
            # that no extensions will be found with a shorter wordlength.
            # Therefore clip just the 3' consensus portion out as an
            # independent 3' end.
            && (    !$fragCnt
                 || $current->getQueryStart() > 50 && !$reverse
                 || $queryleftover > 50            && $reverse )

            # Handle the case where this end could be extended but its
            # to close to a boundry ( of a fragmented sequence ) to tell.
              )
          {

         # at the edge of query fragments (< 50 bp left); extension may be found
         # in overlapping fragment
            if (    $havePreviousElement
                 && $previous->getQueryEnd() >= $current->getQueryEnd() )
            {

              #accidental overlap < 20 bp
              if ( !$reverse || $lastonecut ) {
                print "Modifying annotation ($i): Filter #5a - 3' end "
                    . "start adjustment\n"
                    . " (accidental overlap)\n"
                    if ( $DEBUG );
                $current->setQueryStart( $previous->getQueryEnd() + 1 );
              }
            }
            $lastonecut        = 1;
            $lastoneisfine3utr = "";
          }
          else {

            # Ok....this is strange.  If this is set....and for some
            # reason a body is not quickly located...this annotation
            # will never be clipped.
            $lastoneisfine3utr = 1 if $reverse;
            $lastonecut        = 0;

            # Strange that we don't keep it if its a singleton
            print "Deleting annotation ($i): Filter #6 - "
                . "Assimilation or singleton\n"
                if ( $DEBUG );
            push @deleteList, $i;
          }
        }    # if ($name =~ /_3end\#/...
        elsif (
             $name =~ /_orf2/
          && $begin > 100
          && $left < 20

          # only the younger L1 body consensi are suffixed '_orf2';
          # they are all 5' shortened in part because no 5' ends
          # (which overlap with the full consensi) are included
          # in this comparison only matches labeled 'extended'
          # are clipped, wich are treated separately in ProcessRepeats
          && ( !$fragCnt || $current->getQueryStart() > 50 )
            )
        {

          # forward: may extend into previous frag; reverse: lack of
          # complete 3' UTR guaranteed
          if (    $havePreviousElement
               && $lastoneisfine3utr
               && $previous->getQueryEnd() > $current->getQueryStart()
               && $reverse
               && ( !$fragCnt || $queryleftover > 50 ) )
          {
            print "Modifying annotation ($i): Filter #7 - Reverse "
                . "body can be combined with 3'.\n"
                if ( $DEBUG );
            if ( $previous->getScore() > $score ) {
              $current->setScore( $previous->getScore() );
              $current->setPctDiverge( $previous->getPctDiverge() );
            }
            $current->setQueryStart( $previous->getQueryStart() );
            $current->setSubjName( ( $lastlineName =~ s/\#/extended\#/ ) );
            $current->setSubjLeft( $lastlineleft );
            if ( $tax->isA( $options{'species'}, "primates" ) == 1 ) {
              $current->setSubjEnd( $lastlineend + 3144 );
            }
            else {    #right now only alternative is rodents;
              $current->setSubjEnd( $lastlineend + 4384 );
            }
            $lastonecut        = 1;
            $lastoneisfineorf2 = "";
          }
          else {
            $lastonecut        = 0;
            $lastoneisfineorf2 = $name if !$reverse;
            $lastlinebegin     = $begin;
            print "Deleting annotation ($i): Filter #7 - Not extended (yet).\n"
                if ( $DEBUG );
            push @deleteList, $i;
          }
          $lastoneisfine3utr = "";
        }
        else {

          # I'm not cutting out 3' UTRs on opposite strand that begin < 150
          # and do not overlap with ORF2
          $lastoneisfine3utr = $lastoneisfineorf2 = "";
          $lastonecut = 0;
          print "Deleting annotation ($i): Filter #8 - A bodyless 3' UTR.\n"
              if ( $DEBUG );
          push @deleteList, $i;
        }
      }
      else {    # if <75% identical
        $lastonecut = 0;
        print "Deleting annotation ($i): Filter #9 - < 75% identical.\n"
            if ( $DEBUG );
        push @deleteList, $i;
      }
    }    # Class cut2.lib
         #
         # Class mirs
         #
    elsif ( $chooseClass eq 'mirs' ) {

      my $classnow  = $name;
      my $classthen = $lastlineName;
      $classnow  =~ s/.+\#//;
      $classthen =~ s/.+\#//;

      # Mirslib:
      # The MIR and MIR3 SINEs share 51 and 78 bp (inexactly) with the L2
      # and L3 LINEs, respectively. Occasionally, a fragment is matched to
      # both a SINE and a LINE, confusing ProcessRepeats and often extending
      # the match (and the masking) too far.  This happens quite often
      # > 1,000 times in HG19.
      #
      # Here is an example:
      # 392  28.7 19.6  3.2  chr1 65671617 65671936 +  L2a  3022 3393   (33)
      # 191  25.0  0.0  0.0  chr1 65671907 65671942 +  MIR   206  241   (21)
      #
      # Ratios appear to hold for nhmmer ( see linesine.fa example sequence ).
      #
      if (    $name =~ /^L2a|^L2b|^L3|^MIR/i
           && $havePreviousElement
           && $previous->getQueryEnd() > $current->getQueryStart() + 33
           && $classnow ne $classthen )
      {
        if (
             $current->getQueryEnd() - $previous->getQueryEnd() <= 30
             && (    $previous->getScore() >= 1.05 * $score
                  || $current->getQueryEnd() - $previous->getQueryEnd() <
                  $current->getQueryStart() - $previous->getQueryStart()
                  && $previous->getScore() >= 0.83 * $score )
            )
        {
          print "Deleting annotation ($i): Filter #9a - "
              . "Preceeding L2/L3/Mir better.\n"
              if ( $DEBUG );
          push @deleteList, $i;
        }
        elsif (
                $current->getQueryStart() - $previous->getQueryStart() <= 30
                && (    $score >= 1.05 * $previous->getScore()
                     || $current->getQueryEnd() - $previous->getQueryEnd() >
                     $current->getQueryStart() - $previous->getQueryStart()
                     && $score >= 0.83 * $previous->getScore() )
            )
        {
          print "Deleting annotation ($i-1): Filter #9a - "
              . "Following L2/L3/Mir better.\n"
              if ( $DEBUG );
          push @deleteList, $i - 1;
        }
      }
      else {

        # The crossmatch/blast thresholds for the mirslib are too
        # low for this one element.  NHMMSCAN uses per-model threshold
        # so we should not filter if using NHMMSCAN.
        # NOTE: This element is also in shortlib/shortcutlib
        if (    $options{'engine'} ne "hmmer"
             && $name =~ /MER91/
             && $score < 200 )
        {
          print "Deleting annotation ($i): Filter #9b - A poor MER91.\n"
              if ( $DEBUG );
          push @deleteList, $i;
        }
      }
    }    #class mirs
         #
         # Class sines
         #
    elsif ( $chooseClass eq 'sines' ) {

      if ( $name =~ /^MLT2B/ ) {

        # Occasionally the match to Ricksha is broken up by a gap
        # while spanned in one piece  by a (much more diverged) match to MLT2
        if (
             (
                  $havePreviousElement
               && $previous->getSubjName() =~ /^Ricksha/
               && $current->getQueryStart() < $previous->getQueryEnd() - 50
               && $score < $previous->getScore()
             )
             || (    $haveNextElement
                  && $next->getSubjName() =~ /^Ricksha/
                  && $current->getQueryEnd() > $next->getQueryStart() + 50
                  && $score < $next->getScore() )
            )
        {
          print
"Deleting annotation ($i): Filter \#9c - MLT2 really is Ricksha.\n"
              if ( $DEBUG );
          push @deleteList, $i;
        }
      }
      elsif (    $name eq 'MER3'
              && $current->getSubjStart() > 30
              && $current->getSubjEnd() < 125 )
      {

# MER3 and MER33 share TIRs, but MER3 has 47 bp extraTIReal seqeunce at the 5' end
# this sometimes leads to MER3 and MER33 alignments overlapping because
# the alignments can be (falsely) extended a few bases further to the MER3 consensus
        if (
             (
                  $havePreviousElement
               && $previous->getSubjName()   eq 'MER33'
               && $current->getOrientation() eq 'C'
               && $current->getQueryEnd() - $previous->getQueryEnd() < 20
             )
             || (    $haveNextElement
                  && $next->getSubjName()       eq 'MER33'
                  && $current->getOrientation() eq '+'
                  && $next->getQueryStart() - $current->getQueryStart() < 20 )
            )
        {
          print
              "Deleting annotation ($i): Filter \#9d - MER3 really is MER33.\n"
              if ( $DEBUG );
          push @deleteList, $i;
        }
      }

      # This was handled before the divergence filter.

      if ( $tax->isA( $options{'species'}, "rodentia" ) == 1 ) {

        #
        # TODO:  For now we don't have a HMM models for rodent.  When
        #        we do...we need to revisit the score ratios used below.
        #
        # Similar to above MER3/MER33 case, B4 contains a B1 and alignments
        # can continue a few meaningless bases beyond a B1
        if (    $havePreviousElement
             && $name                    =~ /^B1/
             && $previous->getSubjName() =~ /^B4/
             && $current->getQueryStart() < $previous->getQueryEnd() - 50
             && $score > 1.5 * $previous->getScore()
             && $previous->getSubjStart() >= 80 )
        {

          # deleted requirement of score > 226; this was in original code
          # as the B4 element would otherwise not be considered anyway
          print "Deleting annotation ($i-1): Filter \#10a.\n" if ( $DEBUG );
          push @deleteList, $i - 1;
        }
        elsif (    $haveNextElement
                && $name                =~ /^B4/
                && $next->getSubjName() =~ /^B1/
                && $next->getQueryStart() < $current->getQueryEnd() - 50
                && $next->getScore() > 1.5 * $score
                && $begin >= 80 )
        {
          print "Deleting annotation ($i): Filter \#10b.\n" if ( $DEBUG );
          push @deleteList, $i;
        }
        elsif ( $name =~ /SINE/ && $begin <= 100 )
        {    # check for false positives ID??
              # instead of setting $score < 225" in parameters, here we
              # set the cutoff for all none-SINEs allows SINEs with score
              # 200-225 to get masked
              # TODO: Check...this should never happen these days
          if ( $name eq "RSINE1" && $begin > 50 && $score < 225 ) {
            print "Deleting annotation ($i): Filter \#10bb.\n" if ( $DEBUG );
            push( @deleteList, $i );
          }

          # TODO: Is this even correct...we don't use getOverlap here?
        }
        elsif ( $name =~ /^ORR1.*-int/ && $current->getOverlap() eq '*' ) {

          # location of partially masked B1 in consensus;
          # only skipped if there is a better match overlapping it
          # this may occasionally leave a genuine small bit of
          # ORR1 internal unmasked if it overlaps with something
          # else than a B1
          # (MaLR internals are screened in the shortlib session)
          if (    $name =~ /^ORR1A1-int/ && $begin > 530 && $end < 680
               || $name =~ /^ORR1A3-int/ && $begin >= 595 && $end <= 910
               || $name =~ /^ORR1B1-int/ && $begin >= 585 && $end <= 860 )
          {
            print "Deleting annotation ($i): Filter #11.\n" if ( $DEBUG );
            push @deleteList, $i;
          }
        }
        elsif (    $name eq "RatSatRep1"
                && $begin > 1150
                && $end < 1750
                && $diverge > 16 )
        {

# this fragment matches a retroviral internal sequence (screened only in the next step)
          print "Deleting annotation ($i): Filter #12.\n" if ( $DEBUG );
          push @deleteList, $i;
        }
        elsif ( $score < 225 ) {
          print "Deleting annotation ($i): Filter #13.\n" if ( $DEBUG );
          push @deleteList, $i;
        }
        elsif ( $score <= 350 ) {

       # bits of low complexity in consensi that lead to matches in reversed DNA
          if (
               $name =~ /^RLTR21/   && $begin > 1090 && $end < 1365
            || $name =~ /^RMER12\#/ && $begin > 750  && $end < 1025
            || $score < 310 && (
              $name =~ /^RMER17/ && (
                   $name =~ /^RMER17A\#/  && $begin > 205 && $end < 460
                || $name =~ /^RMER17A2\#/ && $begin > 290 && $end < 495
                || $name =~ /^RMER17B\#/  && $begin > 840 && $end < 940
                ||    # that's an unexpected one
                   $name =~ /^RMER17B\#/  && $begin > 250 && $end < 445
                || $name =~ /^RMER17C\#/  && $begin > 80  && $end < 250
                || $name =~ /^RMER17D\#/  && $begin > 245 && $end <= 500
                || $name =~ /^RMER17D2\#/ && $begin > 255 && $end <= 525
              )
              || $name =~ /^RNLTR15A/
              && (    $name =~ /^RNLTR15A\#/ && $begin > 1440 && $end < 1550
                   || $name =~ /^RNLTR15A2\#/ && $begin > 1310 && $end < 1420 )
            )
              )
          {
            print "Deleting annotation ($i): Filter #14.\n" if ( $DEBUG );
            push @deleteList, $i;
          }
        }
      }
      elsif ( $tax->isA( $options{'species'}, "primates" ) == 1 ) {
        if (  $name =~ /^LTR66/ && $begin > 380 && $end < 470
           || $name =~ /^MER45R/ && $score < 275 && $begin > 310 && $end < 400 )
        {
          print "Deleting annotation ($i): Filter #15.\n" if ( $DEBUG );
          push @deleteList, $i;
        }
      }
    }    # class sines
         #
         # Class longlib
         #
         # Low complexity regions of LINEs.  NHMMSCAN masks out these
         # sections of the model so we do not need to filter out
         # hits like this.
    elsif ( $options{'engine'} ne "hmmer" && $chooseClass eq 'longlib' ) {

      if (
              $name =~ /L1MC3_3end/ && $begin > 1310 && $end < 1400
           || $name =~ /L1MC4_3end/ && $begin > 1325 && $end < 1475
           || $name =~ /^Lx9/       && $begin > 1860 && $end < 1940
           || $score <= 300
           && (    $name =~ /^L1_Mur2\#/ && $begin > 825 && $end < 932
                || $name =~ /L1MCa_5end/ && $begin > 1880 && $end < 1980
                || $name =~ /^L1_Mur3\#/ && $begin > 835  && $end < 905 )
          )
      {
        print "Deleting annotation ($i): Filter #16.\n" if ( $DEBUG );
        push @deleteList, $i;
      }
    }    # class longlib
         #
         # Class l1
         #
         # Low complexity regions of LINEs.  NHMMSCAN masks out these
         # sections of the model so we do not need to filter out
         # hits like this.
    elsif (    $options{'engine'} ne "hmmer"
            && $chooseClass eq 'l1'
            && $score < 425 )
    {    # run with -raw

      if (
           $name =~ /^HAL1\#/    && $begin > 900  && $end < 1350 && $score < 375
        || $name =~ /^HAL1b\#/   && $begin > 695  && $end < 910  && $score < 350
        || $name =~ /L1M4_orf2/  && $begin > 700  && $end < 1340 && $score < 375
        || $name =~ /L1MC3_3end/ && $begin > 1275 && $end < 1430 && $score < 350
        || $name =~ /L1MC4_3end/ && $begin > 1260 && $end < 1490 && $score < 400
        || $name =~ /L1MC4_5end/ && $begin > 850  && $end < 1340 && $score < 415
        || $name =~ /L1MCa_5end/ && $begin > 1855 && $end < 1995 && $score < 350
        || $name =~ /L1M4b_5end/ && (    $begin > 2800 && $end < 3350
                                      || $begin > 2180 && $end < 2300 )
        && $score < 400
        || $name =~ /L1MDa_5end/ && $begin > 2420 && $end < 2505 && $score < 350
        || $name =~ /L1ME4a_3end/ && $begin > 475 && $end < 725 && $score < 350
        || $name =~ /L1MEb_5end/  && $begin > 620 && $end < 800 && $score < 350
        || $name =~ /L1MEc_5end/ && $begin > 2100 && $end < 2500 && $score < 360
          )
      {
        print "Deleting annotation ($i): Filter #17.\n" if ( $DEBUG );
        push @deleteList, $i;
      }
    }    # class l1
         #
         # Class simple
         #
    elsif ( $chooseClass eq "simple" ) {

      # TODO: Check that now in the TRF era we are handling this correctly.
      #       My feeling is that we need to check for TTTT|(T)n now as well.
      if ( $name !~ /AAAAA|\(A\)n/ && $name !~ /Satellite/ ) {

        # avoid cutting out polyA tails
        # decision to cut out simple repeat dependent on the
        # complexity of the unit
        my ( $length, $complex ) = &calcSimpleRepeatComplexityFromName( $name );
        my $cutoff = 16 - 3 * ( $length - $complex );
        if (    $havePreviousElement
             && $previous->getQueryEnd() >= $current->getQueryStart() )
        {

# cutting out overlapping simple repeats leads to problems
# as some nucleotides are cut out twice (some flanking DNA
# will be deleted instead)
# masklevel is set to 1; masklevel 0 would avoid any
# overlaps, but behaves oddly ( only in crossmatch and not our implementation? )
          my $newQueryStart = $previous->getQueryEnd() + 1;
          $current->setQueryStart( $newQueryStart );
        }
        unless (    $name !~ /\([GA]*\)/ && $diverge + $gaps / 1.5 <= $cutoff
                 || $diverge + $gaps < ( $score - 200 ) / 50
                 || $diverge + $gaps <= 5 )
        {
          print "Deleting annotation ($i): Filter #18.\n" if ( $DEBUG );
          push @deleteList, $i;
        }
      }
      else {
        print "Deleting annotation ($i): Filter #19.\n" if ( $DEBUG );
        push @deleteList, $i;
      }
    }    # class simple
         #
         # Class alu
         #
    elsif ( $chooseClass eq "alu" ) {
      if (    $havePreviousElement
           && $previous->getQueryEnd() >= $current->getQueryStart() )
      {

        #we're stuck with a masklevel 1; 0 behaves odd
        my $newfield5 = $previous->getQueryEnd() + 1;

        # Again...see above....but why?
        #$current->setQueryName( quotemeta $current->getQueryName() );
        $current->setQueryStart( $newfield5 );

        #s/(.*$fields[4]\s+)$fields[5](\s$fields[6].*)/$1$newfield5$2/;
      }

      #
      # Ensure the element is full length
      #   For ALUs we are tolerant of a missing Poly-A tail.  The
      #   general case uses consensus remaining to ensure it's near
      #   full length.
      #
      #   FLAM/FRAM/FAM in primates are special cases
      #      They represent the monomers of the Alu sequence.
      #      These models should *not* be clipped out as removing full
      #      length Alus might succeed in uncovering two arms of
      #      another Alu that will score better than two F[L/R]AM
      #      elements. This is avoided by using a buffer for FLAM/FRAM
      #      in sinecutlib.
      #
      #   In non-primate species there are also sines we attempt
      #   to clip out.  Since we do not have the length of these
      #   elements hardcoded we use the begin/left fields to define
      #   what we mean by near-full length.
      #
      unless (    $name =~ /^Alu/ && ( $begin < 70 && $end > 185 )
               || $begin < 6 && $left < 5 )
      {
        print "Deleting annotation ($i): Filter #20.\n" if ( $DEBUG );
        push @deleteList, $i;
      }
    }    # class alu
         #
         # Class cut1
         #
    elsif ( $chooseClass eq "cut1" ) {

# for non-LINEs criteria are only that 5 bp or less are missing from ends;
# no need to require a maximum divergence, since element is complete (can't get better)
      if (    $begin < 6 && $left < 5 && $name !~ /LINE/
           || $name =~ /SINE/ && $begin < 6 && $left < 20 )
      {
        ## used to be restricted to Alu and SINE/B..;
        if (    $havePreviousElement
             && $previous->getQueryEnd() >= $current->getQueryStart() )
        {
          $current->setQueryStart( $previous->getQueryEnd() + 1 );

          # since both last and current are full length, no overlaps
          #of more than a few bases occur
        }

        # TODO: This looks like we are trying to buffer BC[12]* elements in
        # this stage.  Perhaps it would be better to just buffer them?  See
        # similar comment under the "alu" filter for FAM/FLAM/FRAM.
        if ( ( $name =~ /RNA$/ || $name =~ /^7SL/ ) && $options{'norna'}
             || $name =~ /^BC[12]\S+SINE/ )
        {

          #fixed feb 03; was RNA &&
          print "Deleting annotation ($i): Filter #21. Avoid cutting out RNAs"
              . "when the user has shut this off.  Also if this is a BC[12] SINE "
              . "we don't wish to cut this out in the cut1 phase???\n"
              if ( $DEBUG );
          push @deleteList, $i;
        }
      }
      else {
        print "Deleting annotation ($i): Filter #22: Non-complete elements "
            . "could overlap with full length elements.  We want to avoid "
            . "shortening full length elements when the evidence is weak.\n"
            if ( $DEBUG );
        push @deleteList, $i;
      }
    }    #class cut1
         #
         # Class alumask
         #
    elsif ( $chooseClass eq "alumask" ) {

      # NO LONGER USED
    }    # class alumask
         #
         # Catchall class
         #
    else {    # anything that give false positives in the other libraries
              # Arian: it's possible that this doesn't matter
              # TODO: Check this.
              # NHMMSCAN certainly doesn't need this
      if ( $options{'engine'} ne "hmmer" && $score < 300 ) {
        if (    $name =~ /PRIMA4-int/ && $begin > 475 && $end < 575
             || $name eq "LTR39-int" && $begin > 3900 && $end < 3990
             || $name eq "LTR38-int" && $begin > 1350 && $end < 1450 )
        {
          print "Deleting annotation ($i): Filter #24.\n" if ( $DEBUG );
          push @deleteList, $i;
        }
      }
    }

    if ( $name =~ /RNA$/ && $options{'norna'} ) {
      print "Deleting annotation ($i): Filter #25.\n" if ( $DEBUG );
      push @deleteList, $i;
    }

    #
    # Contained elements surviving to this point can cause problems in
    # the excision routine.  Ie.
    #    Excising repeat ( step = 4 ): 53276 - 53394
    #    Excising repeat ( step = 4 ): 53262 - 53394
    #       FastaDB::substr - Error index out of bounds!
    #             at ./RepeatMasker line 4589
    # Delete only if in a cut class, has not already been deleted,
    # is in the same sequence, and overlaps at all with the previous
    # undeleted element.
    #
    if (
         $chooseClass =~ /simple|alu|cut1|cut2/
         && ( !@deleteList
              || $deleteList[ $#deleteList ] != $i )
         && $undelPrevious
         && $current->getQueryName() eq $undelPrevious->getQueryName()
         && $undelPrevious->getQueryEnd() >= $current->getQueryEnd()
        )
    {
      print "Deleting annotation ($i): Filter #26 - Contained by filter\n"
          if ( $DEBUG );
      push @deleteList, $i;
    }

    $lastlineName = $name;
    $lastlineleft = $left;
    $lastlineend  = $end;

    $undelPrevious = $current
        if ( !@deleteList
             || $deleteList[ $#deleteList ] != $i );

  }    # for ( my $i = 0 ; $i < $searchResults->size()...

  # Return a boolean indicating if there were hits left
  # after this process completed
  if ( @deleteList ) {

    # Remove duplicates - not sure if the above generates duplicates but
    # to be on the safe side
    my %seen = ();
    my @uniqDeleteList = grep { !$seen{$_}++ } @deleteList;
    print "Doing actual removals:\n" if ( $DEBUG );
    foreach my $index ( sort { $b <=> $a } @uniqDeleteList ) {
      print "Removing element $index\n" if ( $DEBUG );
      $searchResults->remove( $index );
    }
  }

  # DEBUG output
  if ( $DEBUG ) {
    print "Final Annotations:\n";
    for ( my $i = 0 ; $i < $searchResults->size() ; $i++ ) {
      print "#$i:  "
          . $searchResults->get( $i )
          ->toStringFormatted( SearchResult::NoAlign );
    }
    print "\n";
  }
  $DEBUG = 0;
}    # sub filterResults

##-------------------------------------------------------------------------##
## Use:  my
##
##
##  Returns
##
##  Globals Used: None
##-------------------------------------------------------------------------##
sub calcSimpleRepeatComplexityFromName {
  my %num = ( "A", 0, "C", 0, "G", 0, "T", 0 );
  my %log = ( "A", 0, "C", 0, "G", 0, "T", 0 );
  my $name = shift;
  $name =~ s/^\((\w+).*/$1/;
  my $length = length $name;
  my @bases = split( //, $name );
  foreach my $base ( @bases ) {
    ++$num{$base};
  }
  $log{A} = $num{A} * log( $num{A} ) if $num{A};
  $log{C} = $num{C} * log( $num{C} ) if $num{C};
  $log{G} = $num{G} * log( $num{G} ) if $num{G};
  $log{T} = $num{T} * log( $num{T} ) if $num{T};
  my $complex =
      ( $log{A} + $log{C} + $log{G} + $log{T} - $length * log( $length ) ) / -
      log( 4 );
  return ( $length, $complex );
}

##-------------------------------------------------------------------------##
## Use:  my (\%begin, \%end) = getSegments( $resultsCollection );
##
##
##  Returns
##
##    Looks like this was intended to read the search engine
##    output and look for hit lines.
##
##    From these lines it creates two data structures.  One
##    holds a list (in ascending order) of the start positions
##    and one holds the end positions of all hits in the output.
##
##     %begin = { name1 => [ pos1, pos2, pos3, ... ],
##                name2 => [ pos1, pos2, ... ] };
##     %end   = { name1 => [ pos1, pos2, pos3, ... ],
##                name2 => [ pos1, pos2, ... ] };
##
##     revamped to use a SearchResultCollection.
##
##  NO Globals Used
##-------------------------------------------------------------------------##
sub getSegments {
  my $resultsCollection = shift;

  my %begin = ();    # associative arrays which assign lists (begin and ends)
  my %end   = ();    # to each sequence name: Technically, this assigns a list
                     # reference to each sequence name.
  my ( $lastend, $lastname ) = ();
  for ( my $i = 0 ; $i < $resultsCollection->size() ; $i++ ) {
    my $result = $resultsCollection->get( $i );
    my $name   = $result->getQueryName();
    my $begin  = $result->getQueryStart();
    my $end    = $result->getQueryEnd();

    # Blunt end overlapping annotations
    if (    $lastend
         && $name eq $lastname
         && $lastend >= $begin )
    {

      # otherwise get negative lengths
      $begin = $lastend + 1 if ( ( $lastend + 1 ) < $end );
    }

    push @{ $begin{$name} }, $begin;
    push @{ $end{$name} },   $end;
    $lastname = $name;
    $lastend  = $end;
  }
  return ( \%begin, \%end );
}

sub postProcessSearch {
  my $options           = shift;
  my $resultsCollection = shift;
  my $excisions         = shift;
  my $cutResults        = shift;
  my $sentinelLength    = shift;
  my $seqDB             = shift;
  my $optInv            = shift;
  my $outfile           = shift;
  my $refineableHashRef = shift;
  my $refinementHash    = shift;
  my $batchNum          = shift;
  my $stage             = shift;

  my $subroutine = ( caller( 0 ) )[ 0 ] . "::" . ( caller( 0 ) )[ 3 ];

  #
  # The resultsCollection is expected to be in seqID,queryPos (ascending)
  # sorted order.  Therefore we can get SeqID,queryPos(descending) by
  # starting from the end and work backwards.
  #
  my $excisionRanges = new SearchResultCollection();
  my $prevBeg        = 0;
  my $prevName       = "";

  for ( my $i = $resultsCollection->size() - 1 ; $i >= 0 ; $i-- ) {
    my $result = $resultsCollection->get( $i );
    my $name   = $result->getQueryName();
    my $begin  = $result->getQueryStart();
    my $end    = $result->getQueryEnd();

    #
    # Do all functions based on found coordinates
    #

    # Cut or Mask SeqDB
    my $len = $end - $begin + 1;
    if ( $cutResults && !$options{'nocut'} ) {

      # Check for overlaps
      if ( $prevName eq $name && $prevBeg <= $end ) {
        $len = $prevBeg - $begin;

        # Handle special case where something like this occurs:
        #        --------->
        #        ----->
        if ( $len > 0 ) {
          $seqDB->setSubstr( $name, $begin - 1, $len, 'x' x $sentinelLength );
          $excisionRanges->add(
                                new SearchResult(
                                                  queryName  => $name,
                                                  queryStart => $begin,
                                                  queryEnd   => $prevBeg - 1
                                )
          );
        }
      }
      else {
        $seqDB->setSubstr( $name, $begin - 1, $len, 'x' x $sentinelLength );
        $excisionRanges->add(
                              new SearchResult(
                                                queryName  => $name,
                                                queryStart => $begin,
                                                queryEnd   => $end
                              )
        );
      }
    }
    else {
      eval { $seqDB->setSubstr( $name, $begin - 1, $len, 'X' x $len ) };
      if ( $@ ) {
        croak "$subroutine: $@\nAttempting to mask $name from "
            . ( $begin - 1 ) . " to "
            . ( $end )
            . " ( len = $len )\n";
      }
    }
    $prevBeg  = $begin;
    $prevName = $name;

    #  Modify divergence
    #   TODO: Expand on the motivation here. Divergence should not
    #         be spread over large insertions.  It should be related
    #         to the aligned bases only.
    my $percDiv = $result->getPctDiverge();
    my $percIns = $result->getPctInsert();
    my $adjDiv  = $percDiv;
    if ( $percIns < 100 ) {
      $adjDiv = 100 * ( $percDiv / ( 100 - $percIns ) );
    }
    $result->setPctDiverge( sprintf( "%4.2f", $adjDiv ) );

    #
    #  New ID Scheme:
    #      action:  m = Mask, c = Cut
    #      batch:   Batch number
    #      stage:   Search Stage
    #      element: Position in search results
    #
    #  i.e The 3rd element coming from batch 2 and search stage 7 and
    #      was cut would be labeled:  c_b2s7i3
    #
    my $newID;
    if ( $cutResults && !$options{'nocut'} ) {
      $newID .= "c_b";
    }
    else {
      $newID .= "m_b";
    }
    $newID .= $batchNum . "s" . $stage . "i" . $i;
    $result->setId( $newID );

    # Adjust coordinates
    my ( $globalBegin, $globalEnd ) =
        &queryExcision( $result->getQueryName(), $result->getQueryStart(),
                        $result->getQueryEnd(), $excisions, $sentinelLength );

    # RMH: 11/26/12
    # Must run before $result is modified and after new coordinates are
    # obtained.
    if ( &isRefineable( $result, $refineableHashRef ) ) {
      addToRefinementCollection(
                                 $result,      $refinementHash,
                                 $seqDB,       $batchNum,
                                 $stage,       $i,
                                 $globalBegin, $globalEnd,
                                 $excisions
      );
    }

    print "TRANSLATING: $begin-$end to $globalBegin-$globalEnd\n" if ( $DEBUG );
    $result->setQueryStart( $globalBegin );
    $result->setQueryEnd( $globalEnd );
  }

  # Fragment annotations
  &fragmentAnnotations( $resultsCollection, $excisions, $sentinelLength );

  # Update excise list.
  if ( $cutResults && !$options{'nocut'} ) {
    $excisionRanges->sort(
      sub ($$) {
        (    ( $_[ 0 ]->getQueryName() cmp $_[ 1 ]->getQueryName() )
          || ( $_[ 0 ]->getQueryStart() <=> $_[ 1 ]->getQueryStart() ) );
      }
    );

    &addExcisionSet( $excisionRanges, $excisions, $sentinelLength );
  }

  # Write out results
  if ( $optInv ) {
    $resultsCollection->write( $outfile, SearchResult::AlignWithSubjSeq );
  }
  else {
    $resultsCollection->write( $outfile, SearchResult::AlignWithQuerySeq );
  }

  return;

}

sub addExcisionSet {
  my $resultCollection = shift;
  my $excisions        = shift;
  my $sentinelLength   = shift;

  my $DEBUG      = 0;
  my $subroutine = ( caller( 0 ) )[ 0 ] . "::" . ( caller( 0 ) )[ 3 ];

  my %deleteList = ();

  #
  # Merge overlaping ranges
  #
  # NOTE: This expect a set sorted on queryStart.
  #       Overlaps will not be detected if it
  #       is not sorted.
  #
  my @ranges = ();
  my $i      = 0;
  while ( $i < $resultCollection->size() ) {
    my $result = $resultCollection->get( $i );

    my $id    = $result->getQueryName();
    my $start = $result->getQueryStart();
    my $end   = $result->getQueryEnd();

    my $j = $i + 1;
    while ( $j < $resultCollection->size() ) {
      my $nextResult = $resultCollection->get( $j );
      if (    $id eq $nextResult->getQueryName()
           && $end > $nextResult->getQueryStart() )
      {
        print "$subroutine: Overlap detected: $start-$end and "
            . $nextResult->getQueryStart() . "-"
            . $nextResult->getQueryEnd() . "\n"
            if ( $DEBUG );
        $end = $nextResult->getQueryEnd()
            if ( $end < $nextResult->getQueryEnd() );
        $i++;
      }
      else {
        last;
      }
      $j++;
    }
    push @ranges, [ $id, $start, $end ];
    $i++;
  }

  #
  # Add ranges to excision list
  #
  foreach my $range ( @ranges ) {
    my $id    = $range->[ 0 ];
    my $start = $range->[ 1 ];
    my $end   = $range->[ 2 ];

    print "$subroutine: Adding range: $id:$start-$end ( "
        . ( $end - $start + 1 ) . " )\n"
        if ( $DEBUG );

    my $posAdjustment = 0;

    my $deleteStart = -1;
    my $startPosAdj = -1;
    my $added       = 0;

    if ( defined $excisions->{$id} ) {
      my $i;
      for ( $i = 0 ; $i <= $#{ $excisions->{$id} } ; $i++ ) {
        my $excisionRec = $excisions->{$id}->[ $i ];

        print "$subroutine: Considering excision: $excisionRec->[0]:"
            . "$excisionRec->[1]-$excisionRec->[2] "
            . "( posAdj = $posAdjustment )\n"
            if ( $DEBUG );

        next if ( $excisionRec->[ 0 ] eq "new" );

        if ( $excisionRec->[ 1 ] - $posAdjustment > $end ) {

          # This excision is beyond us. No need to continue.
          print
              "$subroutine: Left of this one. posAdjustment = $posAdjustment\n"
              if ( $DEBUG );
          last;
        }
        elsif ( $excisionRec->[ 1 ] - $posAdjustment <= $start ) {

          # must account for this previous excision
          print "$subroutine: Accounting for prev..\n" if ( $DEBUG );
          $posAdjustment += $excisionRec->[ 2 ] - $sentinelLength;
        }
        elsif (    $excisionRec->[ 1 ] - $posAdjustment >= $start
                && $excisionRec->[ 1 ] - $posAdjustment <= $end )
        {
          print "$subroutine: Contained. $posAdjustment makes this x sit at:"
              . ( $excisionRec->[ 1 ] - $posAdjustment ) . "\n"
              if ( $DEBUG );

          # Flag for deletion
          $deleteStart = $i if ( $deleteStart == -1 || $deleteStart > $i );
          $excisions->{$id}->[ $i ]->[ 0 ] = "del";

          $startPosAdj = $posAdjustment if ( $startPosAdj == -1 );
          $posAdjustment += $excisionRec->[ 2 ] - $sentinelLength;
        }
      }    # for all excisions

      if ( $startPosAdj == -1 ) {
        print "$subroutine: Adding to middle or end ( "
            . ( $start + $posAdjustment ) . ","
            . ( $end - $start + 1 ) . " )\n"
            if ( $DEBUG );
        splice( @{ $excisions->{$id} },
                $i, 0, [ "new", $start + $posAdjustment, $end - $start + 1 ] );
        next;
      }
      else {
        print "$subroutine: Splicing out contained startPosAdj = $startPosAdj "
            . "posAdj = $posAdjustment : new "
            . ( $start + $startPosAdj ) . ","
            . ( ( $end + $posAdjustment ) - ( $start + $startPosAdj ) + 1 )
            . "\n"
            if ( $DEBUG );

        # Add just before deletion
        splice(
                @{ $excisions->{$id} },
                $deleteStart,
                0,
                [
                  "new",
                  $start + $startPosAdj,
                  ( $end + $posAdjustment ) - ( $start + $startPosAdj ) + 1
                ]
        );
        next;
      }
    }
    else {
      print "Adding first\n" if ( $DEBUG );
      push @{ $excisions->{$id} }, [ "new", $start, $end - $start + 1 ];
    }
  }    # Results

  foreach my $idKey ( keys( %{$excisions} ) ) {
    for ( my $j = $#{ $excisions->{$idKey} } ; $j >= 0 ; $j-- ) {
      if ( $excisions->{$idKey}->[ $j ]->[ 0 ] eq "del" ) {
        print "Deleting $idKey:$j $excisions->{$idKey}->[$j]->[0] \n"
            if ( $DEBUG );
        splice( @{ $excisions->{$idKey} }, $j, 1 );
      }
      elsif ( $excisions->{$idKey}->[ $j ]->[ 0 ] eq "new" ) {
        $excisions->{$idKey}->[ $j ]->[ 0 ] = "old";
      }
    }
  }
}

sub queryExcision {
  my $id             = shift;
  my $start          = shift;
  my $end            = shift;
  my $excisions      = shift;
  my $sentinelLength = shift;

  my $DEBUG      = 0;
  my $subroutine = ( caller( 0 ) )[ 0 ] . "::" . ( caller( 0 ) )[ 3 ];

  my $posAdjustment = 0;
  my $deleteStart   = -1;
  my $startPosAdj   = -1;

  if ( defined $excisions->{$id} ) {
    for ( my $i = 0 ; $i <= $#{ $excisions->{$id} } ; $i++ ) {
      my $excisionRec = $excisions->{$id}->[ $i ];

      print
"$subroutine: Considering excision: $excisionRec->[0]:$excisionRec->[1]-$excisionRec->[2] "
          . "( posAdj = $posAdjustment )\n"
          if ( $DEBUG );

      if ( $excisionRec->[ 1 ] - $posAdjustment > $end ) {

        # This excision is beyond us. No need to continue.
        print
"$subroutine: This excision is beyond us. No need to consider others.\n"
            if ( $DEBUG );
        last;
      }
      elsif ( $excisionRec->[ 1 ] - $posAdjustment <= $start ) {

        # must account for this previous excision
        print "Accounting for prev.. ( "
            . ( $excisionRec->[ 1 ] - $posAdjustment ) . ")\n"
            if ( $DEBUG );
        $posAdjustment += $excisionRec->[ 2 ] - $sentinelLength;
      }
      elsif (    $excisionRec->[ 1 ] - $posAdjustment >= $start
              && $excisionRec->[ 1 ] - $posAdjustment <= $end )
      {
        $startPosAdj = $posAdjustment if ( $startPosAdj == -1 );
        $posAdjustment += $excisionRec->[ 2 ] - $sentinelLength;
        print "Contained.. $startPosAdj, $posAdjustment\n" if ( $DEBUG );
      }
    }
    if ( $startPosAdj > -1 ) {
      return ( $start + $startPosAdj, $end + $posAdjustment );
    }
    else {
      return ( $start + $posAdjustment, $end + $posAdjustment );
    }
  }
  else {
    return ( $start, $end );
  }
}

##-------------------------------------------------------------------------##
## Use: $groupedAnnots = &fragmentAnnotations( $resultsCollection, $excisions,
##                            $numX );
##
## Returns
##
##    Determine which annotations contain previously excised elements and
##    fragment them.
##
##-------------------------------------------------------------------------##
sub fragmentAnnotations {
  my $resultsCollection = shift;
  my $excisions         = shift;
  my $numX              = shift;

  my $DEBUG            = 0;
  my $newSearchResults = SearchResultCollection->new();
  my %deleteHash       = ();
  my @groupedAnnots    = ();
  for ( my $j = 0 ; $j < $resultsCollection->size() ; $j++ ) {
    my $result      = $resultsCollection->get( $j );
    my $resultBegin = $result->getQueryStart();
    my $resultEnd   = $result->getQueryEnd();
    my $queryID     = $result->getQueryName();

    if ( $DEBUG ) {
      print "Considering fragmenting:\n";
      print ""
          . $result->toStringFormatted( SearchResult::AlignWithQuerySeq )
          . "\n";
      print "" . $result->toString . "\n";
    }

    if ( $excisions->{$queryID} ) {

      # Loop over previous cut out elements for this $seqID
      my $internalBegin = -1;
      my $internalEnd   = -1;

      my @subSegmentList = ();
      my $newResultBegin = $resultBegin;

      print "Element boundaries: $resultBegin - $resultEnd\n" if ( $DEBUG );

      foreach my $hit ( @{ $excisions->{$queryID} } ) {
        my $hitBegin = $hit->[ 1 ];
        my $hitEnd   = $hitBegin + $hit->[ 2 ] - 1;
        last if ( $hitBegin > $resultEnd );
        next if ( $hitBegin < $resultBegin );
        next
            if (    $hitBegin == $resultBegin
                 && $hitEnd == $resultEnd );

        # Deal with this one
        $internalBegin = $hitBegin;
        $internalEnd   = $hitEnd;

        print "Found internal: $internalBegin - $internalEnd\n"
            if ( $DEBUG );

        if ( ( $internalBegin - 1 ) >= $newResultBegin ) {
          print "  - Pushing $newResultBegin - " . ( $internalBegin - 1 ) . "\n"
              if ( $DEBUG );
          push @subSegmentList,
              {
                'begin' => $newResultBegin,
                'end'   => $internalBegin - 1
              };
        }

        $newResultBegin = $hitEnd + 1;
      }
      if (    $internalBegin > $result->getQueryStart()
           && $internalEnd < $resultEnd )
      {

        print "last Annotation from "
            . ( $internalEnd + 1 )
            . " to $resultEnd\n"
            if ( $DEBUG );
        push @subSegmentList,
            {
              'begin' => $internalEnd + 1,
              'end'   => $resultEnd
            };
      }

      if ( $#subSegmentList >= 0 ) {
        my $resultSubCollection =
            &createSubElements( $result, \@subSegmentList, $numX );
        $deleteHash{$j} = 1;    # Signal that this element has been fragmented
        my @fragGroup;
        for ( my $i = 0 ; $i < $resultSubCollection->size() ; $i++ ) {
          push @fragGroup, $resultSubCollection->get( $i );
        }
        push @groupedAnnots, [ @fragGroup ];
        $newSearchResults->addAll( $resultSubCollection );

        if ( $DEBUG ) {
          print "Fragmenting Element:\n"
              . $result->toStringFormatted( SearchResult::AlignWithQuerySeq )
              . "\n";
          for ( my $i = 0 ; $i < $resultSubCollection->size() ; $i++ ) {
            print "New Segment:\n";
            print ""
                . $resultSubCollection->get( $i )
                ->toStringFormatted( SearchResult::AlignWithQuerySeq ) . "\n";
          }
        }
      }
      else {
        push @groupedAnnots, [ $result ];
      }
    }
  }    # for

  # Delete all fragment parents
  foreach my $index ( sort { $b <=> $a } keys( %deleteHash ) ) {
    $resultsCollection->remove( $index );
  }

  # Add all fragments to the results collection
  $resultsCollection->addAll( $newSearchResults );

  # Sort the results collection
  $resultsCollection->sort(
    sub ($$) {
      $_[ 0 ]->getQueryStart() <=> $_[ 1 ]->getQueryStart();
    }
  );

  $DEBUG = 0;

  return ( \@groupedAnnots );
}

##-------------------------------------------------------------------------##
## Use: my $fragResults = &createSubElements( $parentElement,
##                                            $subSegmentList,
##                                            $numX );
##
## Results
##
##   Given a SearchResult and a list of start/end positions fragment
##   the element and return a SearchResultCollection containing the
##   new fragments.
##
##-------------------------------------------------------------------------##
sub createSubElements {
  my $parentElement  = shift;
  my $subSegmentList = shift;
  my $numX           = shift;

  my $DEBUG = 0;

  my $parentQueryStart  = $parentElement->getQueryStart();
  my $parentQueryEnd    = $parentElement->getQueryEnd();
  my $parentQueryLength = $parentQueryEnd - $parentQueryStart + 1;

  my $parentQuerySeq = $parentElement->getQueryString() || "";
  my $parentSubjSeq  = $parentElement->getSubjString()  || "";
  my $parentSubjStart = $parentElement->getSubjStart();
  my $parentSubjEnd   = $parentElement->getSubjEnd();
  my $parentSubjLen   = abs( $parentSubjEnd - $parentSubjStart ) + 1;

  my $newResultColl = SearchResultCollection->new();

  my $numSegments = $#{$subSegmentList} + 1;

  my $segSubjStart = $parentSubjStart;
  my $segSubjEnd   = $parentSubjEnd;

  my $realQueryLength = 0;
  for ( my $j = 0 ; $j < $numSegments ; $j++ ) {
    $realQueryLength +=
        $subSegmentList->[ $j ]->{'end'} - $subSegmentList->[ $j ]->{'begin'} +
        1;
  }

  for ( my $j = 0 ; $j < $numSegments ; $j++ ) {

    my $segQueryStart = $subSegmentList->[ $j ]->{'begin'};
    my $segQueryEnd   = $subSegmentList->[ $j ]->{'end'};
    my $segQueryLen   = $segQueryEnd - $segQueryStart + 1;
    print "RepeatMasker::createSubElements: segQueryStart = $segQueryStart,"
        . " segQueryEnd = $segQueryEnd, segQueryLen = $segQueryLen, "
        . " segSubjStart = $segSubjStart segSubjEnd = $segSubjEnd\n "
        if ( $DEBUG );

    # Do not produce very small alignemnts
    # Note: This can create small gaps ( usually in low quality
    #       regions ) that must be taken into account in ProcessRepeats.
    if ( 0 && $segQueryLen < 5 ) {
      print "RepeatMasker::createSubElements: Annotation is to "
          . "small to report ( len = $segQueryLen, numX = $numX, "
          . "len(parentQuerySeq) = "
          . length( $parentQuerySeq ) . " )\n"
          if ( $DEBUG );

      my $adjLen = 0;
      if ( $segQueryLen < 1 ) {

        # No bases...just move down by numX
        $adjLen = $numX;
      }
      else {
        my $bSeen  = 0;
        my $skipTo = 0;
        while (    $bSeen < $segQueryLen
                && $skipTo < length( $parentQuerySeq ) )
        {
          $bSeen++ if ( substr( $parentQuerySeq, $skipTo++, 1 ) ne "-" );
        }
        print "RepeatMasker::createSubElements: skipTo=$skipTo, bSeen=$bSeen\n"
            if ( $DEBUG );
        $adjLen = $skipTo + $numX;
      }

      if ( $adjLen <= length( $parentQuerySeq ) ) {

        # Adjust subject start
        my $sbjBases = substr( $parentSubjSeq, 0, $adjLen );
        $sbjBases =~ s/-//g;
        $segSubjStart += length( $sbjBases );
        $parentQuerySeq = substr( $parentQuerySeq, $adjLen );
        $parentSubjSeq  = substr( $parentSubjSeq,  $adjLen );
      }

      next;
    }

    my $newSegment = $parentElement->clone();
    $newSegment->setQueryStart( $segQueryStart );
    $newSegment->setQueryEnd( $segQueryEnd );
    $newSegment->setQueryRemaining(
         $parentQueryEnd - $segQueryEnd + $parentElement->getQueryRemaining() );

    # If alignment info
    if ( $parentQuerySeq ne "" ) {

      # Count through query seq until we reach the breakpoint ( discount "-" )
      my $seqCount = 0;
      my $i        = 0;
      while ( $seqCount <= $segQueryLen ) {
        $seqCount++ unless ( substr( $parentQuerySeq, $i++, 1 ) eq '-' );
      }

      # Don't include the X
      my $newQuerySeq = substr( $parentQuerySeq, 0, $i - 1 );
      my $newSubjSeq  = substr( $parentSubjSeq,  0, $i - 1 );

      $newSegment->setQueryString( $newQuerySeq );
      $newSegment->setSubjString( $newSubjSeq );

      # Set parent to remaining sequence if any
      if ( ( $i + $numX ) <= length( $parentQuerySeq ) ) {
        $parentQuerySeq = substr( $parentQuerySeq, $i - 1 + $numX );
        $parentSubjSeq  = substr( $parentSubjSeq,  $i - 1 + $numX );
      }

      $newSubjSeq =~ s/-//g;

      #print "segQueryStart = $segQueryStart - $segQueryEnd\n";
      #print "newSubjSeq = $newSubjSeq\n";
      if ( $parentElement->getOrientation() eq "C" ) {
        $newSegment->setSubjEnd( $segSubjEnd );
        $newSegment->setSubjRemaining(
            $parentSubjEnd - $segSubjEnd + $parentElement->getSubjRemaining() );
        if ( $newSubjSeq eq "" ) {
          $newSegment->setSubjStart( $segSubjEnd );
        }
        else {
          $segSubjEnd -= length( $newSubjSeq );
          $newSegment->setSubjStart( $segSubjEnd + 1 );
        }
      }
      else {
        $newSegment->setSubjStart( $segSubjStart );
        if ( $newSubjSeq eq "" ) {
          $segSubjEnd = $segSubjStart;
        }
        else {
          $segSubjEnd = $segSubjStart + length( $newSubjSeq ) - 1;
          $segSubjStart += length( $newSubjSeq );
        }
        $newSegment->setSubjEnd( $segSubjEnd );
        $newSegment->setSubjRemaining(
            $parentSubjEnd - $segSubjEnd + $parentElement->getSubjRemaining() );
      }

    }
    else {

      # We have no alignment information.  We cannot be sure
      # how many subject characters match this subsegment of
      # the alignment.  Realignment can be too costly so we
      # will guestimate the number.  This is not really proper
      # and we should make a note of this in the output.  Currently
      # ProcessRepeats is doing this!
      my $segSubjLength = 0;
      if ( $j == ( $numSegments - 1 ) ) {
        if ( $parentElement->getOrientation() eq "C" ) {
          $segSubjLength = $segSubjEnd - $parentSubjStart;
        }
        else {
          $segSubjLength = $parentSubjEnd - $segSubjStart;
        }
      }
      else {
        my $percQuerySegLength =
            ( $segQueryEnd - $segQueryStart + 1 ) / $realQueryLength;
        $segSubjLength = int( $parentSubjLen * $percQuerySegLength );
      }

      if ( $parentElement->getOrientation() eq "C" ) {
        $newSegment->setSubjEnd( $segSubjEnd );
        $newSegment->setSubjRemaining(
            $parentSubjEnd - $segSubjEnd + $parentElement->getSubjRemaining() );
        $segSubjEnd -= $segSubjLength;
        $newSegment->setSubjStart( $segSubjEnd );
        $segSubjEnd--;
      }
      else {
        $newSegment->setSubjStart( $segSubjStart );
        $segSubjEnd = $segSubjStart + $segSubjLength;
        $newSegment->setSubjEnd( $segSubjEnd );
        $segSubjStart = $segSubjEnd + 1;
        $newSegment->setSubjRemaining(
            $parentSubjEnd - $segSubjEnd + $parentElement->getSubjRemaining() );
      }

    }
    $newResultColl->add( $newSegment );
  }

  return $newResultColl;

}

##-------------------------------------------------------------------------##
## Use:  my saveOldFiles( $fijl, $fileend, $originaldir, $date, \%options );
##
##
##  Returns
##
##  Globals Used: None
##-------------------------------------------------------------------------##
sub saveOldFiles {
  my $fijl        = shift;
  my $fileend     = shift;
  my $originaldir = shift;
  my $date        = shift;
  my %options     = %{ shift() };

  $fijl = $options{'dir'} . "\/$fileend" if $options{'dir'};
  if ( $options{'is_only'} ) {
    rename( "$fijl.alert", "$fijl.alert.pre$date" )
        && print "\nOld file $fijl.alert renamed to $fijl.alert.pre$date\n\n"
        if -s "$fijl.alert";
    unlink "$fijl.withoutIS" if -s "$fijl.withoutIS";
  }
  else {
    my $savedir = "$originaldir\/$fileend.pre$date.RMoutput";
    $savedir = $options{'dir'} . "\/$fileend.pre$date.RMoutput"
        if $options{'dir'};
    mkdir $savedir, 0777;
    rename( "$fijl.cat",    "$savedir\/$fileend.cat" )    if -s "$fijl.cat";
    rename( "$fijl.stderr", "$savedir\/$fileend.stderr" ) if -s "$fijl.stderr";
    rename( "$fijl.out",    "$savedir\/$fileend.out" )    if -s "$fijl.out";
    rename( "$fijl.masked", "$savedir\/$fileend.masked" ) if -s "$fijl.masked";
    rename( "$fijl.tbl",    "$savedir\/$fileend.tbl" )    if -s "$fijl.tbl";
    rename( "$fijl.cut",    "$savedir\/$fileend.cut" )
        if -s "$fijl.cut" && $options{'cut'};
    rename( "$fijl.align", "$savedir\/$fileend.align" )
        if -s "$fijl.align" && $options{'a'};
    rename( "$fijl.alert", "$savedir\/$fileend.alert" )
        if -s "$fijl.alert" && !$options{'no_is'};
    rename( "$fijl.withoutIS", "$savedir\/$fileend.withoutIS" )
        if -s "$fijl.withoutIS" && $options{'is_clip'};
    rmdir $savedir || print "
Some previous RepeatMasker output files were moved to the directory 
$savedir 
in order not to overwrite them.\n\n";
  }
}

##-------------------------------------------------------------------------##
## Use:  my &SkipFile( $file );
##
##
##  Returns
##
##  Globals Used: None
##-------------------------------------------------------------------------##
sub SkipFile {
  my $file = shift;

  copy( $file, "$file.masked" );
  ( my $tempfile = $file ) =~ s/.+\///;
  print
"RepeatMasker quit because the file $tempfile only contains ambiguous bases, if any.
To accomodate automated processes the file has been copied to $tempfile.masked and this message has been printed to $tempfile.out\n\n";
  open( OUT, ">$file.out" );
  print OUT
"RepeatMasker quit because the file $tempfile only contains ambiguous bases, if any.\n";
  close OUT;
}

##-------------------------------------------------------------------------##
## Use:  my ( $tempdir, $runnumber ) = &createTempDir( \%options, $date,
##                                                     $file );
##
##
##  Returns
##
##   TODO: Clean this up!!!  It uses globals and makes assumptions
##         about how files are passed to repeatmasker etc..
##
##   Globals Used: ARGV[0]
##-------------------------------------------------------------------------##
sub createTempDir {
  my %options = %{ shift() };
  my $date    = shift;
  my $file    = shift;

  my $curdir = cwd();

  # To make cygwin happy - Contributed by Mike Seivers of TimeLogic
  $curdir =~ s/ /\\ /go;

  my ( $querydir, $fileendname ) =
      ( File::Spec->splitpath( $ARGV[ 0 ] ) )[ 1, 2 ];
  $querydir = "." if ( $querydir eq "" );

  # Used to avoid including existing files in output even
  # if $options{'dir'} chosen, preferred to write the temporary
  # files to a temporary subdirectory of the home directory, as
  # $options{'dir'} may be across system boundaries:
  my $runnumber = "$$" . ".$date";

  my $tempdir = "$curdir\/RM_$runnumber";
  unless ( -r "$tempdir\/$fileendname" || mkdir $tempdir, 0777 ) {
    if ( $options{'dir'} ) {
      $tempdir = $options{'dir'} . "\/RM_$runnumber";
      die "Can't write to " . $options{'dir'} . "\n"
          unless mkdir $tempdir, 0777;
    }
    else {    # no writing to current directory
      $tempdir = "$querydir\/RM_$runnumber";
      if ( mkdir $tempdir, 0777 ) {
        my $temptestfile = "$file" . "_$runnumber";
        copy( $file, "$temptestfile" )
            || die "Can not create a $curdir subdirectory nor write "
            . "full output to $querydir.\n Change operating "
            . "directory or use the option "
            . $options{'dir'}
            . " to indicate where files should be written.\n";
        unlink $temptestfile;
      }
      else {
        die "There is no writing access to the current directory "
            . "($curdir) nor to the directory containing the query "
            . "sequence.\nConsider using \"-dir\" or changing "
            . "current directory.";
      }
    }
  }
  return ( $tempdir, $runnumber );
}

##-------------------------------------------------------------------------##
## Use:  my
##
##
##  Returns
##
##  Globals Used: None
##-------------------------------------------------------------------------##
###  Interrupt handler used by systemint() ###
sub handler {
  my ( $sig ) = @_;

  print "\nAborting with a SIG$sig\n";
  exit( -1 );
}

##-------------------------------------------------------------------------##
## Use:  my
##
##
##  Returns
##
##  Globals Used: None
##-------------------------------------------------------------------------##
###  systemint -- Interruptible system call routine.  ###
sub systemint {
  my ( $cmd ) = @_;
  my ( $pid );
  my ( $flag ) = 0;

  local $SIG{INT}  = sub { &handler( @_ ) if ( $flag ) };    #^C
  local $SIG{QUIT} = sub { &handler( @_ ) if ( $flag ) };    #^\
  local $SIG{TERM} =
      sub { &handler( @_ ) if ( $flag ) };    #kill command or system crash
  local $SIG{HUP} = sub { &handler( @_ ) if ( $flag ) };

  #    local $SIG{CHLD} = 'IGNORE';

FORK:
  {
    if ( $pid = fork ) {
      $flag = 1;
      waitpid( $pid, 0 );                     #Waits for child to finish...
      my ( $status ) = $?;
      if ( WIFSTOPPED( $status ) ) {
        my ( $signal ) = WSTOPSIG( $status );
        print "\nforksys:  Program terminated by a signal $signal.\n";
        print "The executing command was:  $cmd\n";
        return 1;
      }
      if ( WIFEXITED( $status ) ) {
        my ( $temp ) = WEXITSTATUS( $status );
        return $temp;
      }
      if ( WIFSIGNALED( $status ) ) {
        my ( $signal ) = WTERMSIG( $status );
        print "\nforksys:  Program terminated by a signal $signal.\n";
        print "The executing command was:  $cmd\n";
        return 1;
      }
    }
    elsif ( defined $pid ) {
      exec( "$cmd" ) or die "Exec $cmd failed\n";
    }
    elsif ( $! =~ /No more process/o ) {
      print "$!\n";
      sleep 5;
      redo FORK;
    }
    else {
      die "Can't fork!  Errorcode: $!\n";
    }
  }
}

##-------------------------------------------------------------------------##
## Use:  my &cleanUp( \%options, $runnumber, $tempdir, $fileori,
##                    $fileend, $file, $originaldir, $compressed );
##
##  Returns
##       --
##       Expects $options{'dir'} ( if set ) to already exist.
##
##  Globals Used: None
##-------------------------------------------------------------------------##
sub cleanUp {
  my %options     = %{ shift() };
  my $runnumber   = shift;
  my $tempdir     = shift;
  my $fileori     = shift;
  my $fileend     = shift;
  my $file        = shift;
  my $originaldir = shift;
  my $compressed  = shift;

  unlink "$tempdir\/$fileend";    # eq $file, but unlinking $file seems scary
                                  # copying it to originaldirectory would
                                  # change the date, priviliges, etc.
  unlink "$file.masked.log";      # pretty darn useless little file
  opendir TEMP, "$tempdir";
  my @outputfiles = readdir TEMP;
  closedir TEMP;

  # default is writing output to query directory
  my $targetdir = $originaldir;
  if ( $options{'dir'} ) {
    $targetdir = $options{'dir'};
  }
  if ( open TEMP2, ">$targetdir\/temp.$runnumber" ) {
    unlink "$targetdir\/temp.$runnumber";
    close TEMP2;
    foreach my $outputfile ( @outputfiles ) {
      next unless $outputfile =~ /^$fileend/;

      #rename doesn't cross system boundaries
      copy( "$tempdir\/$outputfile", "$targetdir\/$outputfile" )
          || die "Can't write all output files to $targetdir "
          . "(over quota?)n.  Files can be found in $tempdir "
          . "(and perhaps a few in $targetdir).  Run "
          . "\"ProcessRepeats\" on the .cat file or redo "
          . "analysis.\n\n";
      unlink "$tempdir\/$outputfile";
    }
  }
  else {
    print "\nOutput files can not be written to $targetdir. "
        . "They can be found in the directory $tempdir instead. \n"
        . "Consider using the -dir option.\n\n";
  }

  if ( $compressed ) {
    &systemint( "gzip $fileori" )     if $compressed eq 'zipped';
    &systemint( "compress $fileori" ) if $compressed eq 'Zed';
  }
}

##----
# DEPRECATED
##----
#sub getDBStats {
#  my $options     = %{ shift() };
#  my $db          = shift;
#  my $specPattern = shift;
#  my $tax         = shift;
#
#  print "RepeatMasker::getDBStats( \$db, " . "$specPattern, \$tax );\n"
#      if ( $DEBUG );
#
#  my $seqCount = $db->getRecordCount();
#
#  my $cladeCnt  = 0;
#  my $ancestCnt = 0;
#
#  # For each sequence in the master library
#  for ( my $i = 0 ; $i < $seqCount ; $i++ ) {
#    my $record = $db->getRecord( $i );
#    foreach my $name ( $record->getRMSpeciesArray() ) {
#      $name =~ s/_/ /g;
#      my $isDescendant = $tax->isA( $name,        $specPattern );
#      my $isAncestor   = $tax->isA( $specPattern, $name );
#
#      if ( $isDescendant == 1 ) {
#        $cladeCnt++;
#        last;
#      }
#      else {
#        if ( $isAncestor ) {
#          $ancestCnt++;
#          last;
#        }
#      }
#    }
#  }
#
#  print
#      "   - $ancestCnt ancestral and ubiquitous sequence(s) for $specPattern\n";
#  print "   - $cladeCnt lineage specific sequence(s) for $specPattern\n";
#}


#### DEPRECATED
##-------------------------------------------------------------------------##
## Use:  my $libSize = createLib( \%options, $db, $libName, $specPattern,
##                                $stageNum, $tax);
##
##         \%options     :  RepeatMasker options hash
##         $db           :  A FastaDB or EMBL object open to
##                          the repeat datbase.
##         $libName      :  The name of the library to create.
##         $specPattern  :  The name of the species to include seqs for.
##         $stageNum:  The name of the old RM database.  Used to
##                          screen the repeats ( will generalize in the
##                          future ).
##         $tax          :  The Taxonomy.pm object.
##
##  Returns
##
##     Creates a library by filtering the RepeatMasker.lib file
##     given specific filtering parameters ( specPattern and stageNum).
##     If wublast is being used it also creates the binary versions
##     of the fasta library.  Returns the number of sequences stored
##     in the library.  Removes the library file if there are no
##     matching sequences.
##
##  Globals Used: None
##-------------------------------------------------------------------------##
#sub createLib {
#  my $options      = %{ shift() };
#  my $db           = shift;
#  my $libName      = shift;
#  my $specPattern  = shift;
#  my $stageNum     = shift;
#  my $tax          = shift;
#  my $searchEngine = shift;
#
#  print "RepeatMasker::createLib( \$db, "
#      . "$libName, $specPattern, $stageNum, \$tax );\n"
#      if ( $DEBUG );
#
#  my @ids;
#  my @descs;
#  my $seqCount = $db->getRecordCount();
#
#  my $outFile = $libName;
#  $outFile = "$libName-wublast"
#      if ( $searchEngine->isa( "WUBlastSearchEngine" ) );
#
#  $outFile .= ".hmm" if ( $searchEngine->isa( "HMMERSearchEngine" ) );
#
#  open OUT, ">$outFile"
#      or die "RepeatMasker::createLib(): Could "
#      . "not open library file $outFile!\n";
#
#  # The number of sequences stored in this library
#  my $librarySize = 0;
#
#  # For each sequence in the master library
#  for ( my $i = 0 ; $i < $seqCount ; $i++ ) {
#    my $match   = 0;
#    my @buffers = ();
#    my $seq     = "";
#    my $id      = "";
#    my $type    = "";
#    my $desc    = "";
#    my $record  = $db->getRecord( $i );
#    foreach my $name ( $record->getRMSpeciesArray() ) {
#      $name =~ s/_/ /g;
#      if (    $tax->isA( $name, $specPattern ) > 0
#           || $tax->isA( $specPattern, $name ) > 0 )
#      {
#        if ( $stageNum == 80 ) {
#
#          # For the specieslib it is sufficient to be in
#          # the clade or in the ancestral species.  No
#          # need to breakout into seperate search stages
#          # yet.
#          $match = 1;
#        }
#        else {
#
#          # Full length sequence non-buffered
#          my @stages = $record->getRMSearchStagesArray();
#          foreach my $stage ( @stages ) {
#            if (
#                 $stage eq $stageNum
#                 || (
#                      $stageNum == 95
#                      && (    $stage == 35
#                           || $stage == 50
#                           || $stage == 55
#                           || $stage == 60
#                           || $stage == 65
#                           || $stage == 70
#                           || $stage == 75 )
#                 )
#                )
#            {
#              $match = 1;
#            }
#          }
#
#          # Buffered Sequence
#          @stages = $record->getRMBufferStagesArray();
#          foreach my $stage ( @stages ) {
#            if ( $stage =~ /(\d+)\[(\d+)\-(\d+)\]/ ) {
#              if ( $1 == $stageNum ) {
#                push @buffers, "$2-$3";
#              }
#            }
#            elsif ( $stage =~ /(\d+)/ ) {
#              if ( $1 == $stageNum ) {
#                push @buffers, "full";
#              }
#            }
#            else {
#              print "RepeatMasker::createLib: Warning buffer stage $stage "
#                  . "understood!\n";
#            }
#          }
#        }
#        last if ( $match == 1 );
#      }
#    }
#
#    if ( $match > 0 || @buffers ) {
#      $librarySize++;
#      $id = $record->getId();
#      if ( $id =~ /D[FR]\d+/ && defined $record->getName() ) {
#        $id = $record->getName();
#      }
#      $type = "#" . $record->getRMType();
#      if ( $record->getRMSubType() ne "" ) {
#        $type .= "/" . $record->getRMSubType();
#      }
#
#      if ( $match > 0 ) {
#        if ( $searchEngine->isa( "HMMERSearchEngine" ) ) {
#
#          # Use species-specific thresholds if supplied
#          my @thresholds    = $record->getThreshArray();
#          my %subThresholds = ();
#          foreach my $thresh ( @thresholds ) {
#            if ( $tax->isSpecies( $thresh->{'taxname'} ) eq
#                 $tax->isSpecies( $specPattern ) )
#            {
#              $subThresholds{'GA'} = $thresh->{'hit_ga'};
#              $subThresholds{'TC'} = $thresh->{'hit_tc'};
#              $subThresholds{'NC'} = $thresh->{'hit_nc'};
#              last;
#            }
#          }
#          if ( exists $subThresholds{'GA'}
#               && $subThresholds{'GA'} > 0 )
#          {
#            my $recordLines = $record->getRecordLines();
#            while ( $recordLines =~ /(.*)[\n\r]+/ig ) {
#              my $line = $1;
#              if ( $line =~ /^(GA|TC|NC) / ) {
#                print OUT "$1   " . $subThresholds{$1} . ";\n";
#              }
#              else {
#                print OUT $line . "\n";
#              }
#            }
#          }
#          else {
#            print OUT $record->getRecordLines();
#          }
#        }
#        else {
#          $desc = $record->getDescription();
#          $seq  = $record->getSequence();
#          if ( $searchEngine->isa( "WUBlastSearchEngine" ) ) {
#            my $rseq = uc( $seq );
#            $rseq =~ tr/ACGTRYWSKMNXBDHV/TGCAYRSWMKNXVHDB/;
#            $rseq = reverse $rseq;
#
#            die "Repeat consensus ( $id ) contains the "
#                . "word \"anti\" in it's name.  This will cause "
#                . "incorrect orientation calls in the output when "
#                . "running with wublast."
#                if ( $id =~ /anti/ );
#
#            print OUT ">$id" . $type;
#            print OUT " (anti)\n";
#            $rseq =~ s/(\S{50})/$1\n/g;
#            $rseq .= "\n"
#                unless ( $rseq =~ /.*\n+$/s );
#            print OUT $rseq;
#          }
#
#          print OUT ">" . $id . "$type\n";
#          $seq =~ s/(\S{50})/$1\n/g;
#          $seq .= "\n"
#              unless ( $seq =~ /.*\n+$/s );
#          print OUT $seq;
#        }
#      }
#
#      if ( @buffers ) {
#        foreach my $buffer ( @buffers ) {
#          if ( $searchEngine->isa( "HMMERSearchEngine" ) ) {
#
#            # TODO: Consider need for buffering with HMMs
#            #warn "Currently we do not support sequence "
#            #    . "buffers for HMMs $id ( $buffer )\n";
#          }
#          else {
#            $desc = $record->getDescription();
#            $seq  = $record->getSequence();
#            if ( $buffer eq "full" ) {
#              $type = "#buffer";
#            }
#            elsif ( $buffer =~ /(\d+)-(\d+)/ ) {
#              $seq = substr( $seq, $1 - 1, $2 - $1 + 1 );
#              $type = "_$1" . "_$2#buffer";
#            }
#            if ( $searchEngine->isa( "WUBlastSearchEngine" ) ) {
#              my $rseq = uc( $seq );
#              $rseq =~ tr/ACGTRYWSKMNXBDHV/TGCAYRSWMKNXVHDB/;
#              $rseq = reverse $rseq;
#
#              die "Repeat consensus ( $id ) contains the "
#                  . "word \"anti\" in it's name.  This will cause "
#                  . "incorrect orientation calls in the output when "
#                  . "running with wublast."
#                  if ( $id =~ /anti/ );
#
#              print OUT ">" . $id . "$type";
#              print OUT " (anti)\n";
#              $rseq =~ s/(\S{50})/$1\n/g;
#              $rseq .= "\n"
#                  unless ( $rseq =~ /.*\n+$/s );
#              print OUT $rseq;
#            }
#
#            print OUT ">" . $id . "$type\n";
#            $seq =~ s/(\S{50})/$1\n/g;
#            $seq .= "\n"
#                unless ( $seq =~ /.*\n+$/s );
#            print OUT $seq;
#          }
#        }
#      }
#    }
#  }
#  close OUT;
#
#  if ( $librarySize == 0 ) {
#    unlink( $outFile );
#  }
#  else {
#    my ( $outFileVol, $outFileDir, $outFileBasename ) =
#        File::Spec->splitpath( $outFile );
#    $outFileDir = "." if ( $outFileDir eq "" );
#
#    if ( $searchEngine->isa( "WUBlastSearchEngine" ) ) {
#      system(   "$SETDB_PRGM $outFile > "
#              . "$outFileDir/setdb.log 2>&1" ) == 0
#          or die "RepeatMasker::createLib(): Error invoking setdb on file "
#          . "$outFile.  We tried using the setdb program ( "
#          . "$SETDB_PRGM ).\n";
#      unlink( $outFile ) unless ( $DEBUG );
#      move( "$outFile.ahd", "$libName.ahd" );
#      move( "$outFile.atb", "$libName.atb" );
#      move( "$outFile.bsq", "$libName.bsq" );
#    }
#    elsif ( $searchEngine->isa( "NCBIBlastSearchEngine" ) ) {
#      system(   "$NCBIBLASTDB_PRGM -dbtype nucl "
#              . "-in $outFile > $outFileDir/rmblastdb.log 2>&1" ) == 0
#          or die "RepeatMasker::createLib(): Error invoking "
#          . "$NCBIBLASTDB_PRGM"
#          . " on file $outFile.\n";
#    }
#    elsif ( $searchEngine->isa( "HMMERSearchEngine" ) ) {
#      system(   "$HMMPRESS_PRGM"
#              . " $outFile > $outFileDir/hmmPress.log 2>&1" ) == 0
#          or die "RepeatMasker::createLib(): Error invoking "
#          . "$HMMPRESS_PRGM on file "
#          . "$outFile.\n";
#    }
#  }
#
#  return ( $librarySize );
#
#}

#sub createValidIDList {
#  my $db          = shift;
#  my $specPattern = shift;
#  my $speciesDir  = shift;
#  my $tax         = shift;
#
#  print "RepeatMasker::createValidIDList( \$db, "
#      . "$specPattern, $speciesDir, \$tax );\n"
#      if ( $DEBUG );
#
#  open OUT, ">$speciesDir/speciesMeta.pm"
#      or die "RepeatMasker::createValidIDList(): Could "
#      . "not open library file $speciesDir/speciesMeta.pm!\n";
#
#  my %validIDs = ();
#  my $seqCount = $db->getRecordCount();
#
#  # For each sequence in the master library
#  for ( my $i = 0 ; $i < $seqCount ; $i++ ) {
#    my $record = $db->getRecord( $i );
#    my $id     = $record->getId();
#
#    my $match = 0;
#    foreach my $name ( $record->getRMSpeciesArray() ) {
#      $name =~ s/_/ /g;
#      if (    $tax->isA( $name, $specPattern ) > 0
#           || $tax->isA( $specPattern, $name ) > 0 )
#      {
#        $match = 1;
#        last;
#      }
#    }
#    if ( $match ) {
#      $validIDs{ lc( $id ) } = 1;
#    }
#  }
#
#  ##
#  ##
#  ##
#  print OUT "package speciesMeta;\n";
#  print OUT "require Exporter;\n";
#  print OUT "\@EXPORT_OK   = qw( \%validIDs );\n";
#  print OUT "\%EXPORT_TAGS = ( all => [ \@EXPORT_OK ] );\n";
#  print OUT "\@ISA         = qw(Exporter);\n";
#  print OUT "BEGIN {\n";
#  print OUT "  \%validIDs = (\n";
#  my $output = Dumper( \%validIDs );
#  $output =~ s/^\$VAR1 = \{//;
#  $output =~ s/\};//;
#  print OUT $output;
#  print OUT "\n";
#  print OUT "  ); }\n";
#  close OUT;
#
#}
#
##-------------------------------------------------------------------------##
## Use:  my &processCustomLib( $libFile, $wublastSetDB, $tempdir );
##
##         $libFile      :  RepeatMasker options hash
##         $wublastSetDB :  The full path to the wublast "setdb" program.
##         $tempdir      :  The temporary directory for this run
##         $searchEngine :  The searchEngine being used
##
##  Returns
##
##     Processes a custom library ( in FASTA format ) supplied by the user.
##     This involves checking that the repeat names supplied by the user's
##     library conform to the RepeatMasker nomenclature.  Secondarily this
##     will create the frozen databases for WUBlast.
##
##  Globals Used: None
##-------------------------------------------------------------------------##
sub processCustomLib {
  my $libFile      = shift;
  my $tempdir      = shift;
  my $searchEngine = shift;

  print "RepeatMasker::processCustomLib()\n" if ( $DEBUG );

  my ( $custLibVol, $custLibDir, $custLibFile ) =
      File::Spec->splitpath( $libFile );
  $custLibDir = "." if ( $custLibDir eq "" );

  # TODO: Check name syntax

  if ( $searchEngine->isa( "WUBlastSearchEngine" ) ) {

    my $libDB = FastaDB->new(
                              fileName    => $libFile,
                              openMode    => SeqDBI::ReadOnly,
                              maxIDLength => 80
    );

    open OUT, ">$tempdir/$custLibFile.anti"
        or die "RepeatMasker::processCustomLib(): Could "
        . "not create wublast compatable library file "
        . "$custLibDir/$custLibFile.anti!\n";

    foreach my $seqID ( $libDB->getIDs() ) {
      my $seq  = $libDB->getSequence( $seqID );
      my $rseq = uc( $seq );
      my $desc = $libDB->getDescription( $seqID );

      die "Repeat consensus ( $seqID ) contains the "
          . "word \"anti\" in it's name.  This will cause "
          . "incorrect orientation calls in the output when "
          . "running with wublast."
          if ( $seqID =~ /anti/ );

      print OUT ">$seqID\n";
      $seq =~ s/(.{50})/$1\n/g;
      print OUT "$seq\n";

      $rseq =~ tr/ACGTRYWSKMNXBDHV/TGCAYRSWMKNXVHDB/;
      $rseq = reverse $rseq;

      print OUT ">$seqID (anti)\n";
      $rseq =~ s/(.{50})/$1\n/g;
      print OUT "$rseq\n";
    }
    close OUT;

    if ( $searchEngine->isa( "WUBlastSearchEngine" ) ) {
      my $currdir = cwd();
      chdir( $tempdir )
          or die "RepeatMasker::processCustomLib(): "
          . "Cannot change directory to $tempdir";
      system(   "$SETDB_PRGM -o $custLibFile "
              . "$tempdir/$custLibFile.anti "
              . " > setdb.log 2>&1" ) == 0
          or die "RepeatMasker::processCustomLib(): Error invoking setdb "
          . "on file $tempdir/$custLibFile.anti.  We tried using "
          . "the setdb program ( $SETDB_PRGM ).\n";
      chdir( $currdir )
          or die "RepeatMasker::processCustomLib(): "
          . "Cannot change directory to $currdir";
    }
    undef $libDB;

  }
  elsif ( $searchEngine->isa( "NCBIBlastSearchEngine" ) ) {
    system(   "$NCBIBLASTDB_PRGM -out $tempdir/$custLibFile "
            . "-dbtype nucl -in $libFile > "
            . "$tempdir/makeblastdb.log 2>&1" );
  }
  elsif ( $searchEngine->isa( "HMMERSearchEngine" ) ) {
    system( "cp $libFile $tempdir/$custLibFile" );
    system(   "$HMMPRESS_PRGM "
            . " $tempdir/$custLibFile > $tempdir/hmmPress.log 2>&1" ) == 0
        or die "RepeatMasker::createLib(): Error invoking "
        . "$HMMPRESS_PRGM on file "
        . "$tempdir/$custLibFile.\n";
  }
}

##-------------------------------------------------------------------------##
##  my ( $refineableHashRef ) = builRefineableHash( $EMBLDBRef );
##
##  Returns
##            A hash: $refineableHashRef = { 'AluJo' => 1,
##                                           'AluSc' => 1, .. }
##            which contains all the id's of repeats which can be
##            refined.
##
##  Globals Used: None
##-------------------------------------------------------------------------##
sub buildRefineableHash {
  my $db = shift;

  my %refineableHash = ();

  if ( $db->isa( "EMBL" ) || $db->isa( "DFAM" ) ) {
    my $seqCount = $db->getRecordCount();

    # For each sequence in the master library
    for ( my $i = 0 ; $i < $seqCount ; $i++ ) {
      my $record     = $db->getRecord( $i );
      my $refineable = $record->getRMRefineable();
      if ( $refineable ) {
        $refineableHash{ $record->getId() } = 1;
      }
    }
  }

  return ( \%refineableHash );
}

sub commify {
    my $text = reverse $_[0];
    $text =~ s/(\d\d\d)(?=\d)(?!\d*\.)/$1,/g;
    return scalar reverse $text;
}

##-------------------------------------------------------------------------##
##  my ( $species, $generalCacheDir, $speciesCacheDir, 
##       $customLibDir, $rmLibLabel ) =
##                             initLibrariesFromFamdb();
##
##      species       : "-species" option provided by the user or undef.
##      customLibFile : "-lib" option provided by the user or undef.
##      rmLibDir      : Directory containing the RepeatMaskerLib.h5 file.
##      workingDir    : The programs working directory (e.g "RM_#####..").
##      libraryPath   : Search path for cached RM libraries.
##      searchEngine  : The search engine object.
##
##  Initialize and collect data on libraries for use by RepeatMasker.
##  The expectation is that libraries now come in two forms either 
##  a simple FASTA/HMM file provided directly by the user with the 
##  -lib option or a FAMDB (hdf5) library with taxonomic labels for
##  multi-purpose searching.
##
##  Returns
##     orgFlag : 
##
##     Or it fails validation and exits the program.
##
##  Globals Used: None
##-------------------------------------------------------------------------##
sub initLibrariesFromFamdb {
  my $species       = shift;
  my $customLibFile = shift;
  my $rmLibDir      = shift;
  my $workingDir    = shift;
  my @libraryPath   = @{ shift() };
  my $searchEngine  = shift;

  if ( $species ne "" && $customLibFile ne "" ) {
    die "The custom library (-lib) option may not be combined with the\n" .
        "species (-species) option.\n\n";
  }

  my $speciesCacheDir = "";          # The directory where we will find the
                                     #   cache files for species spec. libraries
  my $generalCacheDir = "";          # The directory where we will find the
                                     #   cache files for general libraries
  my $customCacheDir  = "$workingDir";  # The directory where we will find the
                                     #   "-lib" custom library.

  my $orgFlag;
  my $dbLabel;

  ##
  ## User supplied FASTA/HMM library
  ##
  if ( $customLibFile ne "" ) {
    if ( -s $customLibFile ) {
      # Validate/Generate required pre-processed files for
      # the search engine requested.
      if ( $searchEngine->isa( "WUBlastSearchEngine" ) ) {
        if (    -s "$customLibFile.bsq"
             || -s "$customLibFile.xps" )
        {
          my ( $custLibVol, $custLibDir, $custLibFile ) =
              File::Spec->splitpath( $customLibFile );
          $customCacheDir = $custLibDir || ".";
          warn "NOTE: Compressed versions of your custom library were\n"
              . "found in the $custLibDir directory.  The program will\n"
              . "use these by default. If these databases do not contain\n"
              . "reverse complemented copies of your sequences the reverse\n"
              . "strand hits will not be returned!";
        }
        else {
          &processCustomLib( $customLibFile, $workingDir, $searchEngine );
        }
      }
      elsif ( $searchEngine->isa( "NCBIBlastSearchEngine" ) ) {
        if (    -s "$customLibFile.nsq"
             || -s "$customLibFile.nhr" )
        {
          my ( $custLibVol, $custLibDir, $custLibFile ) =
              File::Spec->splitpath( $customLibFile );
          $customCacheDir = $custLibDir || ".";
        }
        else {
          &processCustomLib( $customLibFile, $workingDir, $searchEngine );
        }
      }
      elsif ( $searchEngine->isa( "HMMERSearchEngine" ) ) {
        # mylib.hmm.h3f
        # mylib.hmm.h3i
        # mylib.hmm.h3m
        # mylib.hmm.h3p
        if (    -s "$customLibFile.h3f"
             && -s "$customLibFile.h3i"
             && -s "$customLibFile.h3m"
             && -s "$customLibFile.h3p" )
        {
          my ( $custLibVol, $custLibDir, $custLibFile ) =
              File::Spec->splitpath( $customLibFile );
          $customCacheDir = $custLibDir || ".";
        }
        else {
          &processCustomLib( $customLibFile, $workingDir, $searchEngine );
        }
      }
      else {
        my ( $custLibVol, $custLibDir, $custLibFile ) =
            File::Spec->splitpath( $customLibFile );
        $customCacheDir = $custLibDir || ".";
      }
    }
    else {
      die "RepeatMasker::setspecies: Could not find user specified library "
          . $customLibFile . ", or the file is empty.\n";
    }
    print "Using Custom Repeat Library: $customLibFile\n\n";
  }
  else {

    # Default species if non given.
    $species = "homo sapiens" if ( $species eq "" );

    my $modelPrefix = "CONS-";
    $modelPrefix = "HMM-" if ( $searchEngine->isa( "HMMERSearchEngine" ) ); 

    my $famdbCmd = "$REPEATMASKER_DIR/famdb.py -i $rmLibDir/RepeatMaskerLib.h5 info ";
    open IN,"$famdbCmd 2>&1 |" or die "Could not execute famdb.py using: $famdbCmd\n";
    my $dbTitle;
    my $dbVersion;
    my $dbDate;
    my $dbConsCnt;
    my $dbHMMCnt;
    while (<IN>){
      # Database: Dfam
      # Version: 3.1
      # Date: 2019-06-20
      #
      # Dfam - A database of transposable element (TE) sequence alignments and HMMs.
      #
      # Total consensus sequences: 273655
      # Total HMMs: 273655
      if ( /Database:\s*(\S.*)/ ) { 
        $dbTitle = $1;
      }elsif ( /Version:\s*(\S.*)/ ) {
        $dbVersion = $1;
      }elsif ( /Date:\s*(\S.*)/ ) {
        $dbDate = $1;
      }elsif ( /Total consensus sequences:\s*(\d+)/ ) {
        $dbConsCnt = $1;
      }elsif ( /Total HMMs:\s*(\d+)/ ) {
        $dbHMMCnt = $1;
      }
    }
    close IN;
    $dbLabel = $dbTitle;
    $dbLabel = s/^\s+//;
    $dbLabel = s/\s+$//;
    $dbLabel = s/\s+/_/g;
    my $sanitizedTitle = $dbTitle;
    $sanitizedTitle =~ s/[\[\(\{\]\)\]]+//g;
    $sanitizedTitle =~ s/[^a-zA-Z\d]+/_/g;
    $dbLabel = $modelPrefix . $sanitizedTitle . "_" . $dbVersion;
    print "\nUsing Master RepeatMasker Database: $rmLibDir/RepeatMaskerLib.h5\n";
    print "  Title    : $dbTitle\n";
    print "  Version  : $dbVersion\n";
    print "  Date     : $dbDate\n";
    if ( $dbConsCnt > $dbHMMCnt ) {
      print "  Families : " . commify($dbConsCnt) . "\n\n";
    }else {
      print "  Families : " . commify($dbHMMCnt) . "\n\n";
    }
    # TODO Print total Taxa count
 
    # Lookup species in famdb and resolve orgFlag
    # NOTE: orgFlag is a bit of a legacy concept that will probably disappear
    #       in a future RM version. 
    # TODO: Should REPEATMASKER_DIR be global???
    $famdbCmd = "$REPEATMASKER_DIR/famdb.py -i $rmLibDir/RepeatMaskerLib.h5 lineage '" .
                    $species . "' -f semicolon";
    #print "FamdbCMD: $famdbCmd\n";
    open IN,"$famdbCmd 2>&1 |" or die "Could not execute famdb.py using: $famdbCmd\n";
    my $lineage = "";
    my $NCBITaxID;
    my $NCBITaxName;
    while ( <IN> ) {
      # famdb lineage output:
      # 370040: root;cellular organisms;Eukaryota;Opisthokonta;Metazoa;Eumetazoa;Bilateria;Deuterostomia;Chordata;Craniata;Vertebrata;Gnathostomata;Teleostomi;Euteleostomi;Actinopterygii;Actinopteri;Neopterygii;Teleostei;Osteoglossocephalai;Clupeocephala;Euteleosteomorpha;Neoteleostei;Eurypterygia;Ctenosquamata;Acanthomorphata;Euacanthomorphacea;Percomorphaceae;Ovalentaria;Mugilomorphae;Mugiliformes;Mugilidae;Planiliza;Planiliza haematocheilus [1469]
      #1299 entries in ancestors; 8 lineage-specific entries
      $orgFlag = "mammalia" if ( /^\d+:\s.*;Mammalia(;| \[)/ );
      $orgFlag = "rodentia" if ( /^\d+:\s.*;Rodentia(;| \[)/ );
      $orgFlag = "primates" if ( /^\d+:\s.*;Primates(;| \[)/ );
      if ( /^(\d+):\s+(root.*)\;(\S.*)\s+\[.*/ ) {
        $NCBITaxID = $1;
        $lineage = $2;
        $NCBITaxName = $3;
      }
      # TODO: Catch stderr... and print out ambiguous matches up to 10...
      # Ambiguous search term 'mouse' (found 6 results).
      # Please use a more specific name or taxa ID, which can be looked
      # up with the 'names' command.
      #
    }
    close IN;

    $famdbCmd = "$REPEATMASKER_DIR/famdb.py -i $rmLibDir/RepeatMaskerLib.h5 lineage '" .
                    $species . "' --ancestors --descendants -f totals";
    #print "famdb_command: $famdbCmd\n";
    open IN,"$famdbCmd 2>&1 |" or die "Could not execute famdb.py using: $famdbCmd\n";
    while ( <IN> ) {
      #1299 entries in ancestors; 8 lineage-specific entries
      if ( /^(\d+)\s+entries in ancestors;\s*(\d+)\s+lineage-specific/ ) {
        my $ancestral = $1;
        my $linSpecific = $2;
        print "Species/Taxa Search:\n";
        print "  $NCBITaxName [NCBI Taxonomy ID: $NCBITaxID]\n";
        my $lineageBlock = "";
        my $lineageLine = "";
        foreach my $component ( split(/;/,$lineage) ){
          my $prefix;
          if ( $lineageBlock eq "" ) {
            $prefix = "  Lineage: ";
          }else {
            $prefix = "           ";
          }
          if ( length($lineageLine . ";" . $component) > 60 ) {
            $lineageBlock .= $prefix . $lineageLine . "\n";
            $lineageLine = "";
          }
          $lineageLine .= "$component;";
        }
        $lineageBlock =~ s/;$//;
        $lineageBlock .= "\n"
            unless ( $lineageBlock =~ /.*\n+$/s );
        print "$lineageBlock";
        print "  $ancestral families in ancestor taxa; $linSpecific lineage-specific families\n\n";
      }
    }
    close IN;

    if ( $lineage eq "" ) {
      die "\n\nSpecies \""
          . $species
          . "\" is not known to RepeatMasker.  There may\n"
          . "not be any TE families defined in the libraries for this\n"
          . "species/clade or there may be an error in the spelling.\n"
          . "Please check your entry against the NCBI Taxonomy database\n"
          . "and/or try using a broader clade or related species instead.\n"
          . "The full list of species/clades defined in the library may be\n"
          . "obtained using the famdb.py script.\n\n";
    }
  }

  # Check library search path for cached versions
  # of libraries.  NOTE: cached versions are
  # stored as follows:
  #
  #     @libraryPath/$dbLabel/general/foo.lib
  #     @libraryPath/$dbLabel/$species/foolib
  #
  # The first directory in the search path containing
  # the desired library type is used.
  #
  my $speciesWord = $species || "";
  $speciesWord =~ s/\s+/_/g;
  foreach my $path ( @libraryPath ) {
    if ( -d "$path/$dbLabel" ) {
      if ( -d "$path/$dbLabel/$speciesWord" ) {
        if (
          (
            $searchEngine->isa( "WUBlastSearchEngine" )
            && ( my @pathFiles = glob( "$path/$dbLabel/$speciesWord/*.ahd" ) )
          )
          || ( $searchEngine->isa( "NCBIBlastSearchEngine" )
            && ( my @pathFiles = glob( "$path/$dbLabel/$speciesWord/*.nhr" ) )
          )
          || ( $searchEngine->isa( "CrossmatchSearchEngine" )
             && ( my @pathFiles = glob( "$path/$dbLabel/$speciesWord/*lib" ) )
          )
          || ( $searchEngine->isa( "HMMERSearchEngine" )
             && ( my @pathFiles = glob( "$path/$dbLabel/$speciesWord/*hmm" ) )
          )
            )
        {
          $speciesCacheDir = "$path/$dbLabel/$speciesWord";
        }
      }elsif ( -d "$path/$dbLabel/$speciesWord" . ".working" ) {
        # Make sure checkpointing name has been cleared in previous run
        print "\n  It appears that RepeatMasker attempted to generate a cached library for this\n"
              ."  species before but didn't complete it.  Attempting to remove and rebuild this\n"
              ."  cache: $path/$dbLabel/$speciesWord.working\n\n";
        rmtree("$path/$dbLabel/$speciesWord" . ".working",0,1);
      }
      if ( -d "$path/$dbLabel/general" ) {
        if (
             (
                  $searchEngine->isa( "WUBlastSearchEngine" )
               && ( my @pathFiles = glob( "$path/$dbLabel/general/*.ahd" ) )
             )
             || (   $searchEngine->isa( "NCBIBlastSearchEngine" )
                 && ( my @pathFiles = glob( "$path/$dbLabel/general/*.nhr" ) )
                )
             || (    $searchEngine->isa( "CrossmatchSearchEngine" )
                  && -s "$path/$dbLabel/general/l1.lib"
                  && -s "$path/$dbLabel/general/refineableHash.dat" )
             || (   $searchEngine->isa( "HMMERSearchEngine" ) )
            )
        {
          $generalCacheDir = "$path/$dbLabel/general";
        }
      }elsif ( -d "$path/$dbLabel/general" . ".working" ){
        # Make sure checkpointing name has been cleared in previous run
        print "\n  It appears that RepeatMasker attempted to generate a cached general library\n"
              ."  before but didn't complete it.  Attempting to remove and rebuild this\n"
              ."  cache: $path/$dbLabel/general.working\n\n";
        rmtree("$path/$dbLabel/general.working",0,1);
      }
    }
  }

  #
  # If we could not find either the general library cache
  # or the species library cache we need to build them.
  #
  if ( $generalCacheDir eq "" || $speciesCacheDir eq "" ) {
    # Cached libraries are missing -- generate them
    #
    # Determine the highest level writable directory
    #
    my $writableCacheDir = "";
    foreach my $path ( @libraryPath ) {
      if ( -d $path ) {
        if ( open( TEST, ">$path/rmwritetest.deleteme" ) ) {
          close TEST;
          unlink "$path/rmwritetest.deleteme";

          # Just in case there is a read-only version of the
          # db already extracted by the installer.
          if ( -d "$path/$dbLabel" ) {
            if ( open( TEST, ">$path/$dbLabel/rmwritetest.deleteme" ) ) {
              close TEST;
              unlink "$path/rmwritetest.deleteme";
              $writableCacheDir = $path;
              last;
            }
          }
          else {
            $writableCacheDir = $path;
            last;
          }
        }
      }
      elsif ( mkdir "$path", 0777 ) {
        $writableCacheDir = $path;
        last;
      }
    }

    if ( $generalCacheDir eq "" ) {
      # Need to build libraries:   at.lib,  simple.lib
      #                            l1.lib, mirs.lib, mir.lib, is.lib
      print "Building general libraries in: "
          . "$writableCacheDir/$dbLabel/general\n";

      # Make cache dir
      if ( !-d "$writableCacheDir/$dbLabel/general.working" ) {
        eval { mkpath( "$writableCacheDir/$dbLabel/general.working" ) };
        if ( $@ ) {
          die "RepeatMasker::setspecies: Can't creat dir path "
              . "$writableCacheDir/$dbLabel/general! $@\n";
        }
      }

      &createLibFromFamdb( $rmLibDir, 
                           "$writableCacheDir/$dbLabel/general.working/is.lib", 
                           'root', 10, $searchEngine );
      # Rename or copy upon successful completion
      if ( -d "$writableCacheDir/$dbLabel/general" ) {
        # Directory exists but setup for a different search engine - copy files over
        system("cp $writableCacheDir/$dbLabel/general.working/* $writableCacheDir/$dbLabel/general");
        rmtree("$writableCacheDir/$dbLabel/general.working",0,1);
      }else {
        rename("$writableCacheDir/$dbLabel/general.working", "$writableCacheDir/$dbLabel/general");
      }
      $generalCacheDir = "$writableCacheDir/$dbLabel/general";
    }
    else {
      print STDERR "Using general libraries in:\n  $generalCacheDir\n"
          if ( $DEBUG );
    }

    if ( !$customLibFile ) {
      if ( $speciesCacheDir eq "" ) {

        # Need to build species specific libraries
        print "Building species libraries in: "
            . "$writableCacheDir/$dbLabel/$speciesWord\n";

        # Make cache dir
        if ( !-d "$writableCacheDir/$dbLabel/$speciesWord.working" ) {
          eval { mkpath( "$writableCacheDir/$dbLabel/$speciesWord.working" ) };
          if ( $@ ) {
            die "RepeatMasker::setspecies: Can't create dir path "
                . "$writableCacheDir/$dbLabel/$speciesWord! $@\n";
          }
        }

        # Build the cached refineable elements hash.  These are the
        # IDs of sequences which can be refined by searching against
        # the refinelib.
        my $famdbCmd = "$REPEATMASKER_DIR/famdb.py -i $rmLibDir/RepeatMaskerLib.h5 families '" .
                       $species . "' --ancestors --descendants -f embl_meta";
        #print "Running $famdbCmd\n";
        open IN,"$famdbCmd|" or die "Could not execute famdb.py using: $famdbCmd\n";
        my $acc;
        my $name;
        my $ref;
        my %validIDs = ();
        my %refineableHash = ();
        while ( <IN> ) {
           if ( /^\/\// ) {
             if ( $name ) {
               $validIDs{ lc( $name ) } = 1;
               $refineableHash{ $name } = 1 if ( $ref );
             }else {
               $validIDs{ lc( $acc ) } = 1;
               $refineableHash{ $acc } = 1 if ( $ref );
             }
             $acc = undef;
             $name = undef;
             $ref = 0; 
           }
           #ID   DF0000003; SV 4; linear; DNA; STD; UNC; 309 BP.
           $acc = $1 if ( /^ID\s+(\S+)\;/ );
           #NM   AluSc
           $name = $1 if ( /^NM\s+(\S+)/ );
           #CC        Refineable
           $ref = 1 if ( /^CC\s+Refineable/ );
        }
        close IN;
        nstore \%refineableHash,
            "$writableCacheDir/$dbLabel/$speciesWord.working/refineableHash.dat";
   
        ##
        ##
        ##
        open OUT,">$writableCacheDir/$dbLabel/$speciesWord.working/speciesMeta.pm" or 
            die "Could not open $writableCacheDir/$dbLabel/$speciesWord.working/speciesMeta.pm " . 
                "for writing!\n";
        print OUT "package speciesMeta;\n";
        print OUT "require Exporter;\n";
        print OUT "\@EXPORT_OK   = qw( \%validIDs );\n";
        print OUT "\%EXPORT_TAGS = ( all => [ \@EXPORT_OK ] );\n";
        print OUT "\@ISA         = qw(Exporter);\n";
        print OUT "BEGIN {\n";
        print OUT "  \%validIDs = (\n";
        my $output = Dumper( \%validIDs );
        $output =~ s/^\$VAR1 = \{//;
        $output =~ s/\};//;
        print OUT $output;
        print OUT "\n";
        print OUT "  ); }\n";
        close OUT;

        if ( $orgFlag eq "mammalia" || $orgFlag eq "primates" ) {
      
          # alu.lib, rodcutsines.lib => sinecutlib
          &createLibFromFamdb( $rmLibDir, 
                               "$writableCacheDir/$dbLabel/$speciesWord.working/sinecutlib", 
                               $species, 35, $searchEngine );

          # cut1.lib, rodcut.lib => shortcutlib
          &createLibFromFamdb( $rmLibDir, 
                               "$writableCacheDir/$dbLabel/$speciesWord.working/shortcutlib", 
                               $species, 40, $searchEngine );


          # cut2.lib, rodcut2.lib, cetartiocut.lib => cutlib
          &createLibFromFamdb( $rmLibDir, 
                               "$writableCacheDir/$dbLabel/$speciesWord.working/cutlib", 
                               $species, 45, $searchEngine );

          unless ( $searchEngine->isa( "HMMERSearchEngine" ) ) {

            # humsines.lib, rod1.lib, cetartio1.lib => shortlib
            &createLibFromFamdb( $rmLibDir, 
                                 "$writableCacheDir/$dbLabel/$speciesWord.working/shortlib", 
                                 $species, 50, $searchEngine );

            # humlines.lib, rod2.lib => longlib
            &createLibFromFamdb( $rmLibDir, 
                                 "$writableCacheDir/$dbLabel/$speciesWord.working/longlib", 
                                 $species, 55, $searchEngine );

            # mirs.lib => mirslib
            &createLibFromFamdb( $rmLibDir, 
                                 "$writableCacheDir/$dbLabel/$speciesWord.working/mirslib", 
                                 $species, 60, $searchEngine );

            # mir.lib => mirlib
            &createLibFromFamdb( $rmLibDir, 
                                 "$writableCacheDir/$dbLabel/$speciesWord.working/mirlib", 
                                 $species, 65, $searchEngine );

            # retrovirus.lib => retrolib
            &createLibFromFamdb( $rmLibDir, 
                                 "$writableCacheDir/$dbLabel/$speciesWord.working/retrolib", 
                                 $species, 70, $searchEngine );
          }
          else {
            # HMM search combines 35,50,60,65,70 into one search lib
            &createLibFromFamdb( $rmLibDir, 
                                 "$writableCacheDir/$dbLabel/$speciesWord.working/masklib", 
                                 $species, 95, $searchEngine );
          }

          # refinelib
          &createLibFromFamdb( $rmLibDir, 
                               "$writableCacheDir/$dbLabel/$speciesWord.working/refinelib", 
                               $species, 85, $searchEngine );
        }
        else {
          # Need to separate into a species.lib
          &createLibFromFamdb( $rmLibDir, 
                               "$writableCacheDir/$dbLabel/$speciesWord.working/specieslib", 
                               $species, 80, $searchEngine );
        }
        # Rename or copy upon successful completion
        if ( -d "$writableCacheDir/$dbLabel/$speciesWord" ) {
          # Directory exists but setup for a different search engine - copy files over
          system("cp $writableCacheDir/$dbLabel/$speciesWord.working/* $writableCacheDir/$dbLabel/$speciesWord");
          rmtree("$writableCacheDir/$dbLabel/$speciesWord.working",0,1);
        }else {
          rename("$writableCacheDir/$dbLabel/$speciesWord.working", "$writableCacheDir/$dbLabel/$speciesWord");
        }
        $speciesCacheDir = "$writableCacheDir/$dbLabel/$speciesWord";
      }
      else {
        print STDERR "Using species libraries in:\n  $speciesCacheDir\n"
            if ( $DEBUG );
      }
    }
  }

  return ( $species, $generalCacheDir, $speciesCacheDir, $customCacheDir, $dbLabel );

}

##-------------------------------------------------------------------------##
## Use:  createLibFromFamdb( $rmLibDir, $libName, $species,
##                           $stageNum, $searchEngine );
##
##         $rmLibDir     :
##         $libName      :  The name of the library to create.
##         $species      :  The name of the species to include seqs for.
##         $stageNum     :  The name of the old RM database.  Used to
##                          screen the repeats ( will generalize in the
##                          future ).
##         $searchEngine :
##
##  Returns
##
##     Creates a library by filtering the RepeatMasker.lib file
##     given specific filtering parameters ( species and stageNum).
##     If wublast is being used it also creates the binary versions
##     of the fasta library. Removes the library file if there are no
##     matching sequences.
##
##  Globals Used: None
##-------------------------------------------------------------------------##
sub createLibFromFamdb {
  my $rmLibDir     = shift;
  my $libName      = shift;
  my $species      = shift;
  my $stageNum     = shift;
  my $searchEngine = shift;

  print "RepeatMasker::createLib( "
      . "$libName, $species, $stageNum );\n"
      if ( $DEBUG );

  my $outFile = $libName;
  my $format = "fasta_name";
  if ( $searchEngine->isa( "HMMERSearchEngine" ) ) {
    $outFile .= ".hmm";
    $format = "hmm_species";
  }elsif ( $searchEngine->isa( "WUBlastSearchEngine" ) ) {
    $outFile = "$libName-wublast";
  }

  # NOTE: Stage 95 is a catch all for 35 50 55 60 65 70 75....basically
  #       excluding 25/40/45. Stage '80' is all stages.  Currently famdb.py handles these
  #       special case selections internally.
  my $famdbCmd = "$REPEATMASKER_DIR/famdb.py -i $rmLibDir/RepeatMaskerLib.h5 families '" . 
              $species . "' --ancestors --descendants --include-class-in-name --stage $stageNum -f $format";
  if ( $searchEngine->isa( "WUBlastSearchEngine" ) ) {
    $famdbCmd .= " --add-reverse-complement";
  }
  $famdbCmd .= " > $outFile";
  system($famdbCmd);

  if ( `grep -v -c "^#" $outFile` == 0 ) {
    print "createLibFromFamdb: Removing empty library $outFile\n" if ( $DEBUG );
    unlink( $outFile );
  }
  else {
    my ( $outFileVol, $outFileDir, $outFileBasename ) =
        File::Spec->splitpath( $outFile );
    $outFileDir = "." if ( $outFileDir eq "" );

    if ( $searchEngine->isa( "WUBlastSearchEngine" ) ) {
      system(   "$SETDB_PRGM $outFile > "
              . "$outFileDir/setdb.log 2>&1" ) == 0
          or die "RepeatMasker::createLib(): Error invoking setdb on file "
          . "$outFile.  We tried using the setdb program ( "
          . "$SETDB_PRGM ).\n";
      unlink( $outFile ) unless ( $DEBUG );
      move( "$outFile.ahd", "$libName.ahd" );
      move( "$outFile.atb", "$libName.atb" );
      move( "$outFile.bsq", "$libName.bsq" );
    }
    elsif ( $searchEngine->isa( "NCBIBlastSearchEngine" ) ) {
      system(   "$NCBIBLASTDB_PRGM -dbtype nucl "
              . "-in $outFile > $outFileDir/rmblastdb.log 2>&1" ) == 0
          or die "RepeatMasker::createLib(): Error invoking "
          . "$NCBIBLASTDB_PRGM"
          . " on file $outFile.\n";
    }
    elsif ( $searchEngine->isa( "HMMERSearchEngine" ) ) {
      #
      # With the addition of DR families we now have the potential of generating
      # families without any thresholds set.  We can generate thresholds for a
      # given genome size and E-Value using the HMM using the hmmstat program.
      # Ideally the genome size and E-Value would be parameters for RepeatMasker
      # but for now they are fixed at 3GB genome size ( typical for mammals ) and
      # the Dfam theoretical FDR E-value threshold of 0.02.  Below we calculate
      # this for all families in the library ( because it's fast ) and then 
      # add them only if a family is missing a GA.
      #
      system("mv $outFile $outFile.preThresh");
      open IN,"<$outFile.preThresh" or die "Could not open library $outFile.preThresh for reading!\n";
      open OUT,">$outFile" or die "Could not open library $outFile for writing!\n";
      my $acc;
      my $preHeader;
      my $postHeader;
      my $beforeChecksum = 1;
      my $hmm_maxl = 0;
      my $hmm_tau = 0;
      my $hmm_lambda = 0;
      my $hasGA = 0;
      my $Zval = 3000;
      my $evalue = 0.02;
      while ( <IN> ) {
        if ( /^ACC\s+(\S+)/ ) {
          $acc = $1;
          $hmm_maxl = 0;
          $hmm_tau = 0;
          $hmm_lambda = 0;
        }
        if ( /^MAXL\s+(\d+)/ ) {
          $hmm_maxl = $1;
        }
        if ( /^STATS\s+LOCAL\s+FORWARD\s+([\-\d\.]+)\s+([\-\d\.]+)/ ) {
          $hmm_tau = $1;
          $hmm_lambda = $2;
        }
        if ( /^GA\s+\d+\.\d+;/ ) {
          $hasGA = 1;
        }
        if ( $beforeChecksum ) {
          $preHeader .= $_;
        }else {
          $postHeader .= $_;
        }
        if ( /^CKSUM/ ) {
          $beforeChecksum = 0;
        }
        if ( /^\/\// ) {
          print OUT $preHeader;
          if ( $hasGA == 0 ){
            die "Could not find HMM Lambda is zero in accession $acc!\n" if ( $hmm_lambda == 0 );
            die "Could not find HMM MAXL value for $acc!\n" if ( $hmm_maxl == 0 );
            # From eval2score() originally in hmmstat and now in dfthresh ( see dfthresh for details )
            my $nseq = $Zval / $hmm_maxl;
            my $sc = $hmm_tau - ((1/$hmm_lambda) * log($evalue/$nseq));
            print OUT "GA    " . sprintf("0.2f",$sc) . ";\n";
            print OUT "TC    " . sprintf("0.2f",$sc) . ";\n";
            print OUT "NC    " . sprintf("0.2f",$sc) . ";\n";
          }
          print OUT $postHeader;
          $hasGA = 0;
          $acc = "";
          $preHeader = "";
          $postHeader = "";
          $beforeChecksum = 1;
        }
      }
      close IN;
      close OUT;

      if ( -s $outFile ) {
        unlink("$outFile.preThresh");
      }

      system(   "$HMMPRESS_PRGM"
              . " $outFile > $outFileDir/hmmPress.log 2>&1" ) == 0
          or die "RepeatMasker::createLib(): Error invoking "
          . "$HMMPRESS_PRGM on file "
          . "$outFile.\n";
    }
  }
}

1;