#!/bin/bash
set -e

pkg=sortmerna

export LC_ALL=C.UTF-8
if [ "${AUTOPKGTEST_TMP}" = "" ] ; then
  AUTOPKGTEST_TMP=$(mktemp -d /tmp/${pkg}-test.XXXXXX)
  # Double quote below to expand the temporary directory variable now versus
  # later is on purpose.
  # shellcheck disable=SC2064
  trap "rm -rf ${AUTOPKGTEST_TMP}" 0 INT QUIT ABRT PIPE TERM
fi

cp -a /usr/share/doc/${pkg}/examples/* "${AUTOPKGTEST_TMP}"

cd "${AUTOPKGTEST_TMP}"

gunzip -r *
mkdir temp output

echo '=== Test 0 (superficial verifications) ==='
sortmerna --version
sortmerna -h
# FIXME: following test items have become irrelevant with contemporary
# sortmerna usage.  Ideally these should be updated so it is possible to
# drop the superficial marking associated to the present autopkgtest.
# In the meantime, tests are stopped at this early point.
exit

echo === Test 1 ===
echo Writing to --tmpdir
indexdb_rna --ref data/ref_GQ099317_forward_and_rc.fasta,output/test1 --tmpdir temp
if [ -f output/test1.kmer_0.dat ] && \
   [ -f output/test1.bursttrie_0.dat ] && \
   [ -f output/test1.pos_0.dat ] && \
   [ -f output/test1.stats ]; then
   echo PASS
else
   echo FAIL
fi

echo
echo === Test 2 ===
echo Indexing a database using SortMeRNA
indexdb_rna --ref data/gg_13_8_ref_set.fasta,output/test2
if [ -f output/test2.kmer_0.dat ] && \
   [ -f output/test2.bursttrie_0.dat ] && \
   [ -f output/test2.pos_0.dat ] && \
   [ -f output/test2.stats ]; then
   echo PASS
else
   echo FAIL
fi

echo
echo === Test 3 ===
echo Test indexing a database using SortMeRNA with m = 0.05, that is 7 parts
indexdb_rna --ref data/gg_13_8_ref_set.fasta,output/test3 -m 0.05
if [ -f output/test3.kmer_0.dat ] && \
   [ -f output/test3.kmer_1.dat ] && \
   [ -f output/test3.kmer_2.dat ] && \
   [ -f output/test3.kmer_3.dat ] && \
   [ -f output/test3.kmer_4.dat ] && \
   [ -f output/test3.kmer_5.dat ] && \
   [ -f output/test3.kmer_6.dat ] && \
   [ -f output/test3.bursttrie_0.dat ] && \
   [ -f output/test3.bursttrie_1.dat ] && \
   [ -f output/test3.bursttrie_2.dat ] && \
   [ -f output/test3.bursttrie_3.dat ] && \
   [ -f output/test3.bursttrie_4.dat ] && \
   [ -f output/test3.bursttrie_5.dat ] && \
   [ -f output/test3.bursttrie_6.dat ] && \
   [ -f output/test3.pos_0.dat ] && \
   [ -f output/test3.pos_1.dat ] && \
   [ -f output/test3.pos_2.dat ] && \
   [ -f output/test3.pos_3.dat ] && \
   [ -f output/test3.pos_4.dat ] && \
   [ -f output/test3.pos_5.dat ] && \
   [ -f output/test3.pos_6.dat ] && \
   [ -f output/test3.stats ]; then
   echo PASS
else
   echo FAIL
fi

echo
echo Generating index files for the remaining tests.
# Tests 4, 5, 6
indexdb_rna --ref data/silva-bac-16s-database-id85.fasta,output/bac.idx:data/silva-arc-16s-database-id95.fasta,output/arc.idx
# Tests 7 and 8
indexdb_rna --ref data/silva-bac-16s-database-id85.fasta,output/bac_250.idx --max_pos 250
# Test 9
indexdb_rna --ref data/ref_GQ099317_forward_and_rc.fasta,output/GQ099317.idx
echo Done


echo
echo === Test 4 ===
echo Test sortmerna on 3 reads against arc-16s and bac-16s databases.
echo 2/3 reads match both arc-16s and bac-16s and 1/3 is a random read.
sortmerna --ref data/silva-bac-16s-database-id85.fasta,output/bac.idx:data/silva-arc-16s-database-id95.fasta,output/arc.idx \
          --aligned output/test4aligned \
          --reads data/set7_arc_bac_16S_database_match.fasta \
          --log --fastx
if [ -f output/test4aligned.fasta ] && \
   [ -f output/test4aligned.log ]; then
   echo PASS
else
   echo FAIL
fi

echo
echo === Test 5 ===
echo Test sortmerna on simulated data, 10000 reads with 1% error - --aligned,
echo 10000 reads with 10% error - de novo, 10000 reads random - --other
echo Conditions: reference index and input query FASTA file both processed as one section.
sortmerna --ref data/silva-bac-16s-database-id85.fasta,output/bac.idx \
          --aligned output/test5aligned --other output/test5other \
          --reads data/set5_simulated_amplicon_silva_bac_16s.fasta \
          --id 0.97 --coverage 0.97 --log --otu_map --de_novo_otu --blast "1 cigar qcov" --fastx
if [ -f output/test5aligned.blast ] && \
   [ -f output/test5aligned.fasta ] && \
   [ -f output/test5aligned.log ] && \
   [ -f output/test5aligned_denovo.fasta ] && \
   [ -f output/test5aligned_otus.txt ] && \
   [ -f output/test5other.fasta ]; then
   echo PASS
else
   echo FAIL
fi

echo
echo === Test 6 ===
echo Test sortmerna on simulated data, 10000 reads with 1% error - --aligned,
echo 10000 reads with 10% error - de novo, 10000 reads random - --other
echo Conditions: reference index processed as one unit and input query FASTA file in 6 sections.
sortmerna --ref data/silva-bac-16s-database-id85.fasta,output/bac.idx \
          --aligned output/test6aligned --other output/test6other \
          --reads data/set5_simulated_amplicon_silva_bac_16s.fasta \
          --id 0.97 --coverage 0.97 --log --otu_map --de_novo_otu --blast "1 cigar qcov" --fastx -m 1
if [ -f output/test6aligned.blast ] && \
   [ -f output/test6aligned.fasta ] && \
   [ -f output/test6aligned.log ] && \
   [ -f output/test6aligned_denovo.fasta ] && \
   [ -f output/test6aligned_otus.txt ] && \
   [ -f output/test6other.fasta ]; then
   echo PASS
else
   echo FAIL
fi

echo
echo === Test 7 ===
echo Test outputting FASTA file for de novo clustering using environmental data.
echo Conditions: input FASTA file is processed in one mapped section.
sortmerna --ref data/silva-bac-16s-database-id85.fasta,output/bac_250.idx \
          --aligned output/test7aligned --id 0.97 --coverage 0.97 --log \
          --otu_map --de_novo_otu --fastx --reads data/set2_environmental_study_550_amplicon.fasta
if [ -f output/test7aligned.fasta ] && \
   [ -f output/test7aligned.log ] && \
   [ -f output/test7aligned_denovo.fasta ] && \
   [ -f output/test7aligned_otus.txt ]; then
   echo PASS
else
   echo FAIL
fi

echo
echo === Test 8 ===
echo Test outputting FASTQ files for merged mate pair reads.
sortmerna --ref data/silva-bac-16s-database-id85.fasta,output/bac_250.idx \
          --aligned output/test8aligned --other output/test8nonaligned \
          --fastx --reads data/set4_mate_pairs_metatranscriptomics.fastq --log
sortmerna --ref data/silva-bac-16s-database-id85.fasta,output/bac_250.idx \
          --aligned output/test8aligned --other output/test8nonaligned \
          --paired_in  --fastx --reads data/set4_mate_pairs_metatranscriptomics.fastq --log
sortmerna --ref data/silva-bac-16s-database-id85.fasta,output/bac_250.idx \
          --aligned output/test8aligned --other output/test8nonaligned \
          --paired_out --fastx --reads data/set4_mate_pairs_metatranscriptomics.fastq --log
if [ -f output/test8aligned.fastq ] && \
   [ -f output/test8aligned.log ] && \
   [ -f output/test8nonaligned.fastq ]; then
   echo PASS
else
   echo FAIL
fi

echo
echo === Test 9 ===
echo Test SortMeRNAs option --num_alignments 0 which should
echo search both forward and reverse-complement query for alignments 
sortmerna --ref data/ref_GQ099317_forward_and_rc.fasta,output/GQ099317.idx \
          --aligned output/test9aligned --reads data/illumina_GQ099317.fasta --num_alignments 0 --sam 
if [ -f output/test9aligned.sam ]; then
   echo PASS
else
   echo FAIL
fi
