# The goal of this workflow is to check in on the 2019 data, compare the different flow cell runs, and make files that will be ready to group with 2018 and 2021 data. 

# 

# Here is the FWD primer: TATGGTAATT GT GTGYCAGCMGCCGCGGTAA	31 bp, needs to be trimmed
# Here is the REV primer: AGTCAGCCAG CC GGACTACNVGGGTWTCTAAT 32 bp, needs to be trimmed

# on Lovelace

# original data is in: /cluster/metagenomes/Iceland-2019
# fastq files are in: /cluster/metagenomes/Iceland-2019/GSF2432-2019-12/16S/Solo/Casava1.8 and /cluster/metagenomes/Iceland-2019/GSF2706-2020-12/16S/Solo/Casava1.8 - in the 2706 folder are re-runs of spot10, spot3, spot6 and spot8. Only used the spot10 re-run, because the original spots 3, 6 and 8 gave petter results (at least as far as the initial sequencing files showed). 
# working directory is: /mounts/lovelace/16S/Iceland-2019/qiime2-solo-prep-for-grouping
# files are stored in: /cluster/fieldscience/artifacts/Iceland-16S/2019-prep-for-grouping

# load qiime on lovelace
module load qiime2/qiime2-2021.4
source tab-qiime

# fetch manifest and metadata files
fetch-gdrive-object.py -d 1dSBNOXFZ0Yv-nVz63t-2BF4uwwVwrFxRMjjeUgT6Fi0 -t sheet -f tsv -o Iceland-2019-Manifest.tsv
fetch-gdrive-object.py -d 1s5yzNfd5o4o5ASccJaRq78PDjY1ndb6GZtr6Q2pO8rQ -t sheet -f tsv -o Iceland-2019-Metadata.tsv

# copy correct reads into the working folder and gzip them

# Pair the forward and reverse reads
qiime tools import \
--type 'SampleData[PairedEndSequencesWithQuality]' \
--input-path Iceland-2019-Manifest.tsv \
--output-path Iceland-2019-raw.qza \
--input-format PairedEndFastqManifestPhred33V2

qiime demux summarize \
	--i-data Iceland-2019-raw.qza \
	--o-visualization Iceland-2019-raw.qzv

# move files to /cluster/fieldscience/artifacts/Iceland-16S/2019-prep-for-grouping

# now need to trim off primers and trunc based on quality score. We truncated at the first instance of the lower quaretile of the box-and-whisker plot landing below 20. This should leave us with an estimated overlap of 78 bp, assuming our target fragment was 450 bp long. (300+300-450-2-70)

nohup qiime dada2 denoise-paired \
	--p-n-threads 8 \
	--i-demultiplexed-seqs Iceland-2019-raw.qza \
	--p-trunc-len-f 298 --p-trim-left-f 31 \
	--p-trunc-len-r 230 --p-trim-left-r 32 \
	--o-representative-sequences Iceland-2019-rep-seqs-dada2.qza \
	--o-table Iceland-2019-table-dada2.qza \
	--o-denoising-stats Iceland-2019-stats-dada2.qza &

qiime metadata tabulate \
	--m-input-file Iceland-2019-stats-dada2.qza \
	--o-visualization Iceland-2019-stats-dada2.qzv 

# move files to /cluster/fieldscience/artifacts/Iceland-16S/2019-prep-for-grouping

nohup qiime dada2 denoise-paired \
	--p-n-threads 8 \
	--i-demultiplexed-seqs Iceland-2019-raw.qza \
	--p-trunc-len-f 298 --p-trim-left-f 89 \
	--p-trunc-len-r 230 --p-trim-left-r 102 \
	--o-representative-sequences Iceland-2019-rep-seqs2-dada2.qza \
	--o-table Iceland-2019-table2-dada2.qza \
	--o-denoising-stats Iceland-2019-stats2-dada2.qza &

qiime metadata tabulate \
	--m-input-file Iceland-2019-stats2-dada2.qza \
	--o-visualization Iceland-2019-stats2-dada2.qzv 

# Use the second dada2 run in downstream apps - it's not the best but slightly better than before. 

# CHECK BARCODES BEFORE MOVING ON