######################### 2019 Iceland 16S ######################

####### UPDATED 1:28 PM local time (Iceland) May 24th ###########

## find data here: http://lovelace.cluster.earlham.edu/mounts/lovelace/16S/Iceland-2019/Skalanes-Test-Pit/2024_workflow/ 

# ssh to smithem3@cluster.earlham.edu  
# EJcG5pFkGchln3
ssh lovelace
cd /mounts/lovelace/16S/Iceland-2019/Skalanes-Test-Pit/2024_workflow

## created manifest file: https://docs.google.com/spreadsheets/d/1Hnu-4gPQfmLv6Fb-n5E8Gpg7dcCnQz84F9uEHxF4XnQ/edit#gid=0
## and only test pit samples: https://docs.google.com/spreadsheets/d/1Fg6BWqgH5M5avdLudQcJ4Rufxfp23Q0iqbUz_Uxe62o/edit#gid=0

## load qiime2

# module load qiime2/2023.9-amplicon
module load qiime2/2024 
source tab-qiime

## to find out about the availalbe modules use: module avail
## note that in 2023 qiime2 split and now has -amplicon and -shotgun 


## get the single-end manifest file from Drive: 
fetch-gdrive-object.py -d 1Hnu-4gPQfmLv6Fb-n5E8Gpg7dcCnQz84F9uEHxF4XnQ -t sheet -f tsv -o STP-16S-All-Manifest.tsv

fetch-gdrive-object.py -d 1Fg6BWqgH5M5avdLudQcJ4Rufxfp23Q0iqbUz_Uxe62o -t sheet -f tsv -o STP-16S-TPonly-Manifest.tsv

####################### Single-end!!! #################################

### Re-ran ALL on 20240423 just to change the sample names so things will plot in order - aka changed STP2 to STP02

qiime tools import \
  --type 'SampleData[SequencesWithQuality]' \
  --input-path STP-16S-All-Manifest.tsv \
  --output-path STP-16S-All-single-end-demux.qza \
  --input-format SingleEndFastqManifestPhred33V2

qiime demux summarize \
	--i-data STP-16S-All-single-end-demux.qza \
	--o-visualization STP-16S-All-single-end-demux.qzv

qiime tools import \
  --type 'SampleData[SequencesWithQuality]' \
  --input-path STP-16S-TPonly-Manifest.tsv \
  --output-path STP-16S-TPonly-single-end-demux.qza \
  --input-format SingleEndFastqManifestPhred33V2

qiime demux summarize \
	--i-data STP-16S-TPonly-single-end-demux.qza \
	--o-visualization STP-16S-TPonly-single-end-demux.qzv


# CUTADAPT  
# Add the flags --p-match-read-wildcards and --p-match-adapter-wildcards to your command. 
# This will allow matching of IUPAC wild cards such as H, V, N, etc..
# Furthermore, I'd advise adding the flag --p-discard-untrimmed as well. 
# This will discard any sequences in which both primers could not be trimmed from the sequences.

qiime cutadapt trim-single \
  --i-demultiplexed-sequences STP-16S-All-single-end-demux.qza \
  --p-cores 8 \
  --p-front TATGGTAATTGTGTGCCAGCMGCCGCGGTAA \
  --p-match-read-wildcards \
  --p-match-adapter-wildcards \
  --p-discard-untrimmed \
  --o-trimmed-sequences STP-16S-All-single-end-trimmed-sequences.qza 

qiime demux summarize \
	--i-data STP-16S-All-single-end-trimmed-sequences.qza  \
	--o-visualization STP-16S-All-single-end-trimmed-sequences.qzv

qiime cutadapt trim-single \
  --i-demultiplexed-sequences STP-16S-TPonly-single-end-demux.qza \
  --p-cores 8 \
  --p-front TATGGTAATTGTGTGCCAGCMGCCGCGGTAA \
  --p-match-read-wildcards \
  --p-match-adapter-wildcards \
  --p-discard-untrimmed \
  --o-trimmed-sequences STP-16S-TPonly-single-end-trimmed-sequences.qza 

qiime demux summarize \
	--i-data STP-16S-TPonly-single-end-trimmed-sequences.qza  \
	--o-visualization STP-16S-TPonly-single-end-trimmed-sequences.qzv


# DADA2, feature table summarize
# for All: trunc at 226 where QS bottom of box drops below 25
# for Test Pit Only (TPonly): trunc at 226 where QS bottom of box drops below 25

nohup qiime dada2 denoise-single \
	--p-n-threads 8 \
	--i-demultiplexed-seqs STP-16S-All-single-end-trimmed-sequences.qza \
	--p-trunc-len 226 \
	--o-representative-sequences STP-16S-All-single-end-trimmed-repseqs.qza \
	--o-table STP-16S-All-single-end-trimmed-table.qza \
	--o-denoising-stats STP-16S-All-single-end-trimmed-stats.qza & 

qiime metadata tabulate \
	--m-input-file STP-16S-All-single-end-trimmed-stats.qza \
	--o-visualization STP-16S-All-single-end-trimmed-stats.qzv 

#  metadata: https://docs.google.com/spreadsheets/d/1wQ3XPlk0CN7bIoiAifaVTu-wNjsc91IdTTFByCKgkAY/edit#gid=0 

fetch-gdrive-object.py -d 1wQ3XPlk0CN7bIoiAifaVTu-wNjsc91IdTTFByCKgkAY -t sheet -f tsv -o STP-16S-Metadata.tsv

## made an updated metadata (May 22) including sequencing and DADA2 info

fetch-gdrive-object.py -d 1wQ3XPlk0CN7bIoiAifaVTu-wNjsc91IdTTFByCKgkAY -t sheet -f tsv -o STP-16S-large-Metadata.tsv

qiime feature-table summarize \
	--i-table STP-16S-All-single-end-trimmed-table.qza \
	--o-visualization STP-16S-All-single-end-trimmed-table.qzv \
	--m-sample-metadata-file STP-16S-Metadata.tsv

qiime feature-table tabulate-seqs \
  --i-data STP-16S-All-single-end-trimmed-repseqs.qza \
  --o-visualization STP-16S-All-single-end-trimmed-repseqs.qzv

nohup qiime dada2 denoise-single \
	--p-n-threads 8 \
	--i-demultiplexed-seqs STP-16S-TPonly-single-end-trimmed-sequences.qza \
	--p-trunc-len 226 \
	--o-representative-sequences STP-16S-TPonly-single-end-trimmed-repseqs.qza \
	--o-table STP-16S-TPonly-single-end-trimmed-table.qza \
	--o-denoising-stats STP-16S-TPonly-single-end-trimmed-stats.qza & 

qiime metadata tabulate \
	--m-input-file STP-16S-TPonly-single-end-trimmed-stats.qza \
	--o-visualization STP-16S-TPonly-single-end-trimmed-stats.qzv 

qiime feature-table summarize \
	--i-table STP-16S-TPonly-single-end-trimmed-table.qza \
	--o-visualization STP-16S-TPonly-single-end-trimmed-table.qzv \
	--m-sample-metadata-file STP-16S-Metadata.tsv

qiime feature-table tabulate-seqs \
  --i-data STP-16S-TPonly-single-end-trimmed-repseqs.qza \
  --o-visualization STP-16S-TPonly-single-end-trimmed-repseqs.qzv

#### Charlie noticed that we can modify the number of reads that DADA2 uses: 
# The number of reads to use when training the error model. Smaller numbers will result in a shorter run
# time but a less reliable error model.[default: 1,000,000]

# When I summed up the number of reads we got in the 2019 16S STP data, we have 1,453,471
# Somewhere on the forum (sorry I couldn't find the exact thread) the dada2 developer mentions that 
#the default 1M is based on some prior benchmarking and there is only modest improvements beyond that, say up to 2Mil.


qiime dada2 denoise-single \
	--p-n-threads 8 \
	--p-n-reads-learn 3000000 \
	--i-demultiplexed-seqs STP-16S-All-single-end-trimmed-sequences.qza \
	--p-trunc-len 226 \
	--o-representative-sequences STP-16S-All-single-end-trimmed-repseqs.qza \
	--o-table STP-16S-All-single-end-trimmed-table.qza \
	--o-denoising-stats STP-16S-All-single-end-trimmed-stats.qza

qiime metadata tabulate \
	--m-input-file STP-16S-All-single-end-trimmed-stats.qza \
	--o-visualization STP-16S-All-single-end-trimmed-stats.qzv 

# For "all" STP data: Sampling Depth of 38914 Retained 622,624 (71.75%) features in 16 (100.00%) samples at the specifed sampling depth.

# For "test pit only" data: Sampling Depth of 38891 Retained 505,583 (75.36%) features in 13 (100.00%) samples at the specifed sampling depth.

qiime diversity alpha-rarefaction \
  --i-table STP-16S-All-single-end-trimmed-table.qza \
  --m-metadata-file STP-16S-Metadata.tsv \
  --o-visualization STP-16S-All-alpha-rarefaction-38914.qzv \
  --p-min-depth 10 \
  --p-max-depth 38914

qiime diversity alpha-rarefaction \
  --i-table STP-16S-TPonly-single-end-trimmed-table.qza \
  --m-metadata-file STP-16S-Metadata.tsv \
  --o-visualization STP-16S-TPonly-alpha-rarefaction-38891.qzv \
  --p-min-depth 10 \
  --p-max-depth 38891

# at these two levels of rarefaction we are beyond the exponential growth of the curve. 

## re-upload metadata after changing the "floor" category to include "floor-dung" so the sample size is more than 2

fetch-gdrive-object.py -d 1wQ3XPlk0CN7bIoiAifaVTu-wNjsc91IdTTFByCKgkAY -t sheet -f tsv -o STP-16S-Grouped-Floors-Metadata.tsv


nohup qiime phylogeny align-to-tree-mafft-fasttree \
	--p-n-threads 8 \
	--i-sequences STP-16S-All-single-end-trimmed-repseqs.qza \
	--o-alignment STP-16S-All-single-end-trimmed-repseqs-aligned.qza \
	--o-masked-alignment STP-16S-All-single-end-trimmed-repseqs-masked-aligned.qza \
	--o-tree STP-16S-All-single-end-trimmed-unrooted-tree.qza \
	--o-rooted-tree STP-16S-All-single-end-trimmed-rooted-tree.qza &

nohup qiime phylogeny align-to-tree-mafft-fasttree \
	--p-n-threads 8 \
	--i-sequences STP-16S-TPonly-single-end-trimmed-repseqs.qza \
	--o-alignment STP-16S-TPonly-single-end-trimmed-repseqs-aligned.qza \
	--o-masked-alignment STP-16S-TPonly-single-end-trimmed-repseqs-masked-aligned.qza \
	--o-tree STP-16S-TPonly-single-end-trimmed-unrooted-tree.qza \
	--o-rooted-tree STP-16S-TPonly-single-end-trimmed-rooted-tree.qza &


nohup qiime diversity core-metrics-phylogenetic \
	--i-phylogeny STP-16S-All-single-end-trimmed-rooted-tree.qza \
	--i-table STP-16S-All-single-end-trimmed-table.qza  \
	--p-sampling-depth 38914 \
	--m-metadata-file STP-16S-Metadata.tsv \
	--output-dir STP-16S-All-core-metrics-38914 &

qiime diversity alpha-group-significance \
	--i-alpha-diversity STP-16S-All-core-metrics-38914/faith_pd_vector.qza \
	--m-metadata-file STP-16S-Metadata.tsv \
	--o-visualization STP-16S-All-core-metrics-38914/faith-pd-group-significance.qzv

qiime diversity alpha-group-significance \
	--i-alpha-diversity STP-16S-All-core-metrics-38914/evenness_vector.qza \
	--m-metadata-file STP-16S-Metadata.tsv \
	--o-visualization STP-16S-All-core-metrics-38914/evenness-group-significance.qzv

qiime diversity alpha-group-significance \
	--i-alpha-diversity STP-16S-All-core-metrics-38914/shannon_vector.qza \
	--m-metadata-file STP-16S-Metadata.tsv \
	--o-visualization STP-16S-All-core-metrics-38914/shannon-group-significance.qzv


nohup qiime diversity core-metrics-phylogenetic \
	--i-phylogeny STP-16S-TPonly-single-end-trimmed-rooted-tree.qza \
	--i-table STP-16S-TPonly-single-end-trimmed-table.qza \
	--p-sampling-depth 38891 \
	--m-metadata-file STP-16S-Metadata.tsv \
	--output-dir STP-16S-TPonly-core-metrics-38891 &

qiime diversity alpha-group-significance \
	--i-alpha-diversity STP-16S-TPonly-core-metrics-38891/faith_pd_vector.qza \
	--m-metadata-file STP-16S-Metadata.tsv \
	--o-visualization STP-16S-TPonly-core-metrics-38891/faith-pd-group-significance.qzv

qiime diversity alpha-group-significance \
	--i-alpha-diversity STP-16S-TPonly-core-metrics-38891/evenness_vector.qza \
	--m-metadata-file STP-16S-Metadata.tsv \
	--o-visualization STP-16S-TPonly-core-metrics-38891/evenness-group-significance.qzv

qiime diversity alpha-group-significance \
	--i-alpha-diversity STP-16S-TPonly-core-metrics-38891/shannon_vector.qza \
	--m-metadata-file STP-16S-Metadata.tsv \
	--o-visualization STP-16S-TPonly-core-metrics-38891/shannon-group-significance.qzv

###################################################
### PHYLOGENY ####

nohup qiime phylogeny align-to-tree-mafft-fasttree \
	--p-n-threads 8 \
	--i-sequences STP-16S-All-single-end-trimmed-repseqs.qza \
	--o-alignment STP-16S-All-single-end-trimmed-repseqs-aligned.qza \
	--o-masked-alignment STP-16S-All-single-end-trimmed-repseqs-masked-aligned.qza \
	--o-tree STP-16S-All-single-end-trimmed-unrooted-tree.qza \
	--o-rooted-tree STP-16S-All-single-end-trimmed-rooted-tree.qza &

nohup qiime phylogeny align-to-tree-mafft-fasttree \
	--p-n-threads 8 \
	--i-sequences STP-16S-TPonly-single-end-trimmed-repseqs.qza \
	--o-alignment STP-16S-TPonly-single-end-trimmed-repseqs-aligned.qza \
	--o-masked-alignment STP-16S-TPonly-single-end-trimmed-repseqs-masked-aligned.qza \
	--o-tree STP-16S-TPonly-single-end-trimmed-unrooted-tree.qza \
	--o-rooted-tree STP-16S-TPonly-single-end-trimmed-rooted-tree.qza &


## ignored this next section on the second run-through on 20240423 because the new metadata file has the grouped floors ##
## on 20240423 jumped to PERMANOVA analysis ##
## re-upload metadata after changing the "floor" category to include "floor-dung" so the sample size is more than 2

fetch-gdrive-object.py -d 1wQ3XPlk0CN7bIoiAifaVTu-wNjsc91IdTTFByCKgkAY -t sheet -f tsv -o STP-16S-Grouped-Floors-Metadata.tsv

##############
nohup qiime diversity core-metrics-phylogenetic \
	--i-phylogeny STP-16S-All-single-end-trimmed-rooted-tree.qza \
	--i-table STP-16S-All-single-end-trimmed-table.qza  \
	--p-sampling-depth 38914 \
	--m-metadata-file STP-16S-Grouped-Floors-Metadata.tsv \
	--output-dir STP-16S-All-core-metrics-grouped-floor-38914 &

qiime diversity alpha-group-significance \
	--i-alpha-diversity STP-16S-All-core-metrics-grouped-floor-38914/faith_pd_vector.qza \
	--m-metadata-file STP-16S-Grouped-Floors-Metadata.tsv \
	--o-visualization STP-16S-All-core-metrics-grouped-floor-38914/faith-pd-group-significance.qzv

qiime diversity alpha-group-significance \
	--i-alpha-diversity STP-16S-All-core-metrics-grouped-floor-38914/evenness_vector.qza \
	--m-metadata-file STP-16S-Grouped-Floors-Metadata.tsv \
	--o-visualization STP-16S-All-core-metrics-grouped-floor-38914/evenness-group-significance.qzv

qiime diversity alpha-group-significance \
	--i-alpha-diversity STP-16S-All-core-metrics-grouped-floor-38914/shannon_vector.qza \
	--m-metadata-file STP-16S-Grouped-Floors-Metadata.tsv \
	--o-visualization STP-16S-All-core-metrics-grouped-floor-38914/shannon-group-significance.qzv


nohup qiime diversity core-metrics-phylogenetic \
	--i-phylogeny STP-16S-TPonly-single-end-trimmed-rooted-tree.qza \
	--i-table STP-16S-TPonly-single-end-trimmed-table.qza \
	--p-sampling-depth 38891 \
	--m-metadata-file STP-16S-Grouped-Floors-Metadata.tsv \
	--output-dir STP-16S-TPonly-core-metrics-grouped-floor-38891 &

qiime diversity alpha-group-significance \
	--i-alpha-diversity STP-16S-TPonly-core-metrics-grouped-floor-38891/faith_pd_vector.qza \
	--m-metadata-file STP-16S-Grouped-Floors-Metadata.tsv \
	--o-visualization STP-16S-TPonly-core-metrics-grouped-floor-38891/faith-pd-group-significance.qzv

qiime diversity alpha-group-significance \
	--i-alpha-diversity STP-16S-TPonly-core-metrics-grouped-floor-38891/evenness_vector.qza \
	--m-metadata-file STP-16S-Grouped-Floors-Metadata.tsv \
	--o-visualization STP-16S-TPonly-core-metrics-grouped-floor-38891/evenness-group-significance.qzv

qiime diversity alpha-group-significance \
	--i-alpha-diversity STP-16S-TPonly-core-metrics-grouped-floor-38891/shannon_vector.qza \
	--m-metadata-file STP-16S-Grouped-Floors-Metadata.tsv \
	--o-visualization STP-16S-TPonly-core-metrics-grouped-floor-38891/shannon-group-significance.qzv

### PERMANOVA - with grouped floor data 

qiime diversity beta-group-significance \
  --i-distance-matrix STP-16S-All-core-metrics-38914/unweighted_unifrac_distance_matrix.qza \
  --m-metadata-file STP-16S-Metadata.tsv \
  --m-metadata-column area.numeric \
  --o-visualization STP-16S-All-core-metrics-38914/unweighted-unifrac-era-significance.qzv \
  --p-pairwise

qiime diversity beta-group-significance \
  --i-distance-matrix STP-16S-All-core-metrics-38914/weighted_unifrac_distance_matrix.qza \
  --m-metadata-file STP-16S-Metadata.tsv \
  --m-metadata-column area.numeric \
  --o-visualization STP-16S-All-core-metrics-38914/weighted-unifrac-era-significance.qzv \
  --p-pairwise

qiime diversity beta-group-significance \
  --i-distance-matrix STP-16S-All-core-metrics-38914/bray_curtis_distance_matrix.qza \
  --m-metadata-file STP-16S-Metadata.tsv \
  --m-metadata-column area.numeric \
  --o-visualization STP-16S-All-core-metrics-38914/bray-curtis-era-significance.qzv \
  --p-pairwise

qiime diversity beta-group-significance \
  --i-distance-matrix STP-16S-All-core-metrics-38914/jaccard_distance_matrix.qza \
  --m-metadata-file STP-16S-Metadata.tsv \
  --m-metadata-column area.numeric \
  --o-visualization STP-16S-All-core-metrics-38914/jaccard-era-significance.qzv \
  --p-pairwise

##
qiime diversity beta-group-significance \
  --i-distance-matrix STP-16S-TPonly-core-metrics-grouped-floor-38891/unweighted_unifrac_distance_matrix.qza \
  --m-metadata-file STP-16S-Grouped-Floors-Metadata.tsv \
  --m-metadata-column area.numeric \
  --o-visualization STP-16S-TPonly-core-metrics-grouped-floor-38891/unweighted-unifrac-era-significance.qzv \
  --p-pairwise

qiime diversity beta-group-significance \
  --i-distance-matrix STP-16S-TPonly-core-metrics-grouped-floor-38891/weighted_unifrac_distance_matrix.qza \
  --m-metadata-file STP-16S-Grouped-Floors-Metadata.tsv \
  --m-metadata-column area.numeric \
  --o-visualization STP-16S-TPonly-core-metrics-grouped-floor-38891/weighted-unifrac-era-significance.qzv \
  --p-pairwise

qiime diversity beta-group-significance \
  --i-distance-matrix STP-16S-TPonly-core-metrics-grouped-floor-38891/bray_curtis_distance_matrix.qza \
  --m-metadata-file STP-16S-Grouped-Floors-Metadata.tsv \
  --m-metadata-column area.numeric \
  --o-visualization STP-16S-TPonly-core-metrics-grouped-floor-38891/bray-curtis-era-significance.qzv \
  --p-pairwise

qiime diversity beta-group-significance \
  --i-distance-matrix STP-16S-TPonly-core-metrics-grouped-floor-38891/jaccard_distance_matrix.qza \
  --m-metadata-file STP-16S-Grouped-Floors-Metadata.tsv \
  --m-metadata-column area.numeric \
  --o-visualization STP-16S-TPonly-core-metrics-grouped-floor-38891/jaccard-era-significance.qzv \
  --p-pairwise


### PERMDISP

qiime diversity beta-group-significance \
  --i-distance-matrix STP-16S-All-core-metrics-38914/unweighted_unifrac_distance_matrix.qza \
  --m-metadata-file STP-16S-Metadata.tsv \
  --m-metadata-column area.numeric \
  --o-visualization STP-16S-All-core-metrics-38914/unweighted-unifrac-area-significance-disp.qzv \
  --p-method permdisp

qiime diversity beta-group-significance \
  --i-distance-matrix STP-16S-All-core-metrics-38914/weighted_unifrac_distance_matrix.qza \
  --m-metadata-file STP-16S-Metadata.tsv \
  --m-metadata-column area.numeric \
  --o-visualization STP-16S-All-core-metrics-38914/weighted-unifrac-area-significance-disp.qzv \
  --p-method permdisp

qiime diversity beta-group-significance \
  --i-distance-matrix STP-16S-All-core-metrics-38914/bray_curtis_distance_matrix.qza \
  --m-metadata-file STP-16S-Metadata.tsv \
  --m-metadata-column area.numeric \
  --o-visualization STP-16S-All-core-metrics-38914/bray_curtis_distance-area-significance-disp.qzv \
  --p-method permdisp

qiime diversity beta-group-significance \
  --i-distance-matrix STP-16S-All-core-metrics-38914/jaccard_distance_matrix.qza \
  --m-metadata-file STP-16S-Metadata.tsv \
  --m-metadata-column area.numeric \
  --o-visualization STP-16S-All-core-metrics-38914/jaccard_distance-area-significance_disp.qzv \
  --p-method permdisp




qiime diversity beta-group-significance \
  --i-distance-matrix STP-16S-TPonly-core-metrics-grouped-floor-38891/unweighted_unifrac_distance_matrix.qza \
  --m-metadata-file STP-16S-Grouped-Floors-Metadata.tsv \
  --m-metadata-column area.numeric \
  --o-visualization STP-16S-TPonly-core-metrics-grouped-floor-38891/unweighted-unifrac-area-significance-disp.qzv \
  --p-method permdisp

qiime diversity beta-group-significance \
  --i-distance-matrix STP-16S-TPonly-core-metrics-grouped-floor-38891/weighted_unifrac_distance_matrix.qza \
  --m-metadata-file STP-16S-Grouped-Floors-Metadata.tsv \
  --m-metadata-column area.numeric \
  --o-visualization STP-16S-TPonly-core-metrics-grouped-floor-38891/weighted-unifrac-area-significance-disp.qzv \
  --p-method permdisp

qiime diversity beta-group-significance \
  --i-distance-matrix STP-16S-TPonly-core-metrics-grouped-floor-38891/bray_curtis_distance_matrix.qza \
  --m-metadata-file STP-16S-Grouped-Floors-Metadata.tsv \
  --m-metadata-column area.numeric \
  --o-visualization STP-16S-TPonly-core-metrics-grouped-floor-38891/bray_curtis_distance-area-significance-disp.qzv \
  --p-method permdisp

qiime diversity beta-group-significance \
  --i-distance-matrix STP-16S-TPonly-core-metrics-grouped-floor-38891/jaccard_distance_matrix.qza \
  --m-metadata-file STP-16S-Grouped-Floors-Metadata.tsv \
  --m-metadata-column area.numeric \
  --o-visualization STP-16S-TPonly-core-metrics-grouped-floor-38891/jaccard_distance-area-significance-disp.qzv \
  --p-method permdisp



### TAXONOMY (as in https://docs.qiime2.org/2024.2/tutorials/pd-mice/)

# start with SILVA classifier 

qiime feature-classifier classify-sklearn \
  --i-reads STP-16S-All-single-end-trimmed-repseqs.qza \
  --i-classifier silva-138.1-ssu-nr99-2029-16S-lca-classifier.qza \
  --o-classification STP-16S-All-single-end-SILVA-taxonomy.qza 

qiime metadata tabulate \
  --m-input-file STP-16S-All-single-end-SILVA-taxonomy.qza \
  --o-visualization STP-16S-All-single-end-SILVA-taxonomy.qzv

###### this SILVA classifier is the one I made, it is not giving a good classification
# try again with the pre-made classifier - 20240445

nohup qiime feature-classifier classify-sklearn \
  --i-reads STP-16S-All-single-end-trimmed-repseqs.qza \
  --i-classifier silva-138-99-nb-classifier.qza \
  --o-classification STP-16S-All-single-end-SILVA-untrained-taxonomy.qza &

qiime metadata tabulate \
  --m-input-file STP-16S-All-single-end-SILVA-untrained-taxonomy.qza \
  --o-visualization STP-16S-All-single-end-SILVA-untrained-taxonomy.qzv

########## Made some more classifiers with different parameters, try them too
## this one uses shorter primer sequences to train the classifier: silva-138.1-ssu-nr99-2019-16S-lca-classifier-2.qza

nohup qiime feature-classifier classify-sklearn \
  --i-reads STP-16S-All-single-end-trimmed-repseqs.qza \
  --i-classifier silva-138.1-ssu-nr99-2019-16S-lca-classifier-2.qza \
  --o-classification STP-16S-All-single-end-SILVA-lca2-taxonomy.qza &

qiime metadata tabulate \
  --m-input-file STP-16S-All-single-end-SILVA-lca2-taxonomy.qza \
  --o-visualization STP-16S-All-single-end-SILVA-lca2-taxonomy.qzv

qiime taxa barplot \
  --i-table STP-16S-All-single-end-trimmed-table.qza \
  --i-taxonomy STP-16S-All-single-end-SILVA-lca2-taxonomy.qza \
  --m-metadata-file STP-16S-Metadata.tsv \
  --o-visualization STP-16S-All-single-end-trimmed-taxa-lca2-barplot.qzv

## this one uses a different p-mode so we can use clawback: silva-138.1-ssu-nr99-2019-uniq-classifier-2.qza

nohup qiime feature-classifier classify-sklearn \
  --i-reads STP-16S-All-single-end-trimmed-repseqs.qza \
  --i-classifier silva-138.1-ssu-nr99-2019-uniq-classifier-2.qza \
  --o-classification STP-16S-All-single-end-SILVA-uniq2-taxonomy.qza &

qiime metadata tabulate \
  --m-input-file STP-16S-All-single-end-SILVA-uniq2-taxonomy.qza \
  --o-visualization STP-16S-All-single-end-SILVA-uniq2-taxonomy.qzv

qiime taxa barplot \
  --i-table STP-16S-All-single-end-trimmed-table.qza \
  --i-taxonomy STP-16S-All-single-end-SILVA-uniq2-taxonomy.qza \
  --m-metadata-file STP-16S-Metadata.tsv \
  --o-visualization STP-16S-All-single-end-trimmed-taxa-uniq2-barplot.qzv

## this is the classifier I made with clawback: soil-nonsaline-16S-uniq-2-classifier.qza

nohup qiime feature-classifier classify-sklearn \
  --i-reads STP-16S-All-single-end-trimmed-repseqs.qza \
  --i-classifier soil-nonsaline-16S-uniq-2-classifier.qza \
  --o-classification STP-16S-All-single-end-SILVA-soil-nonsaline-taxonomy.qza &

## HERE on 1:11 pm May 22

qiime metadata tabulate \
  --m-input-file STP-16S-All-single-end-SILVA-soil-nonsaline-taxonomy.qza \
  --o-visualization STP-16S-All-single-end-SILVA-soil-nonsaline-taxonomy.qzv

qiime taxa barplot \
  --i-table STP-16S-All-single-end-trimmed-table.qza \
  --i-taxonomy STP-16S-All-single-end-SILVA-soil-nonsaline-taxonomy.qza \
  --m-metadata-file STP-16S-Metadata.tsv \
  --o-visualization STP-16S-All-single-end-trimmed-taxa-uniq2-soil-nonsaline-barplot.qzv


### GREENGENES2 classifier

nohup qiime feature-classifier classify-sklearn \
  --i-reads STP-16S-All-single-end-trimmed-repseqs.qza \
  --i-classifier gg2.2022.10.backbone.v4.nb.qza \
  --o-classification STP-16S-All-single-end-GG2-taxonomy.qza & 

qiime metadata tabulate \
  --m-input-file STP-16S-All-single-end-GG2-taxonomy.qza \
  --o-visualization STP-16S-All-single-end-GG2-taxonomy.qzv

##### Visualize taxonomic data ##### 

qiime feature-table filter-samples \
  --i-table STP-16S-All-single-end-trimmed-table.qza \
  --p-min-frequency 10 \
  --o-filtered-table STP-16S-All-single-end-trimmed-table-10.qza

qiime taxa barplot \
  --i-table STP-16S-All-single-end-trimmed-table-10.qza \
  --i-taxonomy STP-16S-All-single-end-GG2-taxonomy.qza \
  --m-metadata-file STP-16S-Metadata.tsv \
  --o-visualization STP-16S-All-single-end-trimmed-10-GG2-taxa-barplot.qzv

qiime feature-table filter-samples \
  --i-table STP-16S-All-single-end-trimmed-table-10.qza \
  --p-min-frequency 38914 \
  --o-filtered-table STP-16S-All-Solo-single-end-trimmed-table-10-filtered-38914.qza

qiime taxa barplot \
  --i-table STP-16S-All-Solo-single-end-trimmed-table-10-filtered-38914.qza \
  --i-taxonomy STP-16S-All-single-end-GG2-taxonomy.qza \
  --m-metadata-file STP-16S-Metadata.tsv \
  --o-visualization STP-16S-All-single-end-trimmed-table-10-filtered-38914-GG2-taxa-barplot.qzv

qiime taxa barplot \
  --i-table STP-16S-All-Solo-single-end-trimmed-table-10-filtered-38914.qza \
  --i-taxonomy STP-16S-All-single-end-SILVA-taxonomy.qza \
  --m-metadata-file STP-16S-Metadata.tsv \
  --o-visualization STP-16S-All-single-end-trimmed-table-10-filtered-38914-SILVA-taxa-barplot.qzv

## Weighted Greengenes2 Classifier

nohup qiime feature-classifier classify-sklearn \
  --i-reads STP-16S-All-single-end-trimmed-repseqs.qza \
  --i-classifier GG2-V4-soil-nonsaline-weights-classifier.qza \
  --o-classification STP-16S-All-single-end-GG2-weighted-taxonomy.qza & 

qiime metadata tabulate \
  --m-input-file STP-16S-All-single-end-GG2-weighted-taxonomy.qza \
  --o-visualization STP-16S-All-single-end-GG2-weighted-taxonomy.qzv

qiime taxa barplot \
  --i-table STP-16S-All-Solo-single-end-trimmed-table-10-filtered-38914.qza \
  --i-taxonomy STP-16S-All-single-end-GG2-weighted-taxonomy.qza \
  --m-metadata-file STP-16S-Metadata.tsv \
  --o-visualization STP-16S-All-single-end-trimmed-table-10-filtered-38914-GG2-weighted-taxa-barplot.qzv

#### KRONA
module load qiime2/2024

#qiime krona collapse-and-plot \
#--i-table table_2k.qza \
#--i-taxonomy taxonomy.qza \
#--o-krona-plot krona.qzv

qiime krona collapse-and-plot \
--i-table STP-16S-All-single-end-trimmed-table.qza \
--i-taxonomy STP-16S-All-single-end-SILVA-taxonomy.qza \
--o-krona-plot STP-16S-All-single-end-SILVA-krona.qzv

qiime krona collapse-and-plot \
--i-table STP-16S-All-single-end-trimmed-table.qza \
--i-taxonomy STP-16S-All-single-end-GG2-taxonomy.qza \
--o-krona-plot STP-16S-All-single-end-GG2-krona.qzv

qiime krona collapse-and-plot \
--i-table STP-16S-All-Solo-single-end-trimmed-table-10-filtered-38914.qza \
--i-taxonomy STP-16S-All-single-end-SILVA-taxonomy.qza \
--o-krona-plot STP-16S-All-single-end-trimmed-10-filtered-38914-SILVA-krona.qzv

qiime krona collapse-and-plot \
--i-table STP-16S-All-Solo-single-end-trimmed-table-10-filtered-38914.qza \
--i-taxonomy STP-16S-All-single-end-GG2-taxonomy.qza \
--o-krona-plot STP-16S-All-single-end-trimmed-10-filtered-38914-GG2-krona.qzv

# Differential abundance with ANCOM-BC

# Before we begin, we will filter out low abundance/low prevalence ASVs. 
# Filtering can provide better resolution and limit false discovery rate (FDR) 
# penalty on features that are too far below the noise threshhold to be applicable 
# to a statistical test.

qiime feature-table filter-features \
  --i-table Iceland-2023-Solo-single-end-trimmed-table-113416.qza \
  --p-min-frequency 10 \
  --p-min-samples 4 \
  --o-filtered-table Iceland-2023-Solo-single-end-trimmed-table-113416_abund.qza

qiime composition ancombc \
  --i-table Iceland-2023-Solo-single-end-trimmed-table-113416_abund.qza \
  --m-metadata-file Iceland-2023-Solo-Metadata.tsv \
  --p-formula 'era' \
  --o-differentials Iceland-2023-Solo-single-end-trimmed-table-113416-ancombc-era.qza

qiime composition da-barplot \
  --i-data Iceland-2023-Solo-single-end-trimmed-table-113416-ancombc-era.qza \
  --p-significance-threshold 0.001 \
  --o-visualization Iceland-2023-Solo-single-end-trimmed-table-113416-ancombc-era-barplot.qzv

# Longitutidinal and paired sample comparisons
# https://docs.qiime2.org/2024.2/tutorials/longitudinal/
# q2-longitudinal could compare the effects of different collection methods, storage methods, 
# DNA extraction methods, or any bioinformatic processing steps on the feature composition of 
# individual samples.

# PCoA-based analyses 



##############################################################################################################
##############################################################################################################
#################################### with targeted data set!! ################################################
##############################################################################################################
##############################################################################################################

## May 23, made a "targeted" metadata file which excludes STP09, all negative controls, riverbed and sheep pasture

fetch-gdrive-object.py -d 1hTo2PGfxUjFlUXTPvaldEYgeHWzSSLkbtKO-ieq-jRc -t sheet -f tsv -o STP-16S-Targeted-Metadata.tsv

fetch-gdrive-object.py -d 1Fg6BWqgH5M5avdLudQcJ4Rufxfp23Q0iqbUz_Uxe62o -t sheet -f tsv -o STP-16S-Targeted-Manifest-FWDs.tsv

qiime tools import \
  --type 'SampleData[SequencesWithQuality]' \
  --input-path STP-16S-Targeted-Manifest-FWDs.tsv \
  --output-path STP-16S-Targeted-single-end-demux.qza \
  --input-format SingleEndFastqManifestPhred33V2

qiime demux summarize \
	--i-data STP-16S-Targeted-single-end-demux.qza \
	--o-visualization STP-16S-Targeted-single-end-demux.qzv

##############################################################################################################
##############################################################################################################
# CUTADAPT  


# Add the flags --p-match-read-wildcards and --p-match-adapter-wildcards to your command. 
# This will allow matching of IUPAC wild cards such as H, V, N, etc..
# Furthermore, I'd advise adding the flag --p-discard-untrimmed as well. 
# This will discard any sequences in which both primers could not be trimmed from the sequences.

# TATGGTAATTGTGTGCCAGCMGCCGCGGTAA - this was what I used originally, in the prior workflow
# GTGYCAGCMGCCGCGGTAA - this is what I will use this time, which is the V4 primer sequenced I used for the classifier

qiime cutadapt trim-single \
  --i-demultiplexed-sequences STP-16S-Targeted-single-end-demux.qza \
  --p-cores 8 \
  --p-front GTGYCAGCMGCCGCGGTAA \
  --p-match-read-wildcards \
  --p-match-adapter-wildcards \
  --p-discard-untrimmed \
  --o-trimmed-sequences STP-16S-Targeted-single-end-trimmed-short-sequences.qza 

qiime demux summarize \
	--i-data STP-16S-Targeted-single-end-trimmed-short-sequences.qza   \
	--o-visualization STP-16S-Targeted-single-end-trimmed-short-sequences.qzv

qiime cutadapt trim-single \
  --i-demultiplexed-sequences STP-16S-Targeted-single-end-demux.qza \
  --p-cores 8 \
  --p-front TATGGTAATTGTGTGCCAGCMGCCGCGGTAA \
  --p-match-read-wildcards \
  --p-match-adapter-wildcards \
  --p-discard-untrimmed \
  --o-trimmed-sequences STP-16S-Targeted-single-end-trimmed-long-sequences.qza 

qiime demux summarize \
	--i-data STP-16S-Targeted-single-end-trimmed-long-sequences.qza   \
	--o-visualization STP-16S-Targeted-single-end-trimmed-long-sequences.qzv

# Either way, both have similar QC scores: the first position to drop below 25 is 226 (same as used for All Samples)
# the first position to drop below 20 is 269 (drop below being the bottom bound of the box)
# Using the long primer sequence, we end up with 788563 reads. 
# Using the short primer sequence, we end up with 781231 reads - a difference of 7,332 reads.
# how to know which is better??? I think I should use the "short" data because that's the same sequence I use in the 
# taxonomy to train the classifiers, and the feedback was that part of the "long" sequence shouldn't be present in the DNA


##############################################################################################################
##############################################################################################################
# DADA2, feature table summarize
# trunc at 268, sequence falls below 20QC at position 269
# don't change the number of reads for the error model from default, based on forum post.

nohup qiime dada2 denoise-single \
	--p-n-threads 8 \
	--i-demultiplexed-seqs STP-16S-Targeted-single-end-trimmed-short-sequences.qza \
	--p-trunc-len 268 \
	--o-representative-sequences STP-16S-Targeted-single-end-trimmed-repseqs.qza \
	--o-table STP-16S-Targeted-single-end-trimmed-table.qza \
	--o-denoising-stats STP-16S-Targeted-single-end-trimmed-stats.qza & 

qiime metadata tabulate \
	--m-input-file STP-16S-Targeted-single-end-trimmed-stats.qza \
	--o-visualization STP-16S-Targeted-single-end-trimmed-stats.qzv 

qiime feature-table summarize \
	--i-table STP-16S-Targeted-single-end-trimmed-table.qza \
	--o-visualization STP-16S-Targeted-single-end-trimmed-table.qzv \
	--m-sample-metadata-file STP-16S-Targeted-Metadata.tsv

qiime feature-table tabulate-seqs \
  --i-data STP-16S-Targeted-single-end-trimmed-repseqs.qza \
  --o-visualization STP-16S-Targeted-single-end-trimmed-repseqs.qzv

### and try again truncaging at 226?? 

nohup qiime dada2 denoise-single \
	--p-n-threads 8 \
	--i-demultiplexed-seqs STP-16S-Targeted-single-end-trimmed-short-sequences.qza \
	--p-trunc-len 226 \
	--o-representative-sequences STP-16S-Targeted-single-end-trimmed-226-repseqs.qza \
	--o-table STP-16S-Targeted-single-end-trimmed-226-table.qza \
	--o-denoising-stats STP-16S-Targeted-single-end-trimmed-226-stats.qza & 

qiime metadata tabulate \
	--m-input-file STP-16S-Targeted-single-end-trimmed-226-stats.qza \
	--o-visualization STP-16S-Targeted-single-end-trimmed-226-stats.qzv 

### that did it! much better results. Move forward with trunc @ 226 - replace other files

qiime feature-table summarize \
	--i-table STP-16S-Targeted-single-end-trimmed-226-table.qza \
	--o-visualization STP-16S-Targeted-single-end-trimmed-226-table.qzv \
	--m-sample-metadata-file STP-16S-Targeted-Metadata.tsv

qiime feature-table tabulate-seqs \
  --i-data STP-16S-Targeted-single-end-trimmed-226-repseqs.qza \
  --o-visualization STP-16S-Targeted-single-end-trimmed-226repseqs.qzv

##############################################################################################################
##############################################################################################################
# Diversity and Phylogeny 

# Sampling Depth of 38762: Retained 465,144 (73.87%) features in 12 (100.00%) samples at the specifed sampling depth.

qiime diversity alpha-rarefaction \
  --i-table STP-16S-Targeted-single-end-trimmed-226-table.qza \
  --m-metadata-file STP-16S-Targeted-Metadata.tsv \
  --o-visualization STP-16S-Targeted-alpha-rarefaction-38762.qzv \
  --p-min-depth 5 \
  --p-max-depth 38762

# at this level of rarefaction we are beyond the exponential growth of the curve. 

nohup qiime phylogeny align-to-tree-mafft-fasttree \
	--p-n-threads 8 \
	--i-sequences STP-16S-Targeted-single-end-trimmed-226-repseqs.qza \
	--o-alignment STP-16S-Targeted-single-end-trimmed-repseqs-aligned.qza \
	--o-masked-alignment STP-16S-Targeted-single-end-trimmed-repseqs-masked-aligned.qza \
	--o-tree STP-16S-Targeted-single-end-trimmed-unrooted-tree.qza \
	--o-rooted-tree STP-16S-Targeted-single-end-trimmed-rooted-tree.qza &




nohup qiime diversity core-metrics-phylogenetic \
	--i-phylogeny STP-16S-Targeted-single-end-trimmed-rooted-tree.qza \
	--i-table STP-16S-Targeted-single-end-trimmed-226-table.qza  \
	--p-sampling-depth 38914 \
	--m-metadata-file STP-16S-Targeted-Metadata.tsv \
	--output-dir STP-16S-Targeted-core-metrics-rooted-38914 &

qiime diversity alpha-group-significance \
	--i-alpha-diversity STP-16S-Targeted-core-metrics-rooted-38914/faith_pd_vector.qza \
	--m-metadata-file STP-16S-Targeted-Metadata.tsv \
	--o-visualization STP-16S-Targeted-core-metrics-rooted-38914/faith-pd-group-significance.qzv

qiime diversity alpha-group-significance \
	--i-alpha-diversity STP-16S-Targeted-core-metrics-rooted-38914/evenness_vector.qza \
	--m-metadata-file STP-16S-Targeted-Metadata.tsv \
	--o-visualization STP-16S-Targeted-core-metrics-rooted-38914/evenness-group-significance.qzv

qiime diversity alpha-group-significance \
	--i-alpha-diversity STP-16S-Targeted-core-metrics-rooted-38914/shannon_vector.qza \
	--m-metadata-file STP-16S-Targeted-Metadata.tsv \
	--o-visualization STP-16S-Targeted-core-metrics-rooted-38914/shannon-group-significance.qzv

## can't use the unrooted tree? Invalid value for '--i-phylogeny': Expected an artifact of at least
#  type Phylogeny[Rooted]. An artifact of type Phylogeny[Unrooted] was
#  provided.
#nohup qiime diversity core-metrics-phylogenetic \
#	--i-phylogeny STP-16S-Targeted-single-end-trimmed-unrooted-tree.qza \
#	--i-table STP-16S-Targeted-single-end-trimmed-226-table.qza \
#	--p-sampling-depth 38914 \
#	--m-metadata-file STP-16S-Targeted-Metadata.tsv  \
#	--output-dir STP-16S-Targeted-core-metrics-unrooted-38914 &

##############################################################################################################
##############################################################################################################
### PERMANOVA - with grouped floor data 

qiime diversity beta-group-significance \
  --i-distance-matrix STP-16S-Targeted-core-metrics-rooted-38914/unweighted_unifrac_distance_matrix.qza \
  --m-metadata-file STP-16S-Targeted-Metadata.tsv \
  --m-metadata-column area.numeric \
  --o-visualization STP-16S-Targeted-core-metrics-rooted-38914/unweighted-unifrac-era-significance.qzv \
  --p-pairwise

qiime diversity beta-group-significance \
  --i-distance-matrix STP-16S-Targeted-core-metrics-rooted-38914/weighted_unifrac_distance_matrix.qza \
  --m-metadata-file STP-16S-Targeted-Metadata.tsv \
  --m-metadata-column area.numeric \
  --o-visualization STP-16S-Targeted-core-metrics-rooted-38914/weighted-unifrac-era-significance.qzv \
  --p-pairwise

qiime diversity beta-group-significance \
  --i-distance-matrix STP-16S-Targeted-core-metrics-rooted-38914/bray_curtis_distance_matrix.qza \
  --m-metadata-file STP-16S-Targeted-Metadata.tsv \
  --m-metadata-column area.numeric \
  --o-visualization STP-16S-Targeted-core-metrics-rooted-38914/bray-curtis-era-significance.qzv \
  --p-pairwise

qiime diversity beta-group-significance \
  --i-distance-matrix STP-16S-Targeted-core-metrics-rooted-38914/jaccard_distance_matrix.qza \
  --m-metadata-file STP-16S-Targeted-Metadata.tsv \
  --m-metadata-column area.numeric \
  --o-visualization STP-16S-Targeted-core-metrics-rooted-38914/jaccard-era-significance.qzv \
  --p-pairwise

##############################################################################################################
##############################################################################################################
### PERMDISP

qiime diversity beta-group-significance \
  --i-distance-matrix STP-16S-Targeted-core-metrics-rooted-38914/unweighted_unifrac_distance_matrix.qza \
  --m-metadata-file STP-16S-Targeted-Metadata.tsv \
  --m-metadata-column area.numeric \
  --o-visualization STP-16S-Targeted-core-metrics-rooted-38914/unweighted-unifrac-area-significance-disp.qzv \
  --p-method permdisp

qiime diversity beta-group-significance \
  --i-distance-matrix STP-16S-Targeted-core-metrics-rooted-38914/weighted_unifrac_distance_matrix.qza \
  --m-metadata-file STP-16S-Targeted-Metadata.tsv \
  --m-metadata-column area.numeric \
  --o-visualization STP-16S-Targeted-core-metrics-rooted-38914/weighted-unifrac-area-significance-disp.qzv \
  --p-method permdisp

qiime diversity beta-group-significance \
  --i-distance-matrix STP-16S-Targeted-core-metrics-rooted-38914/bray_curtis_distance_matrix.qza \
  --m-metadata-file STP-16S-Targeted-Metadata.tsv \
  --m-metadata-column area.numeric \
  --o-visualization STP-16S-Targeted-core-metrics-rooted-38914/bray_curtis_distance-area-significance-disp.qzv \
  --p-method permdisp

qiime diversity beta-group-significance \
  --i-distance-matrix STP-16S-Targeted-core-metrics-rooted-38914/jaccard_distance_matrix.qza \
  --m-metadata-file STP-16S-Targeted-Metadata.tsv \
  --m-metadata-column area.numeric \
  --o-visualization STP-16S-Targeted-core-metrics-rooted-38914/jaccard_distance-area-significance_disp.qzv \
  --p-method permdisp


##############################################################################################################
##############################################################################################################
### TAXONOMY (as in https://docs.qiime2.org/2024.2/tutorials/pd-mice/)

# start with SILVA classifier 

qiime feature-classifier classify-sklearn \
  --i-reads STP-16S-All-single-end-trimmed-repseqs.qza \
  --i-classifier silva-138.1-ssu-nr99-2029-16S-lca-classifier.qza \
  --o-classification STP-16S-All-single-end-SILVA-taxonomy.qza 

qiime metadata tabulate \
  --m-input-file STP-16S-All-single-end-SILVA-taxonomy.qza \
  --o-visualization STP-16S-All-single-end-SILVA-taxonomy.qzv

###### this SILVA classifier is the one I made, it is not giving a good classification
# try again with the pre-made classifier - 20240445

nohup qiime feature-classifier classify-sklearn \
  --i-reads STP-16S-All-single-end-trimmed-repseqs.qza \
  --i-classifier silva-138-99-nb-classifier.qza \
  --o-classification STP-16S-All-single-end-SILVA-untrained-taxonomy.qza &

qiime metadata tabulate \
  --m-input-file STP-16S-All-single-end-SILVA-untrained-taxonomy.qza \
  --o-visualization STP-16S-All-single-end-SILVA-untrained-taxonomy.qzv

########## Made some more classifiers with different parameters, try them too
## this one uses shorter primer sequences to train the classifier: silva-138.1-ssu-nr99-2019-16S-lca-classifier-2.qza

nohup qiime feature-classifier classify-sklearn \
  --i-reads STP-16S-All-single-end-trimmed-repseqs.qza \
  --i-classifier silva-138.1-ssu-nr99-2019-16S-lca-classifier-2.qza \
  --o-classification STP-16S-All-single-end-SILVA-lca2-taxonomy.qza &

qiime metadata tabulate \
  --m-input-file STP-16S-All-single-end-SILVA-lca2-taxonomy.qza \
  --o-visualization STP-16S-All-single-end-SILVA-lca2-taxonomy.qzv

qiime taxa barplot \
  --i-table STP-16S-All-single-end-trimmed-table.qza \
  --i-taxonomy STP-16S-All-single-end-SILVA-lca2-taxonomy.qza \
  --m-metadata-file STP-16S-Metadata.tsv \
  --o-visualization STP-16S-All-single-end-trimmed-taxa-lca2-barplot.qzv

## this one uses a different p-mode so we can use clawback: silva-138.1-ssu-nr99-2019-uniq-classifier-2.qza

nohup qiime feature-classifier classify-sklearn \
  --i-reads STP-16S-All-single-end-trimmed-repseqs.qza \
  --i-classifier silva-138.1-ssu-nr99-2019-uniq-classifier-2.qza \
  --o-classification STP-16S-All-single-end-SILVA-uniq2-taxonomy.qza &

qiime metadata tabulate \
  --m-input-file STP-16S-All-single-end-SILVA-uniq2-taxonomy.qza \
  --o-visualization STP-16S-All-single-end-SILVA-uniq2-taxonomy.qzv

qiime taxa barplot \
  --i-table STP-16S-All-single-end-trimmed-table.qza \
  --i-taxonomy STP-16S-All-single-end-SILVA-uniq2-taxonomy.qza \
  --m-metadata-file STP-16S-Metadata.tsv \
  --o-visualization STP-16S-All-single-end-trimmed-taxa-uniq2-barplot.qzv

## this is the classifier I made with clawback: soil-nonsaline-16S-uniq-2-classifier.qza

nohup qiime feature-classifier classify-sklearn \
  --i-reads STP-16S-All-single-end-trimmed-repseqs.qza \
  --i-classifier soil-nonsaline-16S-uniq-2-classifier.qza \
  --o-classification STP-16S-All-single-end-SILVA-soil-nonsaline-taxonomy.qza &

## HERE on 1:11 pm May 22

qiime metadata tabulate \
  --m-input-file STP-16S-All-single-end-SILVA-soil-nonsaline-taxonomy.qza \
  --o-visualization STP-16S-All-single-end-SILVA-soil-nonsaline-taxonomy.qzv

qiime taxa barplot \
  --i-table STP-16S-All-single-end-trimmed-table.qza \
  --i-taxonomy STP-16S-All-single-end-SILVA-soil-nonsaline-taxonomy.qza \
  --m-metadata-file STP-16S-Metadata.tsv \
  --o-visualization STP-16S-All-single-end-trimmed-taxa-uniq2-soil-nonsaline-barplot.qzv


### GREENGENES2 classifier

nohup qiime feature-classifier classify-sklearn \
  --i-reads STP-16S-All-single-end-trimmed-repseqs.qza \
  --i-classifier gg2.2022.10.backbone.v4.nb.qza \
  --o-classification STP-16S-All-single-end-GG2-taxonomy.qza & 

qiime metadata tabulate \
  --m-input-file STP-16S-All-single-end-GG2-taxonomy.qza \
  --o-visualization STP-16S-All-single-end-GG2-taxonomy.qzv

##### Visualize taxonomic data ##### 

qiime feature-table filter-samples \
  --i-table STP-16S-All-single-end-trimmed-table.qza \
  --p-min-frequency 10 \
  --o-filtered-table STP-16S-All-single-end-trimmed-table-10.qza

qiime taxa barplot \
  --i-table STP-16S-All-single-end-trimmed-table-10.qza \
  --i-taxonomy STP-16S-All-single-end-GG2-taxonomy.qza \
  --m-metadata-file STP-16S-Metadata.tsv \
  --o-visualization STP-16S-All-single-end-trimmed-10-GG2-taxa-barplot.qzv

qiime feature-table filter-samples \
  --i-table STP-16S-All-single-end-trimmed-table-10.qza \
  --p-min-frequency 38914 \
  --o-filtered-table STP-16S-All-Solo-single-end-trimmed-table-10-filtered-38914.qza

qiime taxa barplot \
  --i-table STP-16S-All-Solo-single-end-trimmed-table-10-filtered-38914.qza \
  --i-taxonomy STP-16S-All-single-end-GG2-taxonomy.qza \
  --m-metadata-file STP-16S-Metadata.tsv \
  --o-visualization STP-16S-All-single-end-trimmed-table-10-filtered-38914-GG2-taxa-barplot.qzv

qiime taxa barplot \
  --i-table STP-16S-All-Solo-single-end-trimmed-table-10-filtered-38914.qza \
  --i-taxonomy STP-16S-All-single-end-SILVA-taxonomy.qza \
  --m-metadata-file STP-16S-Metadata.tsv \
  --o-visualization STP-16S-All-single-end-trimmed-table-10-filtered-38914-SILVA-taxa-barplot.qzv

## Weighted Greengenes2 Classifier

nohup qiime feature-classifier classify-sklearn \
  --i-reads STP-16S-All-single-end-trimmed-repseqs.qza \
  --i-classifier GG2-V4-soil-nonsaline-weights-classifier.qza \
  --o-classification STP-16S-All-single-end-GG2-weighted-taxonomy.qza & 

qiime metadata tabulate \
  --m-input-file STP-16S-All-single-end-GG2-weighted-taxonomy.qza \
  --o-visualization STP-16S-All-single-end-GG2-weighted-taxonomy.qzv

qiime taxa barplot \
  --i-table STP-16S-All-Solo-single-end-trimmed-table-10-filtered-38914.qza \
  --i-taxonomy STP-16S-All-single-end-GG2-weighted-taxonomy.qza \
  --m-metadata-file STP-16S-Metadata.tsv \
  --o-visualization STP-16S-All-single-end-trimmed-table-10-filtered-38914-GG2-weighted-taxa-barplot.qzv

#### KRONA
module load qiime2/2024

#qiime krona collapse-and-plot \
#--i-table table_2k.qza \
#--i-taxonomy taxonomy.qza \
#--o-krona-plot krona.qzv

qiime krona collapse-and-plot \
--i-table STP-16S-All-single-end-trimmed-table.qza \
--i-taxonomy STP-16S-All-single-end-SILVA-taxonomy.qza \
--o-krona-plot STP-16S-All-single-end-SILVA-krona.qzv

qiime krona collapse-and-plot \
--i-table STP-16S-All-single-end-trimmed-table.qza \
--i-taxonomy STP-16S-All-single-end-GG2-taxonomy.qza \
--o-krona-plot STP-16S-All-single-end-GG2-krona.qzv

qiime krona collapse-and-plot \
--i-table STP-16S-All-Solo-single-end-trimmed-table-10-filtered-38914.qza \
--i-taxonomy STP-16S-All-single-end-SILVA-taxonomy.qza \
--o-krona-plot STP-16S-All-single-end-trimmed-10-filtered-38914-SILVA-krona.qzv

qiime krona collapse-and-plot \
--i-table STP-16S-All-Solo-single-end-trimmed-table-10-filtered-38914.qza \
--i-taxonomy STP-16S-All-single-end-GG2-taxonomy.qza \
--o-krona-plot STP-16S-All-single-end-trimmed-10-filtered-38914-GG2-krona.qzv

# Differential abundance with ANCOM-BC

# Before we begin, we will filter out low abundance/low prevalence ASVs. 
# Filtering can provide better resolution and limit false discovery rate (FDR) 
# penalty on features that are too far below the noise threshhold to be applicable 
# to a statistical test.

qiime feature-table filter-features \
  --i-table Iceland-2023-Solo-single-end-trimmed-table-113416.qza \
  --p-min-frequency 10 \
  --p-min-samples 4 \
  --o-filtered-table Iceland-2023-Solo-single-end-trimmed-table-113416_abund.qza

qiime composition ancombc \
  --i-table Iceland-2023-Solo-single-end-trimmed-table-113416_abund.qza \
  --m-metadata-file Iceland-2023-Solo-Metadata.tsv \
  --p-formula 'era' \
  --o-differentials Iceland-2023-Solo-single-end-trimmed-table-113416-ancombc-era.qza

qiime composition da-barplot \
  --i-data Iceland-2023-Solo-single-end-trimmed-table-113416-ancombc-era.qza \
  --p-significance-threshold 0.001 \
  --o-visualization Iceland-2023-Solo-single-end-trimmed-table-113416-ancombc-era-barplot.qzv

# Longitutidinal and paired sample comparisons
# https://docs.qiime2.org/2024.2/tutorials/longitudinal/
# q2-longitudinal could compare the effects of different collection methods, storage methods, 
# DNA extraction methods, or any bioinformatic processing steps on the feature composition of 
# individual samples.

# PCoA-based analyses 
