#!/usr/bin/env python3

import ncbitaxonomist
import os, json, csv, argparse

def resolve_to_name(id):
    output = os.popen('ncbi-taxonomist resolve -t ' + str(id)).read()   # resolve the id, get back all the data
    if output == "":
        return None
    json_output = json.loads(output)                                    # convert data into JSON object
    names = json_output["taxon"]["names"]                               # filter data into the taxa/names section
    for key, value in names.items():                                    # iterate over the names
        if value == "scientific_name":                                  # find the NCBI latin name
            return key
    return None                                                         # return NONE of there is no latin name.

def write_csv(arr, output_name):
    with open(output_name,'w') as output:
        wr = csv.writer(output)
        for line in arr:
            print(line)
            wr.writerow(line)
    
    # (args.input, args.padding, args.taxid_col, args.latin_name_col)
def process(input_file, padding, taxid_col_name, output_col_name):
    with open(input_file, newline='') as csvfile:
        data = list(csv.reader(csvfile))

    headers = data[padding]
    id_index = headers.index(taxid_col_name)
    line_count = 0

    outdata = []

    for line in data:
        line_count += 1
        if line_count <= padding:
            print("LINE " + str(line_count) + ": PADDING LINE")
            line.append("")
            outdata.append(line)

        elif line_count <= padding + 1:
            print("LINE " + str(line_count) + ": HEADER LINE")
            line.append(output_col_name)
            outdata.append(line)

        else:
            if line[id_index] == "":
                print("LINE " + str(line_count) + ": NO TAXID FOUND.")
                line.append("None")

            else:
                latin_name = resolve_to_name(line[id_index])

                if latin_name == None:
                    print("LINE " + str(line_count) + ": NO NCBI LATIN NAME FOUND.")
                    line.append("None")

                else:
                    print("LINE " + str(line_count) + ": " + latin_name)
                    line.append(latin_name)
            outdata.append(line)
    return outdata


if __name__ == '__main__':
    parser = argparse.ArgumentParser(description="Output Filter Script")
    parser.add_argument('-i', '--input', help='Enter input file CSV', required=True, dest="input")
    parser.add_argument('-o', '--output', help='Enter input file CSV', required=True, dest="output")
    parser.add_argument('-p', '--padding', help='Lines to skip from input before the header row', required=False, dest="padding", default=1)
    parser.add_argument('-t', '--taxid_col', help='Name of the TaxID column in CSV', required=False, dest="taxid_col", default="taxid")
    parser.add_argument('-l', '--latin_name_col', help='Name of the TaxID column in CSV', required=False, dest="latin_name_col", default="latin_name")

    args = parser.parse_args()

    data = process(args.input, int(args.padding), args.taxid_col, args.latin_name_col)
    write_csv(data, args.output)
