#!/usr/bin/env python3

# REMOVE_ZEROS_FROM_OUTPUT.PY
#
#   Removes lines from output file that are below a certain read count threshold.
#   Works with folder or single files
#   Example: python3 remove_zeros_from_output.py -i outputs -t 5
#

import sys, os, argparse

def main(PATH, t): 
    if os.path.isdir(PATH): # if path goes to a directory
        if PATH[-1] != "/":
            PATH = PATH + "/"

        for subdir, dirs, files in os.walk(PATH):
            for file in files:
                filepath = subdir + file
                extension = os.path.splitext(filepath)[1] # get extension

                if extension.lower() == ".out" and file[0] != ".": # check if extension is json and file is valid
                    process_file(filepath, t)
    else: # pointed at a single file
         process_file(PATH,t)

def process_file(p,t):
        filename = os.path.splitext(p)[0] + "_trimmed.out"  # output file name
        f = open(p, "r")
        text = f.read()

        in_lines = text.split('\n')
        out_lines = []

        r_count = 0 # keep track of lines rejected
        a_count = 0 # lines accepted
        s_count = 0 # lines skipped
        t_count = 0 # and total lines

        for line in in_lines:
            t_count += 1
            vals = line.split()
            if len(vals) > 0:
                if int(vals[2]) >= int(t): # vals[2] is read count (per Emmett)
                    out_lines.append(line) # accept line to output file
                    a_count += 1
                else:
                    r_count += 1
            else:   # if line is empty
                out_lines.append(line)
                s_count += 1

        with open(filename, "w") as txt_file:
            for line in out_lines:
                txt_file.write(line + "\n") # works with any number of elements in a line

        # terminal output
        print(f"Input: {p}")
        print(f"Output: {filename}")
        print(f"Accepted: {str(a_count)} lines, Rejected: {str(r_count)} lines, Skipped: {str(s_count)} lines, TOTAL: {str(t_count)} lines")
        print("-----")
                 
                 
if __name__ == '__main__':
    parser = argparse.ArgumentParser(description="Output Filter Script")
    parser.add_argument('-i', '--input', help='Enter the directory of outputs, or a single file', required=True, dest="input")
    parser.add_argument('-t', '--threshold', help='enter an integer threshold for minimum read count', required=True, dest="threshold")
    args = parser.parse_args()

    main(args.input, args.threshold)
