#!/usr/bin/python

__author__="BU"


"""This script converts mitranscriptome to gencode-like gtf"""

# Usage python mitranscriptome.py -i mitranscriptome.gtf -o output.gtf 

from os import sys
import getopt
import heapq
from optparse import OptionParser

# ----------------
## OPTION PARSING
# ----------------

parser = OptionParser()
parser.add_option("-i",  help="MiTranscriptome annotation in gtf")
parser.add_option("-o",  help='Output gtf file')

options, args = parser.parse_args()


# Store input and output file

infile=''
outfile=''

# Read command line args
myopts, args = getopt.getopt(sys.argv[1:],"i:o:")

# Loop over arguments and assign input to infile and output to outfile 
# o == option
# a == argument passed to the o

#if myopts == []:
#    print("Usage: %s -i input -o output" % sys.argv[0])

for o, a in myopts:
    if o == '-i':
        infile=a
    if o == '-o':
        outfile=a

print ("Input file: %s \nOutput file: %s" % (infile,outfile))

#-------------------------------------------------------------------------------------------------
# Defining Functions
#-------------------------------------------------------------------------------------------------

def mitrans():
   
    gtf={}
    tx2gene={}

    gff=open(infile, "r")
    for line in gff:
        if not line.startswith('#'):
            split=line.split("\t")
            if split[2]=="transcript":
                #print 'TX', split
                chrN=split[0]
                add=split[1]
                typ=split[2]
                start=split[3]
                stop=split[4]
                dot1=split[5]
                strand=split[6]
                dot2=split[7]
                
                spliter=split[8].split(";")
                #print spliter

                for item in spliter:
                    if item.startswith('tcat'): 
                        transcript_biotype=item.split('"')[1]
                        #print transcript_biotype
                    if item.startswith(' gene_id'): 
                        gid=item.split('"')[1]
                        gene_id=gid+"_"+gid
                    if item.startswith(' transcript_id'): 
                        tid=item.split('"')[1]
                        transcript_id=tid+"_"+tid

               
                        record=(chrN+"\t"+add+"\t"+typ+"\t"+start+"\t"+stop+"\t"+dot1+"\t"+strand+"\t"+dot2+"\t"+"gene_id"+" "+'"'+gene_id+'"'"; "+"transcript_id"+" "+'"'+transcript_id+'"'"; "+"transcript_biotype"+" "+'"'+transcript_biotype+'"'";")
           
                        if gene_id not in gtf:
                            gtf[gene_id]=[record]
                        else:
                            gtf[gene_id].append(record)  

            if split[2]=="exon":
                chrN=split[0]
                add=split[1]
                typ=split[2]
                start=split[3]
                stop=split[4]
                dot1=split[5]
                strand=split[6]
                dot2=split[7] 

                spliter=split[8].split(";")
                for item in spliter:
                    if item.startswith('exon_number'): 
                        enb=item.split('"')[1]
                        #print enb
                    if item.startswith(' gene_id'): 
                        gid=item.split('"')[1]
                        gene_id=gid+"_"+gid
                    if item.startswith(' transcript_id'): 
                        tid=item.split('"')[1]
                        transcript_id=tid+"_"+tid
                        exon_id=transcript_id+"."+enb

        
                        record=(chrN+"\t"+add+"\t"+typ+"\t"+start+"\t"+stop+"\t"+dot1+"\t"+strand+"\t"+dot2+"\t"+"gene_id"+" "+'"'+gene_id+'"'"; "+"transcript_id"+" "+'"'+transcript_id+'"'"; "+"exon_id"+" "+'"'+exon_id+'"'";")
           
                        if gene_id not in gtf:
                            gtf[gene_id]=[record]
                        else:
                            gtf[gene_id].append(record)  


    f=open(outfile, "w")

    for key in gtf:
        for v in gtf[key]:
            f.write('%s\n' % v)
    f.close() 

mitrans()