''' deal with this: a seq has multiple blat loci, so it apears multiple times in the bed12. This script rename the seq's name as: seq_trans1 seq_trans2 for each loci ''' import sys import getopt from collections import defaultdict opts, args = getopt.getopt(sys.argv[1:], 'i:o:') inBed = '' outBed = '' def printErrorMsg(): sys.stderr.write('Please use -i and -o\n') for op, value in opts: if op == '-i': inBed = value elif op == '-o': outBed = value else: printErrorMsg() sys.exit(1) if inBed == '' or outBed == '': printErrorMsg() sys.exit(1) fBed = open(inBed) wOut = open(outBed, 'w') ddSeqLoci = defaultdict(list) for row in fBed.readlines(): cols = row.rstrip('\n').split('\t') seqName = cols[3] ddSeqLoci[seqName].append(cols) for seqName, loci in ddSeqLoci.items(): numLoci = len(loci) for i in range(numLoci): num = i + 1 locus = loci[i] locus[3] = locus[3] + '_trans' + str(num) wOut.write('\t'.join(locus) + '\n') fBed.close() wOut.close()