#!/usr/bin/python ''' Called by ucsc_view.sh Tidy and arrange the result of miRNA binding sites. Generate the format for the UCSC genome browser's custom tracks. Convert the transcript coordinates to chromosome coordinates. If a binding sites crossed the junction, use two blocks to present it, just like junction reads. ''' import sys f_exon_bed = open(sys.argv[1]) lCoords = [] # m = 1 exon_counter = 0 dExonMap = {} for row in f_exon_bed.readlines(): exon_counter += 1 cols = row.rstrip('\n').split('\t') chr = cols[0] strand = cols[5] start = int(cols[1]) + 1 end = int(cols[2]) for k in range(start, end + 1): # print k dExonMap[k] = exon_counter # m += 1 lCoords.extend([start, end]) if strand == '-': lCoords = lCoords[::-1] # print lCoords dLocMap = {} n = 1 for i in range(0, len(lCoords), 2): # print i start = lCoords[i] # print 'start', start t = i + 1 end = lCoords[t] # print 'end', end if strand == '+': r = range(start, end + 1) else: r = range(start, end - 1, -1) for j in r: dLocMap[n] = j n += 1 # print dExonMap # print m if strand == '-': lCoords = lCoords[::-1] for row in sys.stdin.readlines(): cols = row.rstrip('\n').split('\t') mir = cols[0] trans_start = int(cols[1]) trans_end = int(cols[2]) clip_support = cols[6] color = '3,112,192' if clip_support == 'Yes': color = '192,0,1' chr_start = dLocMap[trans_start] chr_end = dLocMap[trans_end] if strand == '-': chr_start, chr_end = chr_end, chr_start if dExonMap[chr_start] == dExonMap[chr_end]: block_count = 1 block_size = trans_end - trans_start + 1 block_starts = 0 else: block_count = 2 if strand == '-': trans_start, trans_end = trans_end, trans_start index_of_first_exon_end = 2 * dExonMap[dLocMap[trans_start]] - 1 index_of_second_exon_start = index_of_first_exon_end + 1 first_exon_end = lCoords[index_of_first_exon_end] second_exon_start = lCoords[index_of_second_exon_start] # print first_block_size = first_exon_end - chr_start + 1 second_block_size = chr_end - second_exon_start + 1 block_size = '%s,%s' % (first_block_size, second_block_size) second_start = second_exon_start - chr_start block_starts = '0,%s' % second_start chr_start -= 1 l_output_bed = [chr, chr_start, chr_end, mir, 0, strand, chr_start, chr_end, color, block_count, block_size, block_starts] l_output_bed = [str(s) for s in l_output_bed] print '\t'.join(l_output_bed)