InSilicoControl / missing_to_bed.py
Halper-Stromberg
Support loading MANE transcripts in igv.js, add toggles for CDS/intron/off-target variants, and add MANE transcripts download
710b399
Raw
History Blame Contribute Delete
991 Bytes
import pandas as pd
df = pd.read_csv('missing_synthetic_variants.csv')
bed_data = []
for _, row in df.iterrows():
chrom = row['Chromosome']
# BED is 0-indexed for start, and end is non-inclusive.
# VCF positions are 1-indexed.
start = row['Position'] - 1
end = start + len(row['REF'])
name = f"{row['REF']}>{row['ALT']}"
# We can also append information like whether it was found positionally
score = 1 if row['Found_Positional'] else 0
bed_data.append([chrom, start, end, name, score])
# Sort the BED file
bed_df = pd.DataFrame(bed_data, columns=['chrom', 'start', 'end', 'name', 'score'])
# BED sorting: sort by chrom then start
# We can use a custom sort for chromosomes if needed, but simple string sort usually suffices for IGV, or we can just sort by start.
bed_df = bed_df.sort_values(by=['chrom', 'start'])
bed_df.to_csv('missing_synthetic_variants.bed', sep='\t', header=False, index=False)
print("Saved missing_synthetic_variants.bed")