import pandas as pd

df = pd.read_csv('missing_synthetic_variants.csv')

bed_data = []
for _, row in df.iterrows():
    chrom = row['Chromosome']
    # BED is 0-indexed for start, and end is non-inclusive.
    # VCF positions are 1-indexed.
    start = row['Position'] - 1
    end = start + len(row['REF'])
    name = f"{row['REF']}>{row['ALT']}"
    
    # We can also append information like whether it was found positionally
    score = 1 if row['Found_Positional'] else 0
    bed_data.append([chrom, start, end, name, score])

# Sort the BED file
bed_df = pd.DataFrame(bed_data, columns=['chrom', 'start', 'end', 'name', 'score'])
# BED sorting: sort by chrom then start
# We can use a custom sort for chromosomes if needed, but simple string sort usually suffices for IGV, or we can just sort by start.
bed_df = bed_df.sort_values(by=['chrom', 'start'])

bed_df.to_csv('missing_synthetic_variants.bed', sep='\t', header=False, index=False)
print("Saved missing_synthetic_variants.bed")