File size: 393 Bytes
ea61d54 4466c5e | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 | # Post-process a combined CSV: drop index noise and duplicate sequences.
import pandas as pd
# Load data
df = pd.read_csv("cleaned_amp_data.csv")
# Drop index column if it exists
if 'Unnamed: 0' in df.columns:
df = df.drop(columns=['Unnamed: 0'])
# Drop duplicate sequences
df = df.drop_duplicates(subset='sequence')
# Save cleaned data
df.to_csv("2cleaned_amp_data.csv", index=False)
|