File size: 393 Bytes
ea61d54
4466c5e
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
# Post-process a combined CSV: drop index noise and duplicate sequences.
import pandas as pd

# Load data
df = pd.read_csv("cleaned_amp_data.csv")

# Drop index column if it exists
if 'Unnamed: 0' in df.columns:
    df = df.drop(columns=['Unnamed: 0'])

# Drop duplicate sequences
df = df.drop_duplicates(subset='sequence')

# Save cleaned data
df.to_csv("2cleaned_amp_data.csv", index=False)