Spaces:
Runtime error
Runtime error
File size: 1,049 Bytes
3ef5978 | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 | import pandas as pd
# Define the classes you want to keep
CLASS_NAMES = [
"Bronchiectasis",
"Bronchiolitis",
"chronic obstructive pulmonary disease", # โ ๏ธ Note: you have a trailing comma โ is this intentional?
"Healthy",
"Pneumonia",
]
# Load your dataset
df = pd.read_csv('respiratory symptoms and treatment.csv') # ๐ replace with your actual filename
# Assuming the column containing disease labels is named 'disease'
# If it has a different name (e.g., 'diagnosis', 'label'), change it below
LABEL_COLUMN = 'Disease' # ๐ update this if your column has a different name
# Filter rows: keep only those where the label is in CLASS_NAMES
filtered_df = df[df[LABEL_COLUMN].isin(CLASS_NAMES)]
# Optional: Reset index after filtering
filtered_df = filtered_df.reset_index(drop=True)
# Save the filtered dataset
filtered_df.to_csv('filtered_dataset.csv', index=False)
print(f"Original rows: {len(df)}")
print(f"Filtered rows: {len(filtered_df)}")
print("Filtered dataset saved as 'filtered_dataset.csv'") |