Resp_text / Test.py
Solomon17705's picture
Added Files
3ef5978
import pandas as pd
# Define the classes you want to keep
CLASS_NAMES = [
"Bronchiectasis",
"Bronchiolitis",
"chronic obstructive pulmonary disease", # ⚠️ Note: you have a trailing comma β€” is this intentional?
"Healthy",
"Pneumonia",
]
# Load your dataset
df = pd.read_csv('respiratory symptoms and treatment.csv') # πŸ‘ˆ replace with your actual filename
# Assuming the column containing disease labels is named 'disease'
# If it has a different name (e.g., 'diagnosis', 'label'), change it below
LABEL_COLUMN = 'Disease' # πŸ‘ˆ update this if your column has a different name
# Filter rows: keep only those where the label is in CLASS_NAMES
filtered_df = df[df[LABEL_COLUMN].isin(CLASS_NAMES)]
# Optional: Reset index after filtering
filtered_df = filtered_df.reset_index(drop=True)
# Save the filtered dataset
filtered_df.to_csv('filtered_dataset.csv', index=False)
print(f"Original rows: {len(df)}")
print(f"Filtered rows: {len(filtered_df)}")
print("Filtered dataset saved as 'filtered_dataset.csv'")