File size: 1,049 Bytes
3ef5978
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
import pandas as pd

# Define the classes you want to keep
CLASS_NAMES = [
    "Bronchiectasis",
    "Bronchiolitis",
    "chronic obstructive pulmonary disease",          # โš ๏ธ Note: you have a trailing comma โ€” is this intentional?
    "Healthy",
    "Pneumonia",
]

# Load your dataset
df = pd.read_csv('respiratory symptoms and treatment.csv')  # ๐Ÿ‘ˆ replace with your actual filename

# Assuming the column containing disease labels is named 'disease'
# If it has a different name (e.g., 'diagnosis', 'label'), change it below
LABEL_COLUMN = 'Disease'  # ๐Ÿ‘ˆ update this if your column has a different name

# Filter rows: keep only those where the label is in CLASS_NAMES
filtered_df = df[df[LABEL_COLUMN].isin(CLASS_NAMES)]

# Optional: Reset index after filtering
filtered_df = filtered_df.reset_index(drop=True)

# Save the filtered dataset
filtered_df.to_csv('filtered_dataset.csv', index=False)

print(f"Original rows: {len(df)}")
print(f"Filtered rows: {len(filtered_df)}")
print("Filtered dataset saved as 'filtered_dataset.csv'")