Spaces:
Running
Running
| import os | |
| import sys | |
| import pandas as pd | |
| RESP_BASE = r"c:\Users\ASUS\lung_ai_project\data\extracted_cough\Respiratory_Sound_Dataset-main" | |
| COS_BASE = r"c:\Users\ASUS\lung_ai_project\data\coswara" | |
| def get_all_test_files(): | |
| all_samples = [] | |
| # Respiratory | |
| resp_csv = os.path.join(RESP_BASE, "patient_diagnosis.csv") | |
| if os.path.exists(resp_csv): | |
| resp_df = pd.read_csv(resp_csv) | |
| resp_map = dict(zip(resp_df['Patient_ID'], resp_df['DIAGNOSIS'])) | |
| resp_dir = os.path.join(RESP_BASE, "audio_and_txt_files") | |
| if os.path.exists(resp_dir): | |
| resp_files = [f for f in os.listdir(resp_dir) if f.endswith(".wav")] | |
| print(f"Found {len(resp_files)} resp files") | |
| for f in resp_files: | |
| try: | |
| pid = int(f.split('_')[0]) | |
| diag = resp_map.get(pid, "").lower() | |
| if diag: | |
| label = "healthy" if diag == "healthy" else "sick" | |
| all_samples.append((os.path.join(resp_dir, f), label)) | |
| except: continue | |
| else: | |
| print(f"Resp dir {resp_dir} not found") | |
| else: | |
| print(f"Resp csv {resp_csv} not found") | |
| # Coswara | |
| cos_csv_dir = os.path.join(COS_BASE, "csvs") | |
| cos_status_map = {} | |
| if os.path.exists(cos_csv_dir): | |
| for csv_file in os.listdir(cos_csv_dir): | |
| if csv_file.endswith(".csv"): | |
| try: | |
| df = pd.read_csv(os.path.join(cos_csv_dir, csv_file)) | |
| if 'id' in df.columns and 'covid_status' in df.columns: | |
| for _, row in df.iterrows(): | |
| cos_status_map[row['id']] = row['covid_status'] | |
| except: pass | |
| print(f"Loaded {len(cos_status_map)} coswara status mappings") | |
| else: | |
| print(f"Coswara csv dir {cos_csv_dir} not found") | |
| cos_data_dir = os.path.join(COS_BASE, "coswara_data", "kaggle_data") | |
| if os.path.exists(cos_data_dir): | |
| pids = os.listdir(cos_data_dir) | |
| print(f"Found {len(pids)} PIDs in coswara data dir") | |
| for pid in pids: | |
| status = cos_status_map.get(pid, "").lower() | |
| if status: | |
| label = "healthy" if status == "healthy" else "sick" | |
| pid_dir = os.path.join(cos_data_dir, pid) | |
| if os.path.isdir(pid_dir): | |
| for af in ["cough.wav", "cough-heavy.wav"]: | |
| path = os.path.join(pid_dir, af) | |
| if os.path.exists(path): | |
| all_samples.append((path, label)) | |
| break | |
| else: | |
| print(f"Coswara data dir {cos_data_dir} not found") | |
| return all_samples | |
| samples = get_all_test_files() | |
| print(f"Total samples collected: {len(samples)}") | |
| if samples: | |
| print(f"First 5: {samples[:5]}") | |