KasaHealth / debug_test_files.py
78anand's picture
Upload folder using huggingface_hub
f317798 verified
import os
import sys
import pandas as pd
RESP_BASE = r"c:\Users\ASUS\lung_ai_project\data\extracted_cough\Respiratory_Sound_Dataset-main"
COS_BASE = r"c:\Users\ASUS\lung_ai_project\data\coswara"
def get_all_test_files():
all_samples = []
# Respiratory
resp_csv = os.path.join(RESP_BASE, "patient_diagnosis.csv")
if os.path.exists(resp_csv):
resp_df = pd.read_csv(resp_csv)
resp_map = dict(zip(resp_df['Patient_ID'], resp_df['DIAGNOSIS']))
resp_dir = os.path.join(RESP_BASE, "audio_and_txt_files")
if os.path.exists(resp_dir):
resp_files = [f for f in os.listdir(resp_dir) if f.endswith(".wav")]
print(f"Found {len(resp_files)} resp files")
for f in resp_files:
try:
pid = int(f.split('_')[0])
diag = resp_map.get(pid, "").lower()
if diag:
label = "healthy" if diag == "healthy" else "sick"
all_samples.append((os.path.join(resp_dir, f), label))
except: continue
else:
print(f"Resp dir {resp_dir} not found")
else:
print(f"Resp csv {resp_csv} not found")
# Coswara
cos_csv_dir = os.path.join(COS_BASE, "csvs")
cos_status_map = {}
if os.path.exists(cos_csv_dir):
for csv_file in os.listdir(cos_csv_dir):
if csv_file.endswith(".csv"):
try:
df = pd.read_csv(os.path.join(cos_csv_dir, csv_file))
if 'id' in df.columns and 'covid_status' in df.columns:
for _, row in df.iterrows():
cos_status_map[row['id']] = row['covid_status']
except: pass
print(f"Loaded {len(cos_status_map)} coswara status mappings")
else:
print(f"Coswara csv dir {cos_csv_dir} not found")
cos_data_dir = os.path.join(COS_BASE, "coswara_data", "kaggle_data")
if os.path.exists(cos_data_dir):
pids = os.listdir(cos_data_dir)
print(f"Found {len(pids)} PIDs in coswara data dir")
for pid in pids:
status = cos_status_map.get(pid, "").lower()
if status:
label = "healthy" if status == "healthy" else "sick"
pid_dir = os.path.join(cos_data_dir, pid)
if os.path.isdir(pid_dir):
for af in ["cough.wav", "cough-heavy.wav"]:
path = os.path.join(pid_dir, af)
if os.path.exists(path):
all_samples.append((path, label))
break
else:
print(f"Coswara data dir {cos_data_dir} not found")
return all_samples
samples = get_all_test_files()
print(f"Total samples collected: {len(samples)}")
if samples:
print(f"First 5: {samples[:5]}")