Spaces:
Running
Running
| import os | |
| import shutil | |
| import pandas as pd | |
| # Paths | |
| base_dir = r"c:\Users\ASUS\lung_ai_project" | |
| extracted_dir = os.path.join(base_dir, "data", "extracted_cough", "Respiratory_Sound_Dataset-main") | |
| audio_txt_dir = os.path.join(extracted_dir, "audio_and_txt_files") | |
| diagnosis_file = os.path.join(extracted_dir, "patient_diagnosis.csv") | |
| output_healthy = os.path.join(base_dir, "data", "cough", "healthy") | |
| output_sick = os.path.join(base_dir, "data", "cough", "sick") | |
| # Create directories if not exist | |
| os.makedirs(output_healthy, exist_ok=True) | |
| os.makedirs(output_sick, exist_ok=True) | |
| # Read diagnosis | |
| df = pd.read_csv(diagnosis_file) | |
| print("Columns:", df.columns) | |
| print(df.head()) | |
| # Map Patient_ID to Diagnosis | |
| diagnosis_map = dict(zip(df['Patient_ID'], df['DIAGNOSIS'])) # CSV header implies 'Patient_ID' and 'DIAGNOSIS' | |
| # Process files | |
| files = os.listdir(audio_txt_dir) | |
| wav_files = [f for f in files if f.endswith('.wav')] | |
| print(f"Found {len(wav_files)} wav files") | |
| count_healthy = 0 | |
| count_sick = 0 | |
| for wav_file in wav_files: | |
| # Filename format: 101_1b1_Al_sc_Meditron.wav | |
| try: | |
| patient_id = int(wav_file.split('_')[0]) | |
| except ValueError: | |
| print(f"Skipping {wav_file}") | |
| continue | |
| diagnosis = diagnosis_map.get(patient_id, "Unknown") | |
| if diagnosis == 'Healthy': | |
| shutil.copy2(os.path.join(audio_txt_dir, wav_file), os.path.join(output_healthy, wav_file)) | |
| count_healthy += 1 | |
| elif diagnosis != "Unknown": | |
| shutil.copy2(os.path.join(audio_txt_dir, wav_file), os.path.join(output_sick, wav_file)) | |
| count_sick += 1 | |
| else: | |
| print(f"Unknown diagnosis for patient {patient_id}") | |
| print(f"Result: {count_healthy} healthy files, {count_sick} sick files.") | |