import pandas as pd import os import logging # Configure logging logging.basicConfig(level=logging.INFO, format='%(asctime)s - %(levelname)s - %(message)s') data_logger = logging.getLogger("data_pipeline") def load_data(diagnosis_path='.//data//Respiratory_Sound_Database//patient_diagnosis.csv', demographic_path='.//data//demographic_info.txt'): """Load patient diagnosis and demographic data.""" data_logger.info("Loading patient diagnosis and demographic data.") # Load diagnosis data diagnosis_df = pd.read_csv(diagnosis_path, names=['Patient number', 'Diagnosis']) # Load demographic data patient_df = pd.read_csv(demographic_path, names=['Patient number', 'Age', 'Sex', 'Adult BMI (kg/m2)', 'Child Weight (kg)', 'Child Height (cm)'], delimiter=' ') data_logger.info("Data successfully loaded.") # Merge and return return pd.merge(left=patient_df, right=diagnosis_df, how='left') def process_audio_metadata(folder_path): """Extract audio metadata from filenames.""" processing_logger.info("Extracting audio metadata from filenames.") data = [] for filename in os.listdir(folder_path): if filename.endswith('.txt'): parts = filename.split('_') data.append({ 'Patient number': int(parts[0]), 'Recording index': parts[1], 'Chest location': parts[2], 'Acquisition mode': parts[3], 'Recording equipment': parts[4].split('.')[0] }) processing_logger.info("Audio metadata extraction complete.") return pd.DataFrame(data)