Spaces:
Running
Running
Upload folder using huggingface_hub
Browse files- app/main.py +1 -1
- test_real.txt +0 -0
- test_real_pipeline.py +121 -0
- utils/augment_phone_mic.py +65 -0
app/main.py
CHANGED
|
@@ -94,7 +94,7 @@ def predict():
|
|
| 94 |
|
| 95 |
try:
|
| 96 |
load_resources()
|
| 97 |
-
y, sr = librosa.load(filepath, sr=16000
|
| 98 |
|
| 99 |
# VAD Lite
|
| 100 |
rms_energy = np.mean(librosa.feature.rms(y=y))
|
|
|
|
| 94 |
|
| 95 |
try:
|
| 96 |
load_resources()
|
| 97 |
+
y, sr = librosa.load(filepath, sr=16000)
|
| 98 |
|
| 99 |
# VAD Lite
|
| 100 |
rms_energy = np.mean(librosa.feature.rms(y=y))
|
test_real.txt
ADDED
|
Binary file (3.78 kB). View file
|
|
|
test_real_pipeline.py
ADDED
|
@@ -0,0 +1,121 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
import os
|
| 2 |
+
import sys
|
| 3 |
+
import numpy as np
|
| 4 |
+
import librosa
|
| 5 |
+
import tensorflow as tf
|
| 6 |
+
from tqdm import tqdm
|
| 7 |
+
import random
|
| 8 |
+
|
| 9 |
+
# Force TF logs to error only
|
| 10 |
+
os.environ['TF_CPP_MIN_LOG_LEVEL'] = '3'
|
| 11 |
+
|
| 12 |
+
PROJECT_ROOT = r"c:\Users\ASUS\lung_ai_project"
|
| 13 |
+
sys.path.insert(0, PROJECT_ROOT)
|
| 14 |
+
|
| 15 |
+
from utils.hear_extractor import HeARExtractor
|
| 16 |
+
from utils.audio_preprocessor import advanced_preprocess
|
| 17 |
+
|
| 18 |
+
# Paths
|
| 19 |
+
AUDIO_ROOT = r"c:\Users\ASUS\lung_ai_project\data\coughvid_public\organized"
|
| 20 |
+
SICK_DIR = os.path.join(AUDIO_ROOT, "sick")
|
| 21 |
+
HEALTHY_DIR = os.path.join(AUDIO_ROOT, "healthy")
|
| 22 |
+
MODEL_PATH = os.path.join(PROJECT_ROOT, "models", "hear_classifier_v8_elite.h5")
|
| 23 |
+
|
| 24 |
+
META_PATH = r"c:\Users\ASUS\lung_ai_project\data\coughvid_public\metadata_compiled.csv"
|
| 25 |
+
import pandas as pd
|
| 26 |
+
|
| 27 |
+
def get_audio_files(directory, count=50, is_sick=True):
|
| 28 |
+
df = pd.read_csv(META_PATH)
|
| 29 |
+
if is_sick:
|
| 30 |
+
uuids = df[(df['status'].isin(['COVID-19', 'sick'])) & (df['cough_detected'] > 0.8) & (df['SNR'] > 5)]['uuid'].tolist()
|
| 31 |
+
else:
|
| 32 |
+
uuids = df[(df['status'] == 'healthy') & (df['cough_detected'] > 0.95)]['uuid'].tolist()
|
| 33 |
+
|
| 34 |
+
random.seed(42)
|
| 35 |
+
uuids = random.sample(uuids, count * 3) # Sample more in case some missing
|
| 36 |
+
|
| 37 |
+
files = []
|
| 38 |
+
for u in uuids:
|
| 39 |
+
for ext in ['.webm', '.wav', '.ogg']:
|
| 40 |
+
p1 = os.path.join(directory, f"cv_{u}{ext}")
|
| 41 |
+
p2 = os.path.join(directory, f"{u}{ext}")
|
| 42 |
+
if os.path.exists(p1):
|
| 43 |
+
files.append(p1)
|
| 44 |
+
break
|
| 45 |
+
if os.path.exists(p2):
|
| 46 |
+
files.append(p2)
|
| 47 |
+
break
|
| 48 |
+
if len(files) == count:
|
| 49 |
+
break
|
| 50 |
+
return files
|
| 51 |
+
|
| 52 |
+
def test_pipeline():
|
| 53 |
+
print("Loading V8 Elite Model & HeAR Extractor...")
|
| 54 |
+
classifier_model = tf.keras.models.load_model(MODEL_PATH, compile=False)
|
| 55 |
+
extractor = HeARExtractor()
|
| 56 |
+
|
| 57 |
+
sick_files = get_audio_files(SICK_DIR, 50, True)
|
| 58 |
+
healthy_files = get_audio_files(HEALTHY_DIR, 50, False)
|
| 59 |
+
|
| 60 |
+
test_cases = [(f, "sick") for f in sick_files] + [(f, "healthy") for f in healthy_files]
|
| 61 |
+
|
| 62 |
+
correct_healthy = 0
|
| 63 |
+
correct_sick = 0
|
| 64 |
+
total_healthy = 0
|
| 65 |
+
total_sick = 0
|
| 66 |
+
|
| 67 |
+
print("\nStarting Real-World API Pipeline Test (100 Samples)...")
|
| 68 |
+
for file_path, true_label in tqdm(test_cases):
|
| 69 |
+
try:
|
| 70 |
+
# 1. API Load (Full duration!)
|
| 71 |
+
y, sr = librosa.load(file_path, sr=16000)
|
| 72 |
+
|
| 73 |
+
# 2. API VAD
|
| 74 |
+
rms_energy = np.mean(librosa.feature.rms(y=y))
|
| 75 |
+
if rms_energy < 0.005:
|
| 76 |
+
# API rejects quiet audio. For testing, skip.
|
| 77 |
+
continue
|
| 78 |
+
|
| 79 |
+
# 3. API Preprocess
|
| 80 |
+
y_clean = advanced_preprocess(y, sr)
|
| 81 |
+
|
| 82 |
+
# 4. API Extract
|
| 83 |
+
emb = extractor.extract(y_clean)
|
| 84 |
+
if emb is None:
|
| 85 |
+
continue
|
| 86 |
+
|
| 87 |
+
# 5. API Predict
|
| 88 |
+
X = emb[np.newaxis, ...]
|
| 89 |
+
prob = classifier_model.predict(X, verbose=0)[0][0]
|
| 90 |
+
|
| 91 |
+
# 6. API Logic
|
| 92 |
+
THRESHOLD = 0.50
|
| 93 |
+
if prob > THRESHOLD:
|
| 94 |
+
final_label = "sick"
|
| 95 |
+
else:
|
| 96 |
+
final_label = "healthy"
|
| 97 |
+
|
| 98 |
+
if true_label == "healthy":
|
| 99 |
+
total_healthy += 1
|
| 100 |
+
if final_label == "healthy": correct_healthy += 1
|
| 101 |
+
else:
|
| 102 |
+
total_sick += 1
|
| 103 |
+
if final_label == "sick": correct_sick += 1
|
| 104 |
+
|
| 105 |
+
except Exception as e:
|
| 106 |
+
continue
|
| 107 |
+
|
| 108 |
+
print("\n" + "="*50)
|
| 109 |
+
print(" FINAL API PIPELINE RESULTS")
|
| 110 |
+
print("="*50)
|
| 111 |
+
|
| 112 |
+
if total_healthy > 0:
|
| 113 |
+
h_acc = (correct_healthy / total_healthy) * 100
|
| 114 |
+
print(f"✅ HEALTHY Accuracy: {h_acc:.2f}% ({correct_healthy}/{total_healthy})")
|
| 115 |
+
|
| 116 |
+
if total_sick > 0:
|
| 117 |
+
s_acc = (correct_sick / total_sick) * 100
|
| 118 |
+
print(f"🦠 SICK Accuracy: {s_acc:.2f}% ({correct_sick}/{total_sick})")
|
| 119 |
+
|
| 120 |
+
if __name__ == "__main__":
|
| 121 |
+
test_pipeline()
|
utils/augment_phone_mic.py
ADDED
|
@@ -0,0 +1,65 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
import os
|
| 2 |
+
import sys
|
| 3 |
+
import numpy as np
|
| 4 |
+
import librosa
|
| 5 |
+
import soundfile as sf
|
| 6 |
+
import warnings
|
| 7 |
+
from tqdm import tqdm
|
| 8 |
+
|
| 9 |
+
"""
|
| 10 |
+
PHASE 9: TELEPHONY & WEBM DATA AUGMENTATION
|
| 11 |
+
To prevent healthy patients from being flagged as "High Risk" when recording from
|
| 12 |
+
cheap laptop mics or browsers (WebM compression artifacts), we must intentionally
|
| 13 |
+
degrade our training dataset to teach the HeAR AI what a "Phone" sounds like.
|
| 14 |
+
"""
|
| 15 |
+
|
| 16 |
+
def degrade_audio_to_phone_quality(audio, sr):
|
| 17 |
+
"""
|
| 18 |
+
Simulates a cheap, compressed smartphone microphone over a web browser.
|
| 19 |
+
1. Bandpass filter to isolate 300Hz - 3400Hz (Standard Telephony Band)
|
| 20 |
+
2. Intentional Downsampling & Quantization (Bit-crushing to simulate compression)
|
| 21 |
+
3. Hiss Injection (Simulating mic hardware noise floor)
|
| 22 |
+
"""
|
| 23 |
+
import scipy.signal as signal
|
| 24 |
+
|
| 25 |
+
# 1. Telephony Bandpass (300Hz - 3400Hz)
|
| 26 |
+
nyquist = sr / 2.0
|
| 27 |
+
low = 300.0 / nyquist
|
| 28 |
+
high = 3400.0 / nyquist
|
| 29 |
+
b, a = signal.butter(4, [low, high], btype='band')
|
| 30 |
+
audio_bandpassed = signal.filtfilt(b, a, audio)
|
| 31 |
+
|
| 32 |
+
# 2. Add Hiss (White Noise Floor)
|
| 33 |
+
noise_amp = 0.005 * np.random.uniform()
|
| 34 |
+
white_noise = np.random.randn(len(audio_bandpassed)) * noise_amp
|
| 35 |
+
audio_noisy = audio_bandpassed + white_noise
|
| 36 |
+
|
| 37 |
+
# 3. 8-bit Quantization (Crushing resolution mimicking bad WebM encoding)
|
| 38 |
+
audio_noisy = np.clip(audio_noisy, -1.0, 1.0)
|
| 39 |
+
audio_quantized = np.round(audio_noisy * 128) / 128.0
|
| 40 |
+
|
| 41 |
+
return audio_quantized
|
| 42 |
+
|
| 43 |
+
def process_directory(source_dir, output_dir):
|
| 44 |
+
os.makedirs(output_dir, exist_ok=True)
|
| 45 |
+
files = [f for f in os.listdir(source_dir) if f.endswith('.wav')]
|
| 46 |
+
|
| 47 |
+
for f in tqdm(files, desc=f"Corrupting {os.path.basename(source_dir)}"):
|
| 48 |
+
try:
|
| 49 |
+
path = os.path.join(source_dir, f)
|
| 50 |
+
y, sr = librosa.load(path, sr=16000)
|
| 51 |
+
y_degraded = degrade_audio_to_phone_quality(y, sr)
|
| 52 |
+
|
| 53 |
+
out_path = os.path.join(output_dir, f"phone_{f}")
|
| 54 |
+
sf.write(out_path, y_degraded, sr)
|
| 55 |
+
except:
|
| 56 |
+
pass
|
| 57 |
+
|
| 58 |
+
if __name__ == "__main__":
|
| 59 |
+
base = r"c:\Users\ASUS\lung_ai_project\data\cough"
|
| 60 |
+
target = r"c:\Users\ASUS\lung_ai_project\data\cough_phone_augmented"
|
| 61 |
+
|
| 62 |
+
# Corrupt both classes
|
| 63 |
+
process_directory(os.path.join(base, "sick"), os.path.join(target, "sick"))
|
| 64 |
+
process_directory(os.path.join(base, "healthy"), os.path.join(target, "healthy"))
|
| 65 |
+
print("Phone Simulation Dataset Created. Ready for V9 Elite Training.")
|