Spaces:

trixy194t
/

App-api

Running

App Files Files Community

trixy194t commited on Jan 23

Commit

96339a8

verified ·

1 Parent(s): d00d112

Update app.py

Browse files

Files changed (1) hide show

app.py +204 -61

app.py CHANGED Viewed

@@ -9,6 +9,8 @@ import torch.nn as nn
 from torchvision import models
 from fastapi import FastAPI, UploadFile, File, HTTPException
 from fastapi.middleware.cors import CORSMiddleware
 app = FastAPI()
@@ -27,19 +29,21 @@ def load_model():
     model = models.efficientnet_b0(weights=None)
     num_ftrs = model.classifier[1].in_features
     model.classifier[1] = nn.Linear(num_ftrs, 1)
     try:
         state_dict = torch.load(MODEL_PATH, map_location=device)
         model.load_state_dict(state_dict, strict=False)
-        print("✅ PyTorch EfficientNet Loaded Successfully")
     except Exception as e:
         print(f"❌ Load Error: {e}")
     model.to(device)
     model.eval()
     return model
 model = load_model()
-# --- LOGIC FUNCTIONS ---
 def clean_audio_stream(y, sr=16000):
     y_denoised = nr.reduce_noise(y=y, sr=sr)
@@ -47,59 +51,185 @@ def clean_audio_stream(y, sr=16000):
     y_filtered = signal.filtfilt(b, a, y_denoised)
     return y_filtered
-def detect_snoring_sliding_window(y_segment, sr):
-    """
-    Scans a segment using a sliding window for higher precision.
-    Returns True if a snore is found within the segment.
-    """
-    WINDOW_SIZE = 3.0  # 1 second windows
-    STEP_SIZE = 0.25   # 0.25 second steps for high resolution
-    THRESHOLD = 0.62   # Strict threshold as per your provided logic
-    samples_window = int(WINDOW_SIZE * sr)
-    samples_step = int(STEP_SIZE * sr)
-    if len(y_segment) < samples_window:
-        return False, 0.0
-    best_conf = 0.0
-    found_snore = False
-    # Sliding through the segment
-    for i in range(0, len(y_segment) - samples_window, samples_step):
-        chunk = y_segment[i : i + samples_window]
-        # RMS Gate
-        if np.sqrt(np.mean(chunk**2)) < 0.002:
-            continue
-        # Pre-process
-        y_fixed = librosa.util.fix_length(chunk, size=16000)
         S = librosa.feature.melspectrogram(y=y_fixed, sr=16000, n_mels=128)
         S_db = librosa.power_to_db(S, ref=np.max)
         S_norm = (S_db - S_db.min()) / (S_db.max() - S_db.min() + 1e-6)
-        input_tensor = torch.tensor(S_norm).float().unsqueeze(0).unsqueeze(0)
-        input_tensor = input_tensor.repeat(1, 3, 1, 1).to(device)
-        with torch.no_grad():
-            output = model(input_tensor)
-            conf = torch.sigmoid(output).item()
-        if conf > best_conf:
-            best_conf = conf
-        if conf > THRESHOLD:
-            found_snore = True
-    return found_snore, round(best_conf, 2)
 def validate_sleep_recording(y, sr):
     duration = len(y) / sr
-    if duration < 20: return False, "Audio too short"
     if np.sqrt(np.mean(y**2)) < 0.001: return False, "Audio is blank"
     return True, "Valid"
 # --- API ENDPOINTS ---
 @app.post("/analyze")
@@ -116,50 +246,63 @@ async def analyze_audio(file: UploadFile = File(...)):
             return {"valid_recording": False, "reason": reason}
         y_clean = clean_audio_stream(y_orig, sr)
         intervals = librosa.effects.split(y_clean, top_db=25)
         annotations = []
         prev_end = 0
-        snore_count = 0
         apnea_count = 0
         for start, end in intervals:
-            # --- APNEA LOGIC ---
             gap_dur = (start - prev_end) / sr
             if 10.0 <= gap_dur <= 120.0:
                 apnea_count += 1
-                risk = "LOW" if gap_dur < 15.0 else ("MEDIUM" if gap_dur < 20.0 else "HIGH")
                 annotations.append({
                     "label": "APNEA",
                     "start_sec": round(prev_end/sr, 2),
                     "end_sec": round(start/sr, 2),
                     "duration": round(gap_dur, 2),
-                    "risk_level": risk
-                })
-            # --- SNORING LOGIC (Using Sliding Window) ---
-            seg = y_orig[start:end]
-            is_snore, conf = detect_snoring_sliding_window(seg, sr)
-            if is_snore:
-                snore_count += 1
-                annotations.append({
-                    "label": "SNORING",
-                    "start_sec": round(start/sr, 2),
-                    "end_sec": round(end/sr, 2),
-                    "duration": round((end-start)/sr, 2),
-                    "confidence": conf
                 })
             prev_end = end
-        # Stats logic
         duration_hours = (len(y_orig) / sr) / 3600
         ahi = apnea_count / duration_hours if duration_hours > 0 else 0
         overall_risk = ""
         if ahi >= 20: overall_risk = "HIGH"
         elif ahi >= 15: overall_risk = "MEDIUM"
         elif ahi >= 10: overall_risk = "LOW"
         return {
             "valid_recording": True,
             "snore_count": snore_count,

 from torchvision import models
 from fastapi import FastAPI, UploadFile, File, HTTPException
 from fastapi.middleware.cors import CORSMiddleware
+from scipy.ndimage import gaussian_filter1d
+from scipy.signal import find_peaks
 app = FastAPI()
     model = models.efficientnet_b0(weights=None)
     num_ftrs = model.classifier[1].in_features
     model.classifier[1] = nn.Linear(num_ftrs, 1)
     try:
         state_dict = torch.load(MODEL_PATH, map_location=device)
         model.load_state_dict(state_dict, strict=False)
+        print("✅ PyTorch EfficientNet Loaded")
     except Exception as e:
         print(f"❌ Load Error: {e}")
     model.to(device)
     model.eval()
     return model
 model = load_model()
+# --- ORIGINAL LOGIC FUNCTIONS ---
 def clean_audio_stream(y, sr=16000):
     y_denoised = nr.reduce_noise(y=y, sr=sr)
     y_filtered = signal.filtfilt(b, a, y_denoised)
     return y_filtered
+def is_snoring_sound_pytorch(y_segment, sr):
+    """Refined PyTorch detection to allow real snores while blocking background noise"""
+    try:
+        # 1. FIX SENSITIVITY: Lowered RMS threshold from 0.008 to 0.002
+        # This allows quieter snores to be processed by the AI.
+        rms = np.sqrt(np.mean(y_segment**2))
+        if rms < 0.002:
+            return False, 0.0
+        # 2. Pre-process for EfficientNet
+        if sr != 16000:
+            y_segment = librosa.resample(y_segment, orig_sr=sr, target_sr=16000)
+        y_fixed = librosa.util.fix_length(y_segment, size=16000)
+        # 3. Create Mel Spectrogram
         S = librosa.feature.melspectrogram(y=y_fixed, sr=16000, n_mels=128)
         S_db = librosa.power_to_db(S, ref=np.max)
+        # 4. Normalization (Crucial for EfficientNet)
+        # We add a small epsilon (1e-6) to prevent division by zero
         S_norm = (S_db - S_db.min()) / (S_db.max() - S_db.min() + 1e-6)
+        input_tensor = torch.tensor(S_norm).float().unsqueeze(0).unsqueeze(0)
+        input_tensor = input_tensor.repeat(1, 3, 1, 1) # RGB-like format
+        with torch.no_grad():
+            output = model(input_tensor.to(device))
+            confidence = torch.sigmoid(output).item()
+        # 5. ADJUSTED CONFIDENCE: Lowered from 0.7 to 0.5
+        # This makes the AI less "hesitant" to label a sound as a snore.
+        return confidence > 0.5, round(confidence, 2)
+    except Exception as e:
+        print(f"Inference error: {e}")
+        return False, 0.0
 def validate_sleep_recording(y, sr):
     duration = len(y) / sr
+    if duration < 20: return False, "Audio too short (< 20s)"
     if np.sqrt(np.mean(y**2)) < 0.001: return False, "Audio is blank"
     return True, "Valid"
+# --- NEW ACCURATE SNORE DETECTION FUNCTIONS ---
+def segment_audio(audio, sr, segment_duration=1.5, overlap=0.67):
+    """Split audio into overlapping segments"""
+    segment_samples = int(segment_duration * sr)
+    hop_samples = int(segment_samples * (1 - overlap))
+    segments = []
+    timestamps = []
+    for start in range(0, len(audio) - segment_samples + 1, hop_samples):
+        end = start + segment_samples
+        segment = audio[start:end]
+        segments.append(segment)
+        timestamps.append(start / sr)
+    return segments, timestamps
+def calculate_audio_features(segment, sr):
+    """Calculate comprehensive audio features"""
+    energy = np.sum(segment ** 2) / len(segment)
+    rms = np.sqrt(np.mean(segment ** 2))
+    zcr = np.sum(np.abs(np.diff(np.sign(segment)))) / (2 * len(segment))
+    return {
+        'energy': energy,
+        'rms': rms,
+        'zcr': zcr
+    }
+def detect_snores_accurate(y_clean, sr):
+    """
+    Accurate snore detection using audio features + peak detection
+    Returns list of snore events with timestamps
+    """
+    # Segment the audio
+    segments, timestamps = segment_audio(y_clean, sr, segment_duration=1.5, overlap=0.67)
+    # Extract features for all segments
+    all_features = []
+    for i, (segment, timestamp) in enumerate(zip(segments, timestamps)):
+        features = calculate_audio_features(segment, sr)
+        features['timestamp'] = timestamp
+        # Get model prediction as additional feature
+        is_snore, conf = is_snoring_sound_pytorch(segment, sr)
+        features['snore_prob'] = conf
+        all_features.append(features)
+    # Convert to arrays
+    energies = np.array([f['energy'] for f in all_features])
+    rms_values = np.array([f['rms'] for f in all_features])
+    zcr_values = np.array([f['zcr'] for f in all_features])
+    # Normalize features
+    energy_norm = (energies - energies.min()) / (energies.max() - energies.min() + 1e-8)
+    rms_norm = (rms_values - rms_values.min()) / (rms_values.max() - rms_values.min() + 1e-8)
+    zcr_norm = 1 - (zcr_values - zcr_values.min()) / (zcr_values.max() - zcr_values.min() + 1e-8)
+    # Create composite score: Energy (40%) + RMS (40%) + Low ZCR (20%)
+    composite_score = energy_norm * 0.4 + rms_norm * 0.4 + zcr_norm * 0.2
+    # Smooth the score
+    smoothed_score = gaussian_filter1d(composite_score, sigma=1.2)
+    # Find peaks (individual snores)
+    peak_height = np.percentile(smoothed_score, 50)
+    peak_distance = int(0.8 / 0.5)  # Minimum 0.8 seconds between peaks
+    peak_prominence = 0.04
+    peak_width = (0.5, 8)
+    peaks, properties = find_peaks(
+        smoothed_score,
+        height=peak_height,
+        distance=peak_distance,
+        prominence=peak_prominence,
+        width=peak_width
+    )
+    # Create snore events from peaks
+    snore_events = []
+    for peak_idx in peaks:
+        feature = all_features[peak_idx]
+        # Find event boundaries (tight around peak)
+        start_idx = peak_idx
+        end_idx = peak_idx
+        threshold = smoothed_score[peak_idx] * 0.5
+        # Find start
+        for i in range(peak_idx, max(0, peak_idx - 3), -1):
+            if smoothed_score[i] < threshold:
+                start_idx = i + 1
+                break
+            start_idx = i
+        # Find end
+        for i in range(peak_idx, min(len(smoothed_score), peak_idx + 3)):
+            if smoothed_score[i] < threshold:
+                end_idx = i
+                break
+            end_idx = i
+        # Calculate timestamps
+        start_time = all_features[start_idx]['timestamp']
+        end_time = all_features[end_idx]['timestamp'] + 1.0
+        # Only merge if events overlap significantly
+        should_add = True
+        for existing in snore_events:
+            if start_time < existing['end_time'] - 0.3:  # Overlaps by more than 0.3s
+                # Update existing event instead of adding new one
+                existing['end_time'] = max(existing['end_time'], end_time)
+                existing['confidence'] = max(existing['confidence'], feature['snore_prob'])
+                should_add = False
+                break
+        if should_add:
+            duration = end_time - start_time
+            if duration >= 0.5:  # Minimum duration
+                snore_events.append({
+                    'start_time': start_time,
+                    'end_time': end_time,
+                    'duration': duration,
+                    'confidence': feature['snore_prob'],
+                    'composite_score': smoothed_score[peak_idx]
+                })
+    # Sort by timestamp
+    snore_events = sorted(snore_events, key=lambda x: x['start_time'])
+    return snore_events
 # --- API ENDPOINTS ---
 @app.post("/analyze")
             return {"valid_recording": False, "reason": reason}
         y_clean = clean_audio_stream(y_orig, sr)
+        # --- NEW: Use accurate snore detection ---
+        snore_events = detect_snores_accurate(y_clean, sr)
+        snore_count = len(snore_events)
+        # --- ORIGINAL APNEA DETECTION (unchanged) ---
         intervals = librosa.effects.split(y_clean, top_db=25)
         annotations = []
         prev_end = 0
         apnea_count = 0
         for start, end in intervals:
+            # --- APNEA LOGIC (unchanged) ---
             gap_dur = (start - prev_end) / sr
             if 10.0 <= gap_dur <= 120.0:
                 apnea_count += 1
+                # Risk level per event
+                if gap_dur < 15.0:
+                    current_risk = "LOW"
+                elif gap_dur < 20.0:
+                    current_risk = "MEDIUM"
+                else:
+                    current_risk = "HIGH"
                 annotations.append({
                     "label": "APNEA",
                     "start_sec": round(prev_end/sr, 2),
                     "end_sec": round(start/sr, 2),
                     "duration": round(gap_dur, 2),
+                    "risk_level": current_risk
                 })
             prev_end = end
+        # --- Add detected snores to annotations ---
+        for snore in snore_events:
+            annotations.append({
+                "label": "SNORING",
+                "start_sec": round(snore['start_time'], 2),
+                "end_sec": round(snore['end_time'], 2),
+                "duration": round(snore['duration'], 2),
+                "confidence": round(snore['confidence'], 2)
+            })
+        # Calculate AHI Metrics (unchanged)
         duration_hours = (len(y_orig) / sr) / 3600
         ahi = apnea_count / duration_hours if duration_hours > 0 else 0
+        # --- Risk Level based on frequency (unchanged) ---
         overall_risk = ""
         if ahi >= 20: overall_risk = "HIGH"
         elif ahi >= 15: overall_risk = "MEDIUM"
         elif ahi >= 10: overall_risk = "LOW"
+        # --- FINAL RESPONSE ---
         return {
             "valid_recording": True,
             "snore_count": snore_count,