Spaces:

MeysamSh
/

SoundClassification

Runtime error

App Files Files Community

MeysamSh commited on Feb 6

Commit

e6f58e4

1 Parent(s): 910595e

update

Browse files

Files changed (1) hide show

app.py +80 -2

app.py CHANGED Viewed

@@ -65,6 +65,81 @@ def hide_mission(audio_data):
         return gr.update(visible=False)
     return gr.update(visible=True)
 def post_process_to_emoji(preds, window_ms, min_silence_ms=200):
     """Processes raw AI output, smooths it, enforces silence gaps, and merges duplicates."""
@@ -289,6 +364,8 @@ def train_video_model(v0, v1, v_bg):
     if X1 is None: return None, f"Class 1 error: {msg1}"
     if Xbg is None: return None, f"Background error: {msgbg}"
     X = np.vstack([X0, X1, Xbg])
     y = np.concatenate([
         np.zeros(len(X0)),
@@ -307,6 +384,7 @@ def train_video_model(v0, v1, v_bg):
     ])
     model.fit(X, y)
     return model, "OK"
@@ -316,8 +394,8 @@ def decode_video_sequence(model, video_path):
         return f"Error: {msg}"
     preds = model.predict(X)
-    import pdb; pdb.set_trace()
-    return post_process_to_emoji(preds, window_ms=100)
 def run_video_decoder(v0, v1, v_bg, test_video):

         return gr.update(visible=False)
     return gr.update(visible=True)
+def post_process_video_sequence(
+    preds,
+    min_segment_frames=10,
+    smoothing_window=10,
+    background_class=2
+):
+    """
+    Post-process frame-level predictions into a clean symbol sequence.
+    Steps:
+    1. Temporal smoothing (majority vote).
+    2. Remove very short segments.
+    3. Collapse into final sequence.
+    Args:
+        preds: array of class predictions per frame
+        min_segment_frames: minimum frames required to accept a symbol
+        smoothing_window: neighborhood size for smoothing
+        background_class: class index for background
+    """
+    if len(preds) == 0:
+        return ""
+    preds = [int(p) for p in preds]
+    # -----------------------------------
+    # 1. Majority vote smoothing
+    # -----------------------------------
+    half_w = smoothing_window // 2
+    smoothed = []
+    for i in range(len(preds)):
+        start = max(0, i - half_w)
+        end = min(len(preds), i + half_w + 1)
+        neighborhood = preds[start:end]
+        smoothed.append(max(set(neighborhood), key=neighborhood.count))
+    # -----------------------------------
+    # 2. Segment compression
+    # -----------------------------------
+    segments = []
+    current = smoothed[0]
+    length = 1
+    for p in smoothed[1:]:
+        if p == current:
+            length += 1
+        else:
+            segments.append((current, length))
+            current = p
+            length = 1
+    segments.append((current, length))
+    # -----------------------------------
+    # 3. Filter short segments
+    # -----------------------------------
+    filtered = []
+    for cls, length in segments:
+        if cls != background_class and length < min_segment_frames:
+            continue
+        filtered.append(cls)
+    # -----------------------------------
+    # 4. Collapse duplicates
+    # -----------------------------------
+    final_seq = []
+    for cls in filtered:
+        if cls == background_class:
+            continue
+        if not final_seq or cls != final_seq[-1]:
+            final_seq.append(str(cls))
+    return "_".join(final_seq)
 def post_process_to_emoji(preds, window_ms, min_silence_ms=200):
     """Processes raw AI output, smooths it, enforces silence gaps, and merges duplicates."""
     if X1 is None: return None, f"Class 1 error: {msg1}"
     if Xbg is None: return None, f"Background error: {msgbg}"
+    print(f"Training video model with {len(X0)} frames for Class 0, {len(X1)} frames for Class 1, and {len(Xbg)} frames for Background.")
     X = np.vstack([X0, X1, Xbg])
     y = np.concatenate([
         np.zeros(len(X0)),
     ])
     model.fit(X, y)
+    print("Video model trained successfully!")
     return model, "OK"
         return f"Error: {msg}"
     preds = model.predict(X)
+    print(f"Raw frame-level predictions: {preds}")
+    return post_process_video_sequence(preds)
 def run_video_decoder(v0, v1, v_bg, test_video):