Spaces:

Bliss-Ruth
/

Ugandan_sign_language_translation_tool

Sleeping

App Files Files Community

Bliss-Ruth commited on Nov 17, 2025

Commit

d96d937

verified ·

1 Parent(s): 5712234

Update app.py

Browse files

Files changed (1) hide show

app.py +50 -70

app.py CHANGED Viewed

@@ -303,74 +303,12 @@ def predict_single_sign(video_path):
             confidence, pred_class = torch.max(probs, 1)
         predicted_label = id_to_label[pred_class.item()]
-        confidence_value = confidence.item()
-        return predicted_label, confidence_value
     except Exception as e:
         print(f"❌ Prediction error: {e}")
-        return "Unknown", 0.0
-def predict_multiple_videos(video_files):
-    """
-    MAIN FUNCTION: Predict signs from multiple videos and build a sentence
-    Args:
-        video_files: List of video file paths or single video
-    Returns:
-        Complete sentence, individual predictions, detailed results
-    """
-    try:
-        # Handle single video or list
-        if not isinstance(video_files, list):
-            video_files = [video_files]
-        # Remove None values
-        video_files = [v for v in video_files if v is not None]
-        if len(video_files) == 0:
-            return "Please upload at least one video.", "", []
-        # Predict each video
-        predictions = []
-        detailed_results = []
-        for i, video_path in enumerate(video_files, 1):
-            sign, confidence = predict_single_sign(video_path)
-            predictions.append(sign)
-            detailed_results.append({
-                'video_num': i,
-                'sign': sign,
-                'confidence': confidence
-            })
-        # Build sentence
-        sentence = " ".join(predictions)
-        # Format detailed results
-        details_md = "### 📊 Individual Sign Analysis\n\n"
-        for result in detailed_results:
-            details_md += f"**Sign {result['video_num']}:** {result['sign']} ({result['confidence']*100:.1f}% confidence)\n\n"
-        # Final output
-        final_result = f"""
-## 🎯 Complete Sentence Translation
-### Detected Sentence:
-**"{sentence}"**
-{details_md}
----
-**Total Signs Detected:** {len(predictions)}
-**Model:** X-CLIP Fine-tuned on Ugandan Sign Language
-"""
-        return final_result, sentence, detailed_results
-    except Exception as e:
-        return f"**Error:** {str(e)}", "", []
 def analyze_joined_video(video_path, num_signs, use_auto_detect):
     """
@@ -411,21 +349,21 @@ def analyze_joined_video(video_path, num_signs, use_auto_detect):
         for i, segment_path in enumerate(segment_paths, 1):
             print(f"🔍 Analyzing segment {i}/{actual_segments}...")
-            sign, confidence = predict_single_sign(segment_path)
             predictions.append(sign)
             detailed_results.append({
                 'video_num': i,
-                'sign': sign,
-                'confidence': confidence
             })
         # STEP 3: Build sentence
         sentence = " ".join(predictions)
         # Format detailed results
-        details_md = "### 📊 Individual Sign Analysis (In Order)\n\n"
         for result in detailed_results:
-            details_md += f"**Position {result['video_num']}:** {result['sign']} ({result['confidence']*100:.1f}% confidence)\n\n"
         # Determine split method used
         split_method = "Automatic Motion Detection" if use_auto_detect else "Equal Time Segments"
@@ -433,7 +371,7 @@ def analyze_joined_video(video_path, num_signs, use_auto_detect):
         # Final output
         final_result = f"""
-## 🎯 Complete Sentence Translation
 ### Detected Sentence:
 **"{sentence}"**
@@ -638,7 +576,49 @@ with gr.Blocks(css=custom_css, title="Sign Language Sentence Builder") as demo:
         outputs=[joined_video, auto_detect, num_signs_input, results_output, current_sentence, current_details, feedback_output]
     )
 # Launch
 if __name__ == "__main__":

             confidence, pred_class = torch.max(probs, 1)
         predicted_label = id_to_label[pred_class.item()]
+        return predicted_label  # Only return the label
     except Exception as e:
         print(f"❌ Prediction error: {e}")
+        return "Unknown"
 def analyze_joined_video(video_path, num_signs, use_auto_detect):
     """
         for i, segment_path in enumerate(segment_paths, 1):
             print(f"🔍 Analyzing segment {i}/{actual_segments}...")
+            sign = predict_single_sign(segment_path)
             predictions.append(sign)
             detailed_results.append({
                 'video_num': i,
+                'sign': sign
             })
         # STEP 3: Build sentence
         sentence = " ".join(predictions)
         # Format detailed results
+        details_md = "### Individual Sign Analysis (In Order)\n\n"
         for result in detailed_results:
+            details_md += f"**Position {result['video_num']}:** {result['sign']}\n\n"
         # Determine split method used
         split_method = "Automatic Motion Detection" if use_auto_detect else "Equal Time Segments"
         # Final output
         final_result = f"""
+## Complete Sentence Translation
 ### Detected Sentence:
 **"{sentence}"**
         outputs=[joined_video, auto_detect, num_signs_input, results_output, current_sentence, current_details, feedback_output]
     )
+    # Example section
+    gr.Markdown("""
+    ---
+    ### 📝 Complete Example Workflow
+    **Goal:** Translate "Hello how good" in sign language
+    **Step 1: Record Your Signs**
+    - Sign 1: "Hello" (performer holds sign for 2 seconds)
+    - Sign 2: "How" (performer holds sign for 1 second)
+    - Sign 3: "Good" (performer holds sign for 3 seconds)
+    **Step 2: Join in CapCut**
+    - Import all 3 videos
+    - Arrange in order: Hello → How → Good
+    - Export as ONE video (6 seconds total)
+    **Step 3: Upload & Analyze**
+    - Upload the 6-second video here
+    - Enable "Automatic Detection" ✅
+    - Set "Expected signs" to 3
+    - Click "Analyze Sentence"
+    **Step 4: Result**
+    - 🤖 AI detects 3 segments automatically:
+      - Position 1: "Hello"
+      - Position 2: "How"
+      - Position 3: "Good"
+    - **Final Sentence:** "Hello How Good" ✅
+    ---
+    ### 🆚 When to Use Each Mode
+    | Scenario | Recommended Mode | Why |
+    |----------|-----------------|-----|
+    | Signs have different lengths | 🤖 Automatic | Detects boundaries precisely |
+    | You pause between signs | 🤖 Automatic | Pauses help detection |
+    | All signs exactly same duration | 📏 Manual | Simple equal split works |
+    | Fast, continuous signing | 📏 Manual | Motion detection may struggle |
+    | Professional recording | 🤖 Automatic | Better accuracy |
+    | Quick test/prototype | 📏 Manual | Faster processing |
+    """)
 # Launch
 if __name__ == "__main__":