Spaces:

Bliss-Ruth
/

Ugandan_sign_language_translation_tool

Running

App Files Files Community

Bliss-Ruth commited on Nov 17, 2025

Commit

5712234

verified ·

1 Parent(s): 36c94b9

Update app.py

Browse files

Files changed (1) hide show

app.py +64 -63

app.py CHANGED Viewed

@@ -372,13 +372,14 @@ def predict_multiple_videos(video_files):
     except Exception as e:
         return f"**Error:** {str(e)}", "", []
-def analyze_joined_video(video_path, num_signs):
     """
     NEW MAIN FUNCTION: Analyze a JOINED video with multiple signs
     Args:
         video_path: Path to the joined video from CapCut
-        num_signs: How many signs are in the video
     Returns:
         Complete sentence, individual predictions, detailed results
@@ -388,23 +389,28 @@ def analyze_joined_video(video_path, num_signs):
             return "Please upload a video.", "", []
         if num_signs is None or num_signs <= 0:
-            return "Please specify how many signs are in the video.", "", []
         # STEP 1: Split the joined video into segments
-        print(f"🔪 Splitting video into {num_signs} segments...")
-        segment_paths = split_video_into_segments(video_path, num_signs)
         if len(segment_paths) == 0:
             return "Failed to split video. Please check your video file.", "", []
-        print(f"✅ Created {len(segment_paths)} segments")
         # STEP 2: Analyze each segment separately
         predictions = []
         detailed_results = []
         for i, segment_path in enumerate(segment_paths, 1):
-            print(f"🔍 Analyzing segment {i}/{num_signs}...")
             sign, confidence = predict_single_sign(segment_path)
             predictions.append(sign)
             detailed_results.append({
@@ -421,6 +427,10 @@ def analyze_joined_video(video_path, num_signs):
         for result in detailed_results:
             details_md += f"**Position {result['video_num']}:** {result['sign']} ({result['confidence']*100:.1f}% confidence)\n\n"
         # Final output
         final_result = f"""
 ## 🎯 Complete Sentence Translation
@@ -431,11 +441,11 @@ def analyze_joined_video(video_path, num_signs):
 {details_md}
 ---
-**Video Duration Split:** {num_signs} equal segments
-**Processing Method:** Sequential sign detection
 **Model:** X-CLIP Fine-tuned on Ugandan Sign Language
-*Each sign was analyzed from its corresponding time segment in your video*
 """
         # Clean up temporary files
@@ -449,7 +459,10 @@ def analyze_joined_video(video_path, num_signs):
         return final_result, sentence, detailed_results
     except Exception as e:
-        return f"**Error analyzing video:** {str(e)}", "", []
 # ============================================================================
 # FEEDBACK SYSTEM
@@ -518,15 +531,11 @@ with gr.Blocks(css=custom_css, title="Sign Language Sentence Builder") as demo:
     gr.Markdown("""
     # 🤟 Ugandan Sign Language Sentence Analyzer
-    *Upload ONE joined video (from CapCut) with multiple signs and get instant sentence translation!*
-    **How it works:**
-    1. **Join videos in CapCat:** Combine your sign videos (e.g., Hello → How → Good)
-    2. **Upload the joined video** below
-    3. **Tell us how many signs** are in the video (e.g., 3)
-    4. **Click "Analyze Sentence"** - we'll automatically split and analyze each sign in order!
-    **Example:** If you joined 3 videos (Hello, How, Good), enter "3" and we'll detect: "Hello How Good"
     """)
     with gr.Row():
@@ -535,25 +544,45 @@ with gr.Blocks(css=custom_css, title="Sign Language Sentence Builder") as demo:
             gr.Markdown("### 📤 Upload Your Joined Video")
             joined_video = gr.Video(
-                label="Joined Video from CapCut",
                 sources=["upload", "webcam"]
             )
             num_signs_input = gr.Slider(
                 minimum=1,
                 maximum=10,
                 value=3,
                 step=1,
-                label="How many signs are in this video?",
-                info="The video will be split into this many equal parts"
             )
-            gr.Markdown("""
-            **💡 Tip:**
-            - Make sure each sign takes roughly the same time in your joined video
-            - Example: 3 signs × 2 seconds each = 6 second video
-            - The video will be split equally into segments
-            """)
             with gr.Row():
                 analyze_btn = gr.Button("🚀 Analyze Sentence", variant="primary", scale=2)
@@ -563,13 +592,13 @@ with gr.Blocks(css=custom_css, title="Sign Language Sentence Builder") as demo:
         with gr.Column(scale=1):
             gr.Markdown("### 🎯 Translation Results")
             results_output = gr.Markdown(
-                value="**Upload your joined video and click 'Analyze Sentence' to see the translation.**"
             )
-            gr.Markdown("### 💡 Sentence Feedback")
-            gr.Markdown("*If the sentence is wrong, type the correct one below:*")
             correct_sentence_input = gr.Textbox(
-                label="Correct Sentence",
                 placeholder="e.g., Hello how are you"
             )
             feedback_btn = gr.Button("📝 Submit Feedback", variant="secondary")
@@ -582,7 +611,7 @@ with gr.Blocks(css=custom_css, title="Sign Language Sentence Builder") as demo:
     # Analyze sentence logic
     analyze_btn.click(
         fn=analyze_joined_video,
-        inputs=[joined_video, num_signs_input],
         outputs=[results_output, current_sentence, current_details]
     )
@@ -602,42 +631,14 @@ with gr.Blocks(css=custom_css, title="Sign Language Sentence Builder") as demo:
     # Clear button
     def clear_all():
-        return None, 3, "**Upload your video and click 'Analyze Sentence'.**", "", [], ""
     clear_btn.click(
         fn=clear_all,
-        outputs=[joined_video, num_signs_input, results_output, current_sentence, current_details, feedback_output]
     )
-    # Example section
-    gr.Markdown("""
-    ---
-    ### 📝 Step-by-Step Example
-    **Goal:** Say "Hello how are you" in sign language
-    **Method 1: Using CapCut (Recommended)**
-    1. Record/film 4 separate videos:
-       - Video 1: Sign for "Hello" (2 seconds)
-       - Video 2: Sign for "How" (2 seconds)
-       - Video 3: Sign for "Are" (2 seconds)
-       - Video 4: Sign for "You" (2 seconds)
-    2. Open CapCut and **join the 4 videos** in order
-    3. Export as ONE video (8 seconds total)
-    4. Upload here and enter "4" for number of signs
-    5. Click "Analyze Sentence"
-    6. **Result:** "Hello How Are You" ✅
-    ---
-    **Method 2: Multiple Videos** *(if you prefer separate uploads)*
-    - Use the "Multi-Video Mode" (see tabs above)
-    """)
 # Launch
 if __name__ == "__main__":

     except Exception as e:
         return f"**Error:** {str(e)}", "", []
+def analyze_joined_video(video_path, num_signs, use_auto_detect):
     """
     NEW MAIN FUNCTION: Analyze a JOINED video with multiple signs
     Args:
         video_path: Path to the joined video from CapCut
+        num_signs: How many signs are in the video (used as hint)
+        use_auto_detect: Whether to use automatic motion detection
     Returns:
         Complete sentence, individual predictions, detailed results
             return "Please upload a video.", "", []
         if num_signs is None or num_signs <= 0:
+            num_signs = 3  # Default
         # STEP 1: Split the joined video into segments
+        if use_auto_detect:
+            print(f"🤖 Using AUTOMATIC motion detection (expected ~{num_signs} signs)...")
+            segment_paths = split_video_smart(video_path, num_signs, use_motion_detection=True)
+        else:
+            print(f"📏 Using MANUAL equal split ({num_signs} segments)...")
+            segment_paths = split_video_smart(video_path, num_signs, use_motion_detection=False)
         if len(segment_paths) == 0:
             return "Failed to split video. Please check your video file.", "", []
+        actual_segments = len(segment_paths)
+        print(f"✅ Created {actual_segments} segments")
         # STEP 2: Analyze each segment separately
         predictions = []
         detailed_results = []
         for i, segment_path in enumerate(segment_paths, 1):
+            print(f"🔍 Analyzing segment {i}/{actual_segments}...")
             sign, confidence = predict_single_sign(segment_path)
             predictions.append(sign)
             detailed_results.append({
         for result in detailed_results:
             details_md += f"**Position {result['video_num']}:** {result['sign']} ({result['confidence']*100:.1f}% confidence)\n\n"
+        # Determine split method used
+        split_method = "Automatic Motion Detection" if use_auto_detect else "Equal Time Segments"
+        segments_info = f"Detected {actual_segments} segments" if use_auto_detect else f"Split into {num_signs} equal segments"
         # Final output
         final_result = f"""
 ## 🎯 Complete Sentence Translation
 {details_md}
 ---
+**Split Method:** {split_method}
+**Segments:** {segments_info}
 **Model:** X-CLIP Fine-tuned on Ugandan Sign Language
+*{'Signs were automatically detected by analyzing motion patterns' if use_auto_detect else 'Each sign was analyzed from equal time segments'}*
 """
         # Clean up temporary files
         return final_result, sentence, detailed_results
     except Exception as e:
+        import traceback
+        error_details = traceback.format_exc()
+        print(f"❌ Error: {error_details}")
+        return f"**Error analyzing video:** {str(e)}\n\nPlease try:\n- Using a different video\n- Toggling automatic detection\n- Adjusting number of signs", "", []
 # ============================================================================
 # FEEDBACK SYSTEM
     gr.Markdown("""
     # 🤟 Ugandan Sign Language Sentence Analyzer
+    *Upload ONE joined video with multiple signs - we'll automatically detect and translate them!*
+    **Two Detection Modes:**
+    1. **🤖 Automatic (Recommended):** AI detects where each sign starts/ends (works with unequal durations!)
+    2. **📏 Manual:** Split video into equal time segments (use if signs have equal duration)
     """)
     with gr.Row():
             gr.Markdown("### 📤 Upload Your Joined Video")
             joined_video = gr.Video(
+                label="Joined Video (from CapCut or any editor)",
                 sources=["upload", "webcam"]
             )
+            gr.Markdown("### ⚙️ Detection Settings")
+            auto_detect = gr.Checkbox(
+                label="🤖 Use Automatic Motion Detection",
+                value=True,
+                info="AI automatically finds sign boundaries (recommended!)"
+            )
             num_signs_input = gr.Slider(
                 minimum=1,
                 maximum=10,
                 value=3,
                 step=1,
+                label="Expected number of signs (approximate)",
+                info="Helps guide the detection algorithm"
             )
+            with gr.Accordion("💡 How It Works", open=False):
+                gr.Markdown("""
+                **Automatic Mode (🤖):**
+                - Analyzes motion patterns in your video
+                - Detects pauses/transitions between signs
+                - Works even if signs have different durations!
+                - Example: 1s + 3s + 2s signs → correctly detected
+                **Manual Mode (📏):**
+                - Splits video into equal time segments
+                - Works best when all signs take equal time
+                - Example: 2s + 2s + 2s signs → perfect split
+                **Tips:**
+                - ✅ Pause briefly between signs for best detection
+                - ✅ Keep camera angle consistent
+                - ✅ Good lighting helps accuracy
+                """)
             with gr.Row():
                 analyze_btn = gr.Button("🚀 Analyze Sentence", variant="primary", scale=2)
         with gr.Column(scale=1):
             gr.Markdown("### 🎯 Translation Results")
             results_output = gr.Markdown(
+                value="**Upload your video, choose detection mode, and click 'Analyze Sentence'**"
             )
+            gr.Markdown("### 💡 Feedback")
+            gr.Markdown("*Help improve accuracy by providing corrections:*")
             correct_sentence_input = gr.Textbox(
+                label="Correct Sentence (if prediction was wrong)",
                 placeholder="e.g., Hello how are you"
             )
             feedback_btn = gr.Button("📝 Submit Feedback", variant="secondary")
     # Analyze sentence logic
     analyze_btn.click(
         fn=analyze_joined_video,
+        inputs=[joined_video, num_signs_input, auto_detect],
         outputs=[results_output, current_sentence, current_details]
     )
     # Clear button
     def clear_all():
+        return None, True, 3, "**Upload your video and click 'Analyze Sentence'.**", "", [], ""
     clear_btn.click(
         fn=clear_all,
+        outputs=[joined_video, auto_detect, num_signs_input, results_output, current_sentence, current_details, feedback_output]
     )
 # Launch
 if __name__ == "__main__":