Bliss-Ruth commited on
Commit
5712234
·
verified ·
1 Parent(s): 36c94b9

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +64 -63
app.py CHANGED
@@ -372,13 +372,14 @@ def predict_multiple_videos(video_files):
372
  except Exception as e:
373
  return f"**Error:** {str(e)}", "", []
374
 
375
- def analyze_joined_video(video_path, num_signs):
376
  """
377
  NEW MAIN FUNCTION: Analyze a JOINED video with multiple signs
378
 
379
  Args:
380
  video_path: Path to the joined video from CapCut
381
- num_signs: How many signs are in the video
 
382
 
383
  Returns:
384
  Complete sentence, individual predictions, detailed results
@@ -388,23 +389,28 @@ def analyze_joined_video(video_path, num_signs):
388
  return "Please upload a video.", "", []
389
 
390
  if num_signs is None or num_signs <= 0:
391
- return "Please specify how many signs are in the video.", "", []
392
 
393
  # STEP 1: Split the joined video into segments
394
- print(f"🔪 Splitting video into {num_signs} segments...")
395
- segment_paths = split_video_into_segments(video_path, num_signs)
 
 
 
 
396
 
397
  if len(segment_paths) == 0:
398
  return "Failed to split video. Please check your video file.", "", []
399
 
400
- print(f"✅ Created {len(segment_paths)} segments")
 
401
 
402
  # STEP 2: Analyze each segment separately
403
  predictions = []
404
  detailed_results = []
405
 
406
  for i, segment_path in enumerate(segment_paths, 1):
407
- print(f"🔍 Analyzing segment {i}/{num_signs}...")
408
  sign, confidence = predict_single_sign(segment_path)
409
  predictions.append(sign)
410
  detailed_results.append({
@@ -421,6 +427,10 @@ def analyze_joined_video(video_path, num_signs):
421
  for result in detailed_results:
422
  details_md += f"**Position {result['video_num']}:** {result['sign']} ({result['confidence']*100:.1f}% confidence)\n\n"
423
 
 
 
 
 
424
  # Final output
425
  final_result = f"""
426
  ## 🎯 Complete Sentence Translation
@@ -431,11 +441,11 @@ def analyze_joined_video(video_path, num_signs):
431
  {details_md}
432
 
433
  ---
434
- **Video Duration Split:** {num_signs} equal segments
435
- **Processing Method:** Sequential sign detection
436
  **Model:** X-CLIP Fine-tuned on Ugandan Sign Language
437
 
438
- *Each sign was analyzed from its corresponding time segment in your video*
439
  """
440
 
441
  # Clean up temporary files
@@ -449,7 +459,10 @@ def analyze_joined_video(video_path, num_signs):
449
  return final_result, sentence, detailed_results
450
 
451
  except Exception as e:
452
- return f"**Error analyzing video:** {str(e)}", "", []
 
 
 
453
 
454
  # ============================================================================
455
  # FEEDBACK SYSTEM
@@ -518,15 +531,11 @@ with gr.Blocks(css=custom_css, title="Sign Language Sentence Builder") as demo:
518
 
519
  gr.Markdown("""
520
  # 🤟 Ugandan Sign Language Sentence Analyzer
521
- *Upload ONE joined video (from CapCut) with multiple signs and get instant sentence translation!*
522
 
523
- **How it works:**
524
- 1. **Join videos in CapCat:** Combine your sign videos (e.g., Hello How → Good)
525
- 2. **Upload the joined video** below
526
- 3. **Tell us how many signs** are in the video (e.g., 3)
527
- 4. **Click "Analyze Sentence"** - we'll automatically split and analyze each sign in order!
528
-
529
- **Example:** If you joined 3 videos (Hello, How, Good), enter "3" and we'll detect: "Hello How Good"
530
  """)
531
 
532
  with gr.Row():
@@ -535,25 +544,45 @@ with gr.Blocks(css=custom_css, title="Sign Language Sentence Builder") as demo:
535
  gr.Markdown("### 📤 Upload Your Joined Video")
536
 
537
  joined_video = gr.Video(
538
- label="Joined Video from CapCut",
539
  sources=["upload", "webcam"]
540
  )
541
 
 
 
 
 
 
 
 
 
542
  num_signs_input = gr.Slider(
543
  minimum=1,
544
  maximum=10,
545
  value=3,
546
  step=1,
547
- label="How many signs are in this video?",
548
- info="The video will be split into this many equal parts"
549
  )
550
 
551
- gr.Markdown("""
552
- **💡 Tip:**
553
- - Make sure each sign takes roughly the same time in your joined video
554
- - Example: 3 signs × 2 seconds each = 6 second video
555
- - The video will be split equally into segments
556
- """)
 
 
 
 
 
 
 
 
 
 
 
 
557
 
558
  with gr.Row():
559
  analyze_btn = gr.Button("🚀 Analyze Sentence", variant="primary", scale=2)
@@ -563,13 +592,13 @@ with gr.Blocks(css=custom_css, title="Sign Language Sentence Builder") as demo:
563
  with gr.Column(scale=1):
564
  gr.Markdown("### 🎯 Translation Results")
565
  results_output = gr.Markdown(
566
- value="**Upload your joined video and click 'Analyze Sentence' to see the translation.**"
567
  )
568
 
569
- gr.Markdown("### 💡 Sentence Feedback")
570
- gr.Markdown("*If the sentence is wrong, type the correct one below:*")
571
  correct_sentence_input = gr.Textbox(
572
- label="Correct Sentence",
573
  placeholder="e.g., Hello how are you"
574
  )
575
  feedback_btn = gr.Button("📝 Submit Feedback", variant="secondary")
@@ -582,7 +611,7 @@ with gr.Blocks(css=custom_css, title="Sign Language Sentence Builder") as demo:
582
  # Analyze sentence logic
583
  analyze_btn.click(
584
  fn=analyze_joined_video,
585
- inputs=[joined_video, num_signs_input],
586
  outputs=[results_output, current_sentence, current_details]
587
  )
588
 
@@ -602,42 +631,14 @@ with gr.Blocks(css=custom_css, title="Sign Language Sentence Builder") as demo:
602
 
603
  # Clear button
604
  def clear_all():
605
- return None, 3, "**Upload your video and click 'Analyze Sentence'.**", "", [], ""
606
 
607
  clear_btn.click(
608
  fn=clear_all,
609
- outputs=[joined_video, num_signs_input, results_output, current_sentence, current_details, feedback_output]
610
  )
611
 
612
- # Example section
613
- gr.Markdown("""
614
- ---
615
- ### 📝 Step-by-Step Example
616
 
617
- **Goal:** Say "Hello how are you" in sign language
618
-
619
- **Method 1: Using CapCut (Recommended)**
620
- 1. Record/film 4 separate videos:
621
- - Video 1: Sign for "Hello" (2 seconds)
622
- - Video 2: Sign for "How" (2 seconds)
623
- - Video 3: Sign for "Are" (2 seconds)
624
- - Video 4: Sign for "You" (2 seconds)
625
-
626
- 2. Open CapCut and **join the 4 videos** in order
627
-
628
- 3. Export as ONE video (8 seconds total)
629
-
630
- 4. Upload here and enter "4" for number of signs
631
-
632
- 5. Click "Analyze Sentence"
633
-
634
- 6. **Result:** "Hello How Are You" ✅
635
-
636
- ---
637
-
638
- **Method 2: Multiple Videos** *(if you prefer separate uploads)*
639
- - Use the "Multi-Video Mode" (see tabs above)
640
- """)
641
 
642
  # Launch
643
  if __name__ == "__main__":
 
372
  except Exception as e:
373
  return f"**Error:** {str(e)}", "", []
374
 
375
+ def analyze_joined_video(video_path, num_signs, use_auto_detect):
376
  """
377
  NEW MAIN FUNCTION: Analyze a JOINED video with multiple signs
378
 
379
  Args:
380
  video_path: Path to the joined video from CapCut
381
+ num_signs: How many signs are in the video (used as hint)
382
+ use_auto_detect: Whether to use automatic motion detection
383
 
384
  Returns:
385
  Complete sentence, individual predictions, detailed results
 
389
  return "Please upload a video.", "", []
390
 
391
  if num_signs is None or num_signs <= 0:
392
+ num_signs = 3 # Default
393
 
394
  # STEP 1: Split the joined video into segments
395
+ if use_auto_detect:
396
+ print(f"🤖 Using AUTOMATIC motion detection (expected ~{num_signs} signs)...")
397
+ segment_paths = split_video_smart(video_path, num_signs, use_motion_detection=True)
398
+ else:
399
+ print(f"📏 Using MANUAL equal split ({num_signs} segments)...")
400
+ segment_paths = split_video_smart(video_path, num_signs, use_motion_detection=False)
401
 
402
  if len(segment_paths) == 0:
403
  return "Failed to split video. Please check your video file.", "", []
404
 
405
+ actual_segments = len(segment_paths)
406
+ print(f"✅ Created {actual_segments} segments")
407
 
408
  # STEP 2: Analyze each segment separately
409
  predictions = []
410
  detailed_results = []
411
 
412
  for i, segment_path in enumerate(segment_paths, 1):
413
+ print(f"🔍 Analyzing segment {i}/{actual_segments}...")
414
  sign, confidence = predict_single_sign(segment_path)
415
  predictions.append(sign)
416
  detailed_results.append({
 
427
  for result in detailed_results:
428
  details_md += f"**Position {result['video_num']}:** {result['sign']} ({result['confidence']*100:.1f}% confidence)\n\n"
429
 
430
+ # Determine split method used
431
+ split_method = "Automatic Motion Detection" if use_auto_detect else "Equal Time Segments"
432
+ segments_info = f"Detected {actual_segments} segments" if use_auto_detect else f"Split into {num_signs} equal segments"
433
+
434
  # Final output
435
  final_result = f"""
436
  ## 🎯 Complete Sentence Translation
 
441
  {details_md}
442
 
443
  ---
444
+ **Split Method:** {split_method}
445
+ **Segments:** {segments_info}
446
  **Model:** X-CLIP Fine-tuned on Ugandan Sign Language
447
 
448
+ *{'Signs were automatically detected by analyzing motion patterns' if use_auto_detect else 'Each sign was analyzed from equal time segments'}*
449
  """
450
 
451
  # Clean up temporary files
 
459
  return final_result, sentence, detailed_results
460
 
461
  except Exception as e:
462
+ import traceback
463
+ error_details = traceback.format_exc()
464
+ print(f"❌ Error: {error_details}")
465
+ return f"**Error analyzing video:** {str(e)}\n\nPlease try:\n- Using a different video\n- Toggling automatic detection\n- Adjusting number of signs", "", []
466
 
467
  # ============================================================================
468
  # FEEDBACK SYSTEM
 
531
 
532
  gr.Markdown("""
533
  # 🤟 Ugandan Sign Language Sentence Analyzer
534
+ *Upload ONE joined video with multiple signs - we'll automatically detect and translate them!*
535
 
536
+ **Two Detection Modes:**
537
+ 1. **🤖 Automatic (Recommended):** AI detects where each sign starts/ends (works with unequal durations!)
538
+ 2. **📏 Manual:** Split video into equal time segments (use if signs have equal duration)
 
 
 
 
539
  """)
540
 
541
  with gr.Row():
 
544
  gr.Markdown("### 📤 Upload Your Joined Video")
545
 
546
  joined_video = gr.Video(
547
+ label="Joined Video (from CapCut or any editor)",
548
  sources=["upload", "webcam"]
549
  )
550
 
551
+ gr.Markdown("### ⚙️ Detection Settings")
552
+
553
+ auto_detect = gr.Checkbox(
554
+ label="🤖 Use Automatic Motion Detection",
555
+ value=True,
556
+ info="AI automatically finds sign boundaries (recommended!)"
557
+ )
558
+
559
  num_signs_input = gr.Slider(
560
  minimum=1,
561
  maximum=10,
562
  value=3,
563
  step=1,
564
+ label="Expected number of signs (approximate)",
565
+ info="Helps guide the detection algorithm"
566
  )
567
 
568
+ with gr.Accordion("💡 How It Works", open=False):
569
+ gr.Markdown("""
570
+ **Automatic Mode (🤖):**
571
+ - Analyzes motion patterns in your video
572
+ - Detects pauses/transitions between signs
573
+ - Works even if signs have different durations!
574
+ - Example: 1s + 3s + 2s signs → correctly detected
575
+
576
+ **Manual Mode (📏):**
577
+ - Splits video into equal time segments
578
+ - Works best when all signs take equal time
579
+ - Example: 2s + 2s + 2s signs → perfect split
580
+
581
+ **Tips:**
582
+ - ✅ Pause briefly between signs for best detection
583
+ - ✅ Keep camera angle consistent
584
+ - ✅ Good lighting helps accuracy
585
+ """)
586
 
587
  with gr.Row():
588
  analyze_btn = gr.Button("🚀 Analyze Sentence", variant="primary", scale=2)
 
592
  with gr.Column(scale=1):
593
  gr.Markdown("### 🎯 Translation Results")
594
  results_output = gr.Markdown(
595
+ value="**Upload your video, choose detection mode, and click 'Analyze Sentence'**"
596
  )
597
 
598
+ gr.Markdown("### 💡 Feedback")
599
+ gr.Markdown("*Help improve accuracy by providing corrections:*")
600
  correct_sentence_input = gr.Textbox(
601
+ label="Correct Sentence (if prediction was wrong)",
602
  placeholder="e.g., Hello how are you"
603
  )
604
  feedback_btn = gr.Button("📝 Submit Feedback", variant="secondary")
 
611
  # Analyze sentence logic
612
  analyze_btn.click(
613
  fn=analyze_joined_video,
614
+ inputs=[joined_video, num_signs_input, auto_detect],
615
  outputs=[results_output, current_sentence, current_details]
616
  )
617
 
 
631
 
632
  # Clear button
633
  def clear_all():
634
+ return None, True, 3, "**Upload your video and click 'Analyze Sentence'.**", "", [], ""
635
 
636
  clear_btn.click(
637
  fn=clear_all,
638
+ outputs=[joined_video, auto_detect, num_signs_input, results_output, current_sentence, current_details, feedback_output]
639
  )
640
 
 
 
 
 
641
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
642
 
643
  # Launch
644
  if __name__ == "__main__":