Bliss-Ruth commited on
Commit
473d0f3
·
verified ·
1 Parent(s): b010867

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +454 -38
app.py CHANGED
@@ -1,4 +1,5 @@
1
- # app.py - MULTI-VIDEO SENTENCE BUILDER
 
2
  import torch
3
  import torch.nn as nn
4
  from transformers import XCLIPProcessor, XCLIPModel
@@ -9,6 +10,7 @@ from PIL import Image
9
  import pandas as pd
10
  from datetime import datetime
11
  import os
 
12
 
13
  print("🚀 Loading Ugandan Sign Language Model...")
14
 
@@ -70,9 +72,178 @@ except Exception as e:
70
  exit(1)
71
 
72
  # ============================================================================
73
- # CORE FUNCTIONS
74
  # ============================================================================
75
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
76
  def extract_frames(video_path, num_frames=8):
77
  """Extract frames from video"""
78
  try:
@@ -180,7 +351,7 @@ def predict_multiple_videos(video_files):
180
  # Format detailed results
181
  details_md = "### 📊 Individual Sign Analysis\n\n"
182
  for result in detailed_results:
183
- details_md += f"**Video {result['video_num']}:** {result['sign']} ({result['confidence']*100:.1f}% confidence)\n\n"
184
 
185
  # Final output
186
  final_result = f"""
@@ -201,6 +372,98 @@ def predict_multiple_videos(video_files):
201
  except Exception as e:
202
  return f"**Error:** {str(e)}", "", []
203
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
204
  # ============================================================================
205
  # FEEDBACK SYSTEM
206
  # ============================================================================
@@ -267,35 +530,180 @@ h1 {
267
  with gr.Blocks(css=custom_css, title="Sign Language Sentence Builder") as demo:
268
 
269
  gr.Markdown("""
270
- # 🤟 Ugandan Sign Language Sentence Builder
271
- *Upload multiple videos (one sign per video) to build complete sentences!*
272
 
273
- **How it works:**
274
- 1. Upload 2-5 videos in sequence (each video = one sign/word)
275
- 2. Click "Build Sentence" to see the complete translation
276
- 3. Example: Video 1 (Hello) + Video 2 (How) + Video 3 (Are) → "Hello How Are"
277
  """)
278
 
279
  with gr.Row():
280
- # Left side - Video uploads
281
  with gr.Column(scale=1):
282
- gr.Markdown("### 📤 Upload Videos (In Order)")
 
 
 
 
 
283
 
284
- video1 = gr.Video(label="Video 1 (First Sign)", sources=["upload", "webcam"])
285
- video2 = gr.Video(label="Video 2 (Second Sign)", sources=["upload", "webcam"])
286
- video3 = gr.Video(label="Video 3 (Third Sign)", sources=["upload", "webcam"])
287
- video4 = gr.Video(label="Video 4 (Fourth Sign)", sources=["upload", "webcam"])
288
- video5 = gr.Video(label="Video 5 (Fifth Sign)", sources=["upload", "webcam"])
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
289
 
290
  with gr.Row():
291
- analyze_btn = gr.Button("🚀 Build Sentence", variant="primary", scale=2)
292
- clear_btn = gr.Button("🗑️ Clear All", variant="secondary", scale=1)
293
 
294
  # Right side - Results
295
  with gr.Column(scale=1):
296
  gr.Markdown("### 🎯 Translation Results")
297
  results_output = gr.Markdown(
298
- value="**Upload your videos and click 'Build Sentence' to see the translation.**"
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
299
  )
300
 
301
  gr.Markdown("### 💡 Sentence Feedback")
@@ -311,15 +719,10 @@ with gr.Blocks(css=custom_css, title="Sign Language Sentence Builder") as demo:
311
  current_sentence = gr.State()
312
  current_details = gr.State()
313
 
314
- # Build sentence logic
315
- def build_sentence_wrapper(v1, v2, v3, v4, v5):
316
- videos = [v1, v2, v3, v4, v5]
317
- result, sentence, details = predict_multiple_videos(videos)
318
- return result, sentence, details
319
-
320
  analyze_btn.click(
321
- fn=build_sentence_wrapper,
322
- inputs=[video1, video2, video3, video4, video5],
323
  outputs=[results_output, current_sentence, current_details]
324
  )
325
 
@@ -339,28 +742,41 @@ with gr.Blocks(css=custom_css, title="Sign Language Sentence Builder") as demo:
339
 
340
  # Clear button
341
  def clear_all():
342
- return None, None, None, None, None, "**Upload your videos and click 'Build Sentence'.**", "", [], ""
343
 
344
  clear_btn.click(
345
  fn=clear_all,
346
- outputs=[video1, video2, video3, video4, video5, results_output, current_sentence, current_details, feedback_output]
347
  )
348
 
349
  # Example section
350
  gr.Markdown("""
351
  ---
352
- ### 📝 Example Usage
 
 
 
 
 
 
 
 
 
353
 
354
- **Scenario:** You want to say "Hello how are you"
355
 
356
- 1. **Video 1:** Record/upload sign for "Hello"
357
- 2. **Video 2:** Record/upload sign for "How"
358
- 3. **Video 3:** Record/upload sign for "Are"
359
- 4. **Video 4:** Record/upload sign for "You"
360
- 5. Click "Build Sentence"
361
- 6. Result: **"Hello How Are You"**
 
 
 
362
 
363
- *Note: Each video should contain ONE sign only*
 
364
  """)
365
 
366
  # Launch
 
1
+ # app.py - JOINED VIDEO SENTENCE ANALYZER
2
+ # Analyzes ONE long video with multiple signs and builds a sentence
3
  import torch
4
  import torch.nn as nn
5
  from transformers import XCLIPProcessor, XCLIPModel
 
10
  import pandas as pd
11
  from datetime import datetime
12
  import os
13
+ import tempfile
14
 
15
  print("🚀 Loading Ugandan Sign Language Model...")
16
 
 
72
  exit(1)
73
 
74
  # ============================================================================
75
+ # CORE FUNCTIONS - VIDEO SPLITTING & ANALYSIS WITH MOTION DETECTION
76
  # ============================================================================
77
 
78
+ def detect_motion_changes(video_path, threshold=30):
79
+ """
80
+ Detect motion changes in video to find sign boundaries
81
+
82
+ Args:
83
+ video_path: Path to video
84
+ threshold: Motion threshold (higher = less sensitive)
85
+
86
+ Returns:
87
+ List of frame indices where significant motion changes occur
88
+ """
89
+ try:
90
+ cap = cv2.VideoCapture(video_path)
91
+
92
+ # Read first frame
93
+ ret, prev_frame = cap.read()
94
+ if not ret:
95
+ cap.release()
96
+ return []
97
+
98
+ prev_gray = cv2.cvtColor(prev_frame, cv2.COLOR_BGR2GRAY)
99
+ prev_gray = cv2.GaussianBlur(prev_gray, (21, 21), 0)
100
+
101
+ motion_scores = []
102
+ frame_idx = 0
103
+
104
+ while True:
105
+ ret, frame = cap.read()
106
+ if not ret:
107
+ break
108
+
109
+ # Convert to grayscale and blur
110
+ gray = cv2.cvtColor(frame, cv2.COLOR_BGR2GRAY)
111
+ gray = cv2.GaussianBlur(gray, (21, 21), 0)
112
+
113
+ # Calculate difference between frames
114
+ frame_delta = cv2.absdiff(prev_gray, gray)
115
+ thresh = cv2.threshold(frame_delta, 25, 255, cv2.THRESH_BINARY)[1]
116
+
117
+ # Calculate motion score (percentage of changed pixels)
118
+ motion_score = np.sum(thresh) / (thresh.shape[0] * thresh.shape[1])
119
+ motion_scores.append((frame_idx, motion_score))
120
+
121
+ prev_gray = gray
122
+ frame_idx += 1
123
+
124
+ cap.release()
125
+
126
+ # Find peaks in motion (where motion suddenly increases/decreases)
127
+ # This indicates transitions between signs
128
+ boundaries = [0] # Start with first frame
129
+
130
+ if len(motion_scores) > 10:
131
+ # Smooth motion scores
132
+ window_size = 5
133
+ smoothed = []
134
+ for i in range(len(motion_scores)):
135
+ start = max(0, i - window_size)
136
+ end = min(len(motion_scores), i + window_size + 1)
137
+ avg_score = np.mean([s[1] for s in motion_scores[start:end]])
138
+ smoothed.append((motion_scores[i][0], avg_score))
139
+
140
+ # Find local minima (pauses between signs)
141
+ for i in range(10, len(smoothed) - 10):
142
+ # Check if this is a local minimum
143
+ current_score = smoothed[i][1]
144
+ prev_scores = [smoothed[j][1] for j in range(i-10, i)]
145
+ next_scores = [smoothed[j][1] for j in range(i+1, i+11)]
146
+
147
+ if current_score < np.mean(prev_scores) * 0.3 and current_score < np.mean(next_scores) * 0.3:
148
+ # Significant pause detected
149
+ boundaries.append(smoothed[i][0])
150
+
151
+ return boundaries
152
+
153
+ except Exception as e:
154
+ print(f"❌ Motion detection error: {e}")
155
+ return [0]
156
+
157
+ def split_video_smart(video_path, num_signs=None, use_motion_detection=True):
158
+ """
159
+ Smart video splitting using motion detection OR equal segments
160
+
161
+ Args:
162
+ video_path: Path to the joined video
163
+ num_signs: Expected number of signs (optional if using motion detection)
164
+ use_motion_detection: Whether to use automatic boundary detection
165
+
166
+ Returns:
167
+ List of segment video paths
168
+ """
169
+ try:
170
+ cap = cv2.VideoCapture(video_path)
171
+
172
+ # Get video properties
173
+ fps = int(cap.get(cv2.CAP_PROP_FPS))
174
+ total_frames = int(cap.get(cv2.CAP_PROP_FRAME_COUNT))
175
+ width = int(cap.get(cv2.CAP_PROP_FRAME_WIDTH))
176
+ height = int(cap.get(cv2.CAP_PROP_FRAME_HEIGHT))
177
+
178
+ if total_frames == 0:
179
+ cap.release()
180
+ return []
181
+
182
+ # Determine split points
183
+ if use_motion_detection:
184
+ print("🔍 Using motion detection to find sign boundaries...")
185
+ boundaries = detect_motion_changes(video_path)
186
+
187
+ # Filter boundaries to get approximately num_signs segments
188
+ if num_signs and len(boundaries) > num_signs + 1:
189
+ # Too many boundaries detected, keep the strongest ones
190
+ # Sort by spacing and keep most evenly spaced
191
+ step = len(boundaries) // (num_signs + 1)
192
+ boundaries = [boundaries[i * step] for i in range(num_signs + 1)]
193
+
194
+ boundaries.append(total_frames) # Add end frame
195
+ boundaries = sorted(list(set(boundaries))) # Remove duplicates
196
+
197
+ print(f"✅ Found {len(boundaries)-1} sign segments at frames: {boundaries}")
198
+
199
+ else:
200
+ # Fall back to equal segments
201
+ print(f"📏 Splitting into {num_signs} equal segments...")
202
+ frames_per_segment = total_frames // num_signs
203
+ boundaries = [i * frames_per_segment for i in range(num_signs + 1)]
204
+ boundaries[-1] = total_frames
205
+
206
+ segment_paths = []
207
+ temp_dir = tempfile.mkdtemp()
208
+
209
+ # Create segments based on boundaries
210
+ for segment_idx in range(len(boundaries) - 1):
211
+ start_frame = boundaries[segment_idx]
212
+ end_frame = boundaries[segment_idx + 1]
213
+
214
+ # Skip very short segments (less than 5 frames)
215
+ if end_frame - start_frame < 5:
216
+ continue
217
+
218
+ segment_path = os.path.join(temp_dir, f"segment_{segment_idx}.mp4")
219
+
220
+ fourcc = cv2.VideoWriter_fourcc(*'mp4v')
221
+ out = cv2.VideoWriter(segment_path, fourcc, fps, (width, height))
222
+
223
+ # Write frames for this segment
224
+ cap.set(cv2.CAP_PROP_POS_FRAMES, start_frame)
225
+
226
+ for frame_idx in range(start_frame, end_frame):
227
+ ret, frame = cap.read()
228
+ if not ret:
229
+ break
230
+ out.write(frame)
231
+
232
+ out.release()
233
+
234
+ # Only add if file was created successfully
235
+ if os.path.exists(segment_path) and os.path.getsize(segment_path) > 0:
236
+ segment_paths.append(segment_path)
237
+
238
+ cap.release()
239
+ return segment_paths
240
+
241
+ except Exception as e:
242
+ print(f"❌ Error splitting video: {e}")
243
+ import traceback
244
+ traceback.print_exc()
245
+ return []
246
+
247
  def extract_frames(video_path, num_frames=8):
248
  """Extract frames from video"""
249
  try:
 
351
  # Format detailed results
352
  details_md = "### 📊 Individual Sign Analysis\n\n"
353
  for result in detailed_results:
354
+ details_md += f"**Sign {result['video_num']}:** {result['sign']} ({result['confidence']*100:.1f}% confidence)\n\n"
355
 
356
  # Final output
357
  final_result = f"""
 
372
  except Exception as e:
373
  return f"**Error:** {str(e)}", "", []
374
 
375
+ def analyze_joined_video(video_path, num_signs, use_auto_detect):
376
+ """
377
+ NEW MAIN FUNCTION: Analyze a JOINED video with multiple signs
378
+
379
+ Args:
380
+ video_path: Path to the joined video from CapCut
381
+ num_signs: How many signs are in the video (used as hint)
382
+ use_auto_detect: Whether to use automatic motion detection
383
+
384
+ Returns:
385
+ Complete sentence, individual predictions, detailed results
386
+ """
387
+ try:
388
+ if video_path is None:
389
+ return "Please upload a video.", "", []
390
+
391
+ if num_signs is None or num_signs <= 0:
392
+ num_signs = 3 # Default
393
+
394
+ # STEP 1: Split the joined video into segments
395
+ if use_auto_detect:
396
+ print(f"🤖 Using AUTOMATIC motion detection (expected ~{num_signs} signs)...")
397
+ segment_paths = split_video_smart(video_path, num_signs, use_motion_detection=True)
398
+ else:
399
+ print(f"📏 Using MANUAL equal split ({num_signs} segments)...")
400
+ segment_paths = split_video_smart(video_path, num_signs, use_motion_detection=False)
401
+
402
+ if len(segment_paths) == 0:
403
+ return "Failed to split video. Please check your video file.", "", []
404
+
405
+ actual_segments = len(segment_paths)
406
+ print(f"✅ Created {actual_segments} segments")
407
+
408
+ # STEP 2: Analyze each segment separately
409
+ predictions = []
410
+ detailed_results = []
411
+
412
+ for i, segment_path in enumerate(segment_paths, 1):
413
+ print(f"🔍 Analyzing segment {i}/{actual_segments}...")
414
+ sign, confidence = predict_single_sign(segment_path)
415
+ predictions.append(sign)
416
+ detailed_results.append({
417
+ 'video_num': i,
418
+ 'sign': sign,
419
+ 'confidence': confidence
420
+ })
421
+
422
+ # STEP 3: Build sentence
423
+ sentence = " ".join(predictions)
424
+
425
+ # Format detailed results
426
+ details_md = "### 📊 Individual Sign Analysis (In Order)\n\n"
427
+ for result in detailed_results:
428
+ details_md += f"**Position {result['video_num']}:** {result['sign']} ({result['confidence']*100:.1f}% confidence)\n\n"
429
+
430
+ # Determine split method used
431
+ split_method = "Automatic Motion Detection" if use_auto_detect else "Equal Time Segments"
432
+ segments_info = f"Detected {actual_segments} segments" if use_auto_detect else f"Split into {num_signs} equal segments"
433
+
434
+ # Final output
435
+ final_result = f"""
436
+ ## 🎯 Complete Sentence Translation
437
+
438
+ ### Detected Sentence:
439
+ **"{sentence}"**
440
+
441
+ {details_md}
442
+
443
+ ---
444
+ **Split Method:** {split_method}
445
+ **Segments:** {segments_info}
446
+ **Model:** X-CLIP Fine-tuned on Ugandan Sign Language
447
+
448
+ *{'Signs were automatically detected by analyzing motion patterns' if use_auto_detect else 'Each sign was analyzed from equal time segments'}*
449
+ """
450
+
451
+ # Clean up temporary files
452
+ try:
453
+ for segment_path in segment_paths:
454
+ if os.path.exists(segment_path):
455
+ os.remove(segment_path)
456
+ except:
457
+ pass
458
+
459
+ return final_result, sentence, detailed_results
460
+
461
+ except Exception as e:
462
+ import traceback
463
+ error_details = traceback.format_exc()
464
+ print(f"❌ Error: {error_details}")
465
+ return f"**Error analyzing video:** {str(e)}\n\nPlease try:\n- Using a different video\n- Toggling automatic detection\n- Adjusting number of signs", "", []
466
+
467
  # ============================================================================
468
  # FEEDBACK SYSTEM
469
  # ============================================================================
 
530
  with gr.Blocks(css=custom_css, title="Sign Language Sentence Builder") as demo:
531
 
532
  gr.Markdown("""
533
+ # 🤟 Ugandan Sign Language Sentence Analyzer
534
+ *Upload ONE joined video with multiple signs - we'll automatically detect and translate them!*
535
 
536
+ **Two Detection Modes:**
537
+ 1. **🤖 Automatic (Recommended):** AI detects where each sign starts/ends (works with unequal durations!)
538
+ 2. **📏 Manual:** Split video into equal time segments (use if signs have equal duration)
 
539
  """)
540
 
541
  with gr.Row():
542
+ # Left side - Video upload
543
  with gr.Column(scale=1):
544
+ gr.Markdown("### 📤 Upload Your Joined Video")
545
+
546
+ joined_video = gr.Video(
547
+ label="Joined Video (from CapCut or any editor)",
548
+ sources=["upload", "webcam"]
549
+ )
550
 
551
+ gr.Markdown("### ⚙️ Detection Settings")
552
+
553
+ auto_detect = gr.Checkbox(
554
+ label="🤖 Use Automatic Motion Detection",
555
+ value=True,
556
+ info="AI automatically finds sign boundaries (recommended!)"
557
+ )
558
+
559
+ num_signs_input = gr.Slider(
560
+ minimum=1,
561
+ maximum=10,
562
+ value=3,
563
+ step=1,
564
+ label="Expected number of signs (approximate)",
565
+ info="Helps guide the detection algorithm"
566
+ )
567
+
568
+ with gr.Accordion("💡 How It Works", open=False):
569
+ gr.Markdown("""
570
+ **Automatic Mode (🤖):**
571
+ - Analyzes motion patterns in your video
572
+ - Detects pauses/transitions between signs
573
+ - Works even if signs have different durations!
574
+ - Example: 1s + 3s + 2s signs → correctly detected
575
+
576
+ **Manual Mode (📏):**
577
+ - Splits video into equal time segments
578
+ - Works best when all signs take equal time
579
+ - Example: 2s + 2s + 2s signs → perfect split
580
+
581
+ **Tips:**
582
+ - ✅ Pause briefly between signs for best detection
583
+ - ✅ Keep camera angle consistent
584
+ - ✅ Good lighting helps accuracy
585
+ """)
586
 
587
  with gr.Row():
588
+ analyze_btn = gr.Button("🚀 Analyze Sentence", variant="primary", scale=2)
589
+ clear_btn = gr.Button("🗑️ Clear", variant="secondary", scale=1)
590
 
591
  # Right side - Results
592
  with gr.Column(scale=1):
593
  gr.Markdown("### 🎯 Translation Results")
594
  results_output = gr.Markdown(
595
+ value="**Upload your video, choose detection mode, and click 'Analyze Sentence'**"
596
+ )
597
+
598
+ gr.Markdown("### 💡 Feedback")
599
+ gr.Markdown("*Help improve accuracy by providing corrections:*")
600
+ correct_sentence_input = gr.Textbox(
601
+ label="Correct Sentence (if prediction was wrong)",
602
+ placeholder="e.g., Hello how are you"
603
+ )
604
+ feedback_btn = gr.Button("📝 Submit Feedback", variant="secondary")
605
+ feedback_output = gr.Markdown()
606
+
607
+ # Hidden states
608
+ current_sentence = gr.State()
609
+ current_details = gr.State()
610
+
611
+ # Analyze sentence logic
612
+ analyze_btn.click(
613
+ fn=analyze_joined_video,
614
+ inputs=[joined_video, num_signs_input, auto_detect],
615
+ outputs=[results_output, current_sentence, current_details]
616
+ )
617
+
618
+ # Feedback logic
619
+ def submit_feedback_wrapper(predicted, corrected, details):
620
+ if not corrected or corrected.strip() == "":
621
+ return "Please enter the correct sentence."
622
+
623
+ num_videos = len(details) if details else 0
624
+ return save_sentence_feedback(predicted, corrected, num_videos)
625
+
626
+ feedback_btn.click(
627
+ fn=submit_feedback_wrapper,
628
+ inputs=[current_sentence, correct_sentence_input, current_details],
629
+ outputs=[feedback_output]
630
+ )
631
+
632
+ # Clear button
633
+ def clear_all():
634
+ return None, True, 3, "**Upload your video and click 'Analyze Sentence'.**", "", [], ""
635
+
636
+ clear_btn.click(
637
+ fn=clear_all,
638
+ outputs=[joined_video, auto_detect, num_signs_input, results_output, current_sentence, current_details, feedback_output]
639
+ )
640
+
641
+ # Example section
642
+ gr.Markdown("""
643
+ ---
644
+ ### 📝 Complete Example Workflow
645
+
646
+ **Goal:** Translate "Hello how good" in sign language
647
+
648
+ **Step 1: Record Your Signs**
649
+ - Sign 1: "Hello" (performer holds sign for 2 seconds)
650
+ - Sign 2: "How" (performer holds sign for 1 second)
651
+ - Sign 3: "Good" (performer holds sign for 3 seconds)
652
+
653
+ **Step 2: Join in CapCut**
654
+ - Import all 3 videos
655
+ - Arrange in order: Hello → How → Good
656
+ - Export as ONE video (6 seconds total)
657
+
658
+ **Step 3: Upload & Analyze**
659
+ - Upload the 6-second video here
660
+ - Enable "Automatic Detection" ✅
661
+ - Set "Expected signs" to 3
662
+ - Click "Analyze Sentence"
663
+
664
+ **Step 4: Result**
665
+ - 🤖 AI detects 3 segments automatically:
666
+ - Position 1: "Hello" (0-2 seconds, 87% confidence)
667
+ - Position 2: "How" (2-3 seconds, 91% confidence)
668
+ - Position 3: "Good" (3-6 seconds, 85% confidence)
669
+ - **Final Sentence:** "Hello How Good" ✅
670
+
671
+ ---
672
+
673
+ ### 🆚 When to Use Each Mode
674
+
675
+ | Scenario | Recommended Mode | Why |
676
+ |----------|-----------------|-----|
677
+ | Signs have different lengths | 🤖 Automatic | Detects boundaries precisely |
678
+ | You pause between signs | 🤖 Automatic | Pauses help detection |
679
+ | All signs exactly same duration | 📏 Manual | Simple equal split works |
680
+ | Fast, continuous signing | 📏 Manual | Motion detection may struggle |
681
+ | Professional recording | 🤖 Automatic | Better accuracy |
682
+ | Quick test/prototype | 📏 Manual | Faster processing |
683
+ """)
684
+
685
+ # Launch
686
+ if __name__ == "__main__":
687
+ demo.launch(share=True)
688
+ info="The video will be split into this many equal parts"
689
+ )
690
+
691
+ gr.Markdown("""
692
+ **💡 Tip:**
693
+ - Make sure each sign takes roughly the same time in your joined video
694
+ - Example: 3 signs × 2 seconds each = 6 second video
695
+ - The video will be split equally into segments
696
+ """)
697
+
698
+ with gr.Row():
699
+ analyze_btn = gr.Button("🚀 Analyze Sentence", variant="primary", scale=2)
700
+ clear_btn = gr.Button("🗑️ Clear", variant="secondary", scale=1)
701
+
702
+ # Right side - Results
703
+ with gr.Column(scale=1):
704
+ gr.Markdown("### 🎯 Translation Results")
705
+ results_output = gr.Markdown(
706
+ value="**Upload your joined video and click 'Analyze Sentence' to see the translation.**"
707
  )
708
 
709
  gr.Markdown("### 💡 Sentence Feedback")
 
719
  current_sentence = gr.State()
720
  current_details = gr.State()
721
 
722
+ # Analyze sentence logic
 
 
 
 
 
723
  analyze_btn.click(
724
+ fn=analyze_joined_video,
725
+ inputs=[joined_video, num_signs_input],
726
  outputs=[results_output, current_sentence, current_details]
727
  )
728
 
 
742
 
743
  # Clear button
744
  def clear_all():
745
+ return None, 3, "**Upload your video and click 'Analyze Sentence'.**", "", [], ""
746
 
747
  clear_btn.click(
748
  fn=clear_all,
749
+ outputs=[joined_video, num_signs_input, results_output, current_sentence, current_details, feedback_output]
750
  )
751
 
752
  # Example section
753
  gr.Markdown("""
754
  ---
755
+ ### 📝 Step-by-Step Example
756
+
757
+ **Goal:** Say "Hello how are you" in sign language
758
+
759
+ **Method 1: Using CapCut (Recommended)**
760
+ 1. Record/film 4 separate videos:
761
+ - Video 1: Sign for "Hello" (2 seconds)
762
+ - Video 2: Sign for "How" (2 seconds)
763
+ - Video 3: Sign for "Are" (2 seconds)
764
+ - Video 4: Sign for "You" (2 seconds)
765
 
766
+ 2. Open CapCut and **join the 4 videos** in order
767
 
768
+ 3. Export as ONE video (8 seconds total)
769
+
770
+ 4. Upload here and enter "4" for number of signs
771
+
772
+ 5. Click "Analyze Sentence"
773
+
774
+ 6. **Result:** "Hello How Are You" ✅
775
+
776
+ ---
777
 
778
+ **Method 2: Multiple Videos** *(if you prefer separate uploads)*
779
+ - Use the "Multi-Video Mode" (see tabs above)
780
  """)
781
 
782
  # Launch