Bliss-Ruth commited on
Commit
d96d937
·
verified ·
1 Parent(s): 5712234

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +50 -70
app.py CHANGED
@@ -303,74 +303,12 @@ def predict_single_sign(video_path):
303
  confidence, pred_class = torch.max(probs, 1)
304
 
305
  predicted_label = id_to_label[pred_class.item()]
306
- confidence_value = confidence.item()
307
 
308
- return predicted_label, confidence_value
309
 
310
  except Exception as e:
311
  print(f"❌ Prediction error: {e}")
312
- return "Unknown", 0.0
313
-
314
- def predict_multiple_videos(video_files):
315
- """
316
- MAIN FUNCTION: Predict signs from multiple videos and build a sentence
317
-
318
- Args:
319
- video_files: List of video file paths or single video
320
-
321
- Returns:
322
- Complete sentence, individual predictions, detailed results
323
- """
324
- try:
325
- # Handle single video or list
326
- if not isinstance(video_files, list):
327
- video_files = [video_files]
328
-
329
- # Remove None values
330
- video_files = [v for v in video_files if v is not None]
331
-
332
- if len(video_files) == 0:
333
- return "Please upload at least one video.", "", []
334
-
335
- # Predict each video
336
- predictions = []
337
- detailed_results = []
338
-
339
- for i, video_path in enumerate(video_files, 1):
340
- sign, confidence = predict_single_sign(video_path)
341
- predictions.append(sign)
342
- detailed_results.append({
343
- 'video_num': i,
344
- 'sign': sign,
345
- 'confidence': confidence
346
- })
347
-
348
- # Build sentence
349
- sentence = " ".join(predictions)
350
-
351
- # Format detailed results
352
- details_md = "### 📊 Individual Sign Analysis\n\n"
353
- for result in detailed_results:
354
- details_md += f"**Sign {result['video_num']}:** {result['sign']} ({result['confidence']*100:.1f}% confidence)\n\n"
355
-
356
- # Final output
357
- final_result = f"""
358
- ## 🎯 Complete Sentence Translation
359
-
360
- ### Detected Sentence:
361
- **"{sentence}"**
362
-
363
- {details_md}
364
-
365
- ---
366
- **Total Signs Detected:** {len(predictions)}
367
- **Model:** X-CLIP Fine-tuned on Ugandan Sign Language
368
- """
369
-
370
- return final_result, sentence, detailed_results
371
-
372
- except Exception as e:
373
- return f"**Error:** {str(e)}", "", []
374
 
375
  def analyze_joined_video(video_path, num_signs, use_auto_detect):
376
  """
@@ -411,21 +349,21 @@ def analyze_joined_video(video_path, num_signs, use_auto_detect):
411
 
412
  for i, segment_path in enumerate(segment_paths, 1):
413
  print(f"🔍 Analyzing segment {i}/{actual_segments}...")
414
- sign, confidence = predict_single_sign(segment_path)
 
415
  predictions.append(sign)
416
  detailed_results.append({
417
  'video_num': i,
418
- 'sign': sign,
419
- 'confidence': confidence
420
  })
421
 
422
  # STEP 3: Build sentence
423
  sentence = " ".join(predictions)
424
 
425
  # Format detailed results
426
- details_md = "### 📊 Individual Sign Analysis (In Order)\n\n"
427
  for result in detailed_results:
428
- details_md += f"**Position {result['video_num']}:** {result['sign']} ({result['confidence']*100:.1f}% confidence)\n\n"
429
 
430
  # Determine split method used
431
  split_method = "Automatic Motion Detection" if use_auto_detect else "Equal Time Segments"
@@ -433,7 +371,7 @@ def analyze_joined_video(video_path, num_signs, use_auto_detect):
433
 
434
  # Final output
435
  final_result = f"""
436
- ## 🎯 Complete Sentence Translation
437
 
438
  ### Detected Sentence:
439
  **"{sentence}"**
@@ -638,7 +576,49 @@ with gr.Blocks(css=custom_css, title="Sign Language Sentence Builder") as demo:
638
  outputs=[joined_video, auto_detect, num_signs_input, results_output, current_sentence, current_details, feedback_output]
639
  )
640
 
 
 
 
 
 
 
 
 
 
 
 
641
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
642
 
643
  # Launch
644
  if __name__ == "__main__":
 
303
  confidence, pred_class = torch.max(probs, 1)
304
 
305
  predicted_label = id_to_label[pred_class.item()]
 
306
 
307
+ return predicted_label # Only return the label
308
 
309
  except Exception as e:
310
  print(f"❌ Prediction error: {e}")
311
+ return "Unknown"
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
312
 
313
  def analyze_joined_video(video_path, num_signs, use_auto_detect):
314
  """
 
349
 
350
  for i, segment_path in enumerate(segment_paths, 1):
351
  print(f"🔍 Analyzing segment {i}/{actual_segments}...")
352
+ sign = predict_single_sign(segment_path)
353
+
354
  predictions.append(sign)
355
  detailed_results.append({
356
  'video_num': i,
357
+ 'sign': sign
 
358
  })
359
 
360
  # STEP 3: Build sentence
361
  sentence = " ".join(predictions)
362
 
363
  # Format detailed results
364
+ details_md = "### Individual Sign Analysis (In Order)\n\n"
365
  for result in detailed_results:
366
+ details_md += f"**Position {result['video_num']}:** {result['sign']}\n\n"
367
 
368
  # Determine split method used
369
  split_method = "Automatic Motion Detection" if use_auto_detect else "Equal Time Segments"
 
371
 
372
  # Final output
373
  final_result = f"""
374
+ ## Complete Sentence Translation
375
 
376
  ### Detected Sentence:
377
  **"{sentence}"**
 
576
  outputs=[joined_video, auto_detect, num_signs_input, results_output, current_sentence, current_details, feedback_output]
577
  )
578
 
579
+ # Example section
580
+ gr.Markdown("""
581
+ ---
582
+ ### 📝 Complete Example Workflow
583
+
584
+ **Goal:** Translate "Hello how good" in sign language
585
+
586
+ **Step 1: Record Your Signs**
587
+ - Sign 1: "Hello" (performer holds sign for 2 seconds)
588
+ - Sign 2: "How" (performer holds sign for 1 second)
589
+ - Sign 3: "Good" (performer holds sign for 3 seconds)
590
 
591
+ **Step 2: Join in CapCut**
592
+ - Import all 3 videos
593
+ - Arrange in order: Hello → How → Good
594
+ - Export as ONE video (6 seconds total)
595
+
596
+ **Step 3: Upload & Analyze**
597
+ - Upload the 6-second video here
598
+ - Enable "Automatic Detection" ✅
599
+ - Set "Expected signs" to 3
600
+ - Click "Analyze Sentence"
601
+
602
+ **Step 4: Result**
603
+ - 🤖 AI detects 3 segments automatically:
604
+ - Position 1: "Hello"
605
+ - Position 2: "How"
606
+ - Position 3: "Good"
607
+ - **Final Sentence:** "Hello How Good" ✅
608
+
609
+ ---
610
+
611
+ ### 🆚 When to Use Each Mode
612
+
613
+ | Scenario | Recommended Mode | Why |
614
+ |----------|-----------------|-----|
615
+ | Signs have different lengths | 🤖 Automatic | Detects boundaries precisely |
616
+ | You pause between signs | 🤖 Automatic | Pauses help detection |
617
+ | All signs exactly same duration | 📏 Manual | Simple equal split works |
618
+ | Fast, continuous signing | 📏 Manual | Motion detection may struggle |
619
+ | Professional recording | 🤖 Automatic | Better accuracy |
620
+ | Quick test/prototype | 📏 Manual | Faster processing |
621
+ """)
622
 
623
  # Launch
624
  if __name__ == "__main__":