rdz-falcon commited on
Commit
adaffa8
·
verified ·
1 Parent(s): a7a2fc6

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +56 -45
app.py CHANGED
@@ -67,18 +67,23 @@ PARAM_NAMES = ["betas", "body_pose", "left_hand_pose", "right_hand_pose",
67
  AVATAR_COLOR = (0.36, 0.78, 0.36, 1.0) # Green color as RGBA
68
  VIDEO_FPS = 15
69
  VIDEO_SLOWDOWN = 2
70
- FRAME_WIDTH = 384 # Must be divisible by 16 for video codec compatibility
71
- FRAME_HEIGHT = 512 # Smaller, more compact video size
72
 
73
  # =====================================================================
74
  # Install/Import Dependencies
75
  # =====================================================================
76
- import os
77
- import sys
 
 
 
78
 
79
- import gradio as gr
80
- import smplx
81
- import imageio
 
 
82
 
83
  # PyRender for high-quality rendering
84
  PYRENDER_AVAILABLE = False
@@ -88,7 +93,13 @@ try:
88
  from PIL import Image, ImageDraw, ImageFont
89
  PYRENDER_AVAILABLE = True
90
  except ImportError:
91
- print("Warning: PyRender dependencies not available. Install trimesh, pyrender, Pillow.")
 
 
 
 
 
 
92
 
93
  from transformers import AutoModelForCausalLM, AutoTokenizer
94
  import torch.nn.functional as F
@@ -498,8 +509,8 @@ def render_single_frame(
498
  label: str = "",
499
  color: tuple = AVATAR_COLOR,
500
  fixed_center: np.ndarray = None,
501
- camera_distance: float = 4.5,
502
- focal_length: float = 1200,
503
  frame_width: int = FRAME_WIDTH,
504
  frame_height: int = FRAME_HEIGHT,
505
  bg_color: tuple = (0.95, 0.95, 0.97, 1.0)
@@ -540,12 +551,21 @@ def render_single_frame(
540
  znear=0.1, zfar=20.0
541
  )
542
 
543
- # Camera pose: positioned BEHIND the subject (at +Z), looking at -Z (toward face)
 
 
544
  camera_pose = np.eye(4)
545
- camera_pose[0, 3] = camera_target[0]
546
- camera_pose[1, 3] = camera_target[1]
547
- camera_pose[2, 3] = camera_target[2] + camera_distance
548
- camera_pose[:3, :3] = np.eye(3)
 
 
 
 
 
 
 
549
 
550
  scene.add(camera, pose=camera_pose)
551
 
@@ -594,8 +614,8 @@ def render_side_by_side_frame(
594
  faces: np.ndarray,
595
  labels: list,
596
  fixed_centers: list = None,
597
- camera_distance: float = 4.5,
598
- focal_length: float = 1200,
599
  frame_width: int = FRAME_WIDTH,
600
  frame_height: int = FRAME_HEIGHT,
601
  bg_color: tuple = (0.95, 0.95, 0.97, 1.0)
@@ -635,8 +655,8 @@ def render_video(
635
  label: str = "",
636
  fps: int = VIDEO_FPS,
637
  slowdown: int = VIDEO_SLOWDOWN,
638
- camera_distance: float = 4.5,
639
- focal_length: float = 1200,
640
  frame_width: int = FRAME_WIDTH,
641
  frame_height: int = FRAME_HEIGHT
642
  ) -> str:
@@ -644,13 +664,9 @@ def render_video(
644
  if not ensure_pyrender():
645
  raise RuntimeError("PyRender not available")
646
 
647
- # =========================================================================
648
- # FIX APPLIED: Removed the manual axis flips.
649
- # The previous code was flipping Y (upside down) and Z (facing away).
650
- # =========================================================================
651
  verts = verts.copy()
652
- # verts[..., 1] *= -1 # <--- COMMENTED OUT (Fixes upside down)
653
- # verts[..., 2] *= -1 # <--- COMMENTED OUT (Fixes facing away)
654
 
655
  # Trim last few frames to remove end-of-sequence artifacts
656
  T_total = verts.shape[0]
@@ -690,8 +706,8 @@ def render_comparison_video(
690
  label2: str = "",
691
  fps: int = VIDEO_FPS,
692
  slowdown: int = VIDEO_SLOWDOWN,
693
- camera_distance: float = 4.5,
694
- focal_length: float = 1200,
695
  frame_width: int = FRAME_WIDTH,
696
  frame_height: int = FRAME_HEIGHT
697
  ) -> str:
@@ -699,19 +715,11 @@ def render_comparison_video(
699
  if not ensure_pyrender():
700
  raise RuntimeError("PyRender not available")
701
 
702
- # =========================================================================
703
- # FIX APPLIED: Removed the manual axis flips.
704
- # =========================================================================
705
  verts1 = verts1.copy()
706
  verts2 = verts2.copy()
707
-
708
- # Fix Avatar 1 - Removed flips
709
- # verts1[..., 1] *= -1
710
- # verts1[..., 2] *= -1
711
-
712
- # Fix Avatar 2 - Removed flips
713
- # verts2[..., 1] *= -1
714
- # verts2[..., 2] *= -1
715
 
716
  # Match lengths and trim
717
  T_total = min(verts1.shape[0], verts2.shape[0])
@@ -878,7 +886,7 @@ def create_gradio_interface():
878
  lines=1, max_lines=1
879
  )
880
 
881
- generate_btn = gr.Button("Generate Motion", variant="primary")
882
 
883
  gr.Markdown("---")
884
  gr.Markdown("### Generated Tokens")
@@ -886,7 +894,8 @@ def create_gradio_interface():
886
  tokens_output = gr.Textbox(
887
  label="Motion Tokens (both variants)",
888
  lines=8,
889
- interactive=False
 
890
  )
891
 
892
  if _word_pid_map:
@@ -897,7 +906,8 @@ def create_gradio_interface():
897
  gr.Markdown("### Motion Comparison (Two Signer Variants)")
898
  video_output = gr.Video(
899
  label="Generated Motion",
900
- autoplay=True
 
901
  )
902
 
903
  if example_list:
@@ -906,16 +916,17 @@ def create_gradio_interface():
906
 
907
  for item in example_list:
908
  word, pid = item['word'], item['pid']
909
- with gr.Row():
910
  with gr.Column(scale=1, min_width=180):
911
  gr.HTML(f'<div class="example-word-label">{word.upper()}</div>')
912
  gr.HTML(f'<div class="example-variant-label">Variant: {pid}</div>')
913
- example_btn = gr.Button("Load Example", variant="secondary")
914
 
915
  with gr.Column(scale=3, min_width=500):
916
  example_video = gr.Video(
917
  label=f"Example: {word}",
918
- autoplay=False
 
919
  )
920
 
921
  example_btn.click(
@@ -970,4 +981,4 @@ if __name__ == "__main__":
970
  server_name="0.0.0.0",
971
  server_port=7860,
972
  share=False
973
- )
 
67
  AVATAR_COLOR = (0.36, 0.78, 0.36, 1.0) # Green color as RGBA
68
  VIDEO_FPS = 15
69
  VIDEO_SLOWDOWN = 2
70
+ FRAME_WIDTH = 544 # Must be divisible by 16 for video codec compatibility
71
+ FRAME_HEIGHT = 720
72
 
73
  # =====================================================================
74
  # Install/Import Dependencies
75
  # =====================================================================
76
+ try:
77
+ import gradio as gr
78
+ except ImportError:
79
+ os.system("pip install -q gradio>=4.0.0")
80
+ import gradio as gr
81
 
82
+ try:
83
+ import smplx
84
+ except ImportError:
85
+ os.system("pip install -q smplx==0.1.28")
86
+ import smplx
87
 
88
  # PyRender for high-quality rendering
89
  PYRENDER_AVAILABLE = False
 
93
  from PIL import Image, ImageDraw, ImageFont
94
  PYRENDER_AVAILABLE = True
95
  except ImportError:
96
+ pass
97
+
98
+ try:
99
+ import imageio
100
+ except ImportError:
101
+ os.system("pip install -q imageio[ffmpeg]")
102
+ import imageio
103
 
104
  from transformers import AutoModelForCausalLM, AutoTokenizer
105
  import torch.nn.functional as F
 
509
  label: str = "",
510
  color: tuple = AVATAR_COLOR,
511
  fixed_center: np.ndarray = None,
512
+ camera_distance: float = 3.5,
513
+ focal_length: float = 2000,
514
  frame_width: int = FRAME_WIDTH,
515
  frame_height: int = FRAME_HEIGHT,
516
  bg_color: tuple = (0.95, 0.95, 0.97, 1.0)
 
551
  znear=0.1, zfar=20.0
552
  )
553
 
554
+ # Camera pose: After 180-degree rotation around X-axis, coordinate system changes
555
+ # Camera should be positioned in front (negative Z) with flipped orientation
556
+ # This matches visualize.py and ensures proper face visibility
557
  camera_pose = np.eye(4)
558
+ camera_pose[0, 3] = camera_target[0] # Center X
559
+ camera_pose[1, 3] = camera_target[1] # Center Y (body center)
560
+ camera_pose[2, 3] = camera_target[2] - camera_distance # In front (negative Z)
561
+
562
+ # Camera orientation: flip to look at subject (SOKE-style)
563
+ # This rotation makes camera look toward +Z (at the subject)
564
+ camera_pose[:3, :3] = np.array([
565
+ [1, 0, 0],
566
+ [0, -1, 0],
567
+ [0, 0, -1]
568
+ ])
569
 
570
  scene.add(camera, pose=camera_pose)
571
 
 
614
  faces: np.ndarray,
615
  labels: list,
616
  fixed_centers: list = None,
617
+ camera_distance: float = 3.5,
618
+ focal_length: float = 2000,
619
  frame_width: int = FRAME_WIDTH,
620
  frame_height: int = FRAME_HEIGHT,
621
  bg_color: tuple = (0.95, 0.95, 0.97, 1.0)
 
655
  label: str = "",
656
  fps: int = VIDEO_FPS,
657
  slowdown: int = VIDEO_SLOWDOWN,
658
+ camera_distance: float = 3.5,
659
+ focal_length: float = 2000,
660
  frame_width: int = FRAME_WIDTH,
661
  frame_height: int = FRAME_HEIGHT
662
  ) -> str:
 
664
  if not ensure_pyrender():
665
  raise RuntimeError("PyRender not available")
666
 
667
+ # Apply orientation fix: rotate 180 degrees around X-axis
 
 
 
668
  verts = verts.copy()
669
+ verts[..., 1:] *= -1
 
670
 
671
  # Trim last few frames to remove end-of-sequence artifacts
672
  T_total = verts.shape[0]
 
706
  label2: str = "",
707
  fps: int = VIDEO_FPS,
708
  slowdown: int = VIDEO_SLOWDOWN,
709
+ camera_distance: float = 3.5,
710
+ focal_length: float = 2000,
711
  frame_width: int = FRAME_WIDTH,
712
  frame_height: int = FRAME_HEIGHT
713
  ) -> str:
 
715
  if not ensure_pyrender():
716
  raise RuntimeError("PyRender not available")
717
 
718
+ # Apply orientation fix
 
 
719
  verts1 = verts1.copy()
720
  verts2 = verts2.copy()
721
+ verts1[..., 1:] *= -1
722
+ verts2[..., 1:] *= -1
 
 
 
 
 
 
723
 
724
  # Match lengths and trim
725
  T_total = min(verts1.shape[0], verts2.shape[0])
 
886
  lines=1, max_lines=1
887
  )
888
 
889
+ generate_btn = gr.Button("Generate Motion", variant="primary", size="lg")
890
 
891
  gr.Markdown("---")
892
  gr.Markdown("### Generated Tokens")
 
894
  tokens_output = gr.Textbox(
895
  label="Motion Tokens (both variants)",
896
  lines=8,
897
+ interactive=False,
898
+ show_copy_button=True
899
  )
900
 
901
  if _word_pid_map:
 
906
  gr.Markdown("### Motion Comparison (Two Signer Variants)")
907
  video_output = gr.Video(
908
  label="Generated Motion",
909
+ autoplay=True,
910
+ show_download_button=True
911
  )
912
 
913
  if example_list:
 
916
 
917
  for item in example_list:
918
  word, pid = item['word'], item['pid']
919
+ with gr.Row(elem_classes="example-row"):
920
  with gr.Column(scale=1, min_width=180):
921
  gr.HTML(f'<div class="example-word-label">{word.upper()}</div>')
922
  gr.HTML(f'<div class="example-variant-label">Variant: {pid}</div>')
923
+ example_btn = gr.Button("Load Example", size="sm", variant="secondary")
924
 
925
  with gr.Column(scale=3, min_width=500):
926
  example_video = gr.Video(
927
  label=f"Example: {word}",
928
+ autoplay=False,
929
+ show_download_button=True
930
  )
931
 
932
  example_btn.click(
 
981
  server_name="0.0.0.0",
982
  server_port=7860,
983
  share=False
984
+ )