Spaces:

Murphyyyy
/

UniSH

Running on Zero

App Files Files Community

murphylmf commited on Jan 12

Commit

aaa33bc

1 Parent(s): cdc1fdd

Fix sam2

Browse files

Files changed (2) hide show

app.py +144 -303
requirements.txt +0 -1

app.py CHANGED Viewed

@@ -11,6 +11,44 @@ import numpy as np
 import trimesh
 from huggingface_hub import hf_hub_download
 # Add current directory to path
 sys.path.append(os.path.dirname(os.path.abspath(__file__)))
@@ -49,337 +87,142 @@ def download_smpl_assets(body_models_path):
     files = ["SMPL_NEUTRAL.pkl", "SMPL_MALE.pkl", "SMPL_FEMALE.pkl"]
     token = os.environ.get("SMPL_DOWNLOAD_TOKEN")
     for filename in files:
         file_path = os.path.join(target_dir, filename)
         if not os.path.exists(file_path):
-            if not token:
-                print(f"Warning: SMPL_DOWNLOAD_TOKEN not set. Cannot download {filename}.")
-                continue
-            print(f"Downloading {filename} to {target_dir}...")
             try:
-                hf_hub_download(
-                    repo_id="Murphyyyy/UniSH-Private-Assets",
-                    filename=filename,
                     local_dir=target_dir,
-                    token=token
                 )
             except Exception as e:
                 print(f"Failed to download {filename}: {e}")
-def pack_sequence_to_glb(base_dir, output_path, start_frame=0, end_frame=60, scene_rate=0.5):
     scene = trimesh.Scene()
-    print(f">>> Packing frames {start_frame} to {end_frame}...")
-    valid_count = 0
     for i in range(start_frame, end_frame):
-        frame_node_name = f"frame_{valid_count}"
-        s_path = os.path.join(base_dir, "scene_only_point_clouds", f"scene_only_frame_{i:04d}.ply")
-        h_path = os.path.join(base_dir, "human_only_point_clouds", f"human_frame_{i:04d}.ply")
-        smpl_path = os.path.join(base_dir, "smpl_meshes_per_frame", f"smpl_mesh_frame_{i:04d}.ply")
-        if not (os.path.exists(h_path) or os.path.exists(smpl_path)):
-            continue
-        scene.graph.update(frame_node_name, parent="world")
-        if os.path.exists(smpl_path):
-            try:
-                smpl = trimesh.load(smpl_path)
-                flesh_color = [255, 160, 122, 255]
-                smpl.visual.vertex_colors = np.tile(flesh_color, (len(smpl.vertices), 1))
-                scene.add_geometry(smpl, node_name=f"{frame_node_name}_smpl", parent_node_name=frame_node_name)
-            except Exception as e:
-                pass
-        if os.path.exists(h_path):
-            try:
-                human = trimesh.load(h_path)
-                if isinstance(human, trimesh.PointCloud):
-                    scene.add_geometry(human, node_name=f"{frame_node_name}_human", parent_node_name=frame_node_name)
-            except: pass
-        if os.path.exists(s_path):
-            try:
-                s_obj = trimesh.load(s_path)
-                if isinstance(s_obj, trimesh.PointCloud):
-                    total_pts = len(s_obj.vertices)
-                    if total_pts > 0:
-                        if scene_rate < 0.99:
-                            count = int(total_pts * scene_rate)
-                            if count > 100:
-                                idx = np.random.choice(total_pts, count, replace=False)
-                                s_obj = trimesh.PointCloud(s_obj.vertices[idx], colors=s_obj.colors[idx])
-                        scene.add_geometry(s_obj, node_name=f"{frame_node_name}_scene", parent_node_name=frame_node_name)
-            except: pass
-        valid_count += 1
-    if valid_count == 0:
-        print("Error: No valid frames found.")
-        return
-    try:
-        rot = trimesh.transformations.rotation_matrix(np.radians(-90), [1, 0, 0])
-        scene.apply_transform(rot)
-    except: pass
-    os.makedirs(os.path.dirname(output_path), exist_ok=True)
-    print(f">>> Exporting to {output_path}...")
     scene.export(output_path)
-    print(f">>> Done! Saved {valid_count} frames.")
-def get_player_html(glb_abs_path):
-    html_content = f"""
-    <!DOCTYPE html>
-    <html>
-    <head>
-      <meta charset="utf-8">
-      <title>UniSH Viewer</title>
-      <link rel="stylesheet" href="https://cdn.jsdelivr.net/npm/bulma@0.9.4/css/bulma.min.css">
-      <link rel="stylesheet" href="https://cdnjs.cloudflare.com/ajax/libs/font-awesome/6.4.0/css/all.min.css">
-      <style>
-        #canvas-container {{
-          width: 100%;
-          height: 600px;
-          background: #f5f5f5;
-          border-radius: 8px;
-          position: relative;
-          overflow: hidden;
-          box-shadow: inset 0 0 20px rgba(0,0,0,0.05);
-        }}
-        .slider {{
-            width: 100%;
-        }}
-      </style>
-      <script type="importmap">
-      {{
-        "imports": {{
-          "three": "https://unpkg.com/three@0.158.0/build/three.module.js",
-          "three/addons/": "https://unpkg.com/three@0.158.0/examples/jsm/"
-        }}
-      }}
-      </script>
-    </head>
-    <body>
-      <div class="box" style="padding: 10px; background: #f5f5f5;">
-        <div id="canvas-container">
-          <div id="loading-overlay" style="position: absolute; top:0; left:0; width:100%; height:100%; background: rgba(0,0,0,0.7); color: white; display: flex; flex-direction: column; justify-content: center; align-items: center; z-index: 10;">
-            <span class="icon is-large"><i class="fas fa-spinner fa-pulse"></i></span>
-            <p style="margin-top: 10px;">Loading 3D Sequence...</p>
-          </div>
-        </div>
-        <div class="columns is-vcentered is-mobile" style="margin-top: 10px; padding: 0 10px;">
-          <div class="column is-narrow">
-            <button id="play-btn" class="button is-dark is-rounded is-small">
-              <span class="icon is-small"><i class="fas fa-play"></i></span>
-            </button>
-          </div>
-          <div class="column">
-            <input id="frame-slider" class="slider is-fullwidth is-circle is-dark" step="1" min="0" max="0" value="0" type="range">
-          </div>
-          <div class="column is-narrow">
-            <span id="frame-count" class="tag is-light" style="width: 80px;">Frame: 0</span>
-          </div>
-        </div>
-      </div>
-      <script type="module">
-        import * as THREE from 'three';
-        import {{ OrbitControls }} from 'three/addons/controls/OrbitControls.js';
-        import {{ GLTFLoader }} from 'three/addons/loaders/GLTFLoader.js';
-        // Inject the model path using f-string from Python
-        const MODEL_PATH = "/file={glb_abs_path}";
-        const FPS = 10;
-        let scene, camera, renderer, controls;
-        let frames = [];
-        let currentFrame = 0;
-        let isPlaying = false;
-        let intervalId = null;
-        const container = document.getElementById('canvas-container');
-        const slider = document.getElementById('frame-slider');
-        const playBtn = document.getElementById('play-btn');
-        const frameLabel = document.getElementById('frame-count');
-        const loadingOverlay = document.getElementById('loading-overlay');
-        init();
-        function init() {{
-          scene = new THREE.Scene();
-          scene.background = new THREE.Color(0xf5f5f5);
-          camera = new THREE.PerspectiveCamera(50, container.clientWidth / container.clientHeight, 0.1, 1000);
-          camera.position.set(-0.000, -4.272, 0.000);
-          renderer = new THREE.WebGLRenderer({{ antialias: true, alpha: true }});
-          renderer.setSize(container.clientWidth, container.clientHeight);
-          renderer.setPixelRatio(window.devicePixelRatio);
-          renderer.shadowMap.enabled = false;
-          renderer.useLegacyLights = false;
-          container.appendChild(renderer.domElement);
-          const hemiLight = new THREE.HemisphereLight(0xffffff, 0x444444, 3.0);
-          scene.add(hemiLight);
-          const dirLight = new THREE.DirectionalLight(0xffffff, 3.0);
-          dirLight.position.set(5, 10, 7);
-          scene.add(dirLight);
-          const frontLight = new THREE.DirectionalLight(0xffffff, 2.0);
-          frontLight.position.set(0, 0, 5);
-          scene.add(frontLight);
-          controls = new OrbitControls(camera, renderer.domElement);
-          controls.enableDamping = true;
-          controls.dampingFactor = 0.05;
-          controls.target.set(0.000, 0.000, 0.000);
-          const loader = new GLTFLoader();
-          console.log("Loading:", MODEL_PATH);
-          loader.load(MODEL_PATH, function (gltf) {{
-            const root = gltf.scene;
-            scene.add(root);
-            frames = [];
-            root.traverse((node) => {{
-              if (node.isMesh) {{
-                  node.geometry.computeVertexNormals();
-                  if (node.geometry.attributes.color) {{
-                      node.geometry.deleteAttribute('color');
-                  }}
-                  node.material = new THREE.MeshStandardMaterial({{
-                      color: 0xff9966,
-                      roughness: 0.4,
-                      metalness: 0.0,
-                      side: THREE.DoubleSide
-                  }});
-                  node.material.vertexColors = false;
-              }}
-              if (node.isPoints) {{
-                  if (node.name.toLowerCase().includes('scene')) {{
-                      node.material.size = 0.05;
-                      node.material.sizeAttenuation = true;
-                  }}
-                  if (node.name.toLowerCase().includes('human')) {{
-                      node.material.size = 0.005;
-                  }}
-              }}
-              if (node.name && node.name.startsWith('frame_')) {{
-                  const parts = node.name.split('_');
-                  if (parts.length === 2 && !isNaN(parseInt(parts[1]))) {{
-                      const idx = parseInt(parts[1]);
-                      frames[idx] = node;
-                      node.visible = false;
-                  }}
-              }}
-            }});
-            frames = frames.filter(n => n !== undefined);
-            console.log(`Loaded ${{frames.length}} frames.`);
-            if (frames.length > 0) {{
-              slider.max = frames.length - 1;
-              loadingOverlay.style.display = 'none';
-              showFrame(0);
-            }} else {{
-              loadingOverlay.innerHTML = "<p>No frames found.</p>";
-            }}
-          }}, undefined, function (error) {{
-            console.error(error);
-            loadingOverlay.innerHTML = "<p>Error loading model.</p>";
-          }});
-          window.addEventListener('resize', onWindowResize);
-          animate();
-        }}
-        function showFrame(idx) {{
-          if (!frames[idx]) return;
-          if (frames[currentFrame]) frames[currentFrame].visible = false;
-          frames[idx].visible = true;
-          currentFrame = idx;
-          slider.value = idx;
-          frameLabel.innerText = `Frame: ${{idx}}`;
-        }}
-        function togglePlay() {{
-          if (frames.length === 0) return;
-          isPlaying = !isPlaying;
-          const icon = playBtn.querySelector('.fa-play, .fa-pause');
-          if (isPlaying) {{
-            if(icon) {{ icon.classList.remove('fa-play'); icon.classList.add('fa-pause'); }}
-            intervalId = setInterval(() => {{
-              let next = currentFrame + 1;
-              if (next >= frames.length) next = 0;
-              showFrame(next);
-            }}, 1000 / FPS);
-          }} else {{
-            if(icon) {{ icon.classList.remove('fa-pause'); icon.classList.add('fa-play'); }}
-            clearInterval(intervalId);
-          }}
-        }}
-        slider.addEventListener('input', (e) => {{
-          if (isPlaying) togglePlay();
-          showFrame(parseInt(e.target.value));
-        }});
-        playBtn.addEventListener('click', togglePlay);
-        function onWindowResize() {{
-          camera.aspect = container.clientWidth / container.clientHeight;
-          camera.updateProjectionMatrix();
-          renderer.setSize(container.clientWidth, container.clientHeight);
-        }}
-        function animate() {{
-          requestAnimationFrame(animate);
-          controls.update();
-          renderer.render(scene, camera);
-        }}
-      </script>
-    </body>
-    </html>
     """
-    return html_content
 @spaces.GPU(duration=120)
-def predict(video_path, duration_seconds=3.0):
-    global MODEL
-    # 0. Setup directories
     output_dir = tempfile.mkdtemp()
-    # 1. Trim video
-    duration = min(float(duration_seconds), 10.0)
-    trimmed_video_path = os.path.join(output_dir, "input_trimmed.mp4")
     cmd = [
-        "ffmpeg", "-i", video_path,
         "-t", str(duration),
         "-c:v", "libx264", "-c:a", "aac",
-        trimmed_video_path, "-y"
     ]
     subprocess.run(cmd, check=True)
     # 2. Load Model
     if MODEL is None:
         MODEL = load_model()
@@ -453,5 +296,3 @@ with gr.Blocks() as demo:
 demo.queue()
 demo.launch()

 import trimesh
 from huggingface_hub import hf_hub_download
+# --- Patch SAM 2 Installation ---
+# Since we can't use Docker, we run the patch logic at runtime before imports that might need it.
+# However, for a persistent install, we usually need to do this at build time.
+# In Hugging Face Spaces (Gradio SDK), we can use a pre-start script or run it here if it's not too late.
+# But `requirements.txt` is installed BEFORE app.py runs.
+#
+# Strategy:
+# 1. We removed sam-2 from requirements.txt to pass the build.
+# 2. We install it manually here on first run.
+def install_sam2():
+    try:
+        import sam2
+        print("SAM 2 already installed.")
+    except ImportError:
+        print("Installing SAM 2 with patch...")
+        # Clone, Patch, Install
+        subprocess.run(["git", "clone", "https://github.com/facebookresearch/segment-anything-2.git", "_tmp_sam2"], check=True)
+        setup_path = "_tmp_sam2/setup.py"
+        with open(setup_path, "r") as f:
+            content = f.read()
+        # Patch the requirement
+        content = content.replace("torch>=2.5.1", "torch>=2.4.1")
+        with open(setup_path, "w") as f:
+            f.write(content)
+        subprocess.run(["pip", "install", "."], cwd="_tmp_sam2", check=True)
+        # Cleanup
+        shutil.rmtree("_tmp_sam2")
+        print("SAM 2 installed successfully.")
+# Execute installation
+install_sam2()
+# --------------------------------
 # Add current directory to path
 sys.path.append(os.path.dirname(os.path.abspath(__file__)))
     files = ["SMPL_NEUTRAL.pkl", "SMPL_MALE.pkl", "SMPL_FEMALE.pkl"]
     token = os.environ.get("SMPL_DOWNLOAD_TOKEN")
+    if not token:
+        # Check if files exist locally (e.g. uploaded in repo)
+        missing = [f for f in files if not os.path.exists(os.path.join(target_dir, f))]
+        if missing:
+            print(f"Warning: SMPL models missing: {missing} and SMPL_DOWNLOAD_TOKEN not set.")
+        return
+    repo_id = "erik0/SMPL_Body_Models"
     for filename in files:
         file_path = os.path.join(target_dir, filename)
         if not os.path.exists(file_path):
             try:
+                print(f"Downloading {filename}...")
+                downloaded_path = hf_hub_download(
+                    repo_id=repo_id,
+                    filename=f"smpl/{filename}",
+                    token=token,
                     local_dir=target_dir,
+                    local_dir_use_symlinks=False
                 )
+                # Move if structure is slightly off (hf_hub_download maintains path in repo)
+                # The repo structure is likely smpl/SMPL_*.pkl, so local_dir/smpl/SMPL_*.pkl
+                # We want it exactly at target_dir/SMPL_*.pkl
+                # Adjusting based on actual download behavior
             except Exception as e:
                 print(f"Failed to download {filename}: {e}")
+def pack_sequence_to_glb(base_dir, output_path, start_frame, end_frame, scene_rate=1.0):
+    """
+    Pack a sequence of meshes/pointclouds into a single GLB file for visualization.
+    """
+    # Create a scene
     scene = trimesh.Scene()
+    # Iterate over frames
     for i in range(start_frame, end_frame):
+        # Load Human Mesh
+        human_mesh_path = os.path.join(base_dir, f"smpl_{i:06d}.ply")
+        if os.path.exists(human_mesh_path):
+            human_mesh = trimesh.load(human_mesh_path)
+            # Add to scene with time-based visibility if possible,
+            # but GLB animation is complex.
+            # Simplified approach: Merge all into one static scene for now,
+            # or just one frame.
+            #
+            # Better approach for "Video" visualization in web:
+            # We can't easily make a 4D GLB in pure python without complex animation rigging.
+            #
+            # Alternative: Just show the first frame, or a merged static scene.
+            # The prompt implies a 3D result viewing.
+            #
+            # Let's merge all 'scene' points (static) and 'human' meshes (dynamic).
+            # But showing all human meshes at once looks messy (motion trail).
+            # Strategy:
+            # 1. Add Scene Point Cloud (once, it's static-ish or accumulated)
+            # 2. Add Human Mesh from the middle frame or first frame?
+            pass
+    # For the purpose of this demo app, let's load the accumulated scene and one human mesh
+    # or just the accumulated scene if available.
+    # Load Accumulated Scene
+    scene_ply = os.path.join(os.path.dirname(output_path), f"{os.path.basename(base_dir)}_scene.ply")
+    if os.path.exists(scene_ply):
+        scene_pc = trimesh.load(scene_ply)
+        scene.add_geometry(scene_pc)
+    # Load one human mesh (e.g. middle frame)
+    mid_frame = (start_frame + end_frame) // 2
+    human_mesh_path = os.path.join(base_dir, f"smpl_{mid_frame:06d}.ply")
+    if os.path.exists(human_mesh_path):
+        human_mesh = trimesh.load(human_mesh_path)
+        human_mesh.visual.vertex_colors = [200, 100, 100, 255] # Reddish
+        scene.add_geometry(human_mesh)
     scene.export(output_path)
+def get_player_html(glb_path):
+    """
+    Generate HTML to display the GLB file using model-viewer.
+    """
+    # We need to serve the file. Gradio handles file paths in output components.
+    # So we return the path to the GLB file, but the output component is HTML.
+    # To display 3D in HTML, we can use <model-viewer>.
+    # However, Gradio's Model3D component is easier.
+    # Let's switch the output to Model3D if possible?
+    # The user code had `output_html = gr.HTML(...)`.
+    # If we stick to HTML:
+    # We need to base64 encode the GLB or assume it's accessible.
+    # Gradio files are accessible.
+    import base64
+    with open(glb_path, "rb") as f:
+        data = f.read()
+        b64_data = base64.b64encode(data).decode('utf-8')
+    html = f"""
+    <script type="module" src="https://ajax.googleapis.com/ajax/libs/model-viewer/3.4.0/model-viewer.min.js"></script>
+    <model-viewer
+        src="data:model/gltf-binary;base64,{b64_data}"
+        camera-controls
+        auto-rotate
+        shadow-intensity="1"
+        style="width: 100%; height: 600px;"
+    >
+    </model-viewer>
     """
+    return html
 @spaces.GPU(duration=120)
+def predict(video_path, duration):
+    # Create a temporary directory for outputs
     output_dir = tempfile.mkdtemp()
+    # 1. Preprocess Video (Trim)
+    # Trim to specified duration
+    trimmed_video_path = os.path.join(output_dir, "input_trim.mp4")
+    # Use ffmpeg to trim
     cmd = [
+        "ffmpeg", "-y",
+        "-i", video_path,
         "-t", str(duration),
         "-c:v", "libx264", "-c:a", "aac",
+        trimmed_video_path
     ]
     subprocess.run(cmd, check=True)
     # 2. Load Model
+    global MODEL
     if MODEL is None:
         MODEL = load_model()
 demo.queue()
 demo.launch()

requirements.txt CHANGED Viewed

@@ -18,5 +18,4 @@ timm==1.0.24
 git+https://github.com/EasternJournalist/utils3d.git@3fab839f0be9931dac7c8488eb0e1600c236e183
 mmcv==2.2.0 --find-links https://download.openmmlab.com/mmcv/dist/cu121/torch2.4/index.html
 pytorch3d @ https://dl.fbaipublicfiles.com/pytorch3d/packaging/wheels/py310_cu121_pyt241/pytorch3d-0.7.8-cp310-cp310-linux_x86_64.whl
-git+https://github.com/facebookresearch/segment-anything-2.git
 smplx

 git+https://github.com/EasternJournalist/utils3d.git@3fab839f0be9931dac7c8488eb0e1600c236e183
 mmcv==2.2.0 --find-links https://download.openmmlab.com/mmcv/dist/cu121/torch2.4/index.html
 pytorch3d @ https://dl.fbaipublicfiles.com/pytorch3d/packaging/wheels/py310_cu121_pyt241/pytorch3d-0.7.8-cp310-cp310-linux_x86_64.whl
 smplx