chingshuai commited on
Commit
03ddd2c
·
1 Parent(s): 5491c5e

update pre-download

Browse files
gradio_app.py CHANGED
@@ -12,6 +12,44 @@ import gradio as gr
12
  import torch
13
  from huggingface_hub import snapshot_download
14
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
15
  def try_to_download_model():
16
  repo_id = "tencent/HY-Motion-1.0"
17
  target_folder = "HY-Motion-1.0-Lite"
@@ -923,6 +961,9 @@ def create_demo(final_model_path):
923
 
924
 
925
  # Create demo at module level for Hugging Face Spaces
 
 
 
926
  final_model_path = try_to_download_model()
927
  demo = create_demo(final_model_path)
928
 
 
12
  import torch
13
  from huggingface_hub import snapshot_download
14
 
15
+
16
+ def try_to_download_text_encoder():
17
+ """
18
+ Pre-download text encoder models (Qwen3-8B and CLIP) to local cache.
19
+ This ensures the models are cached locally before they are needed,
20
+ so later loading will not require downloading again.
21
+ """
22
+ # Text encoder model IDs (same as in hymotion/network/text_encoders/text_encoder.py)
23
+ QWEN_REPO_ID = "Qwen/Qwen3-8B"
24
+ CLIP_REPO_ID = "openai/clip-vit-large-patch14"
25
+
26
+ token = os.environ.get("HF_TOKEN", None)
27
+ if token is None:
28
+ token = ""
29
+
30
+ print(f">>> Pre-downloading text encoder: {QWEN_REPO_ID}")
31
+ try:
32
+ snapshot_download(
33
+ repo_id=QWEN_REPO_ID,
34
+ token=token,
35
+ )
36
+ print(f">>> Successfully pre-downloaded: {QWEN_REPO_ID}")
37
+ except Exception as e:
38
+ print(f">>> [WARNING] Failed to pre-download {QWEN_REPO_ID}: {e}")
39
+
40
+ print(f">>> Pre-downloading text encoder: {CLIP_REPO_ID}")
41
+ try:
42
+ snapshot_download(
43
+ repo_id=CLIP_REPO_ID,
44
+ token=token,
45
+ )
46
+ print(f">>> Successfully pre-downloaded: {CLIP_REPO_ID}")
47
+ except Exception as e:
48
+ print(f">>> [WARNING] Failed to pre-download {CLIP_REPO_ID}: {e}")
49
+
50
+ print(">>> Text encoder pre-download complete.")
51
+
52
+
53
  def try_to_download_model():
54
  repo_id = "tencent/HY-Motion-1.0"
55
  target_folder = "HY-Motion-1.0-Lite"
 
961
 
962
 
963
  # Create demo at module level for Hugging Face Spaces
964
+ # Pre-download text encoder models first (without loading)
965
+ try_to_download_text_encoder()
966
+ # Then download the main model
967
  final_model_path = try_to_download_model()
968
  demo = create_demo(final_model_path)
969
 
scripts/gradio/templates/placeholder_scene.html ADDED
@@ -0,0 +1,331 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ <!DOCTYPE html>
2
+ <html>
3
+ <head>
4
+ <title>Motion Visualization</title>
5
+ <meta charset="UTF-8">
6
+ <meta name="viewport" content="width=device-width, initial-scale=1.0">
7
+ <link rel="stylesheet" href="https://cdnjs.cloudflare.com/ajax/libs/font-awesome/6.0.0/css/all.min.css">
8
+ <style>
9
+ html, body {
10
+ background: #424242 !important;
11
+ color: #e2e8f0;
12
+ margin: 0;
13
+ padding: 0;
14
+ width: 100%;
15
+ height: 100%;
16
+ overflow: hidden;
17
+ }
18
+ * {
19
+ margin: 0;
20
+ padding: 0;
21
+ box-sizing: border-box;
22
+ }
23
+ .fullscreen-container {
24
+ position: fixed;
25
+ top: 0;
26
+ left: 0;
27
+ width: 100vw;
28
+ height: 100vh;
29
+ background: #424242;
30
+ overflow: hidden;
31
+ }
32
+ #vis3d {
33
+ position: absolute;
34
+ top: 0;
35
+ left: 0;
36
+ width: 100%;
37
+ height: 100%;
38
+ background: #424242;
39
+ }
40
+ #vis3d canvas {
41
+ display: block;
42
+ width: 100% !important;
43
+ height: 100% !important;
44
+ }
45
+ .welcome-overlay {
46
+ position: absolute;
47
+ top: 50%;
48
+ left: 50%;
49
+ transform: translate(-50%, -50%);
50
+ background: rgba(0, 0, 0, 0.6);
51
+ backdrop-filter: blur(10px);
52
+ -webkit-backdrop-filter: blur(10px);
53
+ color: white;
54
+ padding: 30px 50px;
55
+ border-radius: 16px;
56
+ font-size: 16px;
57
+ z-index: 200;
58
+ text-align: center;
59
+ box-shadow: 0 8px 32px rgba(0, 0, 0, 0.3);
60
+ }
61
+ .welcome-overlay h2 {
62
+ font-size: 20px;
63
+ font-weight: 600;
64
+ margin-bottom: 12px;
65
+ color: #4a9eff;
66
+ }
67
+ .welcome-overlay p {
68
+ color: #a0aec0;
69
+ font-size: 14px;
70
+ line-height: 1.6;
71
+ }
72
+ .control-overlay {
73
+ position: absolute;
74
+ bottom: 30px;
75
+ left: 50%;
76
+ transform: translateX(-50%);
77
+ width: 80%;
78
+ max-width: 600px;
79
+ z-index: 100;
80
+ background: rgba(0, 0, 0, 0.4);
81
+ backdrop-filter: blur(8px);
82
+ -webkit-backdrop-filter: blur(8px);
83
+ padding: 15px 20px;
84
+ border-radius: 12px;
85
+ }
86
+ .control-row-minimal {
87
+ display: flex;
88
+ align-items: center;
89
+ gap: 20px;
90
+ }
91
+ .progress-container {
92
+ flex: 1;
93
+ }
94
+ .progress-slider-minimal {
95
+ width: 100%;
96
+ height: 8px;
97
+ border-radius: 4px;
98
+ background: rgba(255, 255, 255, 0.3);
99
+ outline: none;
100
+ cursor: not-allowed;
101
+ -webkit-appearance: none;
102
+ appearance: none;
103
+ opacity: 0.5;
104
+ }
105
+ .progress-slider-minimal::-webkit-slider-runnable-track {
106
+ width: 100%;
107
+ height: 8px;
108
+ border-radius: 4px;
109
+ background: rgba(255, 255, 255, 0.3);
110
+ }
111
+ .progress-slider-minimal::-webkit-slider-thumb {
112
+ -webkit-appearance: none;
113
+ appearance: none;
114
+ width: 20px;
115
+ height: 20px;
116
+ border-radius: 50%;
117
+ background: #4a9eff;
118
+ cursor: not-allowed;
119
+ border: 2px solid white;
120
+ box-shadow: 0 2px 8px rgba(0, 0, 0, 0.4);
121
+ margin-top: -6px;
122
+ }
123
+ .progress-slider-minimal::-moz-range-track {
124
+ width: 100%;
125
+ height: 8px;
126
+ border-radius: 4px;
127
+ background: rgba(255, 255, 255, 0.3);
128
+ }
129
+ .progress-slider-minimal::-moz-range-thumb {
130
+ width: 20px;
131
+ height: 20px;
132
+ border-radius: 50%;
133
+ background: #4a9eff;
134
+ cursor: not-allowed;
135
+ border: 2px solid white;
136
+ box-shadow: 0 2px 8px rgba(0, 0, 0, 0.4);
137
+ }
138
+ .frame-counter {
139
+ font-family: 'SF Mono', 'Consolas', monospace;
140
+ font-size: 14px;
141
+ font-weight: 500;
142
+ color: rgba(255, 255, 255, 0.5);
143
+ text-shadow: 0 1px 3px rgba(0, 0, 0, 0.5);
144
+ white-space: nowrap;
145
+ min-width: 80px;
146
+ text-align: right;
147
+ }
148
+ </style>
149
+ </head>
150
+ <body>
151
+ <div class="fullscreen-container">
152
+ <div id="vis3d"></div>
153
+ <div class="welcome-overlay">
154
+ <h2>Welcome to HY-Motion-1.0!</h2>
155
+ <p>Enter a text description and generate motion<br>to see the 3D visualization here.</p>
156
+ </div>
157
+ <div class="control-overlay">
158
+ <div class="control-row-minimal">
159
+ <div class="progress-container">
160
+ <input type="range" class="progress-slider-minimal" min="0" max="100" value="0" disabled>
161
+ </div>
162
+ <div class="frame-counter">
163
+ <span>0</span> / <span>0</span>
164
+ </div>
165
+ </div>
166
+ </div>
167
+ </div>
168
+
169
+ <script type="importmap">
170
+ {
171
+ "imports": {
172
+ "three": "https://cdn.jsdelivr.net/npm/three@0.160.0/build/three.module.js",
173
+ "three/addons/": "https://cdn.jsdelivr.net/npm/three@0.160.0/examples/jsm/"
174
+ }
175
+ }
176
+ </script>
177
+
178
+ <script type="module">
179
+ import * as THREE from 'three';
180
+ import { OrbitControls } from 'three/addons/controls/OrbitControls.js';
181
+
182
+ function createBaseChessboard(
183
+ grid_size = 50,
184
+ divisions = 50,
185
+ white = "#ffffff",
186
+ black = "#3a3a3a",
187
+ texture_size = 1024
188
+ ) {
189
+ var adjusted_texture_size = Math.floor(texture_size / divisions) * divisions;
190
+ var canvas = document.createElement("canvas");
191
+ canvas.width = canvas.height = adjusted_texture_size;
192
+ var context = canvas.getContext("2d");
193
+ context.imageSmoothingEnabled = false;
194
+
195
+ var step = adjusted_texture_size / divisions;
196
+ for (var i = 0; i < divisions; i++) {
197
+ for (var j = 0; j < divisions; j++) {
198
+ context.fillStyle = (i + j) % 2 === 0 ? white : black;
199
+ context.fillRect(i * step, j * step, step, step);
200
+ }
201
+ }
202
+
203
+ var texture = new THREE.CanvasTexture(canvas);
204
+ texture.wrapS = THREE.RepeatWrapping;
205
+ texture.wrapT = THREE.RepeatWrapping;
206
+ texture.magFilter = THREE.NearestFilter;
207
+ texture.minFilter = THREE.NearestFilter;
208
+ texture.generateMipmaps = false;
209
+
210
+ var planeGeometry = new THREE.PlaneGeometry(grid_size, grid_size);
211
+
212
+ var planeMaterial = new THREE.MeshStandardMaterial({
213
+ map: texture,
214
+ side: THREE.DoubleSide,
215
+ transparent: true,
216
+ opacity: 0.85,
217
+ roughness: 0.9,
218
+ metalness: 0.1,
219
+ emissiveIntensity: 0.05,
220
+ });
221
+
222
+ var plane = new THREE.Mesh(planeGeometry, planeMaterial);
223
+ plane.receiveShadow = true;
224
+
225
+ return plane;
226
+ }
227
+
228
+ function getChessboardXZ() {
229
+ var plane = createBaseChessboard();
230
+ plane.rotation.x = -Math.PI / 2;
231
+ plane.name = 'ground';
232
+ plane.receiveShadow = true;
233
+ return plane;
234
+ }
235
+
236
+ let scene, camera, renderer, controls;
237
+
238
+ function init() {
239
+ const width = window.innerWidth;
240
+ const height = window.innerHeight;
241
+
242
+ scene = new THREE.Scene();
243
+ camera = new THREE.PerspectiveCamera(45, width / height, 0.1, 50);
244
+ renderer = new THREE.WebGLRenderer({ antialias: true, logarithmicDepthBuffer: true });
245
+
246
+ // Camera setup
247
+ camera.up.set(0, 1, 0);
248
+ camera.position.set(3, 2.5, 5);
249
+ camera.lookAt(new THREE.Vector3(0, 1, 0));
250
+
251
+ // Scene background and fog
252
+ scene.background = new THREE.Color(0x424242);
253
+ scene.fog = new THREE.FogExp2(0x424242, 0.06);
254
+
255
+ // Renderer setup
256
+ renderer.shadowMap.enabled = true;
257
+ renderer.shadowMap.type = THREE.PCFSoftShadowMap;
258
+ renderer.toneMapping = THREE.ACESFilmicToneMapping;
259
+ renderer.toneMappingExposure = 1.0;
260
+ renderer.outputColorSpace = THREE.SRGBColorSpace;
261
+ renderer.setPixelRatio(window.devicePixelRatio);
262
+ renderer.setSize(width, height);
263
+
264
+ // Lights
265
+ const hemisphereLight = new THREE.HemisphereLight(0xffffff, 0x444444, 1.2);
266
+ hemisphereLight.position.set(0, 2, 0);
267
+ scene.add(hemisphereLight);
268
+
269
+ const directionalLight = new THREE.DirectionalLight(0xffffff, 1.5);
270
+ directionalLight.position.set(3, 5, 4);
271
+ directionalLight.castShadow = true;
272
+ directionalLight.shadow.mapSize.width = 2048;
273
+ directionalLight.shadow.mapSize.height = 2048;
274
+ directionalLight.shadow.camera.near = 0.5;
275
+ directionalLight.shadow.camera.far = 50;
276
+ directionalLight.shadow.camera.left = -10;
277
+ directionalLight.shadow.camera.right = 10;
278
+ directionalLight.shadow.camera.top = 10;
279
+ directionalLight.shadow.camera.bottom = -10;
280
+ directionalLight.shadow.bias = -0.0001;
281
+ scene.add(directionalLight);
282
+
283
+ const fillLight = new THREE.DirectionalLight(0xaaccff, 0.5);
284
+ fillLight.position.set(-3, 3, -2);
285
+ scene.add(fillLight);
286
+
287
+ const rimLight = new THREE.DirectionalLight(0xffeedd, 0.4);
288
+ rimLight.position.set(0, 4, -5);
289
+ scene.add(rimLight);
290
+
291
+ // Ground
292
+ scene.add(getChessboardXZ());
293
+
294
+ // Add to DOM
295
+ var container = document.getElementById('vis3d');
296
+ container.appendChild(renderer.domElement);
297
+
298
+ // Controls
299
+ controls = new OrbitControls(camera, renderer.domElement);
300
+ controls.minDistance = 1;
301
+ controls.maxDistance = 15;
302
+ controls.enableDamping = true;
303
+ controls.dampingFactor = 0.05;
304
+ controls.target.set(0, 0.5, 0);
305
+ controls.update();
306
+
307
+ window.addEventListener('resize', onWindowResize);
308
+ animate();
309
+ }
310
+
311
+ function animate() {
312
+ requestAnimationFrame(animate);
313
+ if (controls && controls.enableDamping) {
314
+ controls.update();
315
+ }
316
+ renderer.render(scene, camera);
317
+ }
318
+
319
+ function onWindowResize() {
320
+ const width = window.innerWidth;
321
+ const height = window.innerHeight;
322
+ camera.aspect = width / height;
323
+ camera.updateProjectionMatrix();
324
+ renderer.setSize(width, height);
325
+ }
326
+
327
+ init();
328
+ </script>
329
+ </body>
330
+ </html>
331
+