Daye-Lee18 commited on
Commit
a30a67a
·
1 Parent(s): f59528b

app.py modified version

Browse files
Files changed (3) hide show
  1. app.py +240 -132
  2. old_app.py +154 -0
  3. requirements.txt +8 -0
app.py CHANGED
@@ -1,154 +1,262 @@
 
 
 
 
 
1
  import gradio as gr
2
  import numpy as np
3
- import random
4
-
5
- # import spaces #[uncomment to use ZeroGPU]
6
- from diffusers import DiffusionPipeline
7
  import torch
 
 
 
 
8
 
9
- device = "cuda" if torch.cuda.is_available() else "cpu"
10
- model_repo_id = "stabilityai/sdxl-turbo" # Replace to the model you would like to use
 
 
 
11
 
12
- if torch.cuda.is_available():
13
- torch_dtype = torch.float16
14
- else:
15
- torch_dtype = torch.float32
16
 
17
- pipe = DiffusionPipeline.from_pretrained(model_repo_id, torch_dtype=torch_dtype)
18
- pipe = pipe.to(device)
19
 
20
- MAX_SEED = np.iinfo(np.int32).max
21
- MAX_IMAGE_SIZE = 1024
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
22
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
23
 
24
- # @spaces.GPU #[uncomment to use ZeroGPU]
25
- def infer(
26
- prompt,
27
- negative_prompt,
28
- seed,
29
- randomize_seed,
30
- width,
31
- height,
32
- guidance_scale,
33
- num_inference_steps,
34
- progress=gr.Progress(track_tqdm=True),
 
 
 
 
 
 
 
35
  ):
36
- if randomize_seed:
37
- seed = random.randint(0, MAX_SEED)
 
 
 
38
 
39
- generator = torch.Generator().manual_seed(seed)
 
 
 
40
 
41
- image = pipe(
42
- prompt=prompt,
43
- negative_prompt=negative_prompt,
44
- guidance_scale=guidance_scale,
45
- num_inference_steps=num_inference_steps,
46
- width=width,
47
- height=height,
48
- generator=generator,
49
- ).images[0]
50
 
51
- return image, seed
 
 
 
 
52
 
 
 
 
 
53
 
54
- examples = [
55
- "Astronaut in a jungle, cold color palette, muted colors, detailed, 8k",
56
- "An astronaut riding a green horse",
57
- "A delicious ceviche cheesecake slice",
58
- ]
59
 
60
- css = """
61
- #col-container {
62
- margin: 0 auto;
63
- max-width: 640px;
64
- }
65
- """
 
 
 
 
 
 
 
 
 
 
 
 
 
66
 
67
- with gr.Blocks(css=css) as demo:
68
- with gr.Column(elem_id="col-container"):
69
- gr.Markdown(" # Text-to-Image Gradio Template")
70
 
71
- with gr.Row():
72
- prompt = gr.Text(
73
- label="Prompt",
74
- show_label=False,
75
- max_lines=1,
76
- placeholder="Enter your prompt",
77
- container=False,
78
- )
79
-
80
- run_button = gr.Button("Run", scale=0, variant="primary")
81
-
82
- result = gr.Image(label="Result", show_label=False)
83
-
84
- with gr.Accordion("Advanced Settings", open=False):
85
- negative_prompt = gr.Text(
86
- label="Negative prompt",
87
- max_lines=1,
88
- placeholder="Enter a negative prompt",
89
- visible=False,
90
- )
91
-
92
- seed = gr.Slider(
93
- label="Seed",
94
- minimum=0,
95
- maximum=MAX_SEED,
96
- step=1,
97
- value=0,
98
- )
99
-
100
- randomize_seed = gr.Checkbox(label="Randomize seed", value=True)
101
-
102
- with gr.Row():
103
- width = gr.Slider(
104
- label="Width",
105
- minimum=256,
106
- maximum=MAX_IMAGE_SIZE,
107
- step=32,
108
- value=1024, # Replace with defaults that work for your model
109
- )
110
-
111
- height = gr.Slider(
112
- label="Height",
113
- minimum=256,
114
- maximum=MAX_IMAGE_SIZE,
115
- step=32,
116
- value=1024, # Replace with defaults that work for your model
117
- )
118
-
119
- with gr.Row():
120
- guidance_scale = gr.Slider(
121
- label="Guidance scale",
122
- minimum=0.0,
123
- maximum=10.0,
124
- step=0.1,
125
- value=0.0, # Replace with defaults that work for your model
126
- )
127
-
128
- num_inference_steps = gr.Slider(
129
- label="Number of inference steps",
130
- minimum=1,
131
- maximum=50,
132
- step=1,
133
- value=2, # Replace with defaults that work for your model
134
- )
135
-
136
- gr.Examples(examples=examples, inputs=[prompt])
137
- gr.on(
138
- triggers=[run_button.click, prompt.submit],
139
- fn=infer,
140
- inputs=[
141
- prompt,
142
- negative_prompt,
143
- seed,
144
- randomize_seed,
145
- width,
146
- height,
147
- guidance_scale,
148
- num_inference_steps,
149
- ],
150
- outputs=[result, seed],
151
  )
152
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
153
  if __name__ == "__main__":
 
 
154
  demo.launch()
 
1
+ import os
2
+ import tempfile
3
+ from pathlib import Path
4
+ from typing import Tuple, Optional
5
+
6
  import gradio as gr
7
  import numpy as np
 
 
 
 
8
  import torch
9
+ import soundfile as sf
10
+ import librosa
11
+
12
+ from huggingface_hub import hf_hub_download
13
 
14
+ # -----------------------------
15
+ # Config
16
+ # -----------------------------
17
+ DEFAULT_WEIGHTS_REPO = os.environ.get("WEIGHTS_REPO", "isYes/HuMoGen-X-weights") # private model repo
18
+ WEIGHTS_FILENAME = os.environ.get("WEIGHTS_FILENAME", "train-0090.pt") # in the private repo
19
 
20
+ # Space는 CPU일 수도 있고 GPU일 수도 있음
21
+ DEVICE = "cuda" if torch.cuda.is_available() else "cpu"
 
 
22
 
 
 
23
 
24
+ # -----------------------------
25
+ # Secure download + load
26
+ # -----------------------------
27
+ @gr.cache_resource
28
+ def load_model():
29
+ """
30
+ Loads model weights from a PRIVATE HF repo using HF_TOKEN (Space Secret).
31
+ Cache_resource ensures we load only once per Space runtime.
32
+ """
33
+ token = os.environ.get("HF_TOKEN")
34
+ if not token:
35
+ raise RuntimeError(
36
+ "HF_TOKEN secret is missing. Set it in Space Settings -> Secrets."
37
+ )
38
+
39
+ ckpt_path = hf_hub_download(
40
+ repo_id=DEFAULT_WEIGHTS_REPO,
41
+ filename=WEIGHTS_FILENAME,
42
+ token=token,
43
+ )
44
 
45
+ # TODO: replace this with your actual model class init + load_state_dict
46
+ # Example patterns:
47
+ # model = HuMoGenX(...)
48
+ # state = torch.load(ckpt_path, map_location="cpu")
49
+ # model.load_state_dict(state["state_dict"] if "state_dict" in state else state)
50
+ # model.to(DEVICE).eval()
51
+ #
52
+ # Here we keep a placeholder "model" object.
53
+ model = torch.load(ckpt_path, map_location="cpu")
54
+ if hasattr(model, "to"):
55
+ model = model.to(DEVICE)
56
+ if hasattr(model, "eval"):
57
+ model.eval()
58
+ return model
59
 
60
+
61
+ # -----------------------------
62
+ # Utilities
63
+ # -----------------------------
64
+ def load_audio_mono_16k(audio_path: str, target_sr: int = 16000) -> Tuple[np.ndarray, int]:
65
+ """
66
+ Loads audio file and converts to mono float32 at target_sr.
67
+ """
68
+ y, sr = librosa.load(audio_path, sr=target_sr, mono=True)
69
+ y = y.astype(np.float32)
70
+ return y, target_sr
71
+
72
+
73
+ def render_motion_to_mp4(
74
+ motion: np.ndarray,
75
+ out_mp4_path: str,
76
+ fps: int = 30,
77
+ resolution: int = 512,
78
  ):
79
+ """
80
+ TODO: Replace this with your real renderer.
81
+ This function should create an mp4 from the generated motion.
82
+ - motion: (T, D) or (T, J, 3) etc.
83
+ - out_mp4_path: path to save mp4
84
 
85
+ Options:
86
+ 1) lightweight: matplotlib stick figure -> imageio mp4
87
+ 2) medium: pyrender / trimesh
88
+ 3) heavy: Blender (보통 Space에선 비추)
89
 
90
+ For now, we'll create a dummy black video so the UI pipeline is complete.
91
+ """
92
+ import imageio.v2 as imageio
 
 
 
 
 
 
93
 
94
+ T = int(motion.shape[0]) if motion is not None else 60
95
+ frames = []
96
+ for _ in range(T):
97
+ frame = np.zeros((resolution, resolution, 3), dtype=np.uint8)
98
+ frames.append(frame)
99
 
100
+ writer = imageio.get_writer(out_mp4_path, fps=fps)
101
+ for f in frames:
102
+ writer.append_data(f)
103
+ writer.close()
104
 
 
 
 
 
 
105
 
106
+ # -----------------------------
107
+ # Inference stub (connect your code here)
108
+ # -----------------------------
109
+ @torch.inference_mode()
110
+ def run_inference(
111
+ audio_path: str,
112
+ genre: str,
113
+ cfg_genre: float,
114
+ cfg_music: float,
115
+ seed: int,
116
+ num_frames: int,
117
+ fps: int,
118
+ ) -> np.ndarray:
119
+ """
120
+ Returns generated motion as numpy array.
121
+ Replace the body with your HuMoGen-X sampling logic.
122
+ """
123
+ # Load model
124
+ model = load_model()
125
 
126
+ # Prepare audio
127
+ audio, sr = load_audio_mono_16k(audio_path, target_sr=16000)
 
128
 
129
+ # Set seed
130
+ g = torch.Generator(device=DEVICE)
131
+ g.manual_seed(int(seed))
132
+
133
+ # -----------------------
134
+ # TODO: your actual inference
135
+ # Example pseudo:
136
+ # cond = {
137
+ # "music": torch.tensor(audio)[None, ...].to(DEVICE),
138
+ # "genre": genre_to_id(genre),
139
+ # }
140
+ # motion = model.sample(
141
+ # cond=cond,
142
+ # guidance={"genre": cfg_genre, "music": cfg_music},
143
+ # num_frames=num_frames,
144
+ # generator=g,
145
+ # )
146
+ # motion_np = motion.detach().cpu().numpy()[0]
147
+ # -----------------------
148
+
149
+ # Placeholder motion (T, D)
150
+ T = int(num_frames)
151
+ D = 151 # adjust to your representation
152
+ motion_np = np.random.randn(T, D).astype(np.float32)
153
+ return motion_np
154
+
155
+
156
+ def generate_demo(
157
+ audio_file,
158
+ genre: str,
159
+ cfg_genre: float,
160
+ cfg_music: float,
161
+ seed: int,
162
+ seconds: float,
163
+ fps: int,
164
+ resolution: int,
165
+ ):
166
+ """
167
+ Gradio handler: takes UI inputs, runs inference, renders mp4, returns mp4 path.
168
+ """
169
+ if audio_file is None:
170
+ raise gr.Error("음악 파일을 업로드해줘!")
171
+
172
+ # audio_file can be a path string
173
+ audio_path = audio_file if isinstance(audio_file, str) else audio_file.name
174
+
175
+ num_frames = int(max(1, round(seconds * fps)))
176
+
177
+ motion = run_inference(
178
+ audio_path=audio_path,
179
+ genre=genre,
180
+ cfg_genre=float(cfg_genre),
181
+ cfg_music=float(cfg_music),
182
+ seed=int(seed),
183
+ num_frames=num_frames,
184
+ fps=int(fps),
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
185
  )
186
 
187
+ # Save output mp4 to a temp file
188
+ tmp_dir = Path(tempfile.mkdtemp())
189
+ out_mp4 = str(tmp_dir / "humogenx_result.mp4")
190
+
191
+ render_motion_to_mp4(
192
+ motion=motion,
193
+ out_mp4_path=out_mp4,
194
+ fps=int(fps),
195
+ resolution=int(resolution),
196
+ )
197
+
198
+ return out_mp4
199
+
200
+
201
+ # -----------------------------
202
+ # Gradio UI
203
+ # -----------------------------
204
+ def build_ui():
205
+ GENRES = [
206
+ "HipHop", "Breaking", "Popping", "Locking",
207
+ "House", "Waacking", "Shuffle", "Disco",
208
+ "Jazz", "Kpop", "Ballet", "Contemporary"
209
+ ] # 네 thesis genre set으로 바꿔도 됨
210
+
211
+ with gr.Blocks(title="HuMoGen-X Demo", theme=gr.themes.Soft()) as demo:
212
+ gr.Markdown(
213
+ """
214
+ # HuMoGen-X Demo (Inference-only)
215
+ - **Upload music** → choose **dance genre** → adjust **CFG** → get **MP4**.
216
+ - Model weights are stored in a **private repo** and loaded at runtime.
217
+ """.strip()
218
+ )
219
+
220
+ with gr.Row():
221
+ with gr.Column(scale=1):
222
+ audio = gr.Audio(label="Music Upload", type="filepath")
223
+ genre = gr.Dropdown(choices=GENRES, value=GENRES[0], label="Dance Genre")
224
+
225
+ gr.Markdown("### CFG (Classifier-Free Guidance)")
226
+ cfg_genre = gr.Slider(0.0, 8.0, value=3.0, step=0.1, label="CFG: Genre")
227
+ cfg_music = gr.Slider(0.0, 8.0, value=3.0, step=0.1, label="CFG: Music")
228
+
229
+ with gr.Row():
230
+ seed = gr.Number(value=0, precision=0, label="Seed (int)")
231
+ seconds = gr.Slider(1.0, 12.0, value=6.0, step=0.5, label="Length (sec)")
232
+
233
+ with gr.Row():
234
+ fps = gr.Dropdown(choices=[20, 24, 30, 60], value=30, label="FPS")
235
+ resolution = gr.Dropdown(choices=[256, 512, 720], value=512, label="Render Resolution")
236
+
237
+ run_btn = gr.Button("Generate", variant="primary")
238
+
239
+ with gr.Column(scale=1):
240
+ out_video = gr.Video(label="Result (MP4)", autoplay=True)
241
+
242
+ run_btn.click(
243
+ fn=generate_demo,
244
+ inputs=[audio, genre, cfg_genre, cfg_music, seed, seconds, fps, resolution],
245
+ outputs=[out_video],
246
+ )
247
+
248
+ gr.Markdown(
249
+ """
250
+ ### Notes
251
+ - This Space is **inference-only**; weights are not downloadable here.
252
+ - If you want higher quality rendering, replace `render_motion_to_mp4()` with your renderer.
253
+ """.strip()
254
+ )
255
+
256
+ return demo
257
+
258
+
259
  if __name__ == "__main__":
260
+ demo = build_ui()
261
+ demo.queue()
262
  demo.launch()
old_app.py ADDED
@@ -0,0 +1,154 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import gradio as gr
2
+ import numpy as np
3
+ import random
4
+
5
+ # import spaces #[uncomment to use ZeroGPU]
6
+ from diffusers import DiffusionPipeline
7
+ import torch
8
+
9
+ device = "cuda" if torch.cuda.is_available() else "cpu"
10
+ model_repo_id = "stabilityai/sdxl-turbo" # Replace to the model you would like to use
11
+
12
+ if torch.cuda.is_available():
13
+ torch_dtype = torch.float16
14
+ else:
15
+ torch_dtype = torch.float32
16
+
17
+ pipe = DiffusionPipeline.from_pretrained(model_repo_id, torch_dtype=torch_dtype)
18
+ pipe = pipe.to(device)
19
+
20
+ MAX_SEED = np.iinfo(np.int32).max
21
+ MAX_IMAGE_SIZE = 1024
22
+
23
+
24
+ # @spaces.GPU #[uncomment to use ZeroGPU]
25
+ def infer(
26
+ prompt,
27
+ negative_prompt,
28
+ seed,
29
+ randomize_seed,
30
+ width,
31
+ height,
32
+ guidance_scale,
33
+ num_inference_steps,
34
+ progress=gr.Progress(track_tqdm=True),
35
+ ):
36
+ if randomize_seed:
37
+ seed = random.randint(0, MAX_SEED)
38
+
39
+ generator = torch.Generator().manual_seed(seed)
40
+
41
+ image = pipe(
42
+ prompt=prompt,
43
+ negative_prompt=negative_prompt,
44
+ guidance_scale=guidance_scale,
45
+ num_inference_steps=num_inference_steps,
46
+ width=width,
47
+ height=height,
48
+ generator=generator,
49
+ ).images[0]
50
+
51
+ return image, seed
52
+
53
+
54
+ examples = [
55
+ "Astronaut in a jungle, cold color palette, muted colors, detailed, 8k",
56
+ "An astronaut riding a green horse",
57
+ "A delicious ceviche cheesecake slice",
58
+ ]
59
+
60
+ css = """
61
+ #col-container {
62
+ margin: 0 auto;
63
+ max-width: 640px;
64
+ }
65
+ """
66
+
67
+ with gr.Blocks(css=css) as demo:
68
+ with gr.Column(elem_id="col-container"):
69
+ gr.Markdown(" # Text-to-Image Gradio Template")
70
+
71
+ with gr.Row():
72
+ prompt = gr.Text(
73
+ label="Prompt",
74
+ show_label=False,
75
+ max_lines=1,
76
+ placeholder="Enter your prompt",
77
+ container=False,
78
+ )
79
+
80
+ run_button = gr.Button("Run", scale=0, variant="primary")
81
+
82
+ result = gr.Image(label="Result", show_label=False)
83
+
84
+ with gr.Accordion("Advanced Settings", open=False):
85
+ negative_prompt = gr.Text(
86
+ label="Negative prompt",
87
+ max_lines=1,
88
+ placeholder="Enter a negative prompt",
89
+ visible=False,
90
+ )
91
+
92
+ seed = gr.Slider(
93
+ label="Seed",
94
+ minimum=0,
95
+ maximum=MAX_SEED,
96
+ step=1,
97
+ value=0,
98
+ )
99
+
100
+ randomize_seed = gr.Checkbox(label="Randomize seed", value=True)
101
+
102
+ with gr.Row():
103
+ width = gr.Slider(
104
+ label="Width",
105
+ minimum=256,
106
+ maximum=MAX_IMAGE_SIZE,
107
+ step=32,
108
+ value=1024, # Replace with defaults that work for your model
109
+ )
110
+
111
+ height = gr.Slider(
112
+ label="Height",
113
+ minimum=256,
114
+ maximum=MAX_IMAGE_SIZE,
115
+ step=32,
116
+ value=1024, # Replace with defaults that work for your model
117
+ )
118
+
119
+ with gr.Row():
120
+ guidance_scale = gr.Slider(
121
+ label="Guidance scale",
122
+ minimum=0.0,
123
+ maximum=10.0,
124
+ step=0.1,
125
+ value=0.0, # Replace with defaults that work for your model
126
+ )
127
+
128
+ num_inference_steps = gr.Slider(
129
+ label="Number of inference steps",
130
+ minimum=1,
131
+ maximum=50,
132
+ step=1,
133
+ value=2, # Replace with defaults that work for your model
134
+ )
135
+
136
+ gr.Examples(examples=examples, inputs=[prompt])
137
+ gr.on(
138
+ triggers=[run_button.click, prompt.submit],
139
+ fn=infer,
140
+ inputs=[
141
+ prompt,
142
+ negative_prompt,
143
+ seed,
144
+ randomize_seed,
145
+ width,
146
+ height,
147
+ guidance_scale,
148
+ num_inference_steps,
149
+ ],
150
+ outputs=[result, seed],
151
+ )
152
+
153
+ if __name__ == "__main__":
154
+ demo.launch()
requirements.txt CHANGED
@@ -1,3 +1,11 @@
 
 
 
 
 
 
 
 
1
  accelerate
2
  diffusers
3
  invisible_watermark
 
1
+ gradio>=4.0.0
2
+ huggingface_hub>=0.20.0
3
+ torch
4
+ numpy
5
+ soundfile
6
+ librosa
7
+ imageio
8
+ imageio-ffmpeg
9
  accelerate
10
  diffusers
11
  invisible_watermark