kines9661 commited on
Commit
b8a90d2
·
verified ·
1 Parent(s): b2e2b9c

Upload 3 files

Browse files
Files changed (3) hide show
  1. README.md +0 -4
  2. app.py +123 -291
  3. requirements.txt +1 -0
README.md CHANGED
@@ -12,7 +12,6 @@ hardware: zero-gpu
12
  license: apache-2.0
13
  ---
14
 
15
-
16
  # 🎵 ACE-Step v1.5 AI 音樂生成器
17
 
18
  基於 ACE-Step v1.5 模型的音樂生成 API(完全相容 OpenAI 格式)與 Web 使用者介面。
@@ -21,6 +20,3 @@ license: apache-2.0
21
  - `POST /v1/chat/completions` - 音樂生成
22
  - `GET /v1/models` - 獲取可用模型列表
23
  - `GET /health` - 伺服器健康檢查
24
-
25
- ## 認證設定
26
- 請在 Hugging Face Space 的 Settings -> Secrets 中新增 `API_KEY` 變數,即可啟用 Bearer Token 認證。若未設定則為開放存取。
 
12
  license: apache-2.0
13
  ---
14
 
 
15
  # 🎵 ACE-Step v1.5 AI 音樂生成器
16
 
17
  基於 ACE-Step v1.5 模型的音樂生成 API(完全相容 OpenAI 格式)與 Web 使用者介面。
 
20
  - `POST /v1/chat/completions` - 音樂生成
21
  - `GET /v1/models` - 獲取可用模型列表
22
  - `GET /health` - 伺服器健康檢查
 
 
 
app.py CHANGED
@@ -3,22 +3,31 @@ import time
3
  import base64
4
  import json
5
  import uuid
6
- import io
 
7
  import torch
8
  import spaces
9
  import gradio as gr
10
  import numpy as np
11
- import soundfile as sf
12
  from fastapi import FastAPI, HTTPException, Request
13
  from fastapi.responses import StreamingResponse, JSONResponse
14
  from fastapi.middleware.cors import CORSMiddleware
15
  from pydantic import BaseModel
16
  from typing import Optional, List, Union, Any
 
 
 
 
 
 
 
 
 
 
17
 
18
  # ─── 環境變數 ───────────────────────────────────────────
19
  API_KEY = os.environ.get("API_KEY", None)
20
  MODEL_ID = "acemusic/acestep-v15-turbo"
21
- HF_MODEL_REPO = "ACE-Step/ACE-Step-v1-3.5B"
22
 
23
  # ─── 模型全局實例 ────────────────────────────────────────
24
  pipeline = None
@@ -26,12 +35,12 @@ pipeline = None
26
  def get_pipeline():
27
  global pipeline
28
  if pipeline is None:
 
29
  from acestep.pipeline_ace_step import ACEStepPipeline
30
- # 自動從 Hugging Face Hub 下載模型到 ~/.cache
31
  pipeline = ACEStepPipeline(
32
  checkpoint_dir=None,
33
  dtype="bfloat16",
34
- device="cuda",
35
  )
36
  return pipeline
37
 
@@ -82,14 +91,11 @@ class ChatCompletionRequest(BaseModel):
82
  repainting_end: Optional[float] = None
83
  audio_cover_strength: float = 1.0
84
 
85
- # ─── 輸入解析 ────────────────────────────────────────────
86
  def parse_input(req: ChatCompletionRequest):
87
- """解析 messages,提取出音樂描述 (prompt) 和歌詞 (lyrics)"""
88
  last_user_msg = ""
89
  for msg in reversed(req.messages):
90
  if msg.role == "user":
91
- if isinstance(msg.content, str):
92
- last_user_msg = msg.content
93
  elif isinstance(msg.content, list):
94
  for item in msg.content:
95
  if isinstance(item, dict) and item.get("type") == "text":
@@ -100,345 +106,171 @@ def parse_input(req: ChatCompletionRequest):
100
  prompt = ""
101
  lyrics = req.lyrics or ""
102
 
103
- # 標籤模式:包含 <prompt>...</prompt> 或 <lyrics>...</lyrics>
104
  if "<prompt>" in last_user_msg:
105
  import re
106
  p_match = re.search(r"<prompt>(.*?)</prompt>", last_user_msg, re.DOTALL)
107
  l_match = re.search(r"<lyrics>(.*?)</lyrics>", last_user_msg, re.DOTALL)
108
  prompt = p_match.group(1).strip() if p_match else ""
109
- if not lyrics and l_match:
110
- lyrics = l_match.group(1).strip()
111
- elif lyrics:
112
- # 歌詞與描述分離模式
113
- prompt = last_user_msg
114
- elif req.sample_mode:
115
- # 讓 LLM 自動生成模式
116
  prompt = last_user_msg
117
  else:
118
- # 自動偵測模式:如果包含 [Verse] 或 [Chorus] 等結構,視為純歌詞模式
119
  if any(tag in last_user_msg for tag in ["[Verse", "[verse", "[Chorus", "[chorus", "[Bridge"]):
120
  lyrics = last_user_msg
121
  else:
122
  prompt = last_user_msg
123
-
124
  return prompt, lyrics
125
 
126
- # ─── 音頻轉 Base64 ───────────────────────────────────────
127
- def audio_to_base64(audio_data: np.ndarray, sample_rate: int, fmt: str = "mp3") -> str:
128
- """將生成的音頻陣列轉換為 Base64 Data URL"""
129
- buf = io.BytesIO()
130
- sf.write(buf, audio_data, sample_rate, format="WAV")
131
- buf.seek(0)
132
- audio_bytes = buf.read()
133
- b64 = base64.b64encode(audio_bytes).decode("utf-8")
134
- return f"data:audio/wav;base64,{b64}"
135
-
136
- # ─── 核心生成函數(使用 ZeroGPU 裝飾器)─────────────────
137
  @spaces.GPU(duration=120)
138
- def generate_music(
139
- prompt: str,
140
- lyrics: str,
141
- duration: Optional[float],
142
- bpm: Optional[int],
143
- vocal_language: str,
144
- instrumental: Optional[bool],
145
- guidance_scale: float,
146
- seed: Optional[int],
147
- task_type: str,
148
- repainting_start: float,
149
- repainting_end: Optional[float],
150
- ):
151
- pipe = get_pipeline()
152
-
153
- gen_kwargs = dict(
154
- prompt=prompt if prompt else "instrumental music",
155
- lyrics=lyrics,
156
- audio_duration=duration or 30.0,
157
- guidance_scale=guidance_scale,
158
- infer_steps=27,
159
- scheduler_type="euler",
160
- )
161
-
162
- if bpm is not None:
163
- gen_kwargs["bpm"] = bpm
164
- if vocal_language:
165
- gen_kwargs["vocal_language"] = vocal_language
166
- if instrumental is not None:
167
- gen_kwargs["instrumental"] = instrumental
168
- if seed is not None:
169
- gen_kwargs["seed"] = int(seed)
170
-
171
- result = pipe(**gen_kwargs)
172
-
173
- # 提取音頻陣列數據
174
- if hasattr(result, "audio"):
175
- audio_data = result.audio
176
- elif isinstance(result, tuple):
177
- audio_data = result[0]
178
- else:
179
- audio_data = result
180
 
181
- sample_rate = getattr(result, "sample_rate", 44100)
182
- if isinstance(audio_data, torch.Tensor):
183
- audio_data = audio_data.cpu().numpy()
184
- if audio_data.ndim > 1:
185
- audio_data = audio_data.squeeze()
186
 
187
- return audio_data, sample_rate
 
 
 
188
 
189
  # ─── FastAPI 應用程式 ────────────────────────────────────
190
- fastapi_app = FastAPI(title="ACE-Step OpenRouter API", version="1.0")
191
- fastapi_app.add_middleware(
192
- CORSMiddleware,
193
- allow_origins=["*"],
194
- allow_methods=["*"],
195
- allow_headers=["*"],
196
- )
197
 
198
  @fastapi_app.get("/health")
199
- async def health():
200
- return {"status": "ok", "service": "ACE-Step OpenRouter API", "version": "1.0"}
201
 
202
  @fastapi_app.get("/v1/models")
203
  async def list_models(request: Request):
204
  check_auth(request)
205
- return {
206
- "data": [{
207
- "id": MODEL_ID,
208
- "name": "ACE-Step v1.5",
209
- "created": 1706688000,
210
- "description": "高效能文字轉音樂生成模型",
211
- "input_modalities": ["text", "audio"],
212
- "output_modalities": ["audio", "text"],
213
- "context_length": 4096,
214
- "pricing": {"prompt": "0", "completion": "0", "request": "0"},
215
- "supported_sampling_parameters": ["temperature", "top_p"]
216
- }]
217
- }
218
 
219
  @fastapi_app.post("/v1/chat/completions")
220
  async def chat_completions(req: ChatCompletionRequest, request: Request):
221
  check_auth(request)
222
-
223
  completion_id = f"chatcmpl-{uuid.uuid4().hex[:16]}"
224
  created_ts = int(time.time())
225
 
226
  try:
227
  prompt, lyrics = parse_input(req)
228
  audio_cfg = req.audio_config or AudioConfig()
229
-
230
- seed_val = None
231
- if req.seed is not None:
232
- seed_str = str(req.seed)
233
- seed_val = int(seed_str.split(",")[0].strip())
234
-
235
- audio_data, sample_rate = generate_music(
236
- prompt=prompt,
237
- lyrics=lyrics,
238
- duration=audio_cfg.duration,
239
- bpm=audio_cfg.bpm,
240
- vocal_language=audio_cfg.vocal_language,
241
- instrumental=audio_cfg.instrumental,
242
- guidance_scale=req.guidance_scale,
243
- seed=seed_val,
244
- task_type=req.task_type,
245
- repainting_start=req.repainting_start,
246
- repainting_end=req.repainting_end,
247
- )
248
-
249
- audio_url = audio_to_base64(audio_data, sample_rate, audio_cfg.format)
250
-
251
- content_text = f"## 生成中繼資料 (Metadata)\n**風格描述:** {prompt}\n**時長:** {audio_cfg.duration or 30}秒\n"
252
- if audio_cfg.bpm:
253
- content_text += f"**BPM:** {audio_cfg.bpm}\n"
254
- if lyrics:
255
- content_text += f"\n## 歌詞\n{lyrics}"
 
 
 
256
 
257
  response = {
258
- "id": completion_id,
259
- "object": "chat.completion",
260
- "created": created_ts,
261
- "model": MODEL_ID,
262
- "choices": [{
263
- "index": 0,
264
- "message": {
265
- "role": "assistant",
266
- "content": content_text,
267
- "audio": [{
268
- "type": "audio_url",
269
- "audio_url": {"url": audio_url}
270
- }]
271
- },
272
- "finish_reason": "stop"
273
- }],
274
- "usage": {
275
- "prompt_tokens": len(prompt.split()),
276
- "completion_tokens": 100,
277
- "total_tokens": len(prompt.split()) + 100
278
- }
279
  }
280
 
281
- # 處理 SSE 串流回應
282
  if req.stream:
283
  async def event_stream():
284
- # 階段 1: 初始化
285
- init_chunk = {"id": completion_id, "object": "chat.completion.chunk", "created": created_ts, "model": MODEL_ID, "choices": [{"index": 0, "delta": {"role": "assistant", "content": ""}, "finish_reason": None}]}
286
- yield f"data: {json.dumps(init_chunk)}\n\n"
287
- # 階段 2: 傳送文字資訊
288
- content_chunk = {"id": completion_id, "object": "chat.completion.chunk", "created": created_ts, "model": MODEL_ID, "choices": [{"index": 0, "delta": {"content": content_text}, "finish_reason": None}]}
289
- yield f"data: {json.dumps(content_chunk)}\n\n"
290
- # 階段 3: 傳送音頻數據
291
- audio_chunk = {"id": completion_id, "object": "chat.completion.chunk", "created": created_ts, "model": MODEL_ID, "choices": [{"index": 0, "delta": {"audio": [{"type": "audio_url", "audio_url": {"url": audio_url}}]}, "finish_reason": None}]}
292
- yield f"data: {json.dumps(audio_chunk)}\n\n"
293
- # 階段 4: 完成標記
294
- finish_chunk = {"id": completion_id, "object": "chat.completion.chunk", "created": created_ts, "model": MODEL_ID, "choices": [{"index": 0, "delta": {}, "finish_reason": "stop"}]}
295
- yield f"data: {json.dumps(finish_chunk)}\n\n"
296
  yield "data: [DONE]\n\n"
297
-
298
  return StreamingResponse(event_stream(), media_type="text/event-stream")
299
 
300
  return JSONResponse(response)
301
 
302
  except Exception as e:
303
- raise HTTPException(status_code=500, detail=f"伺服器內部錯誤: {str(e)}")
304
-
305
- # ─── Gradio Web UI 介面 ──────────────────────────────────
306
- @spaces.GPU(duration=120)
307
- def gradio_generate(prompt, lyrics, duration, bpm, vocal_language, instrumental, guidance_scale, seed):
308
- try:
309
- seed_val = int(seed) if seed else None
310
- bpm_val = int(bpm) if bpm else None
311
- dur_val = float(duration) if duration else 30.0
312
- instr = True if instrumental == "是" else (False if instrumental == "否" else None)
313
- # 對齊 API 需要的語言代碼
314
- lang_map = {"英文 (en)": "en", "中文 (zh)": "zh", "日文 (ja)": "ja", "韓文 (ko)": "ko", "自動判定": "en"}
315
- lang = lang_map.get(vocal_language, "en")
316
-
317
- audio_data, sample_rate = generate_music(
318
- prompt=prompt,
319
- lyrics=lyrics,
320
- duration=dur_val,
321
- bpm=bpm_val,
322
- vocal_language=lang,
323
- instrumental=instr,
324
- guidance_scale=float(guidance_scale),
325
- seed=seed_val,
326
- task_type="text2music",
327
- repainting_start=0.0,
328
- repainting_end=None,
329
- )
330
- return (sample_rate, audio_data)
331
- except Exception as e:
332
- raise gr.Error(f"生成失敗: {str(e)}")
333
-
334
- with gr.Blocks(
335
- title="🎵 ACE-Step v1.5 音樂生成器",
336
- theme=gr.themes.Soft(),
337
- css="""
338
- .header { text-align: center; padding: 20px; }
339
- .api-info { background: #f0f4ff; padding: 15px; border-radius: 8px; margin-top: 15px; }
340
- """
341
- ) as demo:
342
- gr.HTML("""
343
- <div class="header">
344
- <h1>🎵 ACE-Step v1.5 音樂生成器</h1>
345
- <p>強大的 AI 音樂生成 · 完全相容 OpenAI API · 由 ZeroGPU 免費算力驅動</p>
346
- </div>
347
- """)
348
 
 
 
 
349
  with gr.Tab("🎼 生成音樂"):
350
  with gr.Row():
351
  with gr.Column(scale=1):
352
- prompt_input = gr.Textbox(
353
- label="🏷️ 音樂風格描述 (Prompt)",
354
- placeholder="例如:節奏強烈的 EDM、包含重低音與合成器主旋律",
355
- lines=3
356
- )
357
- lyrics_input = gr.Textbox(
358
- label="📜 歌詞 (Lyrics,可選填)",
359
- placeholder="[Verse 1]\n這是一個美麗的早晨\n\n[Chorus]\n讓我們跟著節奏跳舞",
360
- lines=6
361
- )
362
  with gr.Row():
363
- duration_input = gr.Number(label="⏱️ 生成時長 (秒)", value=30, minimum=5, maximum=240)
364
- bpm_input = gr.Number(label="🥁 BPM 節拍數 (可選)", value=None)
365
  with gr.Row():
366
- lang_input = gr.Dropdown(
367
- label="🌍 人聲語言",
368
- choices=["英文 (en)", "中文 (zh)", "日文 (ja)", "韓文 (ko)", "自動判定"],
369
- value="英文 (en)"
370
- )
371
- instr_input = gr.Dropdown(
372
- label="🎸 純伴奏 (無人聲)",
373
- choices=["自動判定", "是", "否"],
374
- value="自動判定"
375
- )
376
  with gr.Row():
377
- cfg_input = gr.Slider(label="🎚️ 提示詞引導強度 (Guidance Scale)", minimum=1, maximum=15, value=7.0, step=0.5)
378
- seed_input = gr.Number(label="🎲 隨機種子 (Seed,可選)", value=None)
379
-
380
- generate_btn = gr.Button("🚀 開始生成音樂", variant="primary", size="lg")
381
-
382
  with gr.Column(scale=1):
383
  audio_output = gr.Audio(label="🎵 生成結果", type="numpy")
384
- gr.HTML("""
385
- <div class="api-info">
386
- <h3>📡 API 連線資訊</h3>
387
- <p>您可以使用與 OpenAI 完全相容的程式碼連接到此服務:</p>
388
- <code>POST /v1/chat/completions</code><br>
389
- <code>GET /v1/models</code><br>
390
- <code>GET /health</code>
391
- </div>
392
- """)
393
-
394
- generate_btn.click(
395
- fn=gradio_generate,
396
- inputs=[prompt_input, lyrics_input, duration_input, bpm_input,
397
- lang_input, instr_input, cfg_input, seed_input],
398
- outputs=audio_output
399
- )
400
 
401
- with gr.Tab("📖 API 使用教學"):
402
- gr.Markdown("""
403
- ## 快速整合指南
404
-
405
- 此 Space 提供完全相容 OpenAI 的 API 接口。
406
-
407
- ### cURL 請求範例
408
- ```bash
409
- curl -X POST https://你的-SPACE-URL.hf.space/v1/chat/completions \\
410
- -H "Content-Type: application/json" \\
411
- -d '{
412
- "messages": [{"role": "user", "content": "<prompt>Lo-fi hip hop beat</prompt>"}],
413
- "audio_config": {"instrumental": true, "duration": 30}
414
- }'
415
- ```
416
-
417
- ### JavaScript (前端呼叫) 範例
418
- ```javascript
419
- const response = await fetch("https://你的-SPACE-URL.hf.space/v1/chat/completions", {
420
- method: "POST",
421
- headers: { "Content-Type": "application/json" },
422
- body: JSON.stringify({
423
- messages: [{ role: "user", content: "一首關於回憶的溫柔民謠" }],
424
- sample_mode: true,
425
- audio_config: { vocal_language: "zh", duration: 30 }
426
- })
427
- });
428
- const data = await response.json();
429
-
430
- // 提取 Base64 音頻字串
431
- const audioUrl = data.choices[0].message.audio[0].audio_url.url;
432
-
433
- // 可以在網頁中直接播放
434
- const audio = new Audio(audioUrl);
435
- audio.play();
436
- ```
437
- """)
438
-
439
- # ─── 將 FastAPI 掛載至 Gradio ────────────────────────────
440
- app = gr.mount_gradio_app(fastapi_app, demo, path="/")
441
 
 
 
 
 
 
 
442
  if __name__ == "__main__":
443
  import uvicorn
444
  uvicorn.run(app, host="0.0.0.0", port=7860)
 
3
  import base64
4
  import json
5
  import uuid
6
+ import asyncio
7
+ import traceback
8
  import torch
9
  import spaces
10
  import gradio as gr
11
  import numpy as np
 
12
  from fastapi import FastAPI, HTTPException, Request
13
  from fastapi.responses import StreamingResponse, JSONResponse
14
  from fastapi.middleware.cors import CORSMiddleware
15
  from pydantic import BaseModel
16
  from typing import Optional, List, Union, Any
17
+ from gradio_client import Client
18
+
19
+ # ─── 1. 在啟動時預先下載模型 ────────────────────────────
20
+ from huggingface_hub import snapshot_download
21
+ print("==== 正在預先下載模型到快取 (避免第一次請求超時) ====")
22
+ try:
23
+ snapshot_download("ACE-Step/ACE-Step-v1-3.5B")
24
+ print("==== 模型下載完成! ====")
25
+ except Exception as e:
26
+ print(f"模型下載失敗: {e}")
27
 
28
  # ─── 環境變數 ───────────────────────────────────────────
29
  API_KEY = os.environ.get("API_KEY", None)
30
  MODEL_ID = "acemusic/acestep-v15-turbo"
 
31
 
32
  # ─── 模型全局實例 ────────────────────────────────────────
33
  pipeline = None
 
35
  def get_pipeline():
36
  global pipeline
37
  if pipeline is None:
38
+ print("初始化 ACE-Step Pipeline...")
39
  from acestep.pipeline_ace_step import ACEStepPipeline
 
40
  pipeline = ACEStepPipeline(
41
  checkpoint_dir=None,
42
  dtype="bfloat16",
43
+ device="cuda", # 在 ZeroGPU 的 wrapper 內呼叫時才安全
44
  )
45
  return pipeline
46
 
 
91
  repainting_end: Optional[float] = None
92
  audio_cover_strength: float = 1.0
93
 
 
94
  def parse_input(req: ChatCompletionRequest):
 
95
  last_user_msg = ""
96
  for msg in reversed(req.messages):
97
  if msg.role == "user":
98
+ if isinstance(msg.content, str): last_user_msg = msg.content
 
99
  elif isinstance(msg.content, list):
100
  for item in msg.content:
101
  if isinstance(item, dict) and item.get("type") == "text":
 
106
  prompt = ""
107
  lyrics = req.lyrics or ""
108
 
 
109
  if "<prompt>" in last_user_msg:
110
  import re
111
  p_match = re.search(r"<prompt>(.*?)</prompt>", last_user_msg, re.DOTALL)
112
  l_match = re.search(r"<lyrics>(.*?)</lyrics>", last_user_msg, re.DOTALL)
113
  prompt = p_match.group(1).strip() if p_match else ""
114
+ if not lyrics and l_match: lyrics = l_match.group(1).strip()
115
+ elif lyrics or req.sample_mode:
 
 
 
 
 
116
  prompt = last_user_msg
117
  else:
 
118
  if any(tag in last_user_msg for tag in ["[Verse", "[verse", "[Chorus", "[chorus", "[Bridge"]):
119
  lyrics = last_user_msg
120
  else:
121
  prompt = last_user_msg
 
122
  return prompt, lyrics
123
 
124
+ # ─── Gradio 核心生成函數 (支援 ZeroGPU) ───────────────────
 
 
 
 
 
 
 
 
 
 
125
  @spaces.GPU(duration=120)
126
+ def gradio_generate(prompt, lyrics, duration, bpm, vocal_language, instrumental, guidance_scale, seed):
127
+ try:
128
+ seed_val = int(seed) if seed is not None and str(seed).strip() != "" else None
129
+ bpm_val = int(bpm) if bpm is not None and str(bpm).strip() != "" else None
130
+ dur_val = float(duration) if duration is not None else 30.0
131
+
132
+ lang_map = {"英文 (en)": "en", "中文 (zh)": "zh", "日文 (ja)": "ja", "韓文 (ko)": "ko", "自動判定": "en"}
133
+ lang = lang_map.get(vocal_language, "en") if vocal_language else "en"
134
+ instr = True if instrumental in ["是", "Yes", True] else (False if instrumental in ["否", "No", False] else None)
135
+
136
+ pipe = get_pipeline()
137
+
138
+ gen_kwargs = dict(
139
+ prompt=prompt if prompt else "instrumental music",
140
+ lyrics=lyrics,
141
+ audio_duration=dur_val,
142
+ guidance_scale=float(guidance_scale) if guidance_scale else 7.0,
143
+ infer_steps=27,
144
+ scheduler_type="euler",
145
+ )
146
+ if bpm_val: gen_kwargs["bpm"] = bpm_val
147
+ if lang: gen_kwargs["vocal_language"] = lang
148
+ if instr is not None: gen_kwargs["instrumental"] = instr
149
+ if seed_val is not None: gen_kwargs["seed"] = seed_val
150
+
151
+ result = pipe(**gen_kwargs)
152
+
153
+ if hasattr(result, "audio"): audio_data = result.audio
154
+ elif isinstance(result, tuple): audio_data = result[0]
155
+ else: audio_data = result
 
 
 
 
 
 
 
 
 
 
 
 
156
 
157
+ sample_rate = getattr(result, "sample_rate", 44100)
158
+ if isinstance(audio_data, torch.Tensor): audio_data = audio_data.cpu().numpy()
159
+ if audio_data.ndim > 1: audio_data = audio_data.squeeze()
 
 
160
 
161
+ return (sample_rate, audio_data)
162
+ except Exception as e:
163
+ traceback.print_exc()
164
+ raise gr.Error(f"生成失敗: {str(e)}")
165
 
166
  # ─── FastAPI 應用程式 ────────────────────────────────────
167
+ fastapi_app = FastAPI(title="ACE-Step OpenRouter API")
168
+ fastapi_app.add_middleware(CORSMiddleware, allow_origins=["*"], allow_methods=["*"], allow_headers=["*"])
 
 
 
 
 
169
 
170
  @fastapi_app.get("/health")
171
+ async def health(): return {"status": "ok"}
 
172
 
173
  @fastapi_app.get("/v1/models")
174
  async def list_models(request: Request):
175
  check_auth(request)
176
+ return {"data": [{"id": MODEL_ID, "name": "ACE-Step v1.5", "created": 1706688000, "pricing": {"prompt": "0", "completion": "0", "request": "0"}}]}
 
 
 
 
 
 
 
 
 
 
 
 
177
 
178
  @fastapi_app.post("/v1/chat/completions")
179
  async def chat_completions(req: ChatCompletionRequest, request: Request):
180
  check_auth(request)
 
181
  completion_id = f"chatcmpl-{uuid.uuid4().hex[:16]}"
182
  created_ts = int(time.time())
183
 
184
  try:
185
  prompt, lyrics = parse_input(req)
186
  audio_cfg = req.audio_config or AudioConfig()
187
+ seed_val = int(str(req.seed).split(",")[0].strip()) if req.seed is not None else None
188
+
189
+ # 透過 Gradio Client 橋接到 ZeroGPU 佇列
190
+ def _call_gradio():
191
+ client = Client("http://127.0.0.1:7860/")
192
+ return client.predict(
193
+ prompt=prompt,
194
+ lyrics=lyrics,
195
+ duration=audio_cfg.duration,
196
+ bpm=audio_cfg.bpm,
197
+ vocal_language=audio_cfg.vocal_language,
198
+ instrumental=audio_cfg.instrumental,
199
+ guidance_scale=req.guidance_scale,
200
+ seed=seed_val,
201
+ api_name="/generate_music"
202
+ )
203
+
204
+ # 非同步執行以防止 Uvicorn 死鎖
205
+ result_audio_path = await asyncio.to_thread(_call_gradio)
206
+
207
+ # 讀取生成的 .wav 轉換為 base64
208
+ with open(result_audio_path, "rb") as f:
209
+ audio_bytes = f.read()
210
+ b64 = base64.b64encode(audio_bytes).decode("utf-8")
211
+ audio_url = f"data:audio/wav;base64,{b64}"
212
+ try: os.remove(result_audio_path)
213
+ except: pass
214
+
215
+ content_text = f"## 生成中繼資料\n**風格:** {prompt}\n**時長:** {audio_cfg.duration or 30}s\n"
216
+ if lyrics: content_text += f"\n## 歌詞\n{lyrics}"
217
 
218
  response = {
219
+ "id": completion_id, "object": "chat.completion", "created": created_ts, "model": MODEL_ID,
220
+ "choices": [{"index": 0, "message": {"role": "assistant", "content": content_text, "audio": [{"type": "audio_url", "audio_url": {"url": audio_url}}]}, "finish_reason": "stop"}],
221
+ "usage": {"prompt_tokens": len(prompt.split()), "completion_tokens": 100, "total_tokens": len(prompt.split()) + 100}
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
222
  }
223
 
 
224
  if req.stream:
225
  async def event_stream():
226
+ chunks = [
227
+ {"delta": {"role": "assistant", "content": ""}},
228
+ {"delta": {"content": content_text}},
229
+ {"delta": {"audio": [{"type": "audio_url", "audio_url": {"url": audio_url}}]}},
230
+ {"delta": {}, "finish_reason": "stop"}
231
+ ]
232
+ for chunk in chunks:
233
+ chunk_data = {"id": completion_id, "object": "chat.completion.chunk", "created": created_ts, "model": MODEL_ID, "choices": [{"index": 0, **chunk}]}
234
+ yield f"data: {json.dumps(chunk_data)}\n\n"
 
 
 
235
  yield "data: [DONE]\n\n"
 
236
  return StreamingResponse(event_stream(), media_type="text/event-stream")
237
 
238
  return JSONResponse(response)
239
 
240
  except Exception as e:
241
+ traceback.print_exc()
242
+ raise HTTPException(status_code=500, detail=f"內部伺服器錯誤: {str(e)}")
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
243
 
244
+ # ─── Gradio Web UI ───────────────────────────────────────
245
+ with gr.Blocks(title="🎵 ACE-Step v1.5 音樂生成器", theme=gr.themes.Soft()) as demo:
246
+ gr.HTML("<h1 style='text-align: center;'>🎵 ACE-Step v1.5 音樂生成器</h1>")
247
  with gr.Tab("🎼 生成音樂"):
248
  with gr.Row():
249
  with gr.Column(scale=1):
250
+ prompt_input = gr.Textbox(label="🏷️ 音樂風格描述 (Prompt)")
251
+ lyrics_input = gr.Textbox(label="📜 歌詞 (Lyrics,可選填)", lines=4)
 
 
 
 
 
 
 
 
252
  with gr.Row():
253
+ duration_input = gr.Number(label="⏱️ 時長(秒)", value=30)
254
+ bpm_input = gr.Number(label="🥁 BPM", value=None)
255
  with gr.Row():
256
+ lang_input = gr.Dropdown(label="🌍 語言", choices=["英文 (en)", "中文 (zh)", "日文 (ja)", "韓文 (ko)"], value="英文 (en)")
257
+ instr_input = gr.Dropdown(label="🎸 純伴奏", choices=["自動判定", "是", "否"], value="自動判定")
 
 
 
 
 
 
 
 
258
  with gr.Row():
259
+ cfg_input = gr.Slider(label="🎚️ Guidance Scale", minimum=1, maximum=15, value=7.0)
260
+ seed_input = gr.Number(label="🎲 Seed", value=None)
261
+ generate_btn = gr.Button("🚀 開始生成音樂", variant="primary")
 
 
262
  with gr.Column(scale=1):
263
  audio_output = gr.Audio(label="🎵 生成結果", type="numpy")
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
264
 
265
+ # UI 按鈕綁定
266
+ generate_btn.click(fn=gradio_generate, inputs=[prompt_input, lyrics_input, duration_input, bpm_input, lang_input, instr_input, cfg_input, seed_input], outputs=audio_output)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
267
 
268
+ # API 專用隱藏按鈕 (供 FastAPI 呼叫)
269
+ api_btn = gr.Button("API", visible=False)
270
+ api_btn.click(fn=gradio_generate, inputs=[prompt_input, lyrics_input, duration_input, bpm_input, lang_input, instr_input, cfg_input, seed_input], outputs=audio_output, api_name="generate_music")
271
+
272
+ # ─── 掛載並執行 ──────────────────────────────────────────
273
+ app = gr.mount_gradio_app(fastapi_app, demo, path="/")
274
  if __name__ == "__main__":
275
  import uvicorn
276
  uvicorn.run(app, host="0.0.0.0", port=7860)
requirements.txt CHANGED
@@ -1,4 +1,5 @@
1
  gradio>=4.44.0
 
2
  fastapi>=0.111.0
3
  uvicorn>=0.30.0
4
  httpx>=0.27.0
 
1
  gradio>=4.44.0
2
+ gradio_client>=1.3.0
3
  fastapi>=0.111.0
4
  uvicorn>=0.30.0
5
  httpx>=0.27.0