ken123777 commited on
Commit
7fc5540
ยท
verified ยท
1 Parent(s): 7b7fddd

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +29 -50
app.py CHANGED
@@ -4,9 +4,8 @@ import torch
4
  import librosa
5
  import numpy as np
6
  import scipy.io.wavfile
7
- import os
8
 
9
- # 1. ๋ชจ๋ธ ์„ค์ •
10
  MODEL_ID = "facebook/musicgen-melody"
11
  print(f"Loading Model: {MODEL_ID}...")
12
 
@@ -21,66 +20,47 @@ model.to(device)
21
  print(f"Model loaded on {device}")
22
 
23
  def generate(text, audio_path, duration, guidance_scale, top_k):
24
- print(f"\n--- [DEBUG] Generate Start ---")
25
-
26
  # ์˜ค๋””์˜ค ๋กœ๋“œ (Librosa ์‚ฌ์šฉ)
27
- # Processor์— ํŒŒ์ผ ๊ฒฝ๋กœ๋ฅผ ๋ฐ”๋กœ ์ค˜๋„ ๋˜์ง€๋งŒ, Librosa๋กœ ์ฝ์–ด์„œ ๋„˜๊ธฐ๋Š” ๊ฒŒ ๋” ์•ˆ์ „ํ•จ (ํฌ๋งท ์ด์Šˆ ๋ฐฉ์ง€)
28
  audio = None
29
  sampling_rate = 32000 # MusicGen ๊ธฐ๋ณธ SR
30
 
31
  if audio_path:
32
  try:
33
- # 1. ๋กœ๋“œ
34
  y, sr = librosa.load(audio_path, sr=sampling_rate, mono=True)
35
- print(f"[DEBUG] Audio Loaded: Shape={y.shape}, SR={sr}")
36
  audio = y
37
  except Exception as e:
38
- print(f"[ERROR] Audio Load Failed: {e}")
 
39
 
40
- # 2. ํ†ตํ•ฉ ์ „์ฒ˜๋ฆฌ (Processor์—๊ฒŒ ์œ„์ž„)
41
- # 4.40.2์—์„œ๋Š” text์™€ audio๋ฅผ ๋™์‹œ์— ๋„ฃ์–ด๋„ ์ž˜ ์ฒ˜๋ฆฌํ•ฉ๋‹ˆ๋‹ค.
42
- try:
43
- if audio is not None:
44
- print("[DEBUG] Processing Text + Audio...")
45
- inputs = processor(
46
- text=[text],
47
- audio=[audio], # ๋ฆฌ์ŠคํŠธ๋กœ ๊ฐ์‹ธ์„œ ์ „๋‹ฌ
48
- sampling_rate=sampling_rate,
49
- padding=True,
50
- return_tensors="pt",
51
- ).to(device)
52
- print(f"[DEBUG] Inputs keys: {inputs.keys()}") # input_ids, attention_mask, input_features๊ฐ€ ์žˆ์–ด์•ผ ํ•จ
53
- else:
54
- print("[DEBUG] Processing Text Only...")
55
- inputs = processor(
56
- text=[text],
57
- padding=True,
58
- return_tensors="pt",
59
- ).to(device)
60
-
61
- except Exception as e:
62
- print(f"[FATAL ERROR] Processor Failed: {e}")
63
- raise e
64
 
65
  max_new_tokens = int(duration * 50)
66
 
67
- # 3. ์ƒ์„ฑ
68
- try:
69
- print("[DEBUG] Generating...")
70
- # inputs ๋”•์…”๋„ˆ๋ฆฌ๋ฅผ ํ†ต์งธ๋กœ ๋„˜๊น€ (**inputs)
71
- # ์ด์ œ ์ธ์ž ์ด๋ฆ„ ๊ฑฑ์ •ํ•  ํ•„์š” ์—†์Œ (Processor๊ฐ€ ์•Œ์•„์„œ ๋งž์ถฐ์คŒ)
72
- audio_values = model.generate(
73
- **inputs,
74
- max_new_tokens=max_new_tokens,
75
- guidance_scale=guidance_scale,
76
- do_sample=True,
77
- top_k=top_k,
78
- )
79
- except Exception as e:
80
- print(f"[FATAL ERROR] Generation Failed: {e}")
81
- raise e
82
 
83
- # 4. ์ €์žฅ
84
  sampling_rate = model.config.audio_encoder.sampling_rate
85
  audio_data = audio_values[0, 0].cpu().numpy()
86
 
@@ -91,10 +71,9 @@ def generate(text, audio_path, duration, guidance_scale, top_k):
91
  output_path = "output.wav"
92
  scipy.io.wavfile.write(output_path, rate=sampling_rate, data=audio_data)
93
 
94
- print(f"--- [DEBUG] Generate Complete ---")
95
  return output_path
96
 
97
- # UI ๊ตฌ์„ฑ (๊ทธ๋Œ€๋กœ)
98
  with gr.Blocks(title="๋‚˜๋งŒ์˜ MusicGen ์„œ๋ฒ„") as demo:
99
  gr.Markdown("# ๐ŸŽต ๋‚˜๋งŒ์˜ AI ์ž‘๊ณก๊ฐ€ (MusicGen - Melody Mode)")
100
 
 
4
  import librosa
5
  import numpy as np
6
  import scipy.io.wavfile
 
7
 
8
+ # ๋ชจ๋ธ ์„ค์ •
9
  MODEL_ID = "facebook/musicgen-melody"
10
  print(f"Loading Model: {MODEL_ID}...")
11
 
 
20
  print(f"Model loaded on {device}")
21
 
22
  def generate(text, audio_path, duration, guidance_scale, top_k):
 
 
23
  # ์˜ค๋””์˜ค ๋กœ๋“œ (Librosa ์‚ฌ์šฉ)
 
24
  audio = None
25
  sampling_rate = 32000 # MusicGen ๊ธฐ๋ณธ SR
26
 
27
  if audio_path:
28
  try:
29
+ # ๋กœ๋“œ
30
  y, sr = librosa.load(audio_path, sr=sampling_rate, mono=True)
 
31
  audio = y
32
  except Exception as e:
33
+ print(f"Audio Load Failed: {e}")
34
+ pass
35
 
36
+ # ํ†ตํ•ฉ ์ „์ฒ˜๋ฆฌ (Processor์—๊ฒŒ ์œ„์ž„)
37
+ if audio is not None:
38
+ inputs = processor(
39
+ text=[text],
40
+ audio=[audio],
41
+ sampling_rate=sampling_rate,
42
+ padding=True,
43
+ return_tensors="pt",
44
+ ).to(device)
45
+ else:
46
+ inputs = processor(
47
+ text=[text],
48
+ padding=True,
49
+ return_tensors="pt",
50
+ ).to(device)
 
 
 
 
 
 
 
 
 
51
 
52
  max_new_tokens = int(duration * 50)
53
 
54
+ # ์ƒ์„ฑ
55
+ audio_values = model.generate(
56
+ **inputs,
57
+ max_new_tokens=max_new_tokens,
58
+ guidance_scale=guidance_scale,
59
+ do_sample=True,
60
+ top_k=top_k,
61
+ )
 
 
 
 
 
 
 
62
 
63
+ # ์ €์žฅ
64
  sampling_rate = model.config.audio_encoder.sampling_rate
65
  audio_data = audio_values[0, 0].cpu().numpy()
66
 
 
71
  output_path = "output.wav"
72
  scipy.io.wavfile.write(output_path, rate=sampling_rate, data=audio_data)
73
 
 
74
  return output_path
75
 
76
+ # UI ๊ตฌ์„ฑ
77
  with gr.Blocks(title="๋‚˜๋งŒ์˜ MusicGen ์„œ๋ฒ„") as demo:
78
  gr.Markdown("# ๐ŸŽต ๋‚˜๋งŒ์˜ AI ์ž‘๊ณก๊ฐ€ (MusicGen - Melody Mode)")
79