Spaces:
Build error
Build error
File size: 8,499 Bytes
6706ed2 57f494a 54018eb 8426b78 e903099 8426b78 64196f7 e903099 64196f7 e903099 64196f7 8426b78 e903099 64196f7 8426b78 e903099 8426b78 e903099 8426b78 e903099 8426b78 e903099 86eddd4 e903099 8426b78 e903099 8426b78 e903099 8426b78 e903099 8426b78 64196f7 8426b78 e903099 8426b78 e903099 8426b78 e903099 8426b78 86eddd4 e903099 64196f7 e903099 64196f7 8426b78 e903099 8114539 8426b78 e903099 8426b78 e903099 8426b78 e903099 8426b78 e903099 8426b78 e903099 8426b78 57f494a e903099 285657c 57f494a e903099 64196f7 e903099 64196f7 e903099 64196f7 57f494a e903099 8426b78 64196f7 e903099 64196f7 8426b78 e903099 57f494a 8426b78 64196f7 e903099 8426b78 e903099 57f494a 05a84d9 e903099 57f494a 8426b78 e903099 8426b78 57f494a 64196f7 8426b78 e903099 8426b78 e903099 8426b78 e903099 8426b78 e903099 8426b78 e903099 64196f7 e903099 64196f7 e903099 64196f7 e903099 64196f7 e903099 64196f7 e903099 64196f7 e903099 64196f7 e903099 64196f7 e903099 64196f7 e903099 64196f7 e903099 8426b78 83ada9b 93db536 54018eb e903099 4b66c9b e903099 4b66c9b e903099 05a84d9 e903099 05a84d9 e903099 54018eb 285657c 64196f7 | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 | import os
import json
import gradio as gr
import requests
from dotenv import load_dotenv
from datetime import datetime
from pathlib import Path
from basic_pitch.inference import predict_and_save
from basic_pitch import ICASSP_2022_MODEL_PATH
from music21 import converter
import base64
# === 1. Environment Configuration & OpenAI Client ===
load_dotenv()
OPENAI_API_KEY = os.getenv("OPENAI_API_KEY")
MUSICGEN_API_URL = os.getenv("MUSICGEN_API_URL")
VEROVIO_API_URL = os.getenv("VEROVIO_API_URL")
assert OPENAI_API_KEY, "β Please set OPENAI_API_KEY in your .env file"
# Use OpenAI v1 client
from openai import OpenAI
openai_client = OpenAI(api_key=OPENAI_API_KEY)
# Create output directory if it doesn't exist
Path("output").mkdir(exist_ok=True)
# === 2. Tool Functions ===
def generate_music_from_hum(melody_file: str, prompt: str) -> str:
"""
Call an external MusicGen API to generate a music WAV file
based on a userβs humming audio and a style prompt.
"""
if not MUSICGEN_API_URL:
return "β MUSICGEN_API_URL is not configured"
timestamp = datetime.now().strftime("%Y%m%d_%H%M%S")
output_wav = f"output/generated_{timestamp}.wav"
try:
with open(melody_file, "rb") as f:
files = {"melody": ("hum.wav", f, "audio/wav")}
data = {"text": prompt}
response = requests.post(MUSICGEN_API_URL, files=files, data=data, timeout=180)
if response.status_code != 200:
return f"β MusicGen API error {response.status_code}: {response.text}"
with open(output_wav, "wb") as out:
out.write(response.content)
return output_wav
except Exception as e:
return f"β Music generation failed: {e}"
def wav_to_musicxml(wav_path: str, timestamp: str=None) -> str:
"""
Convert a WAV audio file to MusicXML using basic-pitch for pitch detection.
"""
ts = timestamp or datetime.now().strftime("%Y%m%d_%H%M%S")
# Remove any old MIDI files
for midi_file in Path("output").glob("*_basic_pitch.mid"):
midi_file.unlink()
# Generate MIDI from the WAV
predict_and_save(
audio_path_list=[wav_path],
output_directory="output",
save_midi=True,
sonify_midi=False,
save_model_outputs=False,
save_notes=False,
model_or_model_path=ICASSP_2022_MODEL_PATH
)
midi_files = list(Path("output").glob("*.mid"))
if not midi_files:
return "β Failed to generate MIDI file"
score = converter.parse(str(midi_files[0]))
xml_path = f"output/generated_{ts}.musicxml"
score.write("musicxml", fp=xml_path)
return xml_path
def render_musicxml_via_verovio_api(musicxml_path: str) -> str:
"""
Render a MusicXML file to an SVG preview using the Verovio API.
Returns HTML containing the embedded SVG.
"""
if not VEROVIO_API_URL:
return "β VEROVIO_API_URL is not configured"
try:
with open(musicxml_path, "rb") as f:
response = requests.post(VEROVIO_API_URL, files={"file": f}, timeout=120)
if response.status_code != 200:
return f"β Verovio API error {response.status_code}: {response.text}"
svg = response.json().get("svg", "")
b64_svg = base64.b64encode(svg.encode("utf-8")).decode("utf-8")
return (
'<div style="background:white;padding:10px;border-radius:8px;">'
f'<img src="data:image/svg+xml;base64,{b64_svg}" style="width:100%;" />'
'</div>'
)
except Exception as e:
return f"β SVG rendering failed: {e}"
def generate_score_from_audio(wav_file: str) -> str:
"""
Extract a MusicXML score from a generated music WAV file.
"""
try:
return wav_to_musicxml(wav_file)
except Exception as e:
return f"β Score extraction failed: {e}"
# Map of tool names to functions
TOOL_MAP = {
"generate_music_from_hum": generate_music_from_hum,
"wav_to_musicxml": wav_to_musicxml,
"render_musicxml_via_verovio_api": render_musicxml_via_verovio_api,
"generate_score_from_audio": generate_score_from_audio,
}
# === 3. GPT Tool Selection ===
def gpt_decide_tool(message: str, audio_path: str) -> dict:
system_prompt = """
You are an AI music assistant. The user uploads an audio file and provides a request.
Choose the most appropriate tool from the list below and respond with strict JSON:
- generate_music_from_hum(melody_file, prompt)
- wav_to_musicxml(wav_file)
- render_musicxml_via_verovio_api(musicxml_file)
- generate_score_from_audio(wav_file)
JSON format:
{
"tool_name": "...",
"args": { ... },
"explanation": "Reasoning explanation"
}
"""
user_prompt = f"User request: {message}\nAudio file path: {audio_path}"
response = openai_client.chat.completions.create(
model="gpt-3.5-turbo",
messages=[
{"role": "system", "content": system_prompt},
{"role": "user", "content": user_prompt}
],
temperature=0.2
)
text = response.choices[0].message.content
try:
return json.loads(text)
except Exception:
return {"error": f"Failed to parse JSON from GPT response:\n{text}"}
# === 4. Main Logic & Dynamic Output Display ===
def handle_request(audio_file, user_prompt):
# Input validation
if not audio_file or not user_prompt:
return (
"β Please upload an audio file and enter a request",
"", "",
gr.update(visible=False), gr.update(visible=False), gr.update(visible=False)
)
plan = gpt_decide_tool(user_prompt, audio_file)
if "error" in plan:
return (plan["error"], "", "") + (gr.update(visible=False),)*3
tool_name = plan["tool_name"]
args = plan.get("args", {})
explanation= plan.get("explanation", "")
log = f"π§ GPT chose: {tool_name}\nπ¦ Args: {json.dumps(args, ensure_ascii=False, indent=2)}"
fn = TOOL_MAP.get(tool_name)
if not fn:
return (f"β Unknown tool: {tool_name}", explanation, log) + (gr.update(visible=False),)*3
output = fn(**args)
# Determine output type and update components accordingly
if isinstance(output, str) and output.endswith(".wav") and os.path.isfile(output):
return (
"β
Success", explanation, log,
gr.update(value=output, visible=True), # Audio
gr.update(visible=False), # SVG
gr.update(visible=False) # Text
)
if isinstance(output, str) and output.endswith(".musicxml") and os.path.isfile(output):
# Automatically render MusicXML to SVG
svg_html = render_musicxml_via_verovio_api(output)
return (
"β
Success", explanation, log,
gr.update(visible=False),
gr.update(value=svg_html, visible=True),
gr.update(visible=False)
)
if isinstance(output, str) and output.strip().startswith("<div"):
# Already HTML SVG
return (
"β
Success", explanation, log,
gr.update(visible=False),
gr.update(value=output, visible=True),
gr.update(visible=False)
)
# Otherwise treat as plain text
return (
"β
Success", explanation, log,
gr.update(visible=False),
gr.update(visible=False),
gr.update(value=str(output), visible=True)
)
# === 5. Gradio Interface ===
with gr.Blocks(title="πΆ Vibe Jamming β Your Music Assistant") as demo:
gr.Markdown("## π΅ Vibe Jamming β Your Music Assistant")
with gr.Row():
audio_input = gr.Audio(label="Upload Audio (.wav)", type="filepath")
text_input = gr.Textbox(label="Your Request", placeholder="e.g., Generate jazz music from my humming")
run_button = gr.Button("π Run")
status_box = gr.Textbox(label="Status")
explanation_box = gr.Textbox(label="Explanation")
log_box = gr.Textbox(label="Tool Log", lines=6)
audio_output = gr.Audio(label="π§ Audio Output", visible=False, type="filepath")
svg_output = gr.HTML(label="πΌοΈ Score Preview (SVG)", visible=False)
text_output = gr.Textbox(label="π Text Output", visible=False, lines=4)
run_button.click(
fn=handle_request,
inputs=[audio_input, text_input],
outputs=[status_box, explanation_box, log_box, audio_output, svg_output, text_output]
)
if __name__ == "__main__":
demo.launch()
|