File size: 8,499 Bytes
6706ed2
57f494a
54018eb
8426b78
 
 
 
 
 
 
 
 
e903099
8426b78
64196f7
e903099
 
 
64196f7
e903099
64196f7
 
8426b78
e903099
64196f7
8426b78
e903099
8426b78
 
e903099
 
 
 
8426b78
e903099
 
 
8426b78
 
 
e903099
86eddd4
e903099
 
 
 
 
8426b78
e903099
8426b78
 
e903099
 
 
8426b78
e903099
 
 
 
8426b78
 
 
64196f7
 
 
 
8426b78
 
e903099
 
 
 
 
8426b78
 
 
 
e903099
 
 
 
8426b78
e903099
8426b78
 
86eddd4
e903099
 
 
 
64196f7
 
e903099
64196f7
 
8426b78
e903099
8114539
8426b78
e903099
 
 
8426b78
 
 
e903099
8426b78
e903099
8426b78
 
e903099
8426b78
e903099
8426b78
57f494a
e903099
285657c
57f494a
e903099
 
 
 
64196f7
 
 
 
 
e903099
64196f7
 
 
e903099
64196f7
57f494a
e903099
 
8426b78
64196f7
e903099
 
64196f7
8426b78
 
e903099
57f494a
8426b78
64196f7
e903099
8426b78
e903099
57f494a
05a84d9
e903099
57f494a
8426b78
e903099
8426b78
 
 
57f494a
 
64196f7
8426b78
e903099
 
 
 
8426b78
e903099
8426b78
e903099
8426b78
e903099
8426b78
e903099
 
64196f7
e903099
 
 
 
64196f7
e903099
 
 
64196f7
e903099
64196f7
 
 
 
e903099
 
64196f7
e903099
64196f7
e903099
64196f7
 
e903099
64196f7
e903099
64196f7
 
e903099
64196f7
 
e903099
8426b78
83ada9b
 
93db536
54018eb
e903099
 
4b66c9b
e903099
 
 
 
4b66c9b
e903099
 
 
05a84d9
e903099
05a84d9
e903099
 
54018eb
285657c
64196f7
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
import os
import json
import gradio as gr
import requests
from dotenv import load_dotenv
from datetime import datetime
from pathlib import Path
from basic_pitch.inference import predict_and_save
from basic_pitch import ICASSP_2022_MODEL_PATH
from music21 import converter
import base64

# === 1. Environment Configuration & OpenAI Client ===
load_dotenv()
OPENAI_API_KEY    = os.getenv("OPENAI_API_KEY")
MUSICGEN_API_URL  = os.getenv("MUSICGEN_API_URL")
VEROVIO_API_URL   = os.getenv("VEROVIO_API_URL")
assert OPENAI_API_KEY, "❌ Please set OPENAI_API_KEY in your .env file"

# Use OpenAI v1 client
from openai import OpenAI
openai_client = OpenAI(api_key=OPENAI_API_KEY)

# Create output directory if it doesn't exist
Path("output").mkdir(exist_ok=True)

# === 2. Tool Functions ===

def generate_music_from_hum(melody_file: str, prompt: str) -> str:
    """
    Call an external MusicGen API to generate a music WAV file
    based on a user’s humming audio and a style prompt.
    """
    if not MUSICGEN_API_URL:
        return "❌ MUSICGEN_API_URL is not configured"
    timestamp = datetime.now().strftime("%Y%m%d_%H%M%S")
    output_wav = f"output/generated_{timestamp}.wav"
    try:
        with open(melody_file, "rb") as f:
            files = {"melody": ("hum.wav", f, "audio/wav")}
            data = {"text": prompt}
            response = requests.post(MUSICGEN_API_URL, files=files, data=data, timeout=180)
        if response.status_code != 200:
            return f"❌ MusicGen API error {response.status_code}: {response.text}"
        with open(output_wav, "wb") as out:
            out.write(response.content)
        return output_wav
    except Exception as e:
        return f"❌ Music generation failed: {e}"

def wav_to_musicxml(wav_path: str, timestamp: str=None) -> str:
    """
    Convert a WAV audio file to MusicXML using basic-pitch for pitch detection.
    """
    ts = timestamp or datetime.now().strftime("%Y%m%d_%H%M%S")
    # Remove any old MIDI files
    for midi_file in Path("output").glob("*_basic_pitch.mid"):
        midi_file.unlink()
    # Generate MIDI from the WAV
    predict_and_save(
        audio_path_list=[wav_path],
        output_directory="output",
        save_midi=True,
        sonify_midi=False,
        save_model_outputs=False,
        save_notes=False,
        model_or_model_path=ICASSP_2022_MODEL_PATH
    )
    midi_files = list(Path("output").glob("*.mid"))
    if not midi_files:
        return "❌ Failed to generate MIDI file"
    score = converter.parse(str(midi_files[0]))
    xml_path = f"output/generated_{ts}.musicxml"
    score.write("musicxml", fp=xml_path)
    return xml_path

def render_musicxml_via_verovio_api(musicxml_path: str) -> str:
    """
    Render a MusicXML file to an SVG preview using the Verovio API.
    Returns HTML containing the embedded SVG.
    """
    if not VEROVIO_API_URL:
        return "❌ VEROVIO_API_URL is not configured"
    try:
        with open(musicxml_path, "rb") as f:
            response = requests.post(VEROVIO_API_URL, files={"file": f}, timeout=120)
        if response.status_code != 200:
            return f"❌ Verovio API error {response.status_code}: {response.text}"
        svg = response.json().get("svg", "")
        b64_svg = base64.b64encode(svg.encode("utf-8")).decode("utf-8")
        return (
            '<div style="background:white;padding:10px;border-radius:8px;">'
            f'<img src="data:image/svg+xml;base64,{b64_svg}" style="width:100%;" />'
            '</div>'
        )
    except Exception as e:
        return f"❌ SVG rendering failed: {e}"

def generate_score_from_audio(wav_file: str) -> str:
    """
    Extract a MusicXML score from a generated music WAV file.
    """
    try:
        return wav_to_musicxml(wav_file)
    except Exception as e:
        return f"❌ Score extraction failed: {e}"

# Map of tool names to functions
TOOL_MAP = {
    "generate_music_from_hum": generate_music_from_hum,
    "wav_to_musicxml": wav_to_musicxml,
    "render_musicxml_via_verovio_api": render_musicxml_via_verovio_api,
    "generate_score_from_audio": generate_score_from_audio,
}

# === 3. GPT Tool Selection ===

def gpt_decide_tool(message: str, audio_path: str) -> dict:
    system_prompt = """
You are an AI music assistant. The user uploads an audio file and provides a request.
Choose the most appropriate tool from the list below and respond with strict JSON:

- generate_music_from_hum(melody_file, prompt)
- wav_to_musicxml(wav_file)
- render_musicxml_via_verovio_api(musicxml_file)
- generate_score_from_audio(wav_file)

JSON format:
{
  "tool_name": "...",
  "args": { ... },
  "explanation": "Reasoning explanation"
}
"""
    user_prompt = f"User request: {message}\nAudio file path: {audio_path}"
    response = openai_client.chat.completions.create(
        model="gpt-3.5-turbo",
        messages=[
            {"role": "system", "content": system_prompt},
            {"role": "user",   "content": user_prompt}
        ],
        temperature=0.2
    )
    text = response.choices[0].message.content
    try:
        return json.loads(text)
    except Exception:
        return {"error": f"Failed to parse JSON from GPT response:\n{text}"}

# === 4. Main Logic & Dynamic Output Display ===

def handle_request(audio_file, user_prompt):
    # Input validation
    if not audio_file or not user_prompt:
        return (
            "❗ Please upload an audio file and enter a request",
            "", "",
            gr.update(visible=False), gr.update(visible=False), gr.update(visible=False)
        )
    plan = gpt_decide_tool(user_prompt, audio_file)
    if "error" in plan:
        return (plan["error"], "", "") + (gr.update(visible=False),)*3

    tool_name  = plan["tool_name"]
    args       = plan.get("args", {})
    explanation= plan.get("explanation", "")
    log        = f"🧠 GPT chose: {tool_name}\nπŸ“¦ Args: {json.dumps(args, ensure_ascii=False, indent=2)}"

    fn = TOOL_MAP.get(tool_name)
    if not fn:
        return (f"❌ Unknown tool: {tool_name}", explanation, log) + (gr.update(visible=False),)*3

    output = fn(**args)

    # Determine output type and update components accordingly
    if isinstance(output, str) and output.endswith(".wav") and os.path.isfile(output):
        return (
            "βœ… Success", explanation, log,
            gr.update(value=output, visible=True),  # Audio
            gr.update(visible=False),               # SVG
            gr.update(visible=False)                # Text
        )
    if isinstance(output, str) and output.endswith(".musicxml") and os.path.isfile(output):
        # Automatically render MusicXML to SVG
        svg_html = render_musicxml_via_verovio_api(output)
        return (
            "βœ… Success", explanation, log,
            gr.update(visible=False),
            gr.update(value=svg_html, visible=True),
            gr.update(visible=False)
        )
    if isinstance(output, str) and output.strip().startswith("<div"):
        # Already HTML SVG
        return (
            "βœ… Success", explanation, log,
            gr.update(visible=False),
            gr.update(value=output, visible=True),
            gr.update(visible=False)
        )
    # Otherwise treat as plain text
    return (
        "βœ… Success", explanation, log,
        gr.update(visible=False),
        gr.update(visible=False),
        gr.update(value=str(output), visible=True)
    )

# === 5. Gradio Interface ===

with gr.Blocks(title="🎢 Vibe Jamming – Your Music Assistant") as demo:
    gr.Markdown("## 🎡 Vibe Jamming – Your Music Assistant")

    with gr.Row():
        audio_input = gr.Audio(label="Upload Audio (.wav)", type="filepath")
        text_input  = gr.Textbox(label="Your Request", placeholder="e.g., Generate jazz music from my humming")

    run_button = gr.Button("πŸš€ Run")
    status_box  = gr.Textbox(label="Status")
    explanation_box = gr.Textbox(label="Explanation")
    log_box     = gr.Textbox(label="Tool Log", lines=6)

    audio_output = gr.Audio(label="🎧 Audio Output", visible=False, type="filepath")
    svg_output   = gr.HTML(label="πŸ–ΌοΈ Score Preview (SVG)", visible=False)
    text_output  = gr.Textbox(label="πŸ“„ Text Output", visible=False, lines=4)

    run_button.click(
        fn=handle_request,
        inputs=[audio_input, text_input],
        outputs=[status_box, explanation_box, log_box, audio_output, svg_output, text_output]
    )

if __name__ == "__main__":
    demo.launch()