Spaces:

Nick021402
/

Emotion2Music-AI

Sleeping

App Files Files Community

Nick021402 commited on May 21, 2025

Commit

8a0bb11

verified ·

1 Parent(s): 7423004

Update app.py

Browse files

Files changed (1) hide show

app.py +71 -58

app.py CHANGED Viewed

@@ -1,58 +1,71 @@
-import gradio as gr
-from transformers import pipeline
-import torch
-from transformers import AutoProcessor, MusicgenForConditionalGeneration
-# Load emotion classifier
-emotion_classifier = pipeline("text-classification", model="j-hartmann/emotion-english-distilroberta-base", return_all_scores=True)
-# Load music generator (small for CPU)
-music_model = MusicgenForConditionalGeneration.from_pretrained("facebook/musicgen-small")
-processor = AutoProcessor.from_pretrained("facebook/musicgen-small")
-# Map emotion to style/genre prompts
-EMOTION_TO_MUSIC = {
-    "joy": "happy upbeat piano melody",
-    "anger": "intense aggressive drums",
-    "sadness": "slow emotional violin",
-    "fear": "dark ambient synth",
-    "love": "soft romantic acoustic guitar",
-    "surprise": "quirky playful tune",
-    "neutral": "chill background lofi beat"
-}
-# Main generation function
-def generate_music(user_input):
-    # Step 1: Detect emotion
-    emotion_scores = emotion_classifier(user_input)[0]
-    top_emotion = max(emotion_scores, key=lambda x: x["score"])["label"]
-    # Step 2: Generate prompt
-    music_prompt = EMOTION_TO_MUSIC.get(top_emotion.lower(), "ambient melody")
-    # Step 3: Generate music
-    inputs = processor(text=[music_prompt], return_tensors="pt")
-    audio_values = music_model.generate(**inputs, max_new_tokens=1024)
-    # Convert audio tensor to array
-    audio_array = audio_values[0].cpu().numpy()
-    # Return result
-    return f"Top Emotion: {top_emotion}", (16000, audio_array)
-# Gradio UI
-with gr.Blocks() as demo:
-    gr.Markdown("# Emotion-to-Music AI")
-    gr.Markdown("Describe how you feel and get a unique music track matching your mood!")
-    with gr.Row():
-        text_input = gr.Textbox(label="How are you feeling?")
-        generate_btn = gr.Button("Generate Music")
-    with gr.Row():
-        emotion_output = gr.Textbox(label="Detected Emotion")
-        audio_output = gr.Audio(label="Generated Music", type="numpy")
-    generate_btn.click(fn=generate_music, inputs=text_input, outputs=[emotion_output, audio_output])
-demo.launch()

+import gradio as gr
+from transformers import pipeline
+import torch
+from transformers import AutoProcessor, MusicgenForConditionalGeneration
+import numpy as np # Import numpy
+# Load emotion classifier
+emotion_classifier = pipeline("text-classification", model="j-hartmann/emotion-english-distilroberta-base", return_all_scores=True)
+# Load music generator (small for CPU)
+music_model = MusicgenForConditionalGeneration.from_pretrained("facebook/musicgen-small")
+processor = AutoProcessor.from_pretrained("facebook/musicgen-small")
+# Map emotion to style/genre prompts
+EMOTION_TO_MUSIC = {
+    "joy": "happy upbeat piano melody",
+    "anger": "intense aggressive drums",
+    "sadness": "slow emotional violin",
+    "fear": "dark ambient synth",
+    "love": "soft romantic acoustic guitar",
+    "surprise": "quirky playful tune",
+    "neutral": "chill background lofi beat"
+}
+# Main generation function
+def generate_music(user_input):
+    # Step 1: Detect emotion
+    emotion_scores = emotion_classifier(user_input)[0]
+    top_emotion = max(emotion_scores, key=lambda x: x["score"])["label"]
+    # Step 2: Generate prompt
+    music_prompt = EMOTION_TO_MUSIC.get(top_emotion.lower(), "ambient melody")
+    # Step 3: Generate music
+    inputs = processor(text=[music_prompt], return_tensors="pt")
+    audio_values = music_model.generate(**inputs, max_new_tokens=1024)
+    # Convert audio tensor to numpy array
+    audio_array = audio_values[0].cpu().numpy()
+    # --- FIX START ---
+    # Normalize the audio array to be within the range of a 16-bit PCM WAV file
+    # The default sampling rate for musicgen-small is 16000 Hz, and Gradio expects
+    # values to be scaled for 16-bit integers if not float.
+    # We'll normalize to -1 to 1 for float and let Gradio handle the 16-bit conversion.
+    # However, to be extra safe, ensure max amplitude is 1.0.
+    audio_array = audio_array / np.max(np.abs(audio_array))
+    # --- FIX END ---
+    # Return result
+    # The Musicgen model outputs audio at a sampling rate of 16kHz
+    sampling_rate = 16000
+    return f"Top Emotion: {top_emotion}", (sampling_rate, audio_array)
+# Gradio UI
+with gr.Blocks() as demo:
+    gr.Markdown("# Emotion-to-Music AI")
+    gr.Markdown("Describe how you feel and get a unique music track matching your mood!")
+    with gr.Row():
+        text_input = gr.Textbox(label="How are you feeling?")
+        generate_btn = gr.Button("Generate Music")
+    with gr.Row():
+        emotion_output = gr.Textbox(label="Detected Emotion")
+        audio_output = gr.Audio(label="Generated Music", type="numpy") # type="numpy" is correct here
+    generate_btn.click(fn=generate_music, inputs=text_input, outputs=[emotion_output, audio_output])
+demo.launch()