Spaces:

jfforero
/

Bello

Sleeping

App Files Files Community

jfforero commited on Aug 29, 2025

Commit

a1c3a87

verified ·

1 Parent(s): d628f83

Update app.py

Browse files

Files changed (1) hide show

app.py +66 -5

app.py CHANGED Viewed

@@ -8,7 +8,11 @@ import os
 from tensorflow.keras.models import load_model
 from faster_whisper import WhisperModel
 import random
-from textblob import TextBlob  # Added for sentiment analysis
 # Load the emotion prediction model
 def load_emotion_model(model_path):
@@ -27,6 +31,21 @@ model = load_emotion_model(model_path)
 model_size = "small"
 model2 = WhisperModel(model_size, device="cpu", compute_type="int8")
 # Function to transcribe audio
 def transcribe(wav_filepath):
     try:
@@ -88,6 +107,44 @@ def analyze_sentiment(text):
         print("Error analyzing sentiment:", e)
         return "sentiment analysis error", 0.0
 api_key = os.getenv("DeepAI_api_key")
 # Function to generate an image using DeepAI Text to Image API
@@ -146,7 +203,10 @@ def get_predictions(audio_input):
     image = generate_image(emotion_prediction, transcribed_text)
-    return emotion_prediction, transcribed_text, f"Sentiment: {sentiment} (Polarity: {polarity:.2f})", image
 # Create the Gradio interface
 interface = gr.Interface(
@@ -155,11 +215,12 @@ interface = gr.Interface(
     outputs=[
         gr.Label(label="Acoustic Prediction"),
         gr.Label(label="Transcribed Text"),
-        gr.Label(label="Sentiment Analysis"),  # Added sentiment analysis output
-        gr.Image(type='pil', label="Generated Image")
     ],
     title="Affective Virtual Environments",
-    description="Create an AVE using your voice. Get emotion prediction, transcription, sentiment analysis, and a generated image."
 )
 interface.launch()

 from tensorflow.keras.models import load_model
 from faster_whisper import WhisperModel
 import random
+from textblob import TextBlob
+import torch
+import scipy.io.wavfile
+from transformers import AutoProcessor, MusicgenForConditionalGeneration
+import tempfile
 # Load the emotion prediction model
 def load_emotion_model(model_path):
 model_size = "small"
 model2 = WhisperModel(model_size, device="cpu", compute_type="int8")
+# Load MusicGen model
+def load_musicgen_model():
+    try:
+        device = "cuda" if torch.cuda.is_available() else "cpu"
+        processor = AutoProcessor.from_pretrained("facebook/musicgen-small")
+        music_model = MusicgenForConditionalGeneration.from_pretrained("facebook/musicgen-small")
+        music_model.to(device)
+        print("MusicGen model loaded successfully")
+        return processor, music_model, device
+    except Exception as e:
+        print("Error loading MusicGen model:", e)
+        return None, None, None
+processor, music_model, device = load_musicgen_model()
 # Function to transcribe audio
 def transcribe(wav_filepath):
     try:
         print("Error analyzing sentiment:", e)
         return "sentiment analysis error", 0.0
+# Function to generate music with MusicGen
+def generate_music(transcribed_text, emotion_prediction):
+    try:
+        if processor is None or music_model is None:
+            return None
+        # Create a prompt that combines the emotion and transcription
+        prompt = f"Background music that is {emotion_prediction} and represents: {transcribed_text}"
+        # Limit prompt length to avoid model issues
+        if len(prompt) > 200:
+            prompt = prompt[:200] + "..."
+        inputs = processor(
+            text=[prompt],
+            padding=True,
+            return_tensors="pt",
+        ).to(device)
+        # Generate audio
+        audio_values = music_model.generate(**inputs, max_new_tokens=512)
+        # Convert to numpy array and sample rate
+        sampling_rate = music_model.config.audio_encoder.sampling_rate
+        audio_data = audio_values[0, 0].cpu().numpy()
+        # Normalize audio data
+        audio_data = audio_data / np.max(np.abs(audio_data))
+        # Create a temporary file to save the audio
+        with tempfile.NamedTemporaryFile(suffix=".wav", delete=False) as tmp_file:
+            scipy.io.wavfile.write(tmp_file.name, rate=sampling_rate, data=audio_data)
+            return tmp_file.name
+    except Exception as e:
+        print("Error generating music:", e)
+        return None
 api_key = os.getenv("DeepAI_api_key")
 # Function to generate an image using DeepAI Text to Image API
     image = generate_image(emotion_prediction, transcribed_text)
+    # Generate music based on transcription and emotion
+    music_path = generate_music(transcribed_text, emotion_prediction)
+    return emotion_prediction, transcribed_text, f"Sentiment: {sentiment} (Polarity: {polarity:.2f})", image, music_path
 # Create the Gradio interface
 interface = gr.Interface(
     outputs=[
         gr.Label(label="Acoustic Prediction"),
         gr.Label(label="Transcribed Text"),
+        gr.Label(label="Sentiment Analysis"),
+        gr.Image(type='pil', label="Generated Image"),
+        gr.Audio(label="Generated Music", type="filepath")  # Added music output
     ],
     title="Affective Virtual Environments",
+    description="Create an AVE using your voice. Get emotion prediction, transcription, sentiment analysis, a generated image, and music."
 )
 interface.launch()