Spaces:

jfforero
/

Bello

Sleeping

App Files Files Community

jfforero commited on Aug 29, 2025

Commit

7cdb54d

verified ·

1 Parent(s): 278770c

Update app.py

Browse files

Files changed (1) hide show

app.py +39 -119

app.py CHANGED Viewed

@@ -8,13 +8,7 @@ import os
 from tensorflow.keras.models import load_model
 from faster_whisper import WhisperModel
 import random
-from textblob import TextBlob
-import torch
-import scipy.io.wavfile
-from transformers import AutoProcessor, MusicgenForConditionalGeneration
-import tempfile
-from diffusers import StableDiffusionPipeline, DPMSolverMultistepScheduler
-import torch
 # Load the emotion prediction model
 def load_emotion_model(model_path):
@@ -33,51 +27,6 @@ model = load_emotion_model(model_path)
 model_size = "small"
 model2 = WhisperModel(model_size, device="cpu", compute_type="int8")
-# Load MusicGen model
-def load_musicgen_model():
-    try:
-        device = "cuda" if torch.cuda.is_available() else "cpu"
-        processor = AutoProcessor.from_pretrained("facebook/musicgen-small")
-        music_model = MusicgenForConditionalGeneration.from_pretrained("facebook/musicgen-small")
-        music_model.to(device)
-        print("MusicGen model loaded successfully")
-        return processor, music_model, device
-    except Exception as e:
-        print("Error loading MusicGen model:", e)
-        return None, None, None
-processor, music_model, device = load_musicgen_model()
-# Load Stable Diffusion model
-def load_stable_diffusion_model():
-    try:
-        # Use GPU if available, otherwise CPU
-        sd_device = "cuda" if torch.cuda.is_available() else "cpu"
-        # Load Stable Diffusion 2.1
-        model_id = "stabilityai/stable-diffusion-2-1"
-        # Use the DPMSolverMultistepScheduler for faster inference
-        pipe = StableDiffusionPipeline.from_pretrained(
-            model_id,
-            torch_dtype=torch.float16 if sd_device == "cuda" else torch.float32,
-            safety_checker=None  # Disable safety checker for more creative generations
-        )
-        pipe.scheduler = DPMSolverMultistepScheduler.from_config(pipe.scheduler.config)
-        pipe = pipe.to(sd_device)
-        # Optimize for CPU if needed
-        if sd_device == "cpu":
-            pipe.enable_attention_slicing()
-        print("Stable Diffusion model loaded successfully")
-        return pipe, sd_device
-    except Exception as e:
-        print("Error loading Stable Diffusion model:", e)
-        return None, None
-sd_pipe, sd_device = load_stable_diffusion_model()
 # Function to transcribe audio
 def transcribe(wav_filepath):
     try:
@@ -139,72 +88,48 @@ def analyze_sentiment(text):
         print("Error analyzing sentiment:", e)
         return "sentiment analysis error", 0.0
-# Function to generate music with MusicGen
-def generate_music(transcribed_text, emotion_prediction):
     try:
-        if processor is None or music_model is None:
-            return None
-        # Create a prompt that combines the emotion and transcription
-        prompt = f"Background music that is {emotion_prediction} and represents: {transcribed_text}"
-        # Limit prompt length to avoid model issues
-        if len(prompt) > 200:
-            prompt = prompt[:200] + "..."
-        inputs = processor(
-            text=[prompt],
-            padding=True,
-            return_tensors="pt",
-        ).to(device)
-        # Generate audio
-        audio_values = music_model.generate(**inputs, max_new_tokens=512)
-        # Convert to numpy array and sample rate
-        sampling_rate = music_model.config.audio_encoder.sampling_rate
-        audio_data = audio_values[0, 0].cpu().numpy()
-        # Normalize audio data
-        audio_data = audio_data / np.max(np.abs(audio_data))
-        # Create a temporary file to save the audio
-        with tempfile.NamedTemporaryFile(suffix=".wav", delete=False) as tmp_file:
-            scipy.io.wavfile.write(tmp_file.name, rate=sampling_rate, data=audio_data)
-            return tmp_file.name
-    except Exception as e:
-        print("Error generating music:", e)
-        return None
-# Function to generate image with Stable Diffusion
-def generate_image(emotion_prediction, transcribed_text):
-    try:
-        if sd_pipe is None:
             return None
-        # Create a detailed prompt for image generation
-        prompt = f"Patagonian Monsters with a {emotion_prediction} attitude, representing: {transcribed_text}. " \
-                 f"Asemic writings in an old map style, vintage illustration, detailed, high quality, 4k resolution"
-        # Negative prompt to avoid unwanted elements
-        negative_prompt = "blurry, low quality, distorted, ugly, bad anatomy, text, watermark, signature"
-        # Generate image
-        with torch.autocast("cuda" if sd_device == "cuda" else "cpu"):
-            image = sd_pipe(
-                prompt=prompt,
-                negative_prompt=negative_prompt,
-                height=1024,
-                width=512,
-                num_inference_steps=25,
-                guidance_scale=7.5
-            ).images[0]
-        return image
     except Exception as e:
-        print("Error generating image with Stable Diffusion:", e)
         return None
 # Function to get predictions
@@ -219,13 +144,9 @@ def get_predictions(audio_input):
     # Analyze sentiment of transcribed text
     sentiment, polarity = analyze_sentiment(transcribed_text)
-    # Generate image with Stable Diffusion
     image = generate_image(emotion_prediction, transcribed_text)
-    # Generate music based on transcription and emotion
-    music_path = generate_music(transcribed_text, emotion_prediction)
-    return emotion_prediction, transcribed_text, f"Sentiment: {sentiment} (Polarity: {polarity:.2f})", image, music_path
 # Create the Gradio interface
 interface = gr.Interface(
@@ -234,12 +155,11 @@ interface = gr.Interface(
     outputs=[
         gr.Label(label="Acoustic Prediction"),
         gr.Label(label="Transcribed Text"),
-        gr.Label(label="Sentiment Analysis"),
-        gr.Image(type='pil', label="Generated Image"),
-        gr.Audio(label="Generated Music", type="filepath")
     ],
     title="Affective Virtual Environments",
-    description="Create an AVE using your voice. Get emotion prediction, transcription, sentiment analysis, a generated image, and music."
 )
 interface.launch()

 from tensorflow.keras.models import load_model
 from faster_whisper import WhisperModel
 import random
+from textblob import TextBlob  # Added for sentiment analysis
 # Load the emotion prediction model
 def load_emotion_model(model_path):
 model_size = "small"
 model2 = WhisperModel(model_size, device="cpu", compute_type="int8")
 # Function to transcribe audio
 def transcribe(wav_filepath):
     try:
         print("Error analyzing sentiment:", e)
         return "sentiment analysis error", 0.0
+api_key = os.getenv("DeepAI_api_key")
+# Function to generate an image using DeepAI Text to Image API
+def generate_image(emotion_prediction, transcribed_text, output_resolution=(1024, 1024)):
     try:
+        if not api_key:
+            return "API key not found"
+        url = "https://api.deepai.org/api/image-editor"
+        headers = {
+            'api-key': api_key
+        }
+        # Select a random image file from TerraIncognita0.jpg to TerraIncognita9.jpg
+        random_index = random.randint(0, 9)
+        image_file_path = f'TAI_Images/TerraIncognita{random_index}.jpg'
+        # Check if the file exists
+        if not os.path.exists(image_file_path):
+            return f"Image file not found: {image_file_path}"
+        prompt_text = f"Generate Patagonian Monsters' with a {emotion_prediction} attitude, representing the idea of: [ {transcribed_text} ]. Illustrate this using asemic writings in an old map style."
+        with open(image_file_path, 'rb') as image_file:
+            files = {
+                'image': image_file,
+            }
+            data = {
+                'text': prompt_text
+            }
+            response = requests.post(url, headers=headers, files=files, data=data)
+        response_data = response.json()
+        if 'output_url' in response_data:
+            # Download the image and return it as a PIL Image
+            image_response = requests.get(response_data['output_url'])
+            return Image.open(BytesIO(image_response.content))
+        else:
+            print("Error in DeepAI response:", response_data)
             return None
     except Exception as e:
+        print("Error generating image:", e)
         return None
 # Function to get predictions
     # Analyze sentiment of transcribed text
     sentiment, polarity = analyze_sentiment(transcribed_text)
     image = generate_image(emotion_prediction, transcribed_text)
+    return emotion_prediction, transcribed_text, f"Sentiment: {sentiment} (Polarity: {polarity:.2f})", image
 # Create the Gradio interface
 interface = gr.Interface(
     outputs=[
         gr.Label(label="Acoustic Prediction"),
         gr.Label(label="Transcribed Text"),
+        gr.Label(label="Sentiment Analysis"),  # Added sentiment analysis output
+        gr.Image(type='pil', label="Generated Image")
     ],
     title="Affective Virtual Environments",
+    description="Create an AVE using your voice. Get emotion prediction, transcription, sentiment analysis, and a generated image."
 )
 interface.launch()