Spaces:

arshad1234321
/

Text_to_Multimedia

Running

App Files Files Community

arshad1234321 commited on Apr 15, 2025

Commit

5b84497

verified ·

1 Parent(s): a6324d1

Update app.py

Browse files

Files changed (1) hide show

app.py +16 -6

app.py CHANGED Viewed

@@ -1,4 +1,3 @@
 import torch
 from audiocraft.models import MusicGen
 from transformers import GPT2LMHeadModel, GPT2Tokenizer
@@ -17,6 +16,7 @@ import os
 # Set device
 device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
 # MusicGen
 music_model = MusicGen.get_pretrained("small", device=device)
@@ -24,8 +24,12 @@ music_model = MusicGen.get_pretrained("small", device=device)
 tokenizer = GPT2Tokenizer.from_pretrained("gpt2")
 gpt2_model = GPT2LMHeadModel.from_pretrained("gpt2").to(device)
-# Stable Diffusion for image generation
-pipe = StableDiffusionPipeline.from_pretrained("runwayml/stable-diffusion-v1-5", torch_dtype=torch.float16)
 pipe = pipe.to(device)
 # Emotion detection for Text-to-Audio
@@ -55,7 +59,7 @@ def text_to_audio(text):
     emotion = get_emotion_tone(text)
     engine = pyttsx3.init()
     engine.setProperty('rate', 150 if emotion == "neutral" else 180 if emotion == "happy" else 100 if emotion == "sad" else 200)
-    engine.setProperty('volume', 0.8 if emotion == "neutral" else 1.0 if emotion == "happy" or emotion == "angry" else 0.5)
     temp_file = NamedTemporaryFile(delete=False, suffix=".mp3")
     engine.save_to_file(text, temp_file.name)
@@ -105,9 +109,11 @@ def chat_with_ai(user_input):
 # Simulate Video Generation using a Sequence of Images
 def generate_video(prompt):
     frames = []
-    for i in range(5): # Generate 5 frames as a sequence
         frame_prompt = f"{prompt} frame {i+1}"
         frame_path = generate_image(frame_prompt)
         frames.append(Image.open(frame_path))
     temp_video = NamedTemporaryFile(delete=False, suffix=".gif")
@@ -124,6 +130,8 @@ def main_interface(input_text, task_type, style):
         elif task_type == "Music":
             audio_path = generate_music(input_text)
             spectrogram_path = generate_spectrogram(audio_path)
             return "Music Generated", audio_path, spectrogram_path
@@ -134,6 +142,8 @@ def main_interface(input_text, task_type, style):
         elif task_type == "Video Generation":
             video_path = generate_video(input_text)
             audio_path = generate_music(input_text)
             return "Video Generated", audio_path, video_path
     except Exception as e:
@@ -155,4 +165,4 @@ interface = gr.Interface(
     live=False,
 )
-interface.launch()

 import torch
 from audiocraft.models import MusicGen
 from transformers import GPT2LMHeadModel, GPT2Tokenizer
 # Set device
 device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
 # MusicGen
 music_model = MusicGen.get_pretrained("small", device=device)
 tokenizer = GPT2Tokenizer.from_pretrained("gpt2")
 gpt2_model = GPT2LMHeadModel.from_pretrained("gpt2").to(device)
+# Stable Diffusion for image generation with dtype fix
+dtype = torch.float16 if torch.cuda.is_available() else torch.float32
+pipe = StableDiffusionPipeline.from_pretrained(
+    "runwayml/stable-diffusion-v1-5",
+    torch_dtype=dtype
+)
 pipe = pipe.to(device)
 # Emotion detection for Text-to-Audio
     emotion = get_emotion_tone(text)
     engine = pyttsx3.init()
     engine.setProperty('rate', 150 if emotion == "neutral" else 180 if emotion == "happy" else 100 if emotion == "sad" else 200)
+    engine.setProperty('volume', 0.8 if emotion == "neutral" else 1.0 if emotion in ["happy", "angry"] else 0.5)
     temp_file = NamedTemporaryFile(delete=False, suffix=".mp3")
     engine.save_to_file(text, temp_file.name)
 # Simulate Video Generation using a Sequence of Images
 def generate_video(prompt):
     frames = []
+    for i in range(5):  # Generate 5 frames as a sequence
         frame_prompt = f"{prompt} frame {i+1}"
         frame_path = generate_image(frame_prompt)
+        if "Error" in frame_path:
+            return frame_path
         frames.append(Image.open(frame_path))
     temp_video = NamedTemporaryFile(delete=False, suffix=".gif")
         elif task_type == "Music":
             audio_path = generate_music(input_text)
+            if "Error" in audio_path:
+                return audio_path, None, None
             spectrogram_path = generate_spectrogram(audio_path)
             return "Music Generated", audio_path, spectrogram_path
         elif task_type == "Video Generation":
             video_path = generate_video(input_text)
+            if "Error" in video_path:
+                return video_path, None, None
             audio_path = generate_music(input_text)
             return "Video Generated", audio_path, video_path
     except Exception as e:
     live=False,
 )
+interface.launch()