Update app.py
Browse files
app.py
CHANGED
|
@@ -1,4 +1,3 @@
|
|
| 1 |
-
|
| 2 |
import torch
|
| 3 |
from audiocraft.models import MusicGen
|
| 4 |
from transformers import GPT2LMHeadModel, GPT2Tokenizer
|
|
@@ -17,6 +16,7 @@ import os
|
|
| 17 |
|
| 18 |
# Set device
|
| 19 |
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
|
|
|
|
| 20 |
# MusicGen
|
| 21 |
music_model = MusicGen.get_pretrained("small", device=device)
|
| 22 |
|
|
@@ -24,8 +24,12 @@ music_model = MusicGen.get_pretrained("small", device=device)
|
|
| 24 |
tokenizer = GPT2Tokenizer.from_pretrained("gpt2")
|
| 25 |
gpt2_model = GPT2LMHeadModel.from_pretrained("gpt2").to(device)
|
| 26 |
|
| 27 |
-
# Stable Diffusion for image generation
|
| 28 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
| 29 |
pipe = pipe.to(device)
|
| 30 |
|
| 31 |
# Emotion detection for Text-to-Audio
|
|
@@ -55,7 +59,7 @@ def text_to_audio(text):
|
|
| 55 |
emotion = get_emotion_tone(text)
|
| 56 |
engine = pyttsx3.init()
|
| 57 |
engine.setProperty('rate', 150 if emotion == "neutral" else 180 if emotion == "happy" else 100 if emotion == "sad" else 200)
|
| 58 |
-
engine.setProperty('volume', 0.8 if emotion == "neutral" else 1.0 if emotion
|
| 59 |
|
| 60 |
temp_file = NamedTemporaryFile(delete=False, suffix=".mp3")
|
| 61 |
engine.save_to_file(text, temp_file.name)
|
|
@@ -105,9 +109,11 @@ def chat_with_ai(user_input):
|
|
| 105 |
# Simulate Video Generation using a Sequence of Images
|
| 106 |
def generate_video(prompt):
|
| 107 |
frames = []
|
| 108 |
-
for i in range(5):
|
| 109 |
frame_prompt = f"{prompt} frame {i+1}"
|
| 110 |
frame_path = generate_image(frame_prompt)
|
|
|
|
|
|
|
| 111 |
frames.append(Image.open(frame_path))
|
| 112 |
|
| 113 |
temp_video = NamedTemporaryFile(delete=False, suffix=".gif")
|
|
@@ -124,6 +130,8 @@ def main_interface(input_text, task_type, style):
|
|
| 124 |
|
| 125 |
elif task_type == "Music":
|
| 126 |
audio_path = generate_music(input_text)
|
|
|
|
|
|
|
| 127 |
spectrogram_path = generate_spectrogram(audio_path)
|
| 128 |
return "Music Generated", audio_path, spectrogram_path
|
| 129 |
|
|
@@ -134,6 +142,8 @@ def main_interface(input_text, task_type, style):
|
|
| 134 |
|
| 135 |
elif task_type == "Video Generation":
|
| 136 |
video_path = generate_video(input_text)
|
|
|
|
|
|
|
| 137 |
audio_path = generate_music(input_text)
|
| 138 |
return "Video Generated", audio_path, video_path
|
| 139 |
except Exception as e:
|
|
@@ -155,4 +165,4 @@ interface = gr.Interface(
|
|
| 155 |
live=False,
|
| 156 |
)
|
| 157 |
|
| 158 |
-
interface.launch()
|
|
|
|
|
|
|
| 1 |
import torch
|
| 2 |
from audiocraft.models import MusicGen
|
| 3 |
from transformers import GPT2LMHeadModel, GPT2Tokenizer
|
|
|
|
| 16 |
|
| 17 |
# Set device
|
| 18 |
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
|
| 19 |
+
|
| 20 |
# MusicGen
|
| 21 |
music_model = MusicGen.get_pretrained("small", device=device)
|
| 22 |
|
|
|
|
| 24 |
tokenizer = GPT2Tokenizer.from_pretrained("gpt2")
|
| 25 |
gpt2_model = GPT2LMHeadModel.from_pretrained("gpt2").to(device)
|
| 26 |
|
| 27 |
+
# Stable Diffusion for image generation with dtype fix
|
| 28 |
+
dtype = torch.float16 if torch.cuda.is_available() else torch.float32
|
| 29 |
+
pipe = StableDiffusionPipeline.from_pretrained(
|
| 30 |
+
"runwayml/stable-diffusion-v1-5",
|
| 31 |
+
torch_dtype=dtype
|
| 32 |
+
)
|
| 33 |
pipe = pipe.to(device)
|
| 34 |
|
| 35 |
# Emotion detection for Text-to-Audio
|
|
|
|
| 59 |
emotion = get_emotion_tone(text)
|
| 60 |
engine = pyttsx3.init()
|
| 61 |
engine.setProperty('rate', 150 if emotion == "neutral" else 180 if emotion == "happy" else 100 if emotion == "sad" else 200)
|
| 62 |
+
engine.setProperty('volume', 0.8 if emotion == "neutral" else 1.0 if emotion in ["happy", "angry"] else 0.5)
|
| 63 |
|
| 64 |
temp_file = NamedTemporaryFile(delete=False, suffix=".mp3")
|
| 65 |
engine.save_to_file(text, temp_file.name)
|
|
|
|
| 109 |
# Simulate Video Generation using a Sequence of Images
|
| 110 |
def generate_video(prompt):
|
| 111 |
frames = []
|
| 112 |
+
for i in range(5): # Generate 5 frames as a sequence
|
| 113 |
frame_prompt = f"{prompt} frame {i+1}"
|
| 114 |
frame_path = generate_image(frame_prompt)
|
| 115 |
+
if "Error" in frame_path:
|
| 116 |
+
return frame_path
|
| 117 |
frames.append(Image.open(frame_path))
|
| 118 |
|
| 119 |
temp_video = NamedTemporaryFile(delete=False, suffix=".gif")
|
|
|
|
| 130 |
|
| 131 |
elif task_type == "Music":
|
| 132 |
audio_path = generate_music(input_text)
|
| 133 |
+
if "Error" in audio_path:
|
| 134 |
+
return audio_path, None, None
|
| 135 |
spectrogram_path = generate_spectrogram(audio_path)
|
| 136 |
return "Music Generated", audio_path, spectrogram_path
|
| 137 |
|
|
|
|
| 142 |
|
| 143 |
elif task_type == "Video Generation":
|
| 144 |
video_path = generate_video(input_text)
|
| 145 |
+
if "Error" in video_path:
|
| 146 |
+
return video_path, None, None
|
| 147 |
audio_path = generate_music(input_text)
|
| 148 |
return "Video Generated", audio_path, video_path
|
| 149 |
except Exception as e:
|
|
|
|
| 165 |
live=False,
|
| 166 |
)
|
| 167 |
|
| 168 |
+
interface.launch()
|