arshad1234321 commited on
Commit
5b84497
·
verified ·
1 Parent(s): a6324d1

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +16 -6
app.py CHANGED
@@ -1,4 +1,3 @@
1
-
2
  import torch
3
  from audiocraft.models import MusicGen
4
  from transformers import GPT2LMHeadModel, GPT2Tokenizer
@@ -17,6 +16,7 @@ import os
17
 
18
  # Set device
19
  device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
 
20
  # MusicGen
21
  music_model = MusicGen.get_pretrained("small", device=device)
22
 
@@ -24,8 +24,12 @@ music_model = MusicGen.get_pretrained("small", device=device)
24
  tokenizer = GPT2Tokenizer.from_pretrained("gpt2")
25
  gpt2_model = GPT2LMHeadModel.from_pretrained("gpt2").to(device)
26
 
27
- # Stable Diffusion for image generation
28
- pipe = StableDiffusionPipeline.from_pretrained("runwayml/stable-diffusion-v1-5", torch_dtype=torch.float16)
 
 
 
 
29
  pipe = pipe.to(device)
30
 
31
  # Emotion detection for Text-to-Audio
@@ -55,7 +59,7 @@ def text_to_audio(text):
55
  emotion = get_emotion_tone(text)
56
  engine = pyttsx3.init()
57
  engine.setProperty('rate', 150 if emotion == "neutral" else 180 if emotion == "happy" else 100 if emotion == "sad" else 200)
58
- engine.setProperty('volume', 0.8 if emotion == "neutral" else 1.0 if emotion == "happy" or emotion == "angry" else 0.5)
59
 
60
  temp_file = NamedTemporaryFile(delete=False, suffix=".mp3")
61
  engine.save_to_file(text, temp_file.name)
@@ -105,9 +109,11 @@ def chat_with_ai(user_input):
105
  # Simulate Video Generation using a Sequence of Images
106
  def generate_video(prompt):
107
  frames = []
108
- for i in range(5): # Generate 5 frames as a sequence
109
  frame_prompt = f"{prompt} frame {i+1}"
110
  frame_path = generate_image(frame_prompt)
 
 
111
  frames.append(Image.open(frame_path))
112
 
113
  temp_video = NamedTemporaryFile(delete=False, suffix=".gif")
@@ -124,6 +130,8 @@ def main_interface(input_text, task_type, style):
124
 
125
  elif task_type == "Music":
126
  audio_path = generate_music(input_text)
 
 
127
  spectrogram_path = generate_spectrogram(audio_path)
128
  return "Music Generated", audio_path, spectrogram_path
129
 
@@ -134,6 +142,8 @@ def main_interface(input_text, task_type, style):
134
 
135
  elif task_type == "Video Generation":
136
  video_path = generate_video(input_text)
 
 
137
  audio_path = generate_music(input_text)
138
  return "Video Generated", audio_path, video_path
139
  except Exception as e:
@@ -155,4 +165,4 @@ interface = gr.Interface(
155
  live=False,
156
  )
157
 
158
- interface.launch()
 
 
1
  import torch
2
  from audiocraft.models import MusicGen
3
  from transformers import GPT2LMHeadModel, GPT2Tokenizer
 
16
 
17
  # Set device
18
  device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
19
+
20
  # MusicGen
21
  music_model = MusicGen.get_pretrained("small", device=device)
22
 
 
24
  tokenizer = GPT2Tokenizer.from_pretrained("gpt2")
25
  gpt2_model = GPT2LMHeadModel.from_pretrained("gpt2").to(device)
26
 
27
+ # Stable Diffusion for image generation with dtype fix
28
+ dtype = torch.float16 if torch.cuda.is_available() else torch.float32
29
+ pipe = StableDiffusionPipeline.from_pretrained(
30
+ "runwayml/stable-diffusion-v1-5",
31
+ torch_dtype=dtype
32
+ )
33
  pipe = pipe.to(device)
34
 
35
  # Emotion detection for Text-to-Audio
 
59
  emotion = get_emotion_tone(text)
60
  engine = pyttsx3.init()
61
  engine.setProperty('rate', 150 if emotion == "neutral" else 180 if emotion == "happy" else 100 if emotion == "sad" else 200)
62
+ engine.setProperty('volume', 0.8 if emotion == "neutral" else 1.0 if emotion in ["happy", "angry"] else 0.5)
63
 
64
  temp_file = NamedTemporaryFile(delete=False, suffix=".mp3")
65
  engine.save_to_file(text, temp_file.name)
 
109
  # Simulate Video Generation using a Sequence of Images
110
  def generate_video(prompt):
111
  frames = []
112
+ for i in range(5): # Generate 5 frames as a sequence
113
  frame_prompt = f"{prompt} frame {i+1}"
114
  frame_path = generate_image(frame_prompt)
115
+ if "Error" in frame_path:
116
+ return frame_path
117
  frames.append(Image.open(frame_path))
118
 
119
  temp_video = NamedTemporaryFile(delete=False, suffix=".gif")
 
130
 
131
  elif task_type == "Music":
132
  audio_path = generate_music(input_text)
133
+ if "Error" in audio_path:
134
+ return audio_path, None, None
135
  spectrogram_path = generate_spectrogram(audio_path)
136
  return "Music Generated", audio_path, spectrogram_path
137
 
 
142
 
143
  elif task_type == "Video Generation":
144
  video_path = generate_video(input_text)
145
+ if "Error" in video_path:
146
+ return video_path, None, None
147
  audio_path = generate_music(input_text)
148
  return "Video Generated", audio_path, video_path
149
  except Exception as e:
 
165
  live=False,
166
  )
167
 
168
+ interface.launch()