arshad1234321 commited on
Commit
5d1605e
·
verified ·
1 Parent(s): ae1e3e1

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +26 -36
app.py CHANGED
@@ -14,25 +14,22 @@ import soundfile as sf
14
  from PIL import Image
15
  import os
16
 
17
- # Set device
18
- device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
19
 
20
- # MusicGen
21
  music_model = MusicGen.get_pretrained("small", device=device)
22
 
23
- # GPT-2 for conversation
24
  tokenizer = GPT2Tokenizer.from_pretrained("gpt2")
25
  gpt2_model = GPT2LMHeadModel.from_pretrained("gpt2").to(device)
26
 
27
- # Stable Diffusion for image generation with dtype fix
28
- dtype = torch.float16 if torch.cuda.is_available() else torch.float32
29
  pipe = StableDiffusionPipeline.from_pretrained(
30
  "runwayml/stable-diffusion-v1-5",
31
- torch_dtype=torch.float32 # Required for CPU
32
  ).to("cpu")
33
- pipe = pipe.to(device)
34
 
35
- # Emotion detection for Text-to-Audio
36
  def get_emotion_tone(text):
37
  if any(word in text.lower() for word in ["happy", "joy", "excited"]):
38
  return "happy"
@@ -43,7 +40,6 @@ def get_emotion_tone(text):
43
  else:
44
  return "neutral"
45
 
46
- # Image generation using Stable Diffusion
47
  def generate_image(prompt, style="realistic"):
48
  styled_prompt = f"{style} style {prompt}"
49
  try:
@@ -52,9 +48,9 @@ def generate_image(prompt, style="realistic"):
52
  image.save(temp_image.name)
53
  return temp_image.name
54
  except Exception as e:
55
- return f"Error generating image: {e}"
 
56
 
57
- # Convert Text to Audio with Emotion
58
  def text_to_audio(text):
59
  emotion = get_emotion_tone(text)
60
  engine = pyttsx3.init()
@@ -66,19 +62,17 @@ def text_to_audio(text):
66
  engine.runAndWait()
67
  return temp_file.name
68
 
69
- # Music generation using MusicGen
70
  def generate_music(prompt):
71
  try:
72
- descriptions = [prompt]
73
- wav = music_model.generate(descriptions)
74
  temp_file = NamedTemporaryFile(delete=False, suffix=".wav")
75
  audio_data = wav.cpu().numpy()
76
  wavfile.write(temp_file.name, music_model.sample_rate, audio_data[0, 0])
77
  return temp_file.name
78
  except Exception as e:
79
- return f"Error generating music: {e}"
 
80
 
81
- # Spectrogram generation from audio
82
  def generate_spectrogram(audio_path):
83
  try:
84
  y, sr = librosa.load(audio_path, sr=None)
@@ -94,9 +88,9 @@ def generate_spectrogram(audio_path):
94
  plt.close()
95
  return temp_image.name
96
  except Exception as e:
97
- return f"Error generating spectrogram: {e}"
 
98
 
99
- # Chat with AI (GPT-2)
100
  def chat_with_ai(user_input):
101
  try:
102
  inputs = tokenizer.encode(user_input, return_tensors="pt").to(device)
@@ -104,52 +98,48 @@ def chat_with_ai(user_input):
104
  response = tokenizer.decode(outputs[0], skip_special_tokens=True)
105
  return response
106
  except Exception as e:
107
- return f"Error in chat generation: {e}"
 
108
 
109
- # Simulate Video Generation using a Sequence of Images
110
  def generate_video(prompt):
111
  frames = []
112
- for i in range(5): # Generate 5 frames as a sequence
113
  frame_prompt = f"{prompt} frame {i+1}"
114
  frame_path = generate_image(frame_prompt)
115
- if "Error" in frame_path:
116
- return frame_path
117
- frames.append(Image.open(frame_path))
 
 
118
 
119
  temp_video = NamedTemporaryFile(delete=False, suffix=".gif")
120
  frames[0].save(temp_video.name, save_all=True, append_images=frames[1:], duration=500, loop=0)
121
  return temp_video.name
122
 
123
- # Main interface logic
124
  def main_interface(input_text, task_type, style):
125
  try:
126
  if task_type == "Conversation":
127
  response = chat_with_ai(input_text)
128
  image_path = generate_image(f"conversation about {input_text}", style)
129
- return response, None, image_path
130
 
131
  elif task_type == "Music":
132
  audio_path = generate_music(input_text)
133
- if "Error" in audio_path:
134
- return audio_path, None, None
135
- spectrogram_path = generate_spectrogram(audio_path)
136
- return "Music Generated", audio_path, spectrogram_path
137
 
138
  elif task_type == "Text to Audio":
139
  audio_path = text_to_audio(input_text)
140
  image_path = generate_image(f"text-to-audio conversion for {input_text}", style)
141
- return "Audio Generated", audio_path, image_path
142
 
143
  elif task_type == "Video Generation":
144
  video_path = generate_video(input_text)
145
- if "Error" in video_path:
146
- return video_path, None, None
147
  audio_path = generate_music(input_text)
148
- return "Video Generated", audio_path, video_path
149
  except Exception as e:
150
  return f"Error: {e}", None, None
151
 
152
- # Gradio interface setup
153
  interface = gr.Interface(
154
  fn=main_interface,
155
  inputs=[
 
14
  from PIL import Image
15
  import os
16
 
17
+ # CPU device
18
+ device = torch.device("cpu")
19
 
20
+ # Load MusicGen (CPU)
21
  music_model = MusicGen.get_pretrained("small", device=device)
22
 
23
+ # GPT-2 (CPU)
24
  tokenizer = GPT2Tokenizer.from_pretrained("gpt2")
25
  gpt2_model = GPT2LMHeadModel.from_pretrained("gpt2").to(device)
26
 
27
+ # Stable Diffusion (CPU-safe config)
 
28
  pipe = StableDiffusionPipeline.from_pretrained(
29
  "runwayml/stable-diffusion-v1-5",
30
+ torch_dtype=torch.float32 # Must be float32 for CPU
31
  ).to("cpu")
 
32
 
 
33
  def get_emotion_tone(text):
34
  if any(word in text.lower() for word in ["happy", "joy", "excited"]):
35
  return "happy"
 
40
  else:
41
  return "neutral"
42
 
 
43
  def generate_image(prompt, style="realistic"):
44
  styled_prompt = f"{style} style {prompt}"
45
  try:
 
48
  image.save(temp_image.name)
49
  return temp_image.name
50
  except Exception as e:
51
+ print("Image generation error:", e)
52
+ return None
53
 
 
54
  def text_to_audio(text):
55
  emotion = get_emotion_tone(text)
56
  engine = pyttsx3.init()
 
62
  engine.runAndWait()
63
  return temp_file.name
64
 
 
65
  def generate_music(prompt):
66
  try:
67
+ wav = music_model.generate([prompt])
 
68
  temp_file = NamedTemporaryFile(delete=False, suffix=".wav")
69
  audio_data = wav.cpu().numpy()
70
  wavfile.write(temp_file.name, music_model.sample_rate, audio_data[0, 0])
71
  return temp_file.name
72
  except Exception as e:
73
+ print("Music generation error:", e)
74
+ return None
75
 
 
76
  def generate_spectrogram(audio_path):
77
  try:
78
  y, sr = librosa.load(audio_path, sr=None)
 
88
  plt.close()
89
  return temp_image.name
90
  except Exception as e:
91
+ print("Spectrogram generation error:", e)
92
+ return None
93
 
 
94
  def chat_with_ai(user_input):
95
  try:
96
  inputs = tokenizer.encode(user_input, return_tensors="pt").to(device)
 
98
  response = tokenizer.decode(outputs[0], skip_special_tokens=True)
99
  return response
100
  except Exception as e:
101
+ print("Chat error:", e)
102
+ return "Error in chat generation."
103
 
 
104
  def generate_video(prompt):
105
  frames = []
106
+ for i in range(5):
107
  frame_prompt = f"{prompt} frame {i+1}"
108
  frame_path = generate_image(frame_prompt)
109
+ if frame_path:
110
+ frames.append(Image.open(frame_path))
111
+
112
+ if not frames:
113
+ return None
114
 
115
  temp_video = NamedTemporaryFile(delete=False, suffix=".gif")
116
  frames[0].save(temp_video.name, save_all=True, append_images=frames[1:], duration=500, loop=0)
117
  return temp_video.name
118
 
 
119
  def main_interface(input_text, task_type, style):
120
  try:
121
  if task_type == "Conversation":
122
  response = chat_with_ai(input_text)
123
  image_path = generate_image(f"conversation about {input_text}", style)
124
+ return response, None, image_path if os.path.exists(image_path) else None
125
 
126
  elif task_type == "Music":
127
  audio_path = generate_music(input_text)
128
+ spectrogram_path = generate_spectrogram(audio_path) if audio_path else None
129
+ return "Music Generated", audio_path if os.path.exists(audio_path) else None, spectrogram_path
 
 
130
 
131
  elif task_type == "Text to Audio":
132
  audio_path = text_to_audio(input_text)
133
  image_path = generate_image(f"text-to-audio conversion for {input_text}", style)
134
+ return "Audio Generated", audio_path if os.path.exists(audio_path) else None, image_path
135
 
136
  elif task_type == "Video Generation":
137
  video_path = generate_video(input_text)
 
 
138
  audio_path = generate_music(input_text)
139
+ return "Video Generated", audio_path if os.path.exists(audio_path) else None, video_path
140
  except Exception as e:
141
  return f"Error: {e}", None, None
142
 
 
143
  interface = gr.Interface(
144
  fn=main_interface,
145
  inputs=[