arshad1234321 commited on
Commit
f1b1b95
·
verified ·
1 Parent(s): e9bad54

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +36 -32
app.py CHANGED
@@ -1,7 +1,7 @@
1
  import torch
2
  from audiocraft.models import MusicGen
3
  from transformers import GPT2LMHeadModel, GPT2Tokenizer
4
- import pyttsx3
5
  import gradio as gr
6
  from tempfile import NamedTemporaryFile
7
  import numpy as np
@@ -14,22 +14,22 @@ import soundfile as sf
14
  from PIL import Image
15
  import os
16
 
17
- # CPU device
18
- device = torch.device("cpu")
19
 
20
- # Load MusicGen (CPU)
21
  music_model = MusicGen.get_pretrained("small", device=device)
22
 
23
- # GPT-2 (CPU)
24
  tokenizer = GPT2Tokenizer.from_pretrained("gpt2")
25
  gpt2_model = GPT2LMHeadModel.from_pretrained("gpt2").to(device)
26
 
27
- # Stable Diffusion (CPU-safe config)
28
  pipe = StableDiffusionPipeline.from_pretrained(
29
- "runwayml/stable-diffusion-v1-5",
30
- torch_dtype=torch.float32 # Must be float32 for CPU
31
- ).to("cpu")
32
 
 
 
 
33
  def get_emotion_tone(text):
34
  if any(word in text.lower() for word in ["happy", "joy", "excited"]):
35
  return "happy"
@@ -40,6 +40,7 @@ def get_emotion_tone(text):
40
  else:
41
  return "neutral"
42
 
 
43
  def generate_image(prompt, style="realistic"):
44
  styled_prompt = f"{style} style {prompt}"
45
  try:
@@ -48,20 +49,17 @@ def generate_image(prompt, style="realistic"):
48
  image.save(temp_image.name)
49
  return temp_image.name
50
  except Exception as e:
51
- print("Image generation error:", e)
52
  return None
53
 
 
54
  def text_to_audio(text):
55
  emotion = get_emotion_tone(text)
56
- engine = pyttsx3.init()
57
- engine.setProperty('rate', 150 if emotion == "neutral" else 180 if emotion == "happy" else 100 if emotion == "sad" else 200)
58
- engine.setProperty('volume', 0.8 if emotion == "neutral" else 1.0 if emotion in ["happy", "angry"] else 0.5)
59
-
60
- temp_file = NamedTemporaryFile(delete=False, suffix=".mp3")
61
- engine.save_to_file(text, temp_file.name)
62
- engine.runAndWait()
63
  return temp_file.name
64
 
 
65
  def generate_music(prompt):
66
  try:
67
  wav = music_model.generate([prompt])
@@ -70,9 +68,10 @@ def generate_music(prompt):
70
  wavfile.write(temp_file.name, music_model.sample_rate, audio_data[0, 0])
71
  return temp_file.name
72
  except Exception as e:
73
- print("Music generation error:", e)
74
  return None
75
 
 
76
  def generate_spectrogram(audio_path):
77
  try:
78
  y, sr = librosa.load(audio_path, sr=None)
@@ -88,19 +87,21 @@ def generate_spectrogram(audio_path):
88
  plt.close()
89
  return temp_image.name
90
  except Exception as e:
91
- print("Spectrogram generation error:", e)
92
  return None
93
 
 
94
  def chat_with_ai(user_input):
95
  try:
96
  inputs = tokenizer.encode(user_input, return_tensors="pt").to(device)
97
- outputs = gpt2_model.generate(inputs, max_length=50, num_return_sequences=1)
98
  response = tokenizer.decode(outputs[0], skip_special_tokens=True)
99
  return response
100
  except Exception as e:
101
  print("Chat error:", e)
102
- return "Error in chat generation."
103
 
 
104
  def generate_video(prompt):
105
  frames = []
106
  for i in range(5):
@@ -109,37 +110,40 @@ def generate_video(prompt):
109
  if frame_path:
110
  frames.append(Image.open(frame_path))
111
 
112
- if not frames:
113
- return None
114
-
115
- temp_video = NamedTemporaryFile(delete=False, suffix=".gif")
116
- frames[0].save(temp_video.name, save_all=True, append_images=frames[1:], duration=500, loop=0)
117
- return temp_video.name
118
 
 
119
  def main_interface(input_text, task_type, style):
120
  try:
121
  if task_type == "Conversation":
122
  response = chat_with_ai(input_text)
123
  image_path = generate_image(f"conversation about {input_text}", style)
124
- return response, None, image_path if os.path.exists(image_path) else None
125
 
126
  elif task_type == "Music":
127
  audio_path = generate_music(input_text)
128
- spectrogram_path = generate_spectrogram(audio_path) if audio_path else None
129
- return "Music Generated", audio_path if os.path.exists(audio_path) else None, spectrogram_path
130
 
131
  elif task_type == "Text to Audio":
132
  audio_path = text_to_audio(input_text)
133
  image_path = generate_image(f"text-to-audio conversion for {input_text}", style)
134
- return "Audio Generated", audio_path if os.path.exists(audio_path) else None, image_path
135
 
136
  elif task_type == "Video Generation":
137
  video_path = generate_video(input_text)
138
  audio_path = generate_music(input_text)
139
- return "Video Generated", audio_path if os.path.exists(audio_path) else None, video_path
 
140
  except Exception as e:
 
141
  return f"Error: {e}", None, None
142
 
 
143
  interface = gr.Interface(
144
  fn=main_interface,
145
  inputs=[
 
1
  import torch
2
  from audiocraft.models import MusicGen
3
  from transformers import GPT2LMHeadModel, GPT2Tokenizer
4
+ from TTS.api import TTS
5
  import gradio as gr
6
  from tempfile import NamedTemporaryFile
7
  import numpy as np
 
14
  from PIL import Image
15
  import os
16
 
17
+ device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
 
18
 
19
+ # Load models
20
  music_model = MusicGen.get_pretrained("small", device=device)
21
 
 
22
  tokenizer = GPT2Tokenizer.from_pretrained("gpt2")
23
  gpt2_model = GPT2LMHeadModel.from_pretrained("gpt2").to(device)
24
 
25
+ # Set torch_dtype to float32 for compatibility on CPU
26
  pipe = StableDiffusionPipeline.from_pretrained(
27
+ "runwayml/stable-diffusion-v1-5", torch_dtype=torch.float32
28
+ ).to(device)
 
29
 
30
+ tts = TTS(model_name="tts_models/en/ljspeech/tacotron2-DDC", progress_bar=False).to(device)
31
+
32
+ # Emotion detection
33
  def get_emotion_tone(text):
34
  if any(word in text.lower() for word in ["happy", "joy", "excited"]):
35
  return "happy"
 
40
  else:
41
  return "neutral"
42
 
43
+ # Generate image
44
  def generate_image(prompt, style="realistic"):
45
  styled_prompt = f"{style} style {prompt}"
46
  try:
 
49
  image.save(temp_image.name)
50
  return temp_image.name
51
  except Exception as e:
52
+ print("Image error:", e)
53
  return None
54
 
55
+ # Convert text to audio using TTS
56
  def text_to_audio(text):
57
  emotion = get_emotion_tone(text)
58
+ temp_file = NamedTemporaryFile(delete=False, suffix=".wav")
59
+ tts.tts_to_file(text=text, file_path=temp_file.name)
 
 
 
 
 
60
  return temp_file.name
61
 
62
+ # Generate music
63
  def generate_music(prompt):
64
  try:
65
  wav = music_model.generate([prompt])
 
68
  wavfile.write(temp_file.name, music_model.sample_rate, audio_data[0, 0])
69
  return temp_file.name
70
  except Exception as e:
71
+ print("Music error:", e)
72
  return None
73
 
74
+ # Generate spectrogram
75
  def generate_spectrogram(audio_path):
76
  try:
77
  y, sr = librosa.load(audio_path, sr=None)
 
87
  plt.close()
88
  return temp_image.name
89
  except Exception as e:
90
+ print("Spectrogram error:", e)
91
  return None
92
 
93
+ # GPT-2 chatbot
94
  def chat_with_ai(user_input):
95
  try:
96
  inputs = tokenizer.encode(user_input, return_tensors="pt").to(device)
97
+ outputs = gpt2_model.generate(inputs, max_length=60, num_return_sequences=1)
98
  response = tokenizer.decode(outputs[0], skip_special_tokens=True)
99
  return response
100
  except Exception as e:
101
  print("Chat error:", e)
102
+ return "Sorry, I couldn't respond."
103
 
104
+ # Generate gif video
105
  def generate_video(prompt):
106
  frames = []
107
  for i in range(5):
 
110
  if frame_path:
111
  frames.append(Image.open(frame_path))
112
 
113
+ if frames:
114
+ temp_video = NamedTemporaryFile(delete=False, suffix=".gif")
115
+ frames[0].save(temp_video.name, save_all=True, append_images=frames[1:], duration=500, loop=0)
116
+ return temp_video.name
117
+ return None
 
118
 
119
+ # Main interface
120
  def main_interface(input_text, task_type, style):
121
  try:
122
  if task_type == "Conversation":
123
  response = chat_with_ai(input_text)
124
  image_path = generate_image(f"conversation about {input_text}", style)
125
+ return response, None, image_path
126
 
127
  elif task_type == "Music":
128
  audio_path = generate_music(input_text)
129
+ spectrogram_path = generate_spectrogram(audio_path)
130
+ return "Music Generated", audio_path, spectrogram_path
131
 
132
  elif task_type == "Text to Audio":
133
  audio_path = text_to_audio(input_text)
134
  image_path = generate_image(f"text-to-audio conversion for {input_text}", style)
135
+ return "Audio Generated", audio_path, image_path
136
 
137
  elif task_type == "Video Generation":
138
  video_path = generate_video(input_text)
139
  audio_path = generate_music(input_text)
140
+ return "Video Generated", audio_path, video_path
141
+
142
  except Exception as e:
143
+ print("Main interface error:", e)
144
  return f"Error: {e}", None, None
145
 
146
+ # Gradio app
147
  interface = gr.Interface(
148
  fn=main_interface,
149
  inputs=[