arshad1234321 commited on
Commit
bdeeafd
·
verified ·
1 Parent(s): 2e95955

Create app.py

Browse files
Files changed (1) hide show
  1. app.py +160 -0
app.py ADDED
@@ -0,0 +1,160 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ #working code
2
+ !pip install transformers diffusers gradio librosa audiocraft pyttsx3
3
+ !pip install --upgrade torch torchvision torchaudio --extra-index-url https://download.pytorch.org/whl/cu118
4
+ import torch
5
+ from audiocraft.models import MusicGen
6
+ from transformers import GPT2LMHeadModel, GPT2Tokenizer
7
+ import pyttsx3
8
+ import gradio as gr
9
+ from tempfile import NamedTemporaryFile
10
+ import numpy as np
11
+ import scipy.io.wavfile as wavfile
12
+ from diffusers import StableDiffusionPipeline
13
+ import matplotlib.pyplot as plt
14
+ import librosa.display
15
+ import librosa
16
+ import soundfile as sf
17
+ from PIL import Image
18
+ import os
19
+
20
+ # Set device
21
+ device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
22
+ # MusicGen
23
+ music_model = MusicGen.get_pretrained("small", device=device)
24
+
25
+ # GPT-2 for conversation
26
+ tokenizer = GPT2Tokenizer.from_pretrained("gpt2")
27
+ gpt2_model = GPT2LMHeadModel.from_pretrained("gpt2").to(device)
28
+
29
+ # Stable Diffusion for image generation
30
+ pipe = StableDiffusionPipeline.from_pretrained("runwayml/stable-diffusion-v1-5", torch_dtype=torch.float16)
31
+ pipe = pipe.to(device)
32
+
33
+ # Emotion detection for Text-to-Audio
34
+ def get_emotion_tone(text):
35
+ if any(word in text.lower() for word in ["happy", "joy", "excited"]):
36
+ return "happy"
37
+ elif any(word in text.lower() for word in ["sad", "down", "melancholy"]):
38
+ return "sad"
39
+ elif any(word in text.lower() for word in ["angry", "frustrated"]):
40
+ return "angry"
41
+ else:
42
+ return "neutral"
43
+
44
+ # Image generation using Stable Diffusion
45
+ def generate_image(prompt, style="realistic"):
46
+ styled_prompt = f"{style} style {prompt}"
47
+ try:
48
+ image = pipe(styled_prompt).images[0]
49
+ temp_image = NamedTemporaryFile(delete=False, suffix=".png")
50
+ image.save(temp_image.name)
51
+ return temp_image.name
52
+ except Exception as e:
53
+ return f"Error generating image: {e}"
54
+
55
+ # Convert Text to Audio with Emotion
56
+ def text_to_audio(text):
57
+ emotion = get_emotion_tone(text)
58
+ engine = pyttsx3.init()
59
+ engine.setProperty('rate', 150 if emotion == "neutral" else 180 if emotion == "happy" else 100 if emotion == "sad" else 200)
60
+ engine.setProperty('volume', 0.8 if emotion == "neutral" else 1.0 if emotion == "happy" or emotion == "angry" else 0.5)
61
+
62
+ temp_file = NamedTemporaryFile(delete=False, suffix=".mp3")
63
+ engine.save_to_file(text, temp_file.name)
64
+ engine.runAndWait()
65
+ return temp_file.name
66
+
67
+ # Music generation using MusicGen
68
+ def generate_music(prompt):
69
+ try:
70
+ descriptions = [prompt]
71
+ wav = music_model.generate(descriptions)
72
+ temp_file = NamedTemporaryFile(delete=False, suffix=".wav")
73
+ audio_data = wav.cpu().numpy()
74
+ wavfile.write(temp_file.name, music_model.sample_rate, audio_data[0, 0])
75
+ return temp_file.name
76
+ except Exception as e:
77
+ return f"Error generating music: {e}"
78
+
79
+ # Spectrogram generation from audio
80
+ def generate_spectrogram(audio_path):
81
+ try:
82
+ y, sr = librosa.load(audio_path, sr=None)
83
+ S = librosa.feature.melspectrogram(y, sr=sr)
84
+ S_dB = librosa.power_to_db(S, ref=np.max)
85
+
86
+ plt.figure(figsize=(10, 4))
87
+ librosa.display.specshow(S_dB, sr=sr, x_axis='time', y_axis='mel', cmap='coolwarm')
88
+ plt.colorbar(format='%+2.0f dB')
89
+ plt.title('Mel-frequency spectrogram')
90
+ temp_image = NamedTemporaryFile(delete=False, suffix=".png")
91
+ plt.savefig(temp_image.name)
92
+ plt.close()
93
+ return temp_image.name
94
+ except Exception as e:
95
+ return f"Error generating spectrogram: {e}"
96
+
97
+ # Chat with AI (GPT-2)
98
+ def chat_with_ai(user_input):
99
+ try:
100
+ inputs = tokenizer.encode(user_input, return_tensors="pt").to(device)
101
+ outputs = gpt2_model.generate(inputs, max_length=50, num_return_sequences=1)
102
+ response = tokenizer.decode(outputs[0], skip_special_tokens=True)
103
+ return response
104
+ except Exception as e:
105
+ return f"Error in chat generation: {e}"
106
+
107
+ # Simulate Video Generation using a Sequence of Images
108
+ def generate_video(prompt):
109
+ frames = []
110
+ for i in range(5): # Generate 5 frames as a sequence
111
+ frame_prompt = f"{prompt} frame {i+1}"
112
+ frame_path = generate_image(frame_prompt)
113
+ frames.append(Image.open(frame_path))
114
+
115
+ temp_video = NamedTemporaryFile(delete=False, suffix=".gif")
116
+ frames[0].save(temp_video.name, save_all=True, append_images=frames[1:], duration=500, loop=0)
117
+ return temp_video.name
118
+
119
+ # Main interface logic
120
+ def main_interface(input_text, task_type, style):
121
+ try:
122
+ if task_type == "Conversation":
123
+ response = chat_with_ai(input_text)
124
+ image_path = generate_image(f"conversation about {input_text}", style)
125
+ return response, None, image_path
126
+
127
+ elif task_type == "Music":
128
+ audio_path = generate_music(input_text)
129
+ spectrogram_path = generate_spectrogram(audio_path)
130
+ return "Music Generated", audio_path, spectrogram_path
131
+
132
+ elif task_type == "Text to Audio":
133
+ audio_path = text_to_audio(input_text)
134
+ image_path = generate_image(f"text-to-audio conversion for {input_text}", style)
135
+ return "Audio Generated", audio_path, image_path
136
+
137
+ elif task_type == "Video Generation":
138
+ video_path = generate_video(input_text)
139
+ audio_path = generate_music(input_text)
140
+ return "Video Generated", audio_path, video_path
141
+ except Exception as e:
142
+ return f"Error: {e}", None, None
143
+
144
+ # Gradio interface setup
145
+ interface = gr.Interface(
146
+ fn=main_interface,
147
+ inputs=[
148
+ gr.Textbox(label="Enter Text or Prompt"),
149
+ gr.Radio(["Conversation", "Music", "Text to Audio", "Video Generation"], label="Select Task"),
150
+ gr.Dropdown(["realistic", "abstract", "comic"], label="Select Style"),
151
+ ],
152
+ outputs=[
153
+ gr.Textbox(label="Generated Output"),
154
+ gr.Audio(label="Generated Audio", type="filepath"),
155
+ gr.Image(label="Generated Image", type="filepath"),
156
+ ],
157
+ live=False,
158
+ )
159
+
160
+ interface.launch()