Spaces:
Sleeping
Sleeping
File size: 5,837 Bytes
bdeeafd 1678182 bdeeafd 1678182 bdeeafd 1678182 bdeeafd 1678182 bdeeafd 1678182 bdeeafd 1678182 bdeeafd 1678182 bdeeafd 1678182 bdeeafd 1678182 bdeeafd 1678182 bdeeafd 1678182 bdeeafd 1678182 bdeeafd 1678182 bdeeafd 1678182 bdeeafd 1678182 bdeeafd 1678182 bdeeafd 1678182 bdeeafd 1678182 bdeeafd 1678182 bdeeafd 1678182 bdeeafd 1678182 bdeeafd 1678182 bdeeafd 1678182 bdeeafd 1678182 bdeeafd 1678182 bdeeafd 1678182 bdeeafd 1678182 bdeeafd 1678182 bdeeafd 1678182 bdeeafd 1678182 bdeeafd 1678182 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 |
import torch
from audiocraft.models import MusicGen
from transformers import GPT2LMHeadModel, GPT2Tokenizer
import pyttsx3
import gradio as gr
from tempfile import NamedTemporaryFile
import numpy as np
import scipy.io.wavfile as wavfile
from diffusers import StableDiffusionPipeline
import matplotlib.pyplot as plt
import librosa.display
import librosa
import soundfile as sf
from PIL import Image
import os
# Set device
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
# MusicGen
music_model = MusicGen.get_pretrained("small", device=device)
# GPT-2 for conversation
tokenizer = GPT2Tokenizer.from_pretrained("gpt2")
gpt2_model = GPT2LMHeadModel.from_pretrained("gpt2").to(device)
# Stable Diffusion for image generation
pipe = StableDiffusionPipeline.from_pretrained("runwayml/stable-diffusion-v1-5", torch_dtype=torch.float16)
pipe = pipe.to(device)
# Emotion detection for Text-to-Audio
def get_emotion_tone(text):
if any(word in text.lower() for word in ["happy", "joy", "excited"]):
return "happy"
elif any(word in text.lower() for word in ["sad", "down", "melancholy"]):
return "sad"
elif any(word in text.lower() for word in ["angry", "frustrated"]):
return "angry"
else:
return "neutral"
# Image generation using Stable Diffusion
def generate_image(prompt, style="realistic"):
styled_prompt = f"{style} style {prompt}"
try:
image = pipe(styled_prompt).images[0]
temp_image = NamedTemporaryFile(delete=False, suffix=".png")
image.save(temp_image.name)
return temp_image.name
except Exception as e:
return f"Error generating image: {e}"
# Convert Text to Audio with Emotion
def text_to_audio(text):
emotion = get_emotion_tone(text)
engine = pyttsx3.init()
engine.setProperty('rate', 150 if emotion == "neutral" else 180 if emotion == "happy" else 100 if emotion == "sad" else 200)
engine.setProperty('volume', 0.8 if emotion == "neutral" else 1.0 if emotion == "happy" or emotion == "angry" else 0.5)
temp_file = NamedTemporaryFile(delete=False, suffix=".mp3")
engine.save_to_file(text, temp_file.name)
engine.runAndWait()
return temp_file.name
# Music generation using MusicGen
def generate_music(prompt):
try:
descriptions = [prompt]
wav = music_model.generate(descriptions)
temp_file = NamedTemporaryFile(delete=False, suffix=".wav")
audio_data = wav.cpu().numpy()
wavfile.write(temp_file.name, music_model.sample_rate, audio_data[0, 0])
return temp_file.name
except Exception as e:
return f"Error generating music: {e}"
# Spectrogram generation from audio
def generate_spectrogram(audio_path):
try:
y, sr = librosa.load(audio_path, sr=None)
S = librosa.feature.melspectrogram(y, sr=sr)
S_dB = librosa.power_to_db(S, ref=np.max)
plt.figure(figsize=(10, 4))
librosa.display.specshow(S_dB, sr=sr, x_axis='time', y_axis='mel', cmap='coolwarm')
plt.colorbar(format='%+2.0f dB')
plt.title('Mel-frequency spectrogram')
temp_image = NamedTemporaryFile(delete=False, suffix=".png")
plt.savefig(temp_image.name)
plt.close()
return temp_image.name
except Exception as e:
return f"Error generating spectrogram: {e}"
# Chat with AI (GPT-2)
def chat_with_ai(user_input):
try:
inputs = tokenizer.encode(user_input, return_tensors="pt").to(device)
outputs = gpt2_model.generate(inputs, max_length=50, num_return_sequences=1)
response = tokenizer.decode(outputs[0], skip_special_tokens=True)
return response
except Exception as e:
return f"Error in chat generation: {e}"
# Simulate Video Generation using a Sequence of Images
def generate_video(prompt):
frames = []
for i in range(5): # Generate 5 frames as a sequence
frame_prompt = f"{prompt} frame {i+1}"
frame_path = generate_image(frame_prompt)
frames.append(Image.open(frame_path))
temp_video = NamedTemporaryFile(delete=False, suffix=".gif")
frames[0].save(temp_video.name, save_all=True, append_images=frames[1:], duration=500, loop=0)
return temp_video.name
# Main interface logic
def main_interface(input_text, task_type, style):
try:
if task_type == "Conversation":
response = chat_with_ai(input_text)
image_path = generate_image(f"conversation about {input_text}", style)
return response, None, image_path
elif task_type == "Music":
audio_path = generate_music(input_text)
spectrogram_path = generate_spectrogram(audio_path)
return "Music Generated", audio_path, spectrogram_path
elif task_type == "Text to Audio":
audio_path = text_to_audio(input_text)
image_path = generate_image(f"text-to-audio conversion for {input_text}", style)
return "Audio Generated", audio_path, image_path
elif task_type == "Video Generation":
video_path = generate_video(input_text)
audio_path = generate_music(input_text)
return "Video Generated", audio_path, video_path
except Exception as e:
return f"Error: {e}", None, None
# Gradio interface setup
interface = gr.Interface(
fn=main_interface,
inputs=[
gr.Textbox(label="Enter Text or Prompt"),
gr.Radio(["Conversation", "Music", "Text to Audio", "Video Generation"], label="Select Task"),
gr.Dropdown(["realistic", "abstract", "comic"], label="Select Style"),
],
outputs=[
gr.Textbox(label="Generated Output"),
gr.Audio(label="Generated Audio", type="filepath"),
gr.Image(label="Generated Image", type="filepath"),
],
live=False,
)
interface.launch() |