Spaces:

lucksadasd
/

eazy-lim

Sleeping

App Files Files Community

eazy-lim / magic

lucksadasd

Create magic

4fcb972 verified 12 months ago

raw

history blame contribute delete

3.66 kB

	import gradio as gr
	from transformers import pipeline, WhisperProcessor, WhisperForConditionalGeneration
	from diffusers import StableDiffusionPipeline
	import torch

	# Step 1: Prompt-to-Prompt Generation using BART (or any LLM except GPT or DeepSeek)
	prompt_generator = pipeline("text2text-generation", model="facebook/bart-large-cnn")

	def generate_prompt(description: str) -> str:
	# Generate a detailed prompt based on a short description
	prompt = prompt_generator(f"Expand this description into a detailed prompt for an image: {description}", max_length=150)[0]['generated_text']
	return prompt

	# Step 2: Prompt-to-Image Generation using Stable Diffusion v1.5 (with CPU support)
	stable_diffusion = StableDiffusionPipeline.from_pretrained("stabilityai/stable-diffusion-2-1-base")
	stable_diffusion.to("cpu") # Use CPU instead of GPU

	def generate_image(prompt: str):
	# Generate an image from the prompt using Stable Diffusion
	image = stable_diffusion(prompt).images[0]
	return image

	# Step 5: Voice Input Integration using Whisper for Speech-to-Text
	processor = WhisperProcessor.from_pretrained("openai/whisper-large")
	model = WhisperForConditionalGeneration.from_pretrained("openai/whisper-large")

	def transcribe_audio(audio):
	# Convert audio to text using Whisper
	audio_input = processor(audio, return_tensors="pt").input_features
	predicted_ids = model.generate(audio_input)
	transcription = processor.decode(predicted_ids[0], skip_special_tokens=True)
	return transcription

	# Step 3: Gradio Interface with Multiple Controllers (Textbox, Slider, Checkbox, Audio)
	def process_input(description: str, creativity: float, include_background: bool):
	# Generate a detailed prompt
	prompt = generate_prompt(description)

	# Optionally modify prompt based on checkbox (for background inclusion)
	if include_background:
	prompt += " with a detailed, vibrant background."

	# Generate image based on the prompt
	image = generate_image(prompt)

	return prompt, image

	def process_audio_input(audio):
	# Convert audio to text
	description = transcribe_audio(audio)
	# Generate a prompt and image based on transcribed text
	prompt = generate_prompt(description)
	image = generate_image(prompt)
	return prompt, image

	# Define Gradio interface
	text_input = gr.Textbox(label="Enter Description", placeholder="E.g., A magical treehouse in the sky")
	creativity_slider = gr.Slider(minimum=0, maximum=1, step=0.1, label="Creativity (0 to 1)", value=0.7)
	background_checkbox = gr.Checkbox(label="Include Background", value=True)

	audio_input = gr.Audio(type="numpy", label="Speak your Description")

	# Create interface with both text and audio inputs
	interface = gr.Interface(
	fn=process_input,
	inputs=[
	text_input,
	creativity_slider,
	background_checkbox
	],
	outputs=[
	gr.Textbox(label="Generated Prompt"),
	gr.Image(label="Generated Image")
	],
	title="Magical Image Generator",
	description="Enter a short description or speak it to generate a magical image! Adjust creativity and background options.",
	theme="huggingface"
	)

	# Add audio input for voice interaction
	interface_with_audio = gr.Interface(
	fn=process_audio_input,
	inputs=[audio_input],
	outputs=[gr.Textbox(label="Generated Prompt"), gr.Image(label="Generated Image")],
	title="Magical Image Generator with Voice Input",
	description="Speak a short description and generate a magical image!"
	)

	# Launch the interface with multiple tabs for text and voice input
	gr.TabbedInterface([interface, interface_with_audio]).launch()