Alpha108's picture
Update app.py
4cdf0cd verified
import streamlit as st
from diffusers import StableDiffusionPipeline
import torch
from PIL import Image, ImageDraw, ImageFont
import tempfile
import speech_recognition as sr
# Load Stable Diffusion
@st.cache_resource
def load_model():
model_id = "runwayml/stable-diffusion-v1-5"
pipe = StableDiffusionPipeline.from_pretrained(model_id, torch_dtype=torch.float16)
pipe = pipe.to("cuda" if torch.cuda.is_available() else "cpu")
return pipe
sd_pipe = load_model()
# Meme generator function
def generate_meme(prompt, caption=""):
# Step 1: Generate base image
image = sd_pipe(prompt, num_inference_steps=30).images[0]
# Step 2: Add caption text
if caption:
draw = ImageDraw.Draw(image)
try:
font = ImageFont.truetype("DejaVuSans-Bold.ttf", 48)
except:
font = ImageFont.load_default()
W, H = image.size
text = caption.upper()
# Wrap long text
import textwrap
lines = textwrap.wrap(text, width=25)
y_text = 20
for line in lines:
w, h = draw.textsize(line, font=font)
draw.text(((W - w) / 2, y_text), line, font=font,
fill="white", stroke_width=3, stroke_fill="black")
y_text += h + 10
return image
# Speech-to-text
def speech_to_text(audio_file):
recognizer = sr.Recognizer()
with sr.AudioFile(audio_file) as source:
audio = recognizer.record(source)
try:
return recognizer.recognize_google(audio)
except:
return "Could not recognize speech."
# Streamlit UI
st.title("🎭 Meme Generator (Text & Voice)")
mode = st.radio("Choose Input Mode:", ["Text", "Voice"])
if mode == "Text":
text_input = st.text_area("Enter meme text:")
if st.button("Generate Meme"):
if text_input.strip():
img = generate_meme("funny cartoon meme background", caption=text_input)
st.image(img, caption="Generated Meme", use_column_width=True)
else:
st.warning("Please enter some text.")
else:
audio_file = st.file_uploader("Upload voice file (.wav)", type=["wav"])
if audio_file:
with tempfile.NamedTemporaryFile(delete=False, suffix=".wav") as tmp_file:
tmp_file.write(audio_file.read())
tmp_path = tmp_file.name
text = speech_to_text(tmp_path)
st.write(f"πŸ“ Transcribed Text: {text}")
if st.button("Generate Meme from Voice"):
if text.strip():
img = generate_meme("funny cartoon meme background", caption=text)
st.image(img, caption="Generated Meme", use_column_width=True)
else:
st.warning("Speech not recognized.")