Alpha108 commited on
Commit
4cdf0cd
Β·
verified Β·
1 Parent(s): 7ca1854

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +72 -51
app.py CHANGED
@@ -1,65 +1,86 @@
1
  import streamlit as st
2
- from audiorecorder import audiorecorder
3
- from transformers import pipeline
4
- from diffusers import StableDiffusionPipeline, DDIMScheduler
5
  import torch
6
- from tempfile import NamedTemporaryFile
 
 
7
 
8
- st.set_page_config(page_title="AI Meme Generator", page_icon="🎭")
9
- st.title("🎭 AI Meme Generator (Voice + Text)")
10
-
11
- # Load Whisper
12
- @st.cache_resource
13
- def load_asr():
14
- return pipeline("automatic-speech-recognition", model="openai/whisper-tiny")
15
-
16
- # Load Stable Diffusion with safe scheduler
17
  @st.cache_resource
18
- def load_sd():
19
- device = "cuda" if torch.cuda.is_available() else "cpu"
20
- pipe = StableDiffusionPipeline.from_pretrained(
21
- "runwayml/stable-diffusion-v1-5", # βœ… Hugging Face model URL
22
- scheduler=DDIMScheduler.from_pretrained(
23
- "runwayml/stable-diffusion-v1-5", subfolder="scheduler"
24
- ),
25
- torch_dtype=torch.float16 if device == "cuda" else torch.float32
26
- ).to(device)
27
  return pipe
28
 
29
- asr = load_asr()
30
- sd_pipe = load_sd()
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
31
 
32
- # Generate meme (with safe inference steps)
33
- def generate_meme(prompt):
34
- return sd_pipe(prompt, num_inference_steps=30).images[0]
35
 
36
- # Tabs
37
- tab1, tab2 = st.tabs(["πŸ“ Text to Meme", "🎀 Voice to Meme"])
38
 
39
- # ---------------- Text-to-Meme ----------------
40
- with tab1:
41
- text_input = st.text_area("Enter your meme idea")
42
- if st.button("Generate Meme", key="text_meme"):
43
  if text_input.strip():
44
- with st.spinner("Generating meme..."):
45
- img = generate_meme(f"Meme style funny cartoon with text: {text_input}")
46
- st.image(img, caption="Generated Meme")
47
  else:
48
- st.warning("Please enter some text!")
49
 
50
- # ---------------- Voice-to-Meme ----------------
51
- with tab2:
52
- st.write("🎀 Record your voice below and create a meme!")
53
- audio = audiorecorder("Click to record", "Click to stop recording")
 
 
54
 
55
- if len(audio) > 0:
56
- # Save temp audio file
57
- with NamedTemporaryFile(suffix=".wav", delete=False) as f:
58
- audio.export(f.name, format="wav")
59
- text = asr(f.name)["text"]
60
 
61
- st.success(f"Recognized Text: {text}")
62
- if st.button("Generate Meme from Voice", key="voice_meme"):
63
- with st.spinner("Transcribing and generating meme..."):
64
- img = generate_meme(f"Meme style funny cartoon with text: {text}")
65
- st.image(img, caption="Generated Meme")
 
 
1
  import streamlit as st
2
+ from diffusers import StableDiffusionPipeline
 
 
3
  import torch
4
+ from PIL import Image, ImageDraw, ImageFont
5
+ import tempfile
6
+ import speech_recognition as sr
7
 
8
+ # Load Stable Diffusion
 
 
 
 
 
 
 
 
9
  @st.cache_resource
10
+ def load_model():
11
+ model_id = "runwayml/stable-diffusion-v1-5"
12
+ pipe = StableDiffusionPipeline.from_pretrained(model_id, torch_dtype=torch.float16)
13
+ pipe = pipe.to("cuda" if torch.cuda.is_available() else "cpu")
 
 
 
 
 
14
  return pipe
15
 
16
+ sd_pipe = load_model()
17
+
18
+ # Meme generator function
19
+ def generate_meme(prompt, caption=""):
20
+ # Step 1: Generate base image
21
+ image = sd_pipe(prompt, num_inference_steps=30).images[0]
22
+
23
+ # Step 2: Add caption text
24
+ if caption:
25
+ draw = ImageDraw.Draw(image)
26
+ try:
27
+ font = ImageFont.truetype("DejaVuSans-Bold.ttf", 48)
28
+ except:
29
+ font = ImageFont.load_default()
30
+
31
+ W, H = image.size
32
+ text = caption.upper()
33
+
34
+ # Wrap long text
35
+ import textwrap
36
+ lines = textwrap.wrap(text, width=25)
37
+
38
+ y_text = 20
39
+ for line in lines:
40
+ w, h = draw.textsize(line, font=font)
41
+ draw.text(((W - w) / 2, y_text), line, font=font,
42
+ fill="white", stroke_width=3, stroke_fill="black")
43
+ y_text += h + 10
44
+
45
+ return image
46
+
47
+ # Speech-to-text
48
+ def speech_to_text(audio_file):
49
+ recognizer = sr.Recognizer()
50
+ with sr.AudioFile(audio_file) as source:
51
+ audio = recognizer.record(source)
52
+ try:
53
+ return recognizer.recognize_google(audio)
54
+ except:
55
+ return "Could not recognize speech."
56
 
57
+ # Streamlit UI
58
+ st.title("🎭 Meme Generator (Text & Voice)")
 
59
 
60
+ mode = st.radio("Choose Input Mode:", ["Text", "Voice"])
 
61
 
62
+ if mode == "Text":
63
+ text_input = st.text_area("Enter meme text:")
64
+ if st.button("Generate Meme"):
 
65
  if text_input.strip():
66
+ img = generate_meme("funny cartoon meme background", caption=text_input)
67
+ st.image(img, caption="Generated Meme", use_column_width=True)
 
68
  else:
69
+ st.warning("Please enter some text.")
70
 
71
+ else:
72
+ audio_file = st.file_uploader("Upload voice file (.wav)", type=["wav"])
73
+ if audio_file:
74
+ with tempfile.NamedTemporaryFile(delete=False, suffix=".wav") as tmp_file:
75
+ tmp_file.write(audio_file.read())
76
+ tmp_path = tmp_file.name
77
 
78
+ text = speech_to_text(tmp_path)
79
+ st.write(f"πŸ“ Transcribed Text: {text}")
 
 
 
80
 
81
+ if st.button("Generate Meme from Voice"):
82
+ if text.strip():
83
+ img = generate_meme("funny cartoon meme background", caption=text)
84
+ st.image(img, caption="Generated Meme", use_column_width=True)
85
+ else:
86
+ st.warning("Speech not recognized.")