hskwon7 commited on
Commit
3718f37
·
verified ·
1 Parent(s): ea03981

Create app.py

Browse files
Files changed (1) hide show
  1. app.py +64 -0
app.py ADDED
@@ -0,0 +1,64 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import streamlit as st
2
+ from transformers import pipeline
3
+ from PIL import Image
4
+ import io
5
+ from gtts import gTTS
6
+ import tempfile
7
+
8
+ st.title("🖼️ → 📖 Image-to-Story Demo")
9
+ st.write("Upload an image and watch as it’s captioned, turned into a short story, and even read aloud!")
10
+
11
+ @st.cache_resource
12
+ def load_captioner():
13
+ return pipeline("image-to-text", model="unography/blip-large-long-cap")
14
+
15
+ @st.cache_resource
16
+ def load_story_gen():
17
+ return pipeline("text-generation", model="gpt2", tokenizer="gpt2")
18
+
19
+ captioner = load_captioner()
20
+ story_gen = load_story_gen()
21
+
22
+ uploaded = st.file_uploader("Upload an image", type=["png","jpg","jpeg"], key="image")
23
+ if uploaded:
24
+ img = Image.open(uploaded)
25
+ st.image(img, use_column_width=True)
26
+
27
+ # Caption
28
+ if "caption" not in st.session_state:
29
+ with st.spinner("Generating caption…"):
30
+ caps = captioner(img)
31
+ st.session_state.caption = caps[0] if isinstance(caps, list) else caps
32
+ st.write("**Caption:**", st.session_state.caption)
33
+
34
+ # Story
35
+ if "story" not in st.session_state:
36
+ with st.spinner("Spinning up a story…"):
37
+ out = story_gen(
38
+ st.session_state.caption,
39
+ max_length=200,
40
+ num_return_sequences=1,
41
+ do_sample=True,
42
+ top_p=0.9
43
+ )
44
+ st.session_state.story = out[0]["generated_text"]
45
+ st.write("**Story:**", st.session_state.story)
46
+
47
+ # Prepare audio bytes once
48
+ if "audio_bytes" not in st.session_state:
49
+ with st.spinner("Generating audio…"):
50
+ tts = gTTS(text=st.session_state.story, lang="en")
51
+ buf = io.BytesIO()
52
+ tts.write_to_fp(buf)
53
+ st.session_state.audio_bytes = buf.getvalue()
54
+
55
+ # Play button
56
+ if st.button("🔊 Play Story Audio"):
57
+ # Write to a temp file
58
+ tmp = tempfile.NamedTemporaryFile(delete=False, suffix=".mp3")
59
+ tmp.write(st.session_state.audio_bytes)
60
+ tmp.flush()
61
+ tmp_path = tmp.name
62
+ tmp.close()
63
+ # Stream it
64
+ st.audio(tmp_path, format="audio/mp3")