File size: 2,292 Bytes
38da107 598ecb7 b6eef41 2d5370d 598ecb7 38da107 2d5370d 38da107 2d5370d bc98b06 38da107 2d5370d 598ecb7 38da107 76bead5 38da107 2d5370d bc98b06 38da107 76bead5 2d5370d 598ecb7 38da107 598ecb7 38da107 bc98b06 76bead5 598ecb7 b6eef41 e698bdd bc98b06 e698bdd b6eef41 e698bdd bc98b06 e698bdd b6eef41 2d5370d 38da107 bc98b06 86cd9e8 e698bdd 38da107 bc98b06 86cd9e8 bc98b06 e698bdd 38da107 bc98b06 86cd9e8 38da107 bc98b06 e698bdd 38da107 b6eef41 598ecb7 bc98b06 b6eef41 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 |
import streamlit as st
from transformers import pipeline
from gtts import gTTS
import tempfile
import os
# function part
# img2text
def img2text(img_path):
# Image captioning model
captioner = pipeline(
"image-to-text",
model="nlpconnect/vit-gpt2-image-captioning" #This model is relatively fast and accurate
)
result = captioner(img_path)
return result[0]["generated_text"]
# text2story
def text2story(scenario):
# Story generator config
generator = pipeline(
"text-generation",
model="gpt2", #Relatively small but fast
max_length=200, # Maximum story lengt
num_return_sequences=1 #Number of variants to generate
)
prompt = f"Create a children's story based on: {scenario}"
story = generator(prompt)[0]["generated_text"]
return story
# text2audio
def text2audio(story_text):
# Audio file creation
tts = gTTS(text=story_text, lang="en")
audio_file = tempfile.NamedTemporaryFile(delete=False, suffix=".mp3")
tts.save(audio_file.name)
return audio_file.name
def main():
st.set_page_config(
page_title="Image to Story",
page_icon="📖"
)
st.header("Upload Your Image")
uploaded_file = st.file_uploader(
"Choose Image",
type=["jpg", "png", "jpeg"]
)
if uploaded_file:
temp_img = os.path.join(tempfile.gettempdir(), uploaded_file.name)
with open(temp_img, "wb") as f:
f.write(uploaded_file.getvalue())
st.image(uploaded_file)
#Stage 1: Image to Text
with st.status("🖼️ Processing image..."):
scenario = img2text(temp_img)
st.write("Image Caption:", scenario)
#Stage 2: Text to Story
with st.status("📖 Generating story..."):
story = text2story(scenario)
st.subheader("Story")
st.write(story)
#Stage 3: Story to Audio data
with st.status("🔊 Converting audio..."):
audio_path = text2audio(story)
# Play button
if st.button("▶️ Play Audio Story"):
st.audio(audio_path, format="audio/mp3")
# Cleanup
os.unlink(temp_img)
os.unlink(audio_path)
if __name__ == "__main__":
main() |