testasd1 / app.py
Bondya's picture
Update app.py
38da107 verified
import streamlit as st
from transformers import pipeline
from gtts import gTTS
import tempfile
import os
# function part
# img2text
def img2text(img_path):
# Image captioning model
captioner = pipeline(
"image-to-text",
model="nlpconnect/vit-gpt2-image-captioning" #This model is relatively fast and accurate
)
result = captioner(img_path)
return result[0]["generated_text"]
# text2story
def text2story(scenario):
# Story generator config
generator = pipeline(
"text-generation",
model="gpt2", #Relatively small but fast
max_length=200, # Maximum story lengt
num_return_sequences=1 #Number of variants to generate
)
prompt = f"Create a children's story based on: {scenario}"
story = generator(prompt)[0]["generated_text"]
return story
# text2audio
def text2audio(story_text):
# Audio file creation
tts = gTTS(text=story_text, lang="en")
audio_file = tempfile.NamedTemporaryFile(delete=False, suffix=".mp3")
tts.save(audio_file.name)
return audio_file.name
def main():
st.set_page_config(
page_title="Image to Story",
page_icon="๐Ÿ“–"
)
st.header("Upload Your Image")
uploaded_file = st.file_uploader(
"Choose Image",
type=["jpg", "png", "jpeg"]
)
if uploaded_file:
temp_img = os.path.join(tempfile.gettempdir(), uploaded_file.name)
with open(temp_img, "wb") as f:
f.write(uploaded_file.getvalue())
st.image(uploaded_file)
#Stage 1: Image to Text
with st.status("๐Ÿ–ผ๏ธ Processing image..."):
scenario = img2text(temp_img)
st.write("Image Caption:", scenario)
#Stage 2: Text to Story
with st.status("๐Ÿ“– Generating story..."):
story = text2story(scenario)
st.subheader("Story")
st.write(story)
#Stage 3: Story to Audio data
with st.status("๐Ÿ”Š Converting audio..."):
audio_path = text2audio(story)
# Play button
if st.button("โ–ถ๏ธ Play Audio Story"):
st.audio(audio_path, format="audio/mp3")
# Cleanup
os.unlink(temp_img)
os.unlink(audio_path)
if __name__ == "__main__":
main()