indivassign / app.py
AndrewLi403's picture
Update app.py
9efb485 verified
import streamlit as st
from transformers import pipeline
from gtts import gTTS
import os
from io import BytesIO
from PIL import Image
import torch
st.set_page_config(page_title="Your Image to Audio Story", page_icon="🦜")
def img2text(image_path):
# Load Hugging Face image captioning model
captioner = pipeline("image-to-text", model="Salesforce/blip-image-captioning-base")
image = Image.open(image_path)
caption = captioner(image)[0]['generated_text']
return caption
def text2story(text):
# Use Hugging Face model to generate a longer story from the caption
generator = pipeline("text-generation", model="gpt2")
story = generator(text, max_length=150, num_return_sequences=1)[0]['generated_text']
return story
def text2audio(text):
# Convert text to speech using gTTS
tts = gTTS(text=text, lang='en')
audio_buffer = BytesIO()
tts.write_to_fp(audio_buffer)
audio_buffer.seek(0)
return audio_buffer
st.header("Turn Your Image to Audio Story")
uploaded_file = st.file_uploader("Select an Image...", type=["png", "jpg", "jpeg"])
if uploaded_file is not None:
image = Image.open(uploaded_file)
#st.image(image, caption="Uploaded Image", use_column_width=True)
st.image(image, caption="Uploaded Image", use_container_width=True)
# Process Image to Text
st.text('Processing image to text...')
scenario = img2text(uploaded_file)
st.write("**Generated Caption:**", scenario)
# Generate Story
st.text('Generating a story...')
story = text2story(scenario)
st.write("**Generated Story:**", story)
# Convert Story to Audio
st.text('Generating audio...')
audio_data = text2audio(story)
# Play button
if st.button("Play Audio"):
st.audio(audio_data, format="audio/wav")