Spaces:
Sleeping
Sleeping
| import streamlit as st | |
| from transformers import pipeline | |
| from gtts import gTTS | |
| import os | |
| from io import BytesIO | |
| from PIL import Image | |
| import torch | |
| st.set_page_config(page_title="Your Image to Audio Story", page_icon="🦜") | |
| def img2text(image_path): | |
| # Load Hugging Face image captioning model | |
| captioner = pipeline("image-to-text", model="Salesforce/blip-image-captioning-base") | |
| image = Image.open(image_path) | |
| caption = captioner(image)[0]['generated_text'] | |
| return caption | |
| def text2story(text): | |
| # Use Hugging Face model to generate a longer story from the caption | |
| generator = pipeline("text-generation", model="gpt2") | |
| story = generator(text, max_length=150, num_return_sequences=1)[0]['generated_text'] | |
| return story | |
| def text2audio(text): | |
| # Convert text to speech using gTTS | |
| tts = gTTS(text=text, lang='en') | |
| audio_buffer = BytesIO() | |
| tts.write_to_fp(audio_buffer) | |
| audio_buffer.seek(0) | |
| return audio_buffer | |
| st.header("Turn Your Image to Audio Story") | |
| uploaded_file = st.file_uploader("Select an Image...", type=["png", "jpg", "jpeg"]) | |
| if uploaded_file is not None: | |
| image = Image.open(uploaded_file) | |
| #st.image(image, caption="Uploaded Image", use_column_width=True) | |
| st.image(image, caption="Uploaded Image", use_container_width=True) | |
| # Process Image to Text | |
| st.text('Processing image to text...') | |
| scenario = img2text(uploaded_file) | |
| st.write("**Generated Caption:**", scenario) | |
| # Generate Story | |
| st.text('Generating a story...') | |
| story = text2story(scenario) | |
| st.write("**Generated Story:**", story) | |
| # Convert Story to Audio | |
| st.text('Generating audio...') | |
| audio_data = text2audio(story) | |
| # Play button | |
| if st.button("Play Audio"): | |
| st.audio(audio_data, format="audio/wav") |