Spaces:
Sleeping
Sleeping
| import streamlit as st | |
| from PIL import Image | |
| from transformers import pipeline | |
| import io | |
| import base64 | |
| # Set page config | |
| st.set_page_config(page_title="Image to Speech App", layout="wide") | |
| # Title | |
| st.title("Image to Text to Speech Converter") | |
| # Initialize the pipelines | |
| def load_models(): | |
| image_to_text = pipeline("image-to-text", model="Salesforce/blip-image-captioning-base") | |
| text_to_speech = pipeline("text-to-speech", model="microsoft/speecht5_tts") | |
| return image_to_text, text_to_speech | |
| image_to_text, text_to_speech = load_models() | |
| # Image upload | |
| uploaded_file = st.file_uploader("Choose an image...", type=["jpg", "jpeg", "png"]) | |
| if uploaded_file is not None: | |
| # Display the uploaded image | |
| image = Image.open(uploaded_file) | |
| st.image(image, caption="Uploaded Image", use_column_width=True) | |
| # Extract text from image | |
| with st.spinner("Extracting text from image..."): | |
| result = image_to_text(image) | |
| extracted_text = result[0]["generated_text"] | |
| st.write("Extracted Text:") | |
| st.write(extracted_text) | |
| # Generate speech from text | |
| if st.button("Convert to Speech"): | |
| with st.spinner("Generating speech..."): | |
| speech = text_to_speech(extracted_text) | |
| # Convert speech to base64 for playback | |
| audio_bytes = bytes(speech["audio"]) | |
| audio_base64 = base64.b64encode(audio_bytes).decode() | |
| # Create audio player HTML | |
| audio_html = f""" | |
| <audio controls> | |
| <source src="data:audio/wav;base64,{audio_base64}" type="audio/wav"> | |
| Your browser does not support the audio element. | |
| </audio> | |
| """ | |
| st.markdown(audio_html, unsafe_allow_html=True) | |
| # Add instructions | |
| with st.expander("How to use"): | |
| st.write(""" | |
| 1. Upload an image containing text using the file uploader | |
| 2. Wait for the text to be extracted from the image | |
| 3. Click 'Convert to Speech' to generate and play the audio | |
| """) |