Spaces:
Sleeping
Sleeping
| import streamlit as st | |
| from transformers import pipeline | |
| from PIL import Image | |
| # Load pipelines | |
| image_to_text = pipeline("image-to-text", model="nlpconnect/vit-gpt2-image-captioning") | |
| text_to_speech = pipeline("text-to-speech", model="facebook/mms-tts-eng") | |
| st.title("Image-to-Text and Text-to-Speech App") | |
| # Image uploader | |
| uploaded_image = st.file_uploader("Upload an image", type=["jpg", "png", "jpeg"]) | |
| if uploaded_image: | |
| image = Image.open(uploaded_image) | |
| st.image(image, caption="Uploaded Image", use_container_width=True) | |
| # Convert image to text | |
| text_output = image_to_text(image)[0]['generated_text'] | |
| st.write("### Extracted Text:") | |
| st.write(text_output) | |
| # Convert text to speech | |
| speech_output = text_to_speech(text_output) | |
| st.write("### Listen to Speech Output:") | |
| st.audio(speech_output['audio'], | |
| format="audio/wav", | |
| start_time=0, | |
| sample_rate = speech_output['sampling_rate']) |