Ex_img2audio / src /streamlit_app.py
isom5240's picture
Update src/streamlit_app.py
f1362e5 verified
raw
history blame contribute delete
902 Bytes
import os
import streamlit as st
from transformers import pipeline
from PIL import Image
st.title("Image-to-Text and Text-to-Speech App")
# Use the token from environment variables
HF_TOKEN = os.environ["HF_TOKEN"]
# Load pipelines using the new 'token' argument
image_to_text = pipeline(
"image-to-text",
model="nlpconnect/vit-gpt2-image-captioning",
token=HF_TOKEN
)
text_to_speech = pipeline(
"text-to-speech",
model="facebook/mms-tts-eng",
token=HF_TOKEN
)
uploaded_file = st.file_uploader("Upload an image", type=["jpg", "jpeg", "png"])
if uploaded_file:
image = Image.open(uploaded_file)
st.image(image)
caption = image_to_text(image)[0]["generated_text"]
st.write("Caption:", caption)
audio = text_to_speech(caption)
audio_path = "speech.wav"
with open(audio_path, "wb") as f:
f.write(audio["audio"])
st.audio(audio_path)