Spaces:

isom5240
/

Ex_img2audio

Sleeping

Ex_img2audio / src /streamlit_app.py

Update src/streamlit_app.py

f1362e5 verified 7 months ago

902 Bytes

	import os
	import streamlit as st
	from transformers import pipeline
	from PIL import Image

	st.title("Image-to-Text and Text-to-Speech App")

	# Use the token from environment variables
	HF_TOKEN = os.environ["HF_TOKEN"]

	# Load pipelines using the new 'token' argument
	image_to_text = pipeline(
	"image-to-text",
	model="nlpconnect/vit-gpt2-image-captioning",
	token=HF_TOKEN
	)

	text_to_speech = pipeline(
	"text-to-speech",
	model="facebook/mms-tts-eng",
	token=HF_TOKEN
	)

	uploaded_file = st.file_uploader("Upload an image", type=["jpg", "jpeg", "png"])

	if uploaded_file:
	image = Image.open(uploaded_file)
	st.image(image)

	caption = image_to_text(image)[0]["generated_text"]
	st.write("Caption:", caption)

	audio = text_to_speech(caption)
	audio_path = "speech.wav"
	with open(audio_path, "wb") as f:
	f.write(audio["audio"])

	st.audio(audio_path)