Spaces:

arteeguz
/

CTP_AI

Runtime error

CTP_AI / app.py

fixed audio?

764cb13 almost 2 years ago

1.68 kB

	from dotenv import find_dotenv, load_dotenv
	from transformers import pipeline
	import os
	import requests
	import streamlit as st

	load_dotenv(find_dotenv())
	HUGGINGFACE_API_TOKEN = os.getenv("HUGGINGFACE_API_TOKEN")

	pipe = pipeline("image-to-text", model="Salesforce/blip-image-captioning-base")

	#img to text
	def img_to_text(url):
	text = pipe(url)[0]["generated_text"]
	print(text)
	return text

	def text_to_speech(message):
	API_URL = "https://api-inference.huggingface.co/models/espnet/kan-bayashi_ljspeech_vits"
	headers = {"Authorization": f"Bearer {HUGGINGFACE_API_TOKEN}"}
	payloads = {
	"inputs":message
	}

	response = requests.post(API_URL, headers=headers, json=payloads)
	with open('audio.flac', 'wb') as file:
	file.write(response.content)

	def main():
	st.set_page_config(page_title="Image to Text", page_icon="🎙️")

	st.header("Image to Text")
	# Image.
	image = "narrator.jpeg"
	left_co, cent_co, last_co = st.columns(3)
	with cent_co:
	st.image(image=image)
	uploaded_file = st.file_uploader("Choose an image: ", type=["jpg", "jpeg", "png"])

	if uploaded_file is not None:
	print(uploaded_file)
	bytes_data = uploaded_file.getvalue()
	with open(uploaded_file.name, "wb") as file:
	file.write(bytes_data)
	st.image(uploaded_file, caption='Uploaded image', use_column_width=True)
	scenario=img_to_text(uploaded_file.name)
	text_to_speech(scenario)

	with st.expander("scenatio"):
	st.write(scenario)

	st.audio("audio.flac")


	if __name__== "__main__":
	main()