Spaces:

JacobLinCool
/

TWASR

Running on Zero

App Files Files Community

TWASR / app.py

JacobLinCool

feat: wait for audio to be uploaded

176e214 about 1 year ago

raw

history blame

2.47 kB

	import gradio as gr
	from huggingface_hub.utils import get_token
	import requests
	import base64
	from model import model_id, transcribe_audio_local

	token = get_token()


	def read_file_as_base64(file_path: str) -> str:
	with open(file_path, "rb") as f:
	return base64.b64encode(f.read()).decode()


	def transcribe_audio(audio: str) -> str:
	print(f"{audio=}")

	if audio is None:
	raise gr.Error(
	"Please wait a moment for the audio to be uploaded, then click the button again."
	)

	b64 = read_file_as_base64(audio)
	url = f"https://api-inference.huggingface.co/models/{model_id}"
	headers = {
	"Authorization": f"Bearer {token}",
	"Content-Type": "application/json",
	"x-wait-for-model": "true",
	}
	data = {
	"inputs": b64,
	"parameters": {
	"generate_kwargs": {
	"num_beams": 5,
	}
	},
	}
	response = requests.post(url, headers=headers, json=data)
	out = response.json()
	print(f"{out=}")

	return out["text"]


	with gr.Blocks() as demo:
	gr.Markdown("# TWASR: Chinese (Taiwan) Automatic Speech Recognition.")
	gr.Markdown("Upload an audio file or record your voice to transcribe it to text.")
	gr.Markdown(
	"First load may take a while to initialize the model, following requests will be faster."
	)

	with gr.Row():
	audio_input = gr.Audio(
	label="Audio", type="filepath", show_download_button=True
	)
	text_output = gr.Textbox(label="Transcription")

	transcribe_local_button = gr.Button(
	"Transcribe with Transformers", variant="primary"
	)
	transcribe_button = gr.Button("Transcribe with Inference API", variant="secondary")

	transcribe_local_button.click(
	fn=transcribe_audio_local, inputs=[audio_input], outputs=[text_output]
	)
	transcribe_button.click(
	fn=transcribe_audio, inputs=[audio_input], outputs=[text_output]
	)

	gr.Examples(
	[
	["./examples/audio1.mp3"],
	["./examples/audio2.mp3"],
	],
	inputs=[audio_input],
	outputs=[text_output],
	fn=transcribe_audio_local,
	cache_examples=True,
	cache_mode="lazy",
	run_on_click=True,
	)

	gr.Markdown(
	f"Current model: {model_id}. For more information, visit the [model hub](https://huggingface.co/{model_id})."
	)

	if __name__ == "__main__":
	demo.launch()