Spaces:
Running
on
Zero
Running
on
Zero
| import gradio as gr | |
| from huggingface_hub.utils import get_token | |
| import requests | |
| import base64 | |
| from model import model_id, transcribe_audio_local | |
| token = get_token() | |
| def read_file_as_base64(file_path: str) -> str: | |
| with open(file_path, "rb") as f: | |
| return base64.b64encode(f.read()).decode() | |
| def transcribe_audio(audio: str) -> str: | |
| print(f"{audio=}") | |
| if audio is None: | |
| raise gr.Error( | |
| "Please wait a moment for the audio to be uploaded, then click the button again." | |
| ) | |
| b64 = read_file_as_base64(audio) | |
| url = f"https://api-inference.huggingface.co/models/{model_id}" | |
| headers = { | |
| "Authorization": f"Bearer {token}", | |
| "Content-Type": "application/json", | |
| "x-wait-for-model": "true", | |
| } | |
| data = { | |
| "inputs": b64, | |
| "parameters": { | |
| "generate_kwargs": { | |
| "num_beams": 5, | |
| } | |
| }, | |
| } | |
| response = requests.post(url, headers=headers, json=data) | |
| out = response.json() | |
| print(f"{out=}") | |
| return out["text"] | |
| with gr.Blocks() as demo: | |
| gr.Markdown("# TWASR: Chinese (Taiwan) Automatic Speech Recognition.") | |
| gr.Markdown("Upload an audio file or record your voice to transcribe it to text.") | |
| gr.Markdown( | |
| "First load may take a while to initialize the model, following requests will be faster." | |
| ) | |
| with gr.Row(): | |
| audio_input = gr.Audio( | |
| label="Audio", type="filepath", show_download_button=True | |
| ) | |
| text_output = gr.Textbox(label="Transcription") | |
| transcribe_local_button = gr.Button( | |
| "Transcribe with Transformers", variant="primary" | |
| ) | |
| transcribe_button = gr.Button("Transcribe with Inference API", variant="secondary") | |
| transcribe_local_button.click( | |
| fn=transcribe_audio_local, inputs=[audio_input], outputs=[text_output] | |
| ) | |
| transcribe_button.click( | |
| fn=transcribe_audio, inputs=[audio_input], outputs=[text_output] | |
| ) | |
| gr.Examples( | |
| [ | |
| ["./examples/audio1.mp3"], | |
| ["./examples/audio2.mp3"], | |
| ], | |
| inputs=[audio_input], | |
| outputs=[text_output], | |
| fn=transcribe_audio_local, | |
| cache_examples=True, | |
| cache_mode="lazy", | |
| run_on_click=True, | |
| ) | |
| gr.Markdown( | |
| f"Current model: {model_id}. For more information, visit the [model hub](https://huggingface.co/{model_id})." | |
| ) | |
| if __name__ == "__main__": | |
| demo.launch() | |