import requests import gradio as gr import numpy as np import io import soundfile as sf import os HF_ACCESS_TOKEN = os.environ['HF_ACCESS_TOKEN'] API_URL = "https://api-inference.huggingface.co/models/openai/whisper-large-v3" headers = {"Authorization": f"Bearer {HF_ACCESS_TOKEN}"} def query(audio_data): with io.BytesIO() as f: sf.write(f, audio_data[1], audio_data[0], format='wav') data = f.getvalue() response = requests.post(API_URL, headers=headers, data=data) return response.json()['text'] def transcribe(audio_data): global output_text print("Received audio data:", audio_data) if audio_data is None: print("Audio data is None. Check the microphone and input configuration.") return None sr, y = audio_data y = y.astype(np.float32) y /= np.max(np.abs(y)) # Add your transcription logic here if needed transcription = query(audio_data) return transcription dark_minimalist = gr.Theme.from_hub("Taithrah/Minimal") iface = gr.Interface(theme=dark_minimalist, fn=transcribe, inputs=gr.Microphone(label="Speak into the microphone",), outputs="text", allow_flagging="never", css=""" footer { visibility: hidden; } .content-container::-webkit-scrollbar { display: none; } body { overflow: hidden !important; } """ ) iface.launch()