import json import os import time from pathlib import Path import gradio as gr import requests SERVER_URL = os.environ.get("CRISPASR_SERVER_URL", "http://127.0.0.1:8080").rstrip("/") SPACE_TITLE = os.environ.get("CRISPASR_SPACE_TITLE", "CrispASR") DEFAULT_LANGUAGE = os.environ.get("CRISPASR_LANGUAGE", "en") DEFAULT_MODEL = os.environ.get("CRISPASR_MODEL", "auto") API_KEY = next((key.strip() for key in os.environ.get("CRISPASR_API_KEYS", "").split(",") if key.strip()), "") MODEL_CHOICES = { "Whisper base multilingual (~147 MB)": ("whisper", "auto", "en"), "Parakeet TDT 0.6B v3 Q4_K (~467 MB)": ("parakeet", "auto", "en"), "Qwen3 ASR 0.6B Q4_K (~500 MB)": ("qwen3", "auto", "en"), "Cohere Transcribe Q4_K (~550 MB)": ("cohere", "auto", "en"), } def log(message: str): print(f"[{time.strftime('%Y-%m-%dT%H:%M:%SZ', time.gmtime())}] hf-space-app: {message}", flush=True) def _request(method: str, path: str, **kwargs): if API_KEY: headers = dict(kwargs.pop("headers", {}) or {}) headers.setdefault("Authorization", f"Bearer {API_KEY}") kwargs["headers"] = headers return requests.request(method, f"{SERVER_URL}{path}", timeout=300, **kwargs) def fetch_status(): try: log("fetch_status: probing /health and /v1/models") health = _request("GET", "/health") health.raise_for_status() models = _request("GET", "/v1/models") models.raise_for_status() health_json = health.json() models_json = models.json() model_names = [item.get("id", "") for item in models_json.get("data", [])] log(f"fetch_status: ready models={model_names if model_names else ['(none)']}") return ( "ready", json.dumps(health_json, indent=2, ensure_ascii=False), "\n".join(model_names) if model_names else "(no models reported)", ) except Exception as exc: log(f"fetch_status: waiting error={type(exc).__name__}: {exc}") return "starting", f"{type(exc).__name__}: {exc}", DEFAULT_MODEL def wait_for_server(): log("wait_for_server: start") last_status = "starting" last_health = "" last_models = DEFAULT_MODEL for i in range(300): last_status, last_health, last_models = fetch_status() if last_status == "ready": log(f"wait_for_server: ready after {i + 1} probe(s)") break time.sleep(1) if last_status != "ready": log("wait_for_server: timeout, app staying up in starting state") return last_status, last_health, last_models def transcribe(audio_path: str, language: str, prompt: str, temperature: float, response_format: str): if not audio_path: raise gr.Error("Upload or record audio first.") file_path = Path(audio_path) if not file_path.exists(): raise gr.Error("Audio file is not available anymore.") log( f"transcribe: file={file_path.name} language={language or 'default'} " f"response_format={response_format} temperature={temperature:.2f} prompt={'yes' if prompt else 'no'}" ) data = { "model": "loaded-model", "response_format": response_format, "temperature": f"{temperature:.2f}", } if language and language != "auto": data["language"] = language if prompt: data["prompt"] = prompt with file_path.open("rb") as f: response = _request( "POST", "/v1/audio/transcriptions", files={"file": (file_path.name, f, "application/octet-stream")}, data=data, ) if response.status_code >= 400: log(f"transcribe: error status={response.status_code} body={response.text[:400]}") raise gr.Error(f"{response.status_code}: {response.text}") content_type = response.headers.get("content-type", "") log(f"transcribe: ok status={response.status_code} content_type={content_type}") if response_format == "verbose_json" or "application/json" in content_type: payload = response.json() text = payload.get("text", "") if isinstance(payload, dict) else "" log(f"transcribe: json text_len={len(text)}") return text, json.dumps(payload, indent=2, ensure_ascii=False) text = response.text.strip() log(f"transcribe: text text_len={len(text)}") return text, text def load_model(choice: str, language: str): backend, model, default_language = MODEL_CHOICES.get(choice, MODEL_CHOICES["Whisper base multilingual (~147 MB)"]) language = language or default_language log(f"load_model: choice={choice} backend={backend} model={model} language={language}") response = _request( "POST", "/load", files={ "backend": (None, backend), "model": (None, model), "language": (None, language), }, ) if response.status_code >= 400: log(f"load_model: error status={response.status_code} body={response.text[:400]}") raise gr.Error(f"{response.status_code}: {response.text}") status, health, models = fetch_status() log(f"load_model: ok backend={backend}") return status, health, models, language with gr.Blocks(title=SPACE_TITLE) as demo: gr.Markdown( f"""# {SPACE_TITLE} Offline speech transcription via CrispASR's OpenAI-compatible server. - Server URL: `{SERVER_URL}` - Model path: `{DEFAULT_MODEL}` """ ) with gr.Row(): status = gr.Textbox(label="Server status", interactive=False) models = gr.Textbox(label="Loaded model(s)", interactive=False) health = gr.Code(label="/health", language="json", interactive=False) refresh = gr.Button("Refresh server status") with gr.Row(): model_choice = gr.Dropdown(list(MODEL_CHOICES.keys()), value="Whisper base multilingual (~147 MB)", label="Model") load = gr.Button("Load selected model") with gr.Row(): audio = gr.Audio(label="Audio", type="filepath", sources=["upload", "microphone"]) with gr.Column(): language = gr.Textbox(value=DEFAULT_LANGUAGE, label="Language", placeholder="auto or ISO-639-1 code") response_format = gr.Dropdown( ["text", "verbose_json"], value="verbose_json", label="Response format" ) temperature = gr.Slider(0.0, 1.0, value=0.0, step=0.1, label="Temperature") prompt = gr.Textbox(label="Prompt", placeholder="Optional prompt or context") submit = gr.Button("Transcribe", variant="primary") transcript = gr.Textbox(label="Transcript", lines=12) raw = gr.Code(label="Raw response", language="json") refresh.click(fetch_status, outputs=[status, health, models]) load.click(load_model, inputs=[model_choice, language], outputs=[status, health, models, language]) submit.click( transcribe, inputs=[audio, language, prompt, temperature, response_format], outputs=[transcript, raw], ) demo.load(wait_for_server, outputs=[status, health, models]) if __name__ == "__main__": log(f"launch: server_url={SERVER_URL} default_model={DEFAULT_MODEL} default_language={DEFAULT_LANGUAGE}") demo.launch( server_name=os.environ.get("GRADIO_SERVER_NAME", "0.0.0.0"), server_port=int(os.environ.get("GRADIO_SERVER_PORT", "7860")), )