File size: 7,359 Bytes
5239f17 7e167d4 b044838 5239f17 2100218 5239f17 200ed8c 5239f17 b044838 5239f17 200ed8c 5239f17 200ed8c 5239f17 200ed8c 5239f17 200ed8c 5239f17 200ed8c 5239f17 200ed8c 5239f17 200ed8c 5239f17 200ed8c 5239f17 200ed8c 5239f17 200ed8c 5239f17 200ed8c 5239f17 200ed8c 5239f17 2100218 4bf75b2 2100218 5239f17 2100218 5239f17 2100218 5239f17 200ed8c 5239f17 | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 | import json
import os
import time
from pathlib import Path
import gradio as gr
import requests
SERVER_URL = os.environ.get("CRISPASR_SERVER_URL", "http://127.0.0.1:8080").rstrip("/")
SPACE_TITLE = os.environ.get("CRISPASR_SPACE_TITLE", "CrispASR")
DEFAULT_LANGUAGE = os.environ.get("CRISPASR_LANGUAGE", "en")
DEFAULT_MODEL = os.environ.get("CRISPASR_MODEL", "auto")
API_KEY = next((key.strip() for key in os.environ.get("CRISPASR_API_KEYS", "").split(",") if key.strip()), "")
MODEL_CHOICES = {
"Whisper base multilingual (~147 MB)": ("whisper", "auto", "en"),
"Parakeet TDT 0.6B v3 Q4_K (~467 MB)": ("parakeet", "auto", "en"),
"Qwen3 ASR 0.6B Q4_K (~500 MB)": ("qwen3", "auto", "en"),
"Cohere Transcribe Q4_K (~550 MB)": ("cohere", "auto", "en"),
}
def log(message: str):
print(f"[{time.strftime('%Y-%m-%dT%H:%M:%SZ', time.gmtime())}] hf-space-app: {message}", flush=True)
def _request(method: str, path: str, **kwargs):
if API_KEY:
headers = dict(kwargs.pop("headers", {}) or {})
headers.setdefault("Authorization", f"Bearer {API_KEY}")
kwargs["headers"] = headers
return requests.request(method, f"{SERVER_URL}{path}", timeout=300, **kwargs)
def fetch_status():
try:
log("fetch_status: probing /health and /v1/models")
health = _request("GET", "/health")
health.raise_for_status()
models = _request("GET", "/v1/models")
models.raise_for_status()
health_json = health.json()
models_json = models.json()
model_names = [item.get("id", "") for item in models_json.get("data", [])]
log(f"fetch_status: ready models={model_names if model_names else ['(none)']}")
return (
"ready",
json.dumps(health_json, indent=2, ensure_ascii=False),
"\n".join(model_names) if model_names else "(no models reported)",
)
except Exception as exc:
log(f"fetch_status: waiting error={type(exc).__name__}: {exc}")
return "starting", f"{type(exc).__name__}: {exc}", DEFAULT_MODEL
def wait_for_server():
log("wait_for_server: start")
last_status = "starting"
last_health = ""
last_models = DEFAULT_MODEL
for i in range(300):
last_status, last_health, last_models = fetch_status()
if last_status == "ready":
log(f"wait_for_server: ready after {i + 1} probe(s)")
break
time.sleep(1)
if last_status != "ready":
log("wait_for_server: timeout, app staying up in starting state")
return last_status, last_health, last_models
def transcribe(audio_path: str, language: str, prompt: str, temperature: float, response_format: str):
if not audio_path:
raise gr.Error("Upload or record audio first.")
file_path = Path(audio_path)
if not file_path.exists():
raise gr.Error("Audio file is not available anymore.")
log(
f"transcribe: file={file_path.name} language={language or 'default'} "
f"response_format={response_format} temperature={temperature:.2f} prompt={'yes' if prompt else 'no'}"
)
data = {
"model": "loaded-model",
"response_format": response_format,
"temperature": f"{temperature:.2f}",
}
if language and language != "auto":
data["language"] = language
if prompt:
data["prompt"] = prompt
with file_path.open("rb") as f:
response = _request(
"POST",
"/v1/audio/transcriptions",
files={"file": (file_path.name, f, "application/octet-stream")},
data=data,
)
if response.status_code >= 400:
log(f"transcribe: error status={response.status_code} body={response.text[:400]}")
raise gr.Error(f"{response.status_code}: {response.text}")
content_type = response.headers.get("content-type", "")
log(f"transcribe: ok status={response.status_code} content_type={content_type}")
if response_format == "verbose_json" or "application/json" in content_type:
payload = response.json()
text = payload.get("text", "") if isinstance(payload, dict) else ""
log(f"transcribe: json text_len={len(text)}")
return text, json.dumps(payload, indent=2, ensure_ascii=False)
text = response.text.strip()
log(f"transcribe: text text_len={len(text)}")
return text, text
def load_model(choice: str, language: str):
backend, model, default_language = MODEL_CHOICES.get(choice, MODEL_CHOICES["Whisper base multilingual (~147 MB)"])
language = language or default_language
log(f"load_model: choice={choice} backend={backend} model={model} language={language}")
response = _request(
"POST",
"/load",
files={
"backend": (None, backend),
"model": (None, model),
"language": (None, language),
},
)
if response.status_code >= 400:
log(f"load_model: error status={response.status_code} body={response.text[:400]}")
raise gr.Error(f"{response.status_code}: {response.text}")
status, health, models = fetch_status()
log(f"load_model: ok backend={backend}")
return status, health, models, language
with gr.Blocks(title=SPACE_TITLE) as demo:
gr.Markdown(
f"""# {SPACE_TITLE}
Offline speech transcription via CrispASR's OpenAI-compatible server.
- Server URL: `{SERVER_URL}`
- Model path: `{DEFAULT_MODEL}`
"""
)
with gr.Row():
status = gr.Textbox(label="Server status", interactive=False)
models = gr.Textbox(label="Loaded model(s)", interactive=False)
health = gr.Code(label="/health", language="json", interactive=False)
refresh = gr.Button("Refresh server status")
with gr.Row():
model_choice = gr.Dropdown(list(MODEL_CHOICES.keys()), value="Whisper base multilingual (~147 MB)", label="Model")
load = gr.Button("Load selected model")
with gr.Row():
audio = gr.Audio(label="Audio", type="filepath", sources=["upload", "microphone"])
with gr.Column():
language = gr.Textbox(value=DEFAULT_LANGUAGE, label="Language", placeholder="auto or ISO-639-1 code")
response_format = gr.Dropdown(
["text", "verbose_json"], value="verbose_json", label="Response format"
)
temperature = gr.Slider(0.0, 1.0, value=0.0, step=0.1, label="Temperature")
prompt = gr.Textbox(label="Prompt", placeholder="Optional prompt or context")
submit = gr.Button("Transcribe", variant="primary")
transcript = gr.Textbox(label="Transcript", lines=12)
raw = gr.Code(label="Raw response", language="json")
refresh.click(fetch_status, outputs=[status, health, models])
load.click(load_model, inputs=[model_choice, language], outputs=[status, health, models, language])
submit.click(
transcribe,
inputs=[audio, language, prompt, temperature, response_format],
outputs=[transcript, raw],
)
demo.load(wait_for_server, outputs=[status, health, models])
if __name__ == "__main__":
log(f"launch: server_url={SERVER_URL} default_model={DEFAULT_MODEL} default_language={DEFAULT_LANGUAGE}")
demo.launch(
server_name=os.environ.get("GRADIO_SERVER_NAME", "0.0.0.0"),
server_port=int(os.environ.get("GRADIO_SERVER_PORT", "7860")),
)
|