Spaces:
Sleeping
Sleeping
File size: 2,598 Bytes
d3a7a1c | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 | import os
from pathlib import Path
MODAL_TRANSCRIBE_URL = os.environ.get(
"MODAL_TRANSCRIBE_URL",
"https://mitvho09--tinyworld-inference-transcribe-endpoint.modal.run",
)
MOCK_SENTENCES = [
"A street parade just showed up out of nowhere.",
"Someone left a mysterious package on the sidewalk.",
"The neighbors are arguing about something loud again.",
"There's a strange light coming from the old building.",
"I just saw something really weird down the block.",
]
def _is_mock() -> bool:
return os.environ.get("TINYWORLD_MOCK", "0") == "1"
def _backend() -> str:
return os.environ.get("TINYWORLD_INFER", "modal").lower()
def transcribe(audio_path: str | None) -> str:
if not audio_path or _is_mock():
import random
return random.choice(MOCK_SENTENCES)
if _backend() == "local":
try:
import inference # ZeroGPU Whisper, imported lazily
text = inference.transcribe_audio(audio_path)
if text:
return text
except Exception as e:
print(f"[transcribe] local failed: {e}")
if MODAL_TRANSCRIBE_URL:
try:
text = _modal_transcribe(audio_path)
if text:
return text
except Exception as e:
print(f"[transcribe] Modal failed: {e}")
try:
return _cohere_transcribe(audio_path)
except Exception as e:
print(f"[transcribe] failed: {e}")
return ""
def _modal_transcribe(audio_path: str) -> str:
import httpx
path = Path(audio_path)
headers = {"x-audio-suffix": path.suffix or ".wav"}
with path.open("rb") as f:
audio_bytes = f.read()
with httpx.Client(timeout=300.0, follow_redirects=True) as client:
response = client.post(
MODAL_TRANSCRIBE_URL,
content=audio_bytes,
headers=headers,
)
response.raise_for_status()
data = response.json()
return (data.get("text") or "").strip()
def _cohere_transcribe(audio_path: str) -> str:
api_key = os.environ.get("COHERE_API_KEY", "")
if not api_key:
print("[transcribe] no COHERE_API_KEY set")
return ""
import cohere
co = cohere.ClientV2(api_key=api_key)
with open(audio_path, "rb") as f:
response = co.audio.transcriptions.create(
model="cohere-transcribe-03-2026",
language="en",
file=f,
)
return response.text.strip() if response.text else ""
if __name__ == "__main__":
print("Mock transcribe:", transcribe(None))
|