Spaces:

build-small-hackathon
/

tinyworld

Sleeping

App Files Files Community

tinyworld / transcribe.py

sush0401

TinyWorld + Crisis Mode, ZeroGPU in-process inference

d3a7a1c verified 15 days ago

Raw

History Blame Contribute Delete

2.6 kB

	import os
	from pathlib import Path

	MODAL_TRANSCRIBE_URL = os.environ.get(
	"MODAL_TRANSCRIBE_URL",
	"https://mitvho09--tinyworld-inference-transcribe-endpoint.modal.run",
	)

	MOCK_SENTENCES = [
	"A street parade just showed up out of nowhere.",
	"Someone left a mysterious package on the sidewalk.",
	"The neighbors are arguing about something loud again.",
	"There's a strange light coming from the old building.",
	"I just saw something really weird down the block.",
	]


	def _is_mock() -> bool:
	return os.environ.get("TINYWORLD_MOCK", "0") == "1"


	def _backend() -> str:
	return os.environ.get("TINYWORLD_INFER", "modal").lower()


	def transcribe(audio_path: str \| None) -> str:
	if not audio_path or _is_mock():
	import random
	return random.choice(MOCK_SENTENCES)

	if _backend() == "local":
	try:
	import inference # ZeroGPU Whisper, imported lazily
	text = inference.transcribe_audio(audio_path)
	if text:
	return text
	except Exception as e:
	print(f"[transcribe] local failed: {e}")

	if MODAL_TRANSCRIBE_URL:
	try:
	text = _modal_transcribe(audio_path)
	if text:
	return text
	except Exception as e:
	print(f"[transcribe] Modal failed: {e}")

	try:
	return _cohere_transcribe(audio_path)
	except Exception as e:
	print(f"[transcribe] failed: {e}")
	return ""


	def _modal_transcribe(audio_path: str) -> str:
	import httpx

	path = Path(audio_path)
	headers = {"x-audio-suffix": path.suffix or ".wav"}
	with path.open("rb") as f:
	audio_bytes = f.read()

	with httpx.Client(timeout=300.0, follow_redirects=True) as client:
	response = client.post(
	MODAL_TRANSCRIBE_URL,
	content=audio_bytes,
	headers=headers,
	)
	response.raise_for_status()

	data = response.json()
	return (data.get("text") or "").strip()


	def _cohere_transcribe(audio_path: str) -> str:
	api_key = os.environ.get("COHERE_API_KEY", "")
	if not api_key:
	print("[transcribe] no COHERE_API_KEY set")
	return ""

	import cohere

	co = cohere.ClientV2(api_key=api_key)

	with open(audio_path, "rb") as f:
	response = co.audio.transcriptions.create(
	model="cohere-transcribe-03-2026",
	language="en",
	file=f,
	)

	return response.text.strip() if response.text else ""


	if __name__ == "__main__":
	print("Mock transcribe:", transcribe(None))