import tempfile import requests from fastapi import FastAPI, UploadFile, File, HTTPException from fastapi.responses import JSONResponse app = FastAPI() CRISPER_SPACE_API = "https://rafaaa2105-crisper-whisper.hf.space/run/predict_1" def parse_transcript(text: str): lines = text.strip().split("\n") chunks = [] for i in range(len(lines)): line = lines[i].strip() if not line: continue if line.startswith("[") and "]" in line: try: time_part = line[1:line.index("]")] word_part = line[line.index("]") + 1:].strip().strip(",") start = float(time_part) end = float(lines[i + 1][1:lines[i + 1].index("]")]) if i + 1 < len(lines) and lines[i + 1].startswith("[") else start + 0.5 chunks.append({"text": word_part, "timestamp": [start, end]}) except: continue return chunks @app.post("/speech2text") async def speech2text(file: UploadFile = File(...)): if not file.filename.endswith((".mp3", ".wav")): raise HTTPException(status_code=400, detail="Only .mp3 or .wav files are supported.") try: with tempfile.NamedTemporaryFile(delete=False, suffix=file.filename[-4:]) as tmp: tmp.write(await file.read()) tmp_path = tmp.name with open(tmp_path, "rb") as f: response = requests.post(CRISPER_SPACE_API, files={"data": f}, json={"data": [None, "transcribe"]}) if response.status_code != 200: raise HTTPException(status_code=500, detail="Failed to get response from upstream Space") output_text = response.json().get("data", [""])[0] chunks = parse_transcript(output_text) return JSONResponse(content={"text": output_text, "chunks": chunks}) except Exception as e: raise HTTPException(status_code=500, detail=str(e))