|
|
import tempfile |
|
|
import requests |
|
|
from fastapi import FastAPI, UploadFile, File, HTTPException |
|
|
from fastapi.responses import JSONResponse |
|
|
|
|
|
app = FastAPI() |
|
|
CRISPER_SPACE_API = "https://rafaaa2105-crisper-whisper.hf.space/run/predict_1" |
|
|
|
|
|
def parse_transcript(text: str): |
|
|
lines = text.strip().split("\n") |
|
|
chunks = [] |
|
|
for i in range(len(lines)): |
|
|
line = lines[i].strip() |
|
|
if not line: |
|
|
continue |
|
|
if line.startswith("[") and "]" in line: |
|
|
try: |
|
|
time_part = line[1:line.index("]")] |
|
|
word_part = line[line.index("]") + 1:].strip().strip(",") |
|
|
start = float(time_part) |
|
|
end = float(lines[i + 1][1:lines[i + 1].index("]")]) if i + 1 < len(lines) and lines[i + 1].startswith("[") else start + 0.5 |
|
|
chunks.append({"text": word_part, "timestamp": [start, end]}) |
|
|
except: |
|
|
continue |
|
|
return chunks |
|
|
|
|
|
@app.post("/speech2text") |
|
|
async def speech2text(file: UploadFile = File(...)): |
|
|
if not file.filename.endswith((".mp3", ".wav")): |
|
|
raise HTTPException(status_code=400, detail="Only .mp3 or .wav files are supported.") |
|
|
|
|
|
try: |
|
|
with tempfile.NamedTemporaryFile(delete=False, suffix=file.filename[-4:]) as tmp: |
|
|
tmp.write(await file.read()) |
|
|
tmp_path = tmp.name |
|
|
|
|
|
with open(tmp_path, "rb") as f: |
|
|
response = requests.post(CRISPER_SPACE_API, files={"data": f}, json={"data": [None, "transcribe"]}) |
|
|
|
|
|
if response.status_code != 200: |
|
|
raise HTTPException(status_code=500, detail="Failed to get response from upstream Space") |
|
|
|
|
|
output_text = response.json().get("data", [""])[0] |
|
|
chunks = parse_transcript(output_text) |
|
|
return JSONResponse(content={"text": output_text, "chunks": chunks}) |
|
|
|
|
|
except Exception as e: |
|
|
raise HTTPException(status_code=500, detail=str(e)) |
|
|
|