File size: 1,909 Bytes
48cbe41
059fca6
48cbe41
 
 
 
059fca6
48cbe41
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
059fca6
48cbe41
059fca6
 
48cbe41
 
 
 
 
059fca6
 
48cbe41
 
059fca6
48cbe41
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
import tempfile
import requests
from fastapi import FastAPI, UploadFile, File, HTTPException
from fastapi.responses import JSONResponse

app = FastAPI()
CRISPER_SPACE_API = "https://rafaaa2105-crisper-whisper.hf.space/run/predict_1"

def parse_transcript(text: str):
    lines = text.strip().split("\n")
    chunks = []
    for i in range(len(lines)):
        line = lines[i].strip()
        if not line:
            continue
        if line.startswith("[") and "]" in line:
            try:
                time_part = line[1:line.index("]")]
                word_part = line[line.index("]") + 1:].strip().strip(",")
                start = float(time_part)
                end = float(lines[i + 1][1:lines[i + 1].index("]")]) if i + 1 < len(lines) and lines[i + 1].startswith("[") else start + 0.5
                chunks.append({"text": word_part, "timestamp": [start, end]})
            except:
                continue
    return chunks

@app.post("/speech2text")
async def speech2text(file: UploadFile = File(...)):
    if not file.filename.endswith((".mp3", ".wav")):
        raise HTTPException(status_code=400, detail="Only .mp3 or .wav files are supported.")

    try:
        with tempfile.NamedTemporaryFile(delete=False, suffix=file.filename[-4:]) as tmp:
            tmp.write(await file.read())
            tmp_path = tmp.name

        with open(tmp_path, "rb") as f:
            response = requests.post(CRISPER_SPACE_API, files={"data": f}, json={"data": [None, "transcribe"]})

        if response.status_code != 200:
            raise HTTPException(status_code=500, detail="Failed to get response from upstream Space")

        output_text = response.json().get("data", [""])[0]
        chunks = parse_transcript(output_text)
        return JSONResponse(content={"text": output_text, "chunks": chunks})

    except Exception as e:
        raise HTTPException(status_code=500, detail=str(e))