filler-p / app.py
cheesecz's picture
Update app.py
059fca6 verified
import tempfile
import requests
from fastapi import FastAPI, UploadFile, File, HTTPException
from fastapi.responses import JSONResponse
app = FastAPI()
CRISPER_SPACE_API = "https://rafaaa2105-crisper-whisper.hf.space/run/predict_1"
def parse_transcript(text: str):
lines = text.strip().split("\n")
chunks = []
for i in range(len(lines)):
line = lines[i].strip()
if not line:
continue
if line.startswith("[") and "]" in line:
try:
time_part = line[1:line.index("]")]
word_part = line[line.index("]") + 1:].strip().strip(",")
start = float(time_part)
end = float(lines[i + 1][1:lines[i + 1].index("]")]) if i + 1 < len(lines) and lines[i + 1].startswith("[") else start + 0.5
chunks.append({"text": word_part, "timestamp": [start, end]})
except:
continue
return chunks
@app.post("/speech2text")
async def speech2text(file: UploadFile = File(...)):
if not file.filename.endswith((".mp3", ".wav")):
raise HTTPException(status_code=400, detail="Only .mp3 or .wav files are supported.")
try:
with tempfile.NamedTemporaryFile(delete=False, suffix=file.filename[-4:]) as tmp:
tmp.write(await file.read())
tmp_path = tmp.name
with open(tmp_path, "rb") as f:
response = requests.post(CRISPER_SPACE_API, files={"data": f}, json={"data": [None, "transcribe"]})
if response.status_code != 200:
raise HTTPException(status_code=500, detail="Failed to get response from upstream Space")
output_text = response.json().get("data", [""])[0]
chunks = parse_transcript(output_text)
return JSONResponse(content={"text": output_text, "chunks": chunks})
except Exception as e:
raise HTTPException(status_code=500, detail=str(e))