Spaces:

MubarakB
/

edu

Sleeping

File size: 3,311 Bytes

from fastapi import FastAPI, File, UploadFile, Form
from fastapi.responses import JSONResponse
import whisper
import jiwer
import tempfile
import os

app = FastAPI()

# Load once at startup (not per request)
model = whisper.load_model("base")

@app.get("/")
def root():
    return {"status": "Speech API is running"}


# --- 1. Speech to Text ---
@app.post("/stt")
async def speech_to_text(file: UploadFile = File(...)):
    with tempfile.NamedTemporaryFile(delete=False, suffix=".wav") as tmp:
        tmp.write(await file.read())
        tmp_path = tmp.name

    result = model.transcribe(tmp_path)
    os.unlink(tmp_path)  # clean up

    return {"transcription": result["text"]}


# --- 2. Fluency Check ---
@app.post("/fluency")
async def fluency_check(
    file: UploadFile = File(...),
    reference_text: str = Form(...)
):
    with tempfile.NamedTemporaryFile(delete=False, suffix=".wav") as tmp:
        tmp.write(await file.read())
        tmp_path = tmp.name

    result = model.transcribe(tmp_path)
    os.unlink(tmp_path)

    transcription = result["text"]

    # Calculate Word Error Rate (lower = more fluent)
    wer = jiwer.wer(reference_text.lower(), transcription.lower())

    # Cap fluency score at 0% minimum (prevents negative scores)
    fluency_score = round(max(0, (1 - wer)) * 100, 2)

    # Count how many reference words appeared in transcription
    words_in_reference = len(reference_text.split())
    words_matched = sum(
        1 for w in reference_text.lower().split()
        if w in transcription.lower().split()
    )

    # Verdict with feedback
    if fluency_score >= 80:
        verdict = "Good"
        feedback = "Great job! You read the text accurately."
    elif fluency_score >= 50:
        verdict = "Average"
        feedback = f"You matched {words_matched} out of {words_in_reference} words. Keep practicing!"
    elif fluency_score > 0:
        verdict = "Needs Improvement"
        feedback = f"Only {words_matched} out of {words_in_reference} words matched. Try reading more slowly."
    else:
        verdict = "Wrong Content"
        feedback = "The audio does not match the reference text at all. Please read the given text."

    return {
        "transcription": transcription,
        "reference_text": reference_text,
        "fluency_score": f"{fluency_score}%",
        "word_error_rate": round(min(wer, 1.0), 3),  # Cap WER at 1.0
        "words_matched": f"{words_matched}/{words_in_reference}",
        "verdict": verdict,
        "feedback": feedback
    }


# --- 3. Speech Verification ---
@app.post("/verify")
async def speech_verify(
    file: UploadFile = File(...),
    target_word: str = Form(...)
):
    with tempfile.NamedTemporaryFile(delete=False, suffix=".wav") as tmp:
        tmp.write(await file.read())
        tmp_path = tmp.name

    result = model.transcribe(tmp_path)
    os.unlink(tmp_path)

    transcription = result["text"].lower()
    target = target_word.lower()
    found = target in transcription

    return {
        "transcription": transcription,
        "target_word": target_word,
        "verified": found,
        "confidence": "high" if found else "not detected",
        "feedback": f"'{target_word}' was detected in your speech." if found else f"'{target_word}' was NOT detected. Please try again."
    }