edu / app.py
MubarakB's picture
Update app.py
e558c6d verified
from fastapi import FastAPI, File, UploadFile, Form
from fastapi.responses import JSONResponse
import whisper
import jiwer
import tempfile
import os
app = FastAPI()
# Load once at startup (not per request)
model = whisper.load_model("base")
@app.get("/")
def root():
return {"status": "Speech API is running"}
# --- 1. Speech to Text ---
@app.post("/stt")
async def speech_to_text(file: UploadFile = File(...)):
with tempfile.NamedTemporaryFile(delete=False, suffix=".wav") as tmp:
tmp.write(await file.read())
tmp_path = tmp.name
result = model.transcribe(tmp_path)
os.unlink(tmp_path) # clean up
return {"transcription": result["text"]}
# --- 2. Fluency Check ---
@app.post("/fluency")
async def fluency_check(
file: UploadFile = File(...),
reference_text: str = Form(...)
):
with tempfile.NamedTemporaryFile(delete=False, suffix=".wav") as tmp:
tmp.write(await file.read())
tmp_path = tmp.name
result = model.transcribe(tmp_path)
os.unlink(tmp_path)
transcription = result["text"]
# Calculate Word Error Rate (lower = more fluent)
wer = jiwer.wer(reference_text.lower(), transcription.lower())
# Cap fluency score at 0% minimum (prevents negative scores)
fluency_score = round(max(0, (1 - wer)) * 100, 2)
# Count how many reference words appeared in transcription
words_in_reference = len(reference_text.split())
words_matched = sum(
1 for w in reference_text.lower().split()
if w in transcription.lower().split()
)
# Verdict with feedback
if fluency_score >= 80:
verdict = "Good"
feedback = "Great job! You read the text accurately."
elif fluency_score >= 50:
verdict = "Average"
feedback = f"You matched {words_matched} out of {words_in_reference} words. Keep practicing!"
elif fluency_score > 0:
verdict = "Needs Improvement"
feedback = f"Only {words_matched} out of {words_in_reference} words matched. Try reading more slowly."
else:
verdict = "Wrong Content"
feedback = "The audio does not match the reference text at all. Please read the given text."
return {
"transcription": transcription,
"reference_text": reference_text,
"fluency_score": f"{fluency_score}%",
"word_error_rate": round(min(wer, 1.0), 3), # Cap WER at 1.0
"words_matched": f"{words_matched}/{words_in_reference}",
"verdict": verdict,
"feedback": feedback
}
# --- 3. Speech Verification ---
@app.post("/verify")
async def speech_verify(
file: UploadFile = File(...),
target_word: str = Form(...)
):
with tempfile.NamedTemporaryFile(delete=False, suffix=".wav") as tmp:
tmp.write(await file.read())
tmp_path = tmp.name
result = model.transcribe(tmp_path)
os.unlink(tmp_path)
transcription = result["text"].lower()
target = target_word.lower()
found = target in transcription
return {
"transcription": transcription,
"target_word": target_word,
"verified": found,
"confidence": "high" if found else "not detected",
"feedback": f"'{target_word}' was detected in your speech." if found else f"'{target_word}' was NOT detected. Please try again."
}