| from fastapi import FastAPI, File, UploadFile, Form |
| from fastapi.responses import JSONResponse |
| import whisper |
| import jiwer |
| import tempfile |
| import os |
|
|
| app = FastAPI() |
|
|
| |
| model = whisper.load_model("base") |
|
|
| @app.get("/") |
| def root(): |
| return {"status": "Speech API is running"} |
|
|
|
|
| |
| @app.post("/stt") |
| async def speech_to_text(file: UploadFile = File(...)): |
| with tempfile.NamedTemporaryFile(delete=False, suffix=".wav") as tmp: |
| tmp.write(await file.read()) |
| tmp_path = tmp.name |
|
|
| result = model.transcribe(tmp_path) |
| os.unlink(tmp_path) |
|
|
| return {"transcription": result["text"]} |
|
|
|
|
| |
| @app.post("/fluency") |
| async def fluency_check( |
| file: UploadFile = File(...), |
| reference_text: str = Form(...) |
| ): |
| with tempfile.NamedTemporaryFile(delete=False, suffix=".wav") as tmp: |
| tmp.write(await file.read()) |
| tmp_path = tmp.name |
|
|
| result = model.transcribe(tmp_path) |
| os.unlink(tmp_path) |
|
|
| transcription = result["text"] |
|
|
| |
| wer = jiwer.wer(reference_text.lower(), transcription.lower()) |
|
|
| |
| fluency_score = round(max(0, (1 - wer)) * 100, 2) |
|
|
| |
| words_in_reference = len(reference_text.split()) |
| words_matched = sum( |
| 1 for w in reference_text.lower().split() |
| if w in transcription.lower().split() |
| ) |
|
|
| |
| if fluency_score >= 80: |
| verdict = "Good" |
| feedback = "Great job! You read the text accurately." |
| elif fluency_score >= 50: |
| verdict = "Average" |
| feedback = f"You matched {words_matched} out of {words_in_reference} words. Keep practicing!" |
| elif fluency_score > 0: |
| verdict = "Needs Improvement" |
| feedback = f"Only {words_matched} out of {words_in_reference} words matched. Try reading more slowly." |
| else: |
| verdict = "Wrong Content" |
| feedback = "The audio does not match the reference text at all. Please read the given text." |
|
|
| return { |
| "transcription": transcription, |
| "reference_text": reference_text, |
| "fluency_score": f"{fluency_score}%", |
| "word_error_rate": round(min(wer, 1.0), 3), |
| "words_matched": f"{words_matched}/{words_in_reference}", |
| "verdict": verdict, |
| "feedback": feedback |
| } |
|
|
|
|
| |
| @app.post("/verify") |
| async def speech_verify( |
| file: UploadFile = File(...), |
| target_word: str = Form(...) |
| ): |
| with tempfile.NamedTemporaryFile(delete=False, suffix=".wav") as tmp: |
| tmp.write(await file.read()) |
| tmp_path = tmp.name |
|
|
| result = model.transcribe(tmp_path) |
| os.unlink(tmp_path) |
|
|
| transcription = result["text"].lower() |
| target = target_word.lower() |
| found = target in transcription |
|
|
| return { |
| "transcription": transcription, |
| "target_word": target_word, |
| "verified": found, |
| "confidence": "high" if found else "not detected", |
| "feedback": f"'{target_word}' was detected in your speech." if found else f"'{target_word}' was NOT detected. Please try again." |
| } |