Spaces:

finalyear226
/

urtox-api

Sleeping

inayatarshad commited on 22 days ago

Commit

4807727

1 Parent(s): 249e156

Use stronger Whisper ASR model

Files changed (2) hide show

README.md CHANGED Viewed

@@ -40,7 +40,13 @@ The Space repo should stay small. Do not commit the `artifacts/` folder to this
 Text mode now runs the saved `Urtox_attempt1` XLM-RoBERTa token-classification model and returns BIO toxic-span predictions.
-Audio mode transcribes speech with `openai/whisper-tiny`, runs the transcript through the text toxic-span model, and also runs `facebook/wav2vec2-base` plus the saved `audio_toxic_classifier.pt` head for an audio-level toxic/non-toxic label.
 ## Test endpoint

 Text mode now runs the saved `Urtox_attempt1` XLM-RoBERTa token-classification model and returns BIO toxic-span predictions.
+Audio mode transcribes speech with Whisper, runs the transcript through the text toxic-span model, and also runs `facebook/wav2vec2-base` plus the saved `audio_toxic_classifier.pt` head for an audio-level toxic/non-toxic label.
+The default ASR model is `openai/whisper-small`. You can override it with:
+```text
+ASR_MODEL_ID=openai/whisper-base
+```
 ## Test endpoint

app.py CHANGED Viewed

@@ -1,4 +1,5 @@
 import base64
 import re
 import shutil
 import subprocess
@@ -22,6 +23,7 @@ app = FastAPI(title="URTOX Toxic Span Detection API")
 MODEL_REPO_ID = "finalyear226/urdu-toxic-span-detector"
 MODEL_ZIP_NAME = "urtox_deploy_artifacts.zip"
 ARTIFACTS_DIR = Path("artifacts")
 TEXT_MODEL_DIR = ARTIFACTS_DIR / "Urtox_attempt1"
 AUDIO_MODEL_PATH = ARTIFACTS_DIR / "audio_toxic_classifier.pt"
@@ -192,7 +194,7 @@ def load_asr_pipeline():
     ASR_PIPELINE = pipeline(
         "automatic-speech-recognition",
-        model="openai/whisper-tiny",
         device=0 if DEVICE.type == "cuda" else -1,
     )
     return ASR_PIPELINE
@@ -472,6 +474,7 @@ def health():
         "textModelLoaded": TEXT_MODEL is not None,
         "audioModelLoaded": AUDIO_CLASSIFIER is not None,
         "asrLoaded": ASR_PIPELINE is not None,
         "device": str(DEVICE),
     }

 import base64
+import os
 import re
 import shutil
 import subprocess
 MODEL_REPO_ID = "finalyear226/urdu-toxic-span-detector"
 MODEL_ZIP_NAME = "urtox_deploy_artifacts.zip"
+ASR_MODEL_ID = os.getenv("ASR_MODEL_ID", "openai/whisper-small")
 ARTIFACTS_DIR = Path("artifacts")
 TEXT_MODEL_DIR = ARTIFACTS_DIR / "Urtox_attempt1"
 AUDIO_MODEL_PATH = ARTIFACTS_DIR / "audio_toxic_classifier.pt"
     ASR_PIPELINE = pipeline(
         "automatic-speech-recognition",
+        model=ASR_MODEL_ID,
         device=0 if DEVICE.type == "cuda" else -1,
     )
     return ASR_PIPELINE
         "textModelLoaded": TEXT_MODEL is not None,
         "audioModelLoaded": AUDIO_CLASSIFIER is not None,
         "asrLoaded": ASR_PIPELINE is not None,
+        "asrModel": ASR_MODEL_ID,
         "device": str(DEVICE),
     }