inayatarshad commited on
Commit
4807727
·
1 Parent(s): 249e156

Use stronger Whisper ASR model

Browse files
Files changed (2) hide show
  1. README.md +7 -1
  2. app.py +4 -1
README.md CHANGED
@@ -40,7 +40,13 @@ The Space repo should stay small. Do not commit the `artifacts/` folder to this
40
 
41
  Text mode now runs the saved `Urtox_attempt1` XLM-RoBERTa token-classification model and returns BIO toxic-span predictions.
42
 
43
- Audio mode transcribes speech with `openai/whisper-tiny`, runs the transcript through the text toxic-span model, and also runs `facebook/wav2vec2-base` plus the saved `audio_toxic_classifier.pt` head for an audio-level toxic/non-toxic label.
 
 
 
 
 
 
44
 
45
  ## Test endpoint
46
 
 
40
 
41
  Text mode now runs the saved `Urtox_attempt1` XLM-RoBERTa token-classification model and returns BIO toxic-span predictions.
42
 
43
+ Audio mode transcribes speech with Whisper, runs the transcript through the text toxic-span model, and also runs `facebook/wav2vec2-base` plus the saved `audio_toxic_classifier.pt` head for an audio-level toxic/non-toxic label.
44
+
45
+ The default ASR model is `openai/whisper-small`. You can override it with:
46
+
47
+ ```text
48
+ ASR_MODEL_ID=openai/whisper-base
49
+ ```
50
 
51
  ## Test endpoint
52
 
app.py CHANGED
@@ -1,4 +1,5 @@
1
  import base64
 
2
  import re
3
  import shutil
4
  import subprocess
@@ -22,6 +23,7 @@ app = FastAPI(title="URTOX Toxic Span Detection API")
22
 
23
  MODEL_REPO_ID = "finalyear226/urdu-toxic-span-detector"
24
  MODEL_ZIP_NAME = "urtox_deploy_artifacts.zip"
 
25
  ARTIFACTS_DIR = Path("artifacts")
26
  TEXT_MODEL_DIR = ARTIFACTS_DIR / "Urtox_attempt1"
27
  AUDIO_MODEL_PATH = ARTIFACTS_DIR / "audio_toxic_classifier.pt"
@@ -192,7 +194,7 @@ def load_asr_pipeline():
192
 
193
  ASR_PIPELINE = pipeline(
194
  "automatic-speech-recognition",
195
- model="openai/whisper-tiny",
196
  device=0 if DEVICE.type == "cuda" else -1,
197
  )
198
  return ASR_PIPELINE
@@ -472,6 +474,7 @@ def health():
472
  "textModelLoaded": TEXT_MODEL is not None,
473
  "audioModelLoaded": AUDIO_CLASSIFIER is not None,
474
  "asrLoaded": ASR_PIPELINE is not None,
 
475
  "device": str(DEVICE),
476
  }
477
 
 
1
  import base64
2
+ import os
3
  import re
4
  import shutil
5
  import subprocess
 
23
 
24
  MODEL_REPO_ID = "finalyear226/urdu-toxic-span-detector"
25
  MODEL_ZIP_NAME = "urtox_deploy_artifacts.zip"
26
+ ASR_MODEL_ID = os.getenv("ASR_MODEL_ID", "openai/whisper-small")
27
  ARTIFACTS_DIR = Path("artifacts")
28
  TEXT_MODEL_DIR = ARTIFACTS_DIR / "Urtox_attempt1"
29
  AUDIO_MODEL_PATH = ARTIFACTS_DIR / "audio_toxic_classifier.pt"
 
194
 
195
  ASR_PIPELINE = pipeline(
196
  "automatic-speech-recognition",
197
+ model=ASR_MODEL_ID,
198
  device=0 if DEVICE.type == "cuda" else -1,
199
  )
200
  return ASR_PIPELINE
 
474
  "textModelLoaded": TEXT_MODEL is not None,
475
  "audioModelLoaded": AUDIO_CLASSIFIER is not None,
476
  "asrLoaded": ASR_PIPELINE is not None,
477
+ "asrModel": ASR_MODEL_ID,
478
  "device": str(DEVICE),
479
  }
480