Spaces:
Sleeping
Sleeping
Commit ·
4807727
1
Parent(s): 249e156
Use stronger Whisper ASR model
Browse files
README.md
CHANGED
|
@@ -40,7 +40,13 @@ The Space repo should stay small. Do not commit the `artifacts/` folder to this
|
|
| 40 |
|
| 41 |
Text mode now runs the saved `Urtox_attempt1` XLM-RoBERTa token-classification model and returns BIO toxic-span predictions.
|
| 42 |
|
| 43 |
-
Audio mode transcribes speech with
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 44 |
|
| 45 |
## Test endpoint
|
| 46 |
|
|
|
|
| 40 |
|
| 41 |
Text mode now runs the saved `Urtox_attempt1` XLM-RoBERTa token-classification model and returns BIO toxic-span predictions.
|
| 42 |
|
| 43 |
+
Audio mode transcribes speech with Whisper, runs the transcript through the text toxic-span model, and also runs `facebook/wav2vec2-base` plus the saved `audio_toxic_classifier.pt` head for an audio-level toxic/non-toxic label.
|
| 44 |
+
|
| 45 |
+
The default ASR model is `openai/whisper-small`. You can override it with:
|
| 46 |
+
|
| 47 |
+
```text
|
| 48 |
+
ASR_MODEL_ID=openai/whisper-base
|
| 49 |
+
```
|
| 50 |
|
| 51 |
## Test endpoint
|
| 52 |
|
app.py
CHANGED
|
@@ -1,4 +1,5 @@
|
|
| 1 |
import base64
|
|
|
|
| 2 |
import re
|
| 3 |
import shutil
|
| 4 |
import subprocess
|
|
@@ -22,6 +23,7 @@ app = FastAPI(title="URTOX Toxic Span Detection API")
|
|
| 22 |
|
| 23 |
MODEL_REPO_ID = "finalyear226/urdu-toxic-span-detector"
|
| 24 |
MODEL_ZIP_NAME = "urtox_deploy_artifacts.zip"
|
|
|
|
| 25 |
ARTIFACTS_DIR = Path("artifacts")
|
| 26 |
TEXT_MODEL_DIR = ARTIFACTS_DIR / "Urtox_attempt1"
|
| 27 |
AUDIO_MODEL_PATH = ARTIFACTS_DIR / "audio_toxic_classifier.pt"
|
|
@@ -192,7 +194,7 @@ def load_asr_pipeline():
|
|
| 192 |
|
| 193 |
ASR_PIPELINE = pipeline(
|
| 194 |
"automatic-speech-recognition",
|
| 195 |
-
model=
|
| 196 |
device=0 if DEVICE.type == "cuda" else -1,
|
| 197 |
)
|
| 198 |
return ASR_PIPELINE
|
|
@@ -472,6 +474,7 @@ def health():
|
|
| 472 |
"textModelLoaded": TEXT_MODEL is not None,
|
| 473 |
"audioModelLoaded": AUDIO_CLASSIFIER is not None,
|
| 474 |
"asrLoaded": ASR_PIPELINE is not None,
|
|
|
|
| 475 |
"device": str(DEVICE),
|
| 476 |
}
|
| 477 |
|
|
|
|
| 1 |
import base64
|
| 2 |
+
import os
|
| 3 |
import re
|
| 4 |
import shutil
|
| 5 |
import subprocess
|
|
|
|
| 23 |
|
| 24 |
MODEL_REPO_ID = "finalyear226/urdu-toxic-span-detector"
|
| 25 |
MODEL_ZIP_NAME = "urtox_deploy_artifacts.zip"
|
| 26 |
+
ASR_MODEL_ID = os.getenv("ASR_MODEL_ID", "openai/whisper-small")
|
| 27 |
ARTIFACTS_DIR = Path("artifacts")
|
| 28 |
TEXT_MODEL_DIR = ARTIFACTS_DIR / "Urtox_attempt1"
|
| 29 |
AUDIO_MODEL_PATH = ARTIFACTS_DIR / "audio_toxic_classifier.pt"
|
|
|
|
| 194 |
|
| 195 |
ASR_PIPELINE = pipeline(
|
| 196 |
"automatic-speech-recognition",
|
| 197 |
+
model=ASR_MODEL_ID,
|
| 198 |
device=0 if DEVICE.type == "cuda" else -1,
|
| 199 |
)
|
| 200 |
return ASR_PIPELINE
|
|
|
|
| 474 |
"textModelLoaded": TEXT_MODEL is not None,
|
| 475 |
"audioModelLoaded": AUDIO_CLASSIFIER is not None,
|
| 476 |
"asrLoaded": ASR_PIPELINE is not None,
|
| 477 |
+
"asrModel": ASR_MODEL_ID,
|
| 478 |
"device": str(DEVICE),
|
| 479 |
}
|
| 480 |
|