Update eval.py
Browse files
eval.py
CHANGED
|
@@ -6,6 +6,7 @@ from typing import Dict
|
|
| 6 |
import torch
|
| 7 |
from datasets import Audio, Dataset, load_dataset, load_metric
|
| 8 |
from num2words import num2words as n2w
|
|
|
|
| 9 |
|
| 10 |
from transformers import AutoFeatureExtractor, AutoModelForCTC, pipeline, Wav2Vec2Processor, Wav2Vec2ProcessorWithLM, Wav2Vec2FeatureExtractor
|
| 11 |
# from pyctcdecode import BeamSearchDecoderCTC
|
|
@@ -17,7 +18,7 @@ def log_results(result: Dataset, args: Dict[str, str]):
|
|
| 17 |
log_outputs = args.log_outputs
|
| 18 |
lm = "withLM" if args.use_lm else "noLM"
|
| 19 |
model_id = args.model_id.replace("/", "_").replace(".", "")
|
| 20 |
-
dataset_id = "_".join([model_id] + args.dataset.split("/") + [args.config, args.split, lm])
|
| 21 |
|
| 22 |
# load metric
|
| 23 |
wer = load_metric("wer")
|
|
@@ -203,7 +204,7 @@ if __name__ == "__main__":
|
|
| 203 |
"--config", type=str, required=True, help="Config of the dataset. *E.g.* `'en'` for Common Voice"
|
| 204 |
)
|
| 205 |
parser.add_argument(
|
| 206 |
-
"--filter", type=str, default="", help="Simple filter on attributes. *E.g.* `region_of_youth:Troms` would
|
| 207 |
)
|
| 208 |
parser.add_argument("--split", type=str, required=True, help="Split of the dataset. *E.g.* `'test'`")
|
| 209 |
parser.add_argument(
|
|
|
|
| 6 |
import torch
|
| 7 |
from datasets import Audio, Dataset, load_dataset, load_metric
|
| 8 |
from num2words import num2words as n2w
|
| 9 |
+
from slugify import slugify
|
| 10 |
|
| 11 |
from transformers import AutoFeatureExtractor, AutoModelForCTC, pipeline, Wav2Vec2Processor, Wav2Vec2ProcessorWithLM, Wav2Vec2FeatureExtractor
|
| 12 |
# from pyctcdecode import BeamSearchDecoderCTC
|
|
|
|
| 18 |
log_outputs = args.log_outputs
|
| 19 |
lm = "withLM" if args.use_lm else "noLM"
|
| 20 |
model_id = args.model_id.replace("/", "_").replace(".", "")
|
| 21 |
+
dataset_id = "_".join([model_id] + args.dataset.split("/") + [args.config, slugify(args.filter), args.split, lm])
|
| 22 |
|
| 23 |
# load metric
|
| 24 |
wer = load_metric("wer")
|
|
|
|
| 204 |
"--config", type=str, required=True, help="Config of the dataset. *E.g.* `'en'` for Common Voice"
|
| 205 |
)
|
| 206 |
parser.add_argument(
|
| 207 |
+
"--filter", type=str, default="", help="Simple filter on attributes. *E.g.* `region_of_youth:Troms` would pnly keep those samplesfor which the condition is met"
|
| 208 |
)
|
| 209 |
parser.add_argument("--split", type=str, required=True, help="Split of the dataset. *E.g.* `'test'`")
|
| 210 |
parser.add_argument(
|