test
Browse files
run_flax_speech_recognition_seq2seq_streaming_v3_pere.py
CHANGED
|
@@ -710,17 +710,18 @@ def main():
|
|
| 710 |
def write_stats(eval_metrics, pred_ids, label_ids):
|
| 711 |
import pandas as pd
|
| 712 |
df = pd.DataFrame(columns=['source', 'prediction'])
|
| 713 |
-
|
| 714 |
|
| 715 |
for pred,label in zip(pred_ids,label_ids):
|
| 716 |
-
pred_text = tokenizer.decode(
|
| 717 |
-
label_text = tokenizer.decode(
|
| 718 |
df = df.append({'source': label_text, 'column2': pred_text}, ignore_index=True)
|
| 719 |
|
| 720 |
-
|
|
|
|
| 721 |
|
| 722 |
print("Writing stats")
|
| 723 |
-
|
| 724 |
|
| 725 |
# 9. Save feature extractor, tokenizer and config
|
| 726 |
feature_extractor.save_pretrained(training_args.output_dir)
|
|
|
|
| 710 |
def write_stats(eval_metrics, pred_ids, label_ids):
|
| 711 |
import pandas as pd
|
| 712 |
df = pd.DataFrame(columns=['source', 'prediction'])
|
| 713 |
+
|
| 714 |
|
| 715 |
for pred,label in zip(pred_ids,label_ids):
|
| 716 |
+
pred_text = tokenizer.decode(pred,skip_special_tokens=True)
|
| 717 |
+
label_text = tokenizer.decode(label,skip_special_tokens=True)
|
| 718 |
df = df.append({'source': label_text, 'column2': pred_text}, ignore_index=True)
|
| 719 |
|
| 720 |
+
breakpoint()
|
| 721 |
+
|
| 722 |
|
| 723 |
print("Writing stats")
|
| 724 |
+
|
| 725 |
|
| 726 |
# 9. Save feature extractor, tokenizer and config
|
| 727 |
feature_extractor.save_pretrained(training_args.output_dir)
|