pere commited on
Commit
75676fa
·
1 Parent(s): a9ed39d
run_flax_speech_recognition_seq2seq_streaming_v3_pere.py CHANGED
@@ -710,17 +710,18 @@ def main():
710
  def write_stats(eval_metrics, pred_ids, label_ids):
711
  import pandas as pd
712
  df = pd.DataFrame(columns=['source', 'prediction'])
713
- breakpoint()
714
 
715
  for pred,label in zip(pred_ids,label_ids):
716
- pred_text = tokenizer.decode(pred_ids)
717
- label_text = tokenizer.decode(label_ids)
718
  df = df.append({'source': label_text, 'column2': pred_text}, ignore_index=True)
719
 
720
-
 
721
 
722
  print("Writing stats")
723
- breakpoint()
724
 
725
  # 9. Save feature extractor, tokenizer and config
726
  feature_extractor.save_pretrained(training_args.output_dir)
 
710
  def write_stats(eval_metrics, pred_ids, label_ids):
711
  import pandas as pd
712
  df = pd.DataFrame(columns=['source', 'prediction'])
713
+
714
 
715
  for pred,label in zip(pred_ids,label_ids):
716
+ pred_text = tokenizer.decode(pred,skip_special_tokens=True)
717
+ label_text = tokenizer.decode(label,skip_special_tokens=True)
718
  df = df.append({'source': label_text, 'column2': pred_text}, ignore_index=True)
719
 
720
+ breakpoint()
721
+
722
 
723
  print("Writing stats")
724
+
725
 
726
  # 9. Save feature extractor, tokenizer and config
727
  feature_extractor.save_pretrained(training_args.output_dir)