Robert Gale commited on
Commit
e6b8050
1 Parent(s): 4bb3829
Files changed (1) hide show
  1. README.md +4 -0
README.md CHANGED
@@ -55,6 +55,7 @@ from combining phonemes.)
55
  ```python
56
  from transformers import AutoTokenizer, BartForConditionalGeneration
57
 
 
58
  in_texts = [
59
  "Due to its coastal location, Long 路a瑟l蓹n路d winter temperatures are milder than most of the state.",
60
  "Due to its coastal location, Long 路b路i失 winter temperatures are milder than most of the state.",
@@ -62,13 +63,16 @@ in_texts = [
62
  "Due to its coastal location, l蓴艐 路b路i失 winter temperatures are milder than most of the state.",
63
  ]
64
 
 
65
  tokenizer = AutoTokenizer.from_pretrained("palat/bort")
66
  model = BartForConditionalGeneration.from_pretrained("palat/bort")
67
 
 
68
  inputs = tokenizer(in_texts, return_tensors="pt", padding=True)
69
  summary_ids = model.generate(inputs["input_ids"], num_beams=2, min_length=0, max_length=2048)
70
  decoded = tokenizer.batch_decode(summary_ids, skip_special_tokens=True, clean_up_tokenization_spaces=False)
71
 
 
72
  for in_text, out_text in zip(in_texts, decoded):
73
  print(f"In: \t{in_text}")
74
  print(f"Out: \t{out_text}")
 
55
  ```python
56
  from transformers import AutoTokenizer, BartForConditionalGeneration
57
 
58
+ # Examples of mixed orthography and IPA phonemes:
59
  in_texts = [
60
  "Due to its coastal location, Long 路a瑟l蓹n路d winter temperatures are milder than most of the state.",
61
  "Due to its coastal location, Long 路b路i失 winter temperatures are milder than most of the state.",
 
63
  "Due to its coastal location, l蓴艐 路b路i失 winter temperatures are milder than most of the state.",
64
  ]
65
 
66
+ # Set up model and tokenizer:
67
  tokenizer = AutoTokenizer.from_pretrained("palat/bort")
68
  model = BartForConditionalGeneration.from_pretrained("palat/bort")
69
 
70
+ # Run generative inference for the batch of examples:
71
  inputs = tokenizer(in_texts, return_tensors="pt", padding=True)
72
  summary_ids = model.generate(inputs["input_ids"], num_beams=2, min_length=0, max_length=2048)
73
  decoded = tokenizer.batch_decode(summary_ids, skip_special_tokens=True, clean_up_tokenization_spaces=False)
74
 
75
+ # Print the translated text:
76
  for in_text, out_text in zip(in_texts, decoded):
77
  print(f"In: \t{in_text}")
78
  print(f"Out: \t{out_text}")