rcgale
/

bort-test

text2text-generation

Model card Files Files and versions

Robert Gale commited on Jul 5, 2023

Commit

e6b8050

·

1 Parent(s): 4bb3829

fiwejoiwe

Files changed (1) hide show

README.md +4 -0

README.md CHANGED Viewed

@@ -55,6 +55,7 @@ from combining phonemes.)
 ```python
 from transformers import AutoTokenizer, BartForConditionalGeneration
 in_texts = [
     "Due to its coastal location, Long ·aɪlən·d winter temperatures are milder than most of the state.",
     "Due to its coastal location, Long ·b·iʧ winter temperatures are milder than most of the state.",
@@ -62,13 +63,16 @@ in_texts = [
     "Due to its coastal location, lɔŋ ·b·iʧ winter temperatures are milder than most of the state.",
 ]
 tokenizer = AutoTokenizer.from_pretrained("palat/bort")
 model = BartForConditionalGeneration.from_pretrained("palat/bort")
 inputs = tokenizer(in_texts, return_tensors="pt", padding=True)
 summary_ids = model.generate(inputs["input_ids"], num_beams=2, min_length=0, max_length=2048)
 decoded = tokenizer.batch_decode(summary_ids, skip_special_tokens=True, clean_up_tokenization_spaces=False)
 for in_text, out_text in zip(in_texts, decoded):
     print(f"In:   \t{in_text}")
     print(f"Out:  \t{out_text}")

 ```python
 from transformers import AutoTokenizer, BartForConditionalGeneration
+# Examples of mixed orthography and IPA phonemes:
 in_texts = [
     "Due to its coastal location, Long ·aɪlən·d winter temperatures are milder than most of the state.",
     "Due to its coastal location, Long ·b·iʧ winter temperatures are milder than most of the state.",
     "Due to its coastal location, lɔŋ ·b·iʧ winter temperatures are milder than most of the state.",
 ]
+# Set up model and tokenizer:
 tokenizer = AutoTokenizer.from_pretrained("palat/bort")
 model = BartForConditionalGeneration.from_pretrained("palat/bort")
+# Run generative inference for the batch of examples:
 inputs = tokenizer(in_texts, return_tensors="pt", padding=True)
 summary_ids = model.generate(inputs["input_ids"], num_beams=2, min_length=0, max_length=2048)
 decoded = tokenizer.batch_decode(summary_ids, skip_special_tokens=True, clean_up_tokenization_spaces=False)
+# Print the translated text:
 for in_text, out_text in zip(in_texts, decoded):
     print(f"In:   \t{in_text}")
     print(f"Out:  \t{out_text}")