Commit ·
230b9d1
1
Parent(s): 51e4ef6
Add
Browse filesThis view is limited to 50 files because it contains too many changes.
See raw diff
- .gitattributes +24 -0
- evaluation/generation/agg.lm1-4b2-84b-oscarroots_GEM-web_nlg_en_PALM_prompt_0.json +1 -0
- evaluation/generation/agg.lm1-4b2-84b-oscarroots_GEM-web_nlg_en_PALM_prompt_1.json +1 -0
- evaluation/generation/agg.lm1-4b2-84b-oscarroots_GEM-wiki_lingua_en_tldr_en_0.json +1 -0
- evaluation/generation/agg.lm1-4b2-84b-oscarroots_GEM-wiki_lingua_en_tldr_en_1.json +1 -0
- evaluation/generation/agg.lm1-4b2-84b-oscarroots_e2e_nlg_cleaned_generate_text_restaurant_0.json +1 -0
- evaluation/generation/agg.lm1-4b2-84b-oscarroots_e2e_nlg_cleaned_generate_text_restaurant_1.json +1 -0
- evaluation/generation/agg.lm1-4b2-84b-oscarroots_e2e_nlg_cleaned_generate_text_restaurant_2.json +1 -0
- evaluation/generation/agg.lm1-4b2-84b-oscarroots_e2e_nlg_cleaned_generate_text_restaurant_3.json +1 -0
- evaluation/generation/agg.lm1-4b2-84b-oscarroots_e2e_nlg_cleaned_generate_text_restaurant_4.json +1 -0
- evaluation/generation/agg.lm1-4b2-84b-oscarroots_e2e_nlg_cleaned_generate_text_restaurant_5.json +1 -0
- evaluation/generation/agg.lm1-4b2-84b-oscarroots_gem_xsum_article_DOC_summary_0.json +1 -0
- evaluation/generation/agg.lm1-4b2-84b-oscarroots_gem_xsum_article_DOC_summary_1.json +1 -0
- evaluation/generation/agg.lm1-4b2-84b-oscarroots_gem_xsum_article_DOC_summary_2.json +1 -0
- evaluation/generation/agg.lm1-4b2-84b-oscarroots_gem_xsum_article_DOC_summary_3.json +1 -0
- evaluation/generation/agg.lm1-4b2-84b-oscarroots_gem_xsum_article_DOC_summary_4.json +1 -0
- evaluation/generation/agg.lm1-4b2-84b-oscarroots_gem_xsum_article_DOC_summary_5.json +1 -0
- evaluation/generation/examples.lm1-4b2-84b-oscarroots_GEM-web_nlg_en_PALM_prompt_0.jsonl +3 -0
- evaluation/generation/examples.lm1-4b2-84b-oscarroots_GEM-web_nlg_en_PALM_prompt_1.jsonl +3 -0
- evaluation/generation/examples.lm1-4b2-84b-oscarroots_GEM-web_nlg_en_PALM_prompt_2.jsonl +0 -0
- evaluation/generation/examples.lm1-4b2-84b-oscarroots_GEM-web_nlg_en_PALM_prompt_3.jsonl +0 -0
- evaluation/generation/examples.lm1-4b2-84b-oscarroots_GEM-web_nlg_en_PALM_prompt_4.jsonl +0 -0
- evaluation/generation/examples.lm1-4b2-84b-oscarroots_GEM-web_nlg_en_PALM_prompt_5.jsonl +0 -0
- evaluation/generation/examples.lm1-4b2-84b-oscarroots_GEM-wiki_lingua_en_tldr_en_0.jsonl +3 -0
- evaluation/generation/examples.lm1-4b2-84b-oscarroots_GEM-wiki_lingua_en_tldr_en_1.jsonl +3 -0
- evaluation/generation/examples.lm1-4b2-84b-oscarroots_GEM-wiki_lingua_en_tldr_en_2.jsonl +0 -0
- evaluation/generation/examples.lm1-4b2-84b-oscarroots_GEM-wiki_lingua_en_tldr_en_3.jsonl +0 -0
- evaluation/generation/examples.lm1-4b2-84b-oscarroots_GEM-wiki_lingua_en_tldr_en_4.jsonl +0 -0
- evaluation/generation/examples.lm1-4b2-84b-oscarroots_GEM-wiki_lingua_en_tldr_en_5.jsonl +0 -0
- evaluation/generation/examples.lm1-4b2-84b-oscarroots_e2e_nlg_cleaned_generate_text_restaurant_0.jsonl +3 -0
- evaluation/generation/examples.lm1-4b2-84b-oscarroots_e2e_nlg_cleaned_generate_text_restaurant_1.jsonl +3 -0
- evaluation/generation/examples.lm1-4b2-84b-oscarroots_e2e_nlg_cleaned_generate_text_restaurant_2.jsonl +3 -0
- evaluation/generation/examples.lm1-4b2-84b-oscarroots_e2e_nlg_cleaned_generate_text_restaurant_3.jsonl +3 -0
- evaluation/generation/examples.lm1-4b2-84b-oscarroots_e2e_nlg_cleaned_generate_text_restaurant_4.jsonl +3 -0
- evaluation/generation/examples.lm1-4b2-84b-oscarroots_e2e_nlg_cleaned_generate_text_restaurant_5.jsonl +3 -0
- evaluation/generation/examples.lm1-4b2-84b-oscarroots_gem_xsum_article_DOC_summary_0.jsonl +3 -0
- evaluation/generation/examples.lm1-4b2-84b-oscarroots_gem_xsum_article_DOC_summary_1.jsonl +3 -0
- evaluation/generation/examples.lm1-4b2-84b-oscarroots_gem_xsum_article_DOC_summary_2.jsonl +3 -0
- evaluation/generation/examples.lm1-4b2-84b-oscarroots_gem_xsum_article_DOC_summary_3.jsonl +3 -0
- evaluation/generation/examples.lm1-4b2-84b-oscarroots_gem_xsum_article_DOC_summary_4.jsonl +3 -0
- evaluation/generation/examples.lm1-4b2-84b-oscarroots_gem_xsum_article_DOC_summary_5.jsonl +3 -0
- evaluation/generation/slim.lm1-4b2-84b-oscarroots_GEM-web_nlg_en_PALM_prompt_0.json +133 -0
- evaluation/generation/slim.lm1-4b2-84b-oscarroots_GEM-web_nlg_en_PALM_prompt_1.json +133 -0
- evaluation/generation/slim.lm1-4b2-84b-oscarroots_GEM-wiki_lingua_en_tldr_en_0.json +133 -0
- evaluation/generation/slim.lm1-4b2-84b-oscarroots_GEM-wiki_lingua_en_tldr_en_1.json +133 -0
- evaluation/generation/slim.lm1-4b2-84b-oscarroots_e2e_nlg_cleaned_generate_text_restaurant_0.json +133 -0
- evaluation/generation/slim.lm1-4b2-84b-oscarroots_e2e_nlg_cleaned_generate_text_restaurant_1.json +133 -0
- evaluation/generation/slim.lm1-4b2-84b-oscarroots_e2e_nlg_cleaned_generate_text_restaurant_2.json +133 -0
- evaluation/generation/slim.lm1-4b2-84b-oscarroots_e2e_nlg_cleaned_generate_text_restaurant_3.json +133 -0
- evaluation/generation/slim.lm1-4b2-84b-oscarroots_e2e_nlg_cleaned_generate_text_restaurant_4.json +133 -0
.gitattributes
CHANGED
|
@@ -32,3 +32,27 @@ saved_model/**/* filter=lfs diff=lfs merge=lfs -text
|
|
| 32 |
*.zip filter=lfs diff=lfs merge=lfs -text
|
| 33 |
*.zst filter=lfs diff=lfs merge=lfs -text
|
| 34 |
*tfevents* filter=lfs diff=lfs merge=lfs -text
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 32 |
*.zip filter=lfs diff=lfs merge=lfs -text
|
| 33 |
*.zst filter=lfs diff=lfs merge=lfs -text
|
| 34 |
*tfevents* filter=lfs diff=lfs merge=lfs -text
|
| 35 |
+
evaluation/generation/examples.lm1-4b2-84b-oscarroots_GEM-web_nlg_en_PALM_prompt_4.jsonl filter=lfs diff=lfs merge=lfs -text
|
| 36 |
+
evaluation/generation/examples.lm1-4b2-84b-oscarroots_GEM-web_nlg_en_PALM_prompt_5.jsonl filter=lfs diff=lfs merge=lfs -text
|
| 37 |
+
evaluation/generation/examples.lm1-4b2-84b-oscarroots_gem_xsum_article_DOC_summary_3.jsonl filter=lfs diff=lfs merge=lfs -text
|
| 38 |
+
evaluation/generation/examples.lm1-4b2-84b-oscarroots_e2e_nlg_cleaned_generate_text_restaurant_0.jsonl filter=lfs diff=lfs merge=lfs -text
|
| 39 |
+
evaluation/generation/examples.lm1-4b2-84b-oscarroots_e2e_nlg_cleaned_generate_text_restaurant_1.jsonl filter=lfs diff=lfs merge=lfs -text
|
| 40 |
+
evaluation/generation/examples.lm1-4b2-84b-oscarroots_e2e_nlg_cleaned_generate_text_restaurant_4.jsonl filter=lfs diff=lfs merge=lfs -text
|
| 41 |
+
evaluation/generation/examples.lm1-4b2-84b-oscarroots_GEM-wiki_lingua_en_tldr_en_5.jsonl filter=lfs diff=lfs merge=lfs -text
|
| 42 |
+
evaluation/generation/examples.lm1-4b2-84b-oscarroots_gem_xsum_article_DOC_summary_2.jsonl filter=lfs diff=lfs merge=lfs -text
|
| 43 |
+
evaluation/generation/examples.lm1-4b2-84b-oscarroots_gem_xsum_article_DOC_summary_4.jsonl filter=lfs diff=lfs merge=lfs -text
|
| 44 |
+
evaluation/generation/examples.lm1-4b2-84b-oscarroots_GEM-wiki_lingua_en_tldr_en_1.jsonl filter=lfs diff=lfs merge=lfs -text
|
| 45 |
+
evaluation/generation/examples.lm1-4b2-84b-oscarroots_e2e_nlg_cleaned_generate_text_restaurant_2.jsonl filter=lfs diff=lfs merge=lfs -text
|
| 46 |
+
evaluation/generation/examples.lm1-4b2-84b-oscarroots_GEM-web_nlg_en_PALM_prompt_0.jsonl filter=lfs diff=lfs merge=lfs -text
|
| 47 |
+
evaluation/generation/examples.lm1-4b2-84b-oscarroots_GEM-wiki_lingua_en_tldr_en_0.jsonl filter=lfs diff=lfs merge=lfs -text
|
| 48 |
+
evaluation/generation/examples.lm1-4b2-84b-oscarroots_GEM-web_nlg_en_PALM_prompt_2.jsonl filter=lfs diff=lfs merge=lfs -text
|
| 49 |
+
evaluation/generation/examples.lm1-4b2-84b-oscarroots_GEM-web_nlg_en_PALM_prompt_3.jsonl filter=lfs diff=lfs merge=lfs -text
|
| 50 |
+
evaluation/generation/examples.lm1-4b2-84b-oscarroots_GEM-wiki_lingua_en_tldr_en_2.jsonl filter=lfs diff=lfs merge=lfs -text
|
| 51 |
+
evaluation/generation/examples.lm1-4b2-84b-oscarroots_GEM-wiki_lingua_en_tldr_en_3.jsonl filter=lfs diff=lfs merge=lfs -text
|
| 52 |
+
evaluation/generation/examples.lm1-4b2-84b-oscarroots_GEM-wiki_lingua_en_tldr_en_4.jsonl filter=lfs diff=lfs merge=lfs -text
|
| 53 |
+
evaluation/generation/examples.lm1-4b2-84b-oscarroots_e2e_nlg_cleaned_generate_text_restaurant_3.jsonl filter=lfs diff=lfs merge=lfs -text
|
| 54 |
+
evaluation/generation/examples.lm1-4b2-84b-oscarroots_e2e_nlg_cleaned_generate_text_restaurant_5.jsonl filter=lfs diff=lfs merge=lfs -text
|
| 55 |
+
evaluation/generation/examples.lm1-4b2-84b-oscarroots_GEM-web_nlg_en_PALM_prompt_1.jsonl filter=lfs diff=lfs merge=lfs -text
|
| 56 |
+
evaluation/generation/examples.lm1-4b2-84b-oscarroots_gem_xsum_article_DOC_summary_0.jsonl filter=lfs diff=lfs merge=lfs -text
|
| 57 |
+
evaluation/generation/examples.lm1-4b2-84b-oscarroots_gem_xsum_article_DOC_summary_1.jsonl filter=lfs diff=lfs merge=lfs -text
|
| 58 |
+
evaluation/generation/examples.lm1-4b2-84b-oscarroots_gem_xsum_article_DOC_summary_5.jsonl filter=lfs diff=lfs merge=lfs -text
|
evaluation/generation/agg.lm1-4b2-84b-oscarroots_GEM-web_nlg_en_PALM_prompt_0.json
ADDED
|
@@ -0,0 +1 @@
|
|
|
|
|
|
|
| 1 |
+
{"results": [{"task_name": "GEM/web_nlg_en", "prompt_name": "PALM_prompt", "bleu": 0.30792186288041745, "fixed_answer_choice_list": null, "dataset_path": "GEM/web_nlg", "dataset_name": "en", "subset": null, "prompt_id": "3e41305c-5461-4cf3-853d-8a6fb5747623", "prompt_jinja": "I will verbalize an abstract representation of a sentence in natural language. To do so, I will first show the representation and then the natural language. The text needs to include all of the information in the representation.\n\n{{input | join(\", \")}} {% for i in references %}\n ||| {{ i }} \n{% endfor %}", "prompt_original_task": true, "comment": "", "bleu_stderr": 0.022591197439731495}, {"task_name": "GEM/web_nlg_en", "prompt_name": "PALM_prompt", "rouge1_precision": 0.07098986968617506, "fixed_answer_choice_list": null, "dataset_path": "GEM/web_nlg", "dataset_name": "en", "subset": null, "prompt_id": "3e41305c-5461-4cf3-853d-8a6fb5747623", "prompt_jinja": "I will verbalize an abstract representation of a sentence in natural language. To do so, I will first show the representation and then the natural language. The text needs to include all of the information in the representation.\n\n{{input | join(\", \")}} {% for i in references %}\n ||| {{ i }} \n{% endfor %}", "prompt_original_task": true, "comment": "", "rouge1_precision_stderr": 0.0020679786797452465}, {"task_name": "GEM/web_nlg_en", "prompt_name": "PALM_prompt", "rouge1_recall": 0.3069347608020833, "fixed_answer_choice_list": null, "dataset_path": "GEM/web_nlg", "dataset_name": "en", "subset": null, "prompt_id": "3e41305c-5461-4cf3-853d-8a6fb5747623", "prompt_jinja": "I will verbalize an abstract representation of a sentence in natural language. To do so, I will first show the representation and then the natural language. The text needs to include all of the information in the representation.\n\n{{input | join(\", \")}} {% for i in references %}\n ||| {{ i }} \n{% endfor %}", "prompt_original_task": true, "comment": "", "rouge1_recall_stderr": 0.00517507368107689}, {"task_name": "GEM/web_nlg_en", "prompt_name": "PALM_prompt", "rouge1_fmeasure": 0.10294623137452774, "fixed_answer_choice_list": null, "dataset_path": "GEM/web_nlg", "dataset_name": "en", "subset": null, "prompt_id": "3e41305c-5461-4cf3-853d-8a6fb5747623", "prompt_jinja": "I will verbalize an abstract representation of a sentence in natural language. To do so, I will first show the representation and then the natural language. The text needs to include all of the information in the representation.\n\n{{input | join(\", \")}} {% for i in references %}\n ||| {{ i }} \n{% endfor %}", "prompt_original_task": true, "comment": "", "rouge1_fmeasure_stderr": 0.0020587417894922925}, {"task_name": "GEM/web_nlg_en", "prompt_name": "PALM_prompt", "rouge2_precision": 0.031171214011889447, "fixed_answer_choice_list": null, "dataset_path": "GEM/web_nlg", "dataset_name": "en", "subset": null, "prompt_id": "3e41305c-5461-4cf3-853d-8a6fb5747623", "prompt_jinja": "I will verbalize an abstract representation of a sentence in natural language. To do so, I will first show the representation and then the natural language. The text needs to include all of the information in the representation.\n\n{{input | join(\", \")}} {% for i in references %}\n ||| {{ i }} \n{% endfor %}", "prompt_original_task": true, "comment": "", "rouge2_precision_stderr": 0.0009608900637588894}, {"task_name": "GEM/web_nlg_en", "prompt_name": "PALM_prompt", "rouge2_recall": 0.14183320494534776, "fixed_answer_choice_list": null, "dataset_path": "GEM/web_nlg", "dataset_name": "en", "subset": null, "prompt_id": "3e41305c-5461-4cf3-853d-8a6fb5747623", "prompt_jinja": "I will verbalize an abstract representation of a sentence in natural language. To do so, I will first show the representation and then the natural language. The text needs to include all of the information in the representation.\n\n{{input | join(\", \")}} {% for i in references %}\n ||| {{ i }} \n{% endfor %}", "prompt_original_task": true, "comment": "", "rouge2_recall_stderr": 0.0033211715439776072}, {"task_name": "GEM/web_nlg_en", "prompt_name": "PALM_prompt", "rouge2_fmeasure": 0.04712663066068758, "fixed_answer_choice_list": null, "dataset_path": "GEM/web_nlg", "dataset_name": "en", "subset": null, "prompt_id": "3e41305c-5461-4cf3-853d-8a6fb5747623", "prompt_jinja": "I will verbalize an abstract representation of a sentence in natural language. To do so, I will first show the representation and then the natural language. The text needs to include all of the information in the representation.\n\n{{input | join(\", \")}} {% for i in references %}\n ||| {{ i }} \n{% endfor %}", "prompt_original_task": true, "comment": "", "rouge2_fmeasure_stderr": 0.0012276103918669153}, {"task_name": "GEM/web_nlg_en", "prompt_name": "PALM_prompt", "rougeL_precision": 0.06796024562406605, "fixed_answer_choice_list": null, "dataset_path": "GEM/web_nlg", "dataset_name": "en", "subset": null, "prompt_id": "3e41305c-5461-4cf3-853d-8a6fb5747623", "prompt_jinja": "I will verbalize an abstract representation of a sentence in natural language. To do so, I will first show the representation and then the natural language. The text needs to include all of the information in the representation.\n\n{{input | join(\", \")}} {% for i in references %}\n ||| {{ i }} \n{% endfor %}", "prompt_original_task": true, "comment": "", "rougeL_precision_stderr": 0.0019370776267962208}, {"task_name": "GEM/web_nlg_en", "prompt_name": "PALM_prompt", "rougeL_recall": 0.29664082837766304, "fixed_answer_choice_list": null, "dataset_path": "GEM/web_nlg", "dataset_name": "en", "subset": null, "prompt_id": "3e41305c-5461-4cf3-853d-8a6fb5747623", "prompt_jinja": "I will verbalize an abstract representation of a sentence in natural language. To do so, I will first show the representation and then the natural language. The text needs to include all of the information in the representation.\n\n{{input | join(\", \")}} {% for i in references %}\n ||| {{ i }} \n{% endfor %}", "prompt_original_task": true, "comment": "", "rougeL_recall_stderr": 0.005024240535996322}, {"task_name": "GEM/web_nlg_en", "prompt_name": "PALM_prompt", "rougeL_fmeasure": 0.09881593036325725, "fixed_answer_choice_list": null, "dataset_path": "GEM/web_nlg", "dataset_name": "en", "subset": null, "prompt_id": "3e41305c-5461-4cf3-853d-8a6fb5747623", "prompt_jinja": "I will verbalize an abstract representation of a sentence in natural language. To do so, I will first show the representation and then the natural language. The text needs to include all of the information in the representation.\n\n{{input | join(\", \")}} {% for i in references %}\n ||| {{ i }} \n{% endfor %}", "prompt_original_task": true, "comment": "", "rougeL_fmeasure_stderr": 0.001917130031576468}, {"task_name": "GEM/web_nlg_en", "prompt_name": "PALM_prompt", "rougeLsum_precision": 0.0670791406212816, "fixed_answer_choice_list": null, "dataset_path": "GEM/web_nlg", "dataset_name": "en", "subset": null, "prompt_id": "3e41305c-5461-4cf3-853d-8a6fb5747623", "prompt_jinja": "I will verbalize an abstract representation of a sentence in natural language. To do so, I will first show the representation and then the natural language. The text needs to include all of the information in the representation.\n\n{{input | join(\", \")}} {% for i in references %}\n ||| {{ i }} \n{% endfor %}", "prompt_original_task": true, "comment": "", "rougeLsum_precision_stderr": 0.00196698409746432}, {"task_name": "GEM/web_nlg_en", "prompt_name": "PALM_prompt", "rougeLsum_recall": 0.2878672957371347, "fixed_answer_choice_list": null, "dataset_path": "GEM/web_nlg", "dataset_name": "en", "subset": null, "prompt_id": "3e41305c-5461-4cf3-853d-8a6fb5747623", "prompt_jinja": "I will verbalize an abstract representation of a sentence in natural language. To do so, I will first show the representation and then the natural language. The text needs to include all of the information in the representation.\n\n{{input | join(\", \")}} {% for i in references %}\n ||| {{ i }} \n{% endfor %}", "prompt_original_task": true, "comment": "", "rougeLsum_recall_stderr": 0.004800974945636242}, {"task_name": "GEM/web_nlg_en", "prompt_name": "PALM_prompt", "rougeLsum_fmeasure": 0.09698777350644844, "fixed_answer_choice_list": null, "dataset_path": "GEM/web_nlg", "dataset_name": "en", "subset": null, "prompt_id": "3e41305c-5461-4cf3-853d-8a6fb5747623", "prompt_jinja": "I will verbalize an abstract representation of a sentence in natural language. To do so, I will first show the representation and then the natural language. The text needs to include all of the information in the representation.\n\n{{input | join(\", \")}} {% for i in references %}\n ||| {{ i }} \n{% endfor %}", "prompt_original_task": true, "comment": "", "rougeLsum_fmeasure_stderr": 0.0019324455424936378}], "config": {"model": "hf-causal", "model_args": "pretrained=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/lm1-4b2-84b-oscarroots/transformers,use_accelerate=True,tokenizer=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/gpt2,dtype=bfloat16", "task_args": "", "num_fewshot": 0, "batch_size": 16, "device": "cuda", "use_cache": false, "limit": 3000, "bootstrap_iters": 10, "seed": 1234}}
|
evaluation/generation/agg.lm1-4b2-84b-oscarroots_GEM-web_nlg_en_PALM_prompt_1.json
ADDED
|
@@ -0,0 +1 @@
|
|
|
|
|
|
|
| 1 |
+
{"results": [{"task_name": "GEM/web_nlg_en", "prompt_name": "PALM_prompt", "bleu": 0.5029735260771602, "fixed_answer_choice_list": null, "dataset_path": "GEM/web_nlg", "dataset_name": "en", "subset": null, "prompt_id": "3e41305c-5461-4cf3-853d-8a6fb5747623", "prompt_jinja": "I will verbalize an abstract representation of a sentence in natural language. To do so, I will first show the representation and then the natural language. The text needs to include all of the information in the representation.\n\n{{input | join(\", \")}} {% for i in references %}\n ||| {{ i }} \n{% endfor %}", "prompt_original_task": true, "comment": "", "bleu_stderr": 0.037504230092449804}, {"task_name": "GEM/web_nlg_en", "prompt_name": "PALM_prompt", "rouge1_precision": 0.13619661582031448, "fixed_answer_choice_list": null, "dataset_path": "GEM/web_nlg", "dataset_name": "en", "subset": null, "prompt_id": "3e41305c-5461-4cf3-853d-8a6fb5747623", "prompt_jinja": "I will verbalize an abstract representation of a sentence in natural language. To do so, I will first show the representation and then the natural language. The text needs to include all of the information in the representation.\n\n{{input | join(\", \")}} {% for i in references %}\n ||| {{ i }} \n{% endfor %}", "prompt_original_task": true, "comment": "", "rouge1_precision_stderr": 0.0043020137724201464}, {"task_name": "GEM/web_nlg_en", "prompt_name": "PALM_prompt", "rouge1_recall": 0.316637398074897, "fixed_answer_choice_list": null, "dataset_path": "GEM/web_nlg", "dataset_name": "en", "subset": null, "prompt_id": "3e41305c-5461-4cf3-853d-8a6fb5747623", "prompt_jinja": "I will verbalize an abstract representation of a sentence in natural language. To do so, I will first show the representation and then the natural language. The text needs to include all of the information in the representation.\n\n{{input | join(\", \")}} {% for i in references %}\n ||| {{ i }} \n{% endfor %}", "prompt_original_task": true, "comment": "", "rouge1_recall_stderr": 0.0047970506632331504}, {"task_name": "GEM/web_nlg_en", "prompt_name": "PALM_prompt", "rouge1_fmeasure": 0.15768658147508735, "fixed_answer_choice_list": null, "dataset_path": "GEM/web_nlg", "dataset_name": "en", "subset": null, "prompt_id": "3e41305c-5461-4cf3-853d-8a6fb5747623", "prompt_jinja": "I will verbalize an abstract representation of a sentence in natural language. To do so, I will first show the representation and then the natural language. The text needs to include all of the information in the representation.\n\n{{input | join(\", \")}} {% for i in references %}\n ||| {{ i }} \n{% endfor %}", "prompt_original_task": true, "comment": "", "rouge1_fmeasure_stderr": 0.003494665966696393}, {"task_name": "GEM/web_nlg_en", "prompt_name": "PALM_prompt", "rouge2_precision": 0.06820976880895747, "fixed_answer_choice_list": null, "dataset_path": "GEM/web_nlg", "dataset_name": "en", "subset": null, "prompt_id": "3e41305c-5461-4cf3-853d-8a6fb5747623", "prompt_jinja": "I will verbalize an abstract representation of a sentence in natural language. To do so, I will first show the representation and then the natural language. The text needs to include all of the information in the representation.\n\n{{input | join(\", \")}} {% for i in references %}\n ||| {{ i }} \n{% endfor %}", "prompt_original_task": true, "comment": "", "rouge2_precision_stderr": 0.0029059822832858398}, {"task_name": "GEM/web_nlg_en", "prompt_name": "PALM_prompt", "rouge2_recall": 0.15837063857057912, "fixed_answer_choice_list": null, "dataset_path": "GEM/web_nlg", "dataset_name": "en", "subset": null, "prompt_id": "3e41305c-5461-4cf3-853d-8a6fb5747623", "prompt_jinja": "I will verbalize an abstract representation of a sentence in natural language. To do so, I will first show the representation and then the natural language. The text needs to include all of the information in the representation.\n\n{{input | join(\", \")}} {% for i in references %}\n ||| {{ i }} \n{% endfor %}", "prompt_original_task": true, "comment": "", "rouge2_recall_stderr": 0.0034375560556260923}, {"task_name": "GEM/web_nlg_en", "prompt_name": "PALM_prompt", "rouge2_fmeasure": 0.07782418733894164, "fixed_answer_choice_list": null, "dataset_path": "GEM/web_nlg", "dataset_name": "en", "subset": null, "prompt_id": "3e41305c-5461-4cf3-853d-8a6fb5747623", "prompt_jinja": "I will verbalize an abstract representation of a sentence in natural language. To do so, I will first show the representation and then the natural language. The text needs to include all of the information in the representation.\n\n{{input | join(\", \")}} {% for i in references %}\n ||| {{ i }} \n{% endfor %}", "prompt_original_task": true, "comment": "", "rouge2_fmeasure_stderr": 0.0023362205542013246}, {"task_name": "GEM/web_nlg_en", "prompt_name": "PALM_prompt", "rougeL_precision": 0.12413525469005826, "fixed_answer_choice_list": null, "dataset_path": "GEM/web_nlg", "dataset_name": "en", "subset": null, "prompt_id": "3e41305c-5461-4cf3-853d-8a6fb5747623", "prompt_jinja": "I will verbalize an abstract representation of a sentence in natural language. To do so, I will first show the representation and then the natural language. The text needs to include all of the information in the representation.\n\n{{input | join(\", \")}} {% for i in references %}\n ||| {{ i }} \n{% endfor %}", "prompt_original_task": true, "comment": "", "rougeL_precision_stderr": 0.0038754321440188466}, {"task_name": "GEM/web_nlg_en", "prompt_name": "PALM_prompt", "rougeL_recall": 0.2992719822505979, "fixed_answer_choice_list": null, "dataset_path": "GEM/web_nlg", "dataset_name": "en", "subset": null, "prompt_id": "3e41305c-5461-4cf3-853d-8a6fb5747623", "prompt_jinja": "I will verbalize an abstract representation of a sentence in natural language. To do so, I will first show the representation and then the natural language. The text needs to include all of the information in the representation.\n\n{{input | join(\", \")}} {% for i in references %}\n ||| {{ i }} \n{% endfor %}", "prompt_original_task": true, "comment": "", "rougeL_recall_stderr": 0.004524889522002559}, {"task_name": "GEM/web_nlg_en", "prompt_name": "PALM_prompt", "rougeL_fmeasure": 0.14491859595914175, "fixed_answer_choice_list": null, "dataset_path": "GEM/web_nlg", "dataset_name": "en", "subset": null, "prompt_id": "3e41305c-5461-4cf3-853d-8a6fb5747623", "prompt_jinja": "I will verbalize an abstract representation of a sentence in natural language. To do so, I will first show the representation and then the natural language. The text needs to include all of the information in the representation.\n\n{{input | join(\", \")}} {% for i in references %}\n ||| {{ i }} \n{% endfor %}", "prompt_original_task": true, "comment": "", "rougeL_fmeasure_stderr": 0.003065753619906709}, {"task_name": "GEM/web_nlg_en", "prompt_name": "PALM_prompt", "rougeLsum_precision": 0.12668637709059938, "fixed_answer_choice_list": null, "dataset_path": "GEM/web_nlg", "dataset_name": "en", "subset": null, "prompt_id": "3e41305c-5461-4cf3-853d-8a6fb5747623", "prompt_jinja": "I will verbalize an abstract representation of a sentence in natural language. To do so, I will first show the representation and then the natural language. The text needs to include all of the information in the representation.\n\n{{input | join(\", \")}} {% for i in references %}\n ||| {{ i }} \n{% endfor %}", "prompt_original_task": true, "comment": "", "rougeLsum_precision_stderr": 0.003967270717894955}, {"task_name": "GEM/web_nlg_en", "prompt_name": "PALM_prompt", "rougeLsum_recall": 0.3010027831236105, "fixed_answer_choice_list": null, "dataset_path": "GEM/web_nlg", "dataset_name": "en", "subset": null, "prompt_id": "3e41305c-5461-4cf3-853d-8a6fb5747623", "prompt_jinja": "I will verbalize an abstract representation of a sentence in natural language. To do so, I will first show the representation and then the natural language. The text needs to include all of the information in the representation.\n\n{{input | join(\", \")}} {% for i in references %}\n ||| {{ i }} \n{% endfor %}", "prompt_original_task": true, "comment": "", "rougeLsum_recall_stderr": 0.004513515461941868}, {"task_name": "GEM/web_nlg_en", "prompt_name": "PALM_prompt", "rougeLsum_fmeasure": 0.14722111353362036, "fixed_answer_choice_list": null, "dataset_path": "GEM/web_nlg", "dataset_name": "en", "subset": null, "prompt_id": "3e41305c-5461-4cf3-853d-8a6fb5747623", "prompt_jinja": "I will verbalize an abstract representation of a sentence in natural language. To do so, I will first show the representation and then the natural language. The text needs to include all of the information in the representation.\n\n{{input | join(\", \")}} {% for i in references %}\n ||| {{ i }} \n{% endfor %}", "prompt_original_task": true, "comment": "", "rougeLsum_fmeasure_stderr": 0.0031421471913191945}], "config": {"model": "hf-causal", "model_args": "pretrained=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/lm1-4b2-84b-oscarroots/transformers,use_accelerate=True,tokenizer=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/gpt2,dtype=bfloat16", "task_args": "", "num_fewshot": 1, "batch_size": 16, "device": "cuda", "use_cache": false, "limit": 3000, "bootstrap_iters": 10, "seed": 1234}}
|
evaluation/generation/agg.lm1-4b2-84b-oscarroots_GEM-wiki_lingua_en_tldr_en_0.json
ADDED
|
@@ -0,0 +1 @@
|
|
|
|
|
|
|
| 1 |
+
{"results": [{"task_name": "GEM/wiki_lingua_en", "prompt_name": "tldr_en", "rouge1_precision": 0.1677624908585186, "fixed_answer_choice_list": null, "dataset_path": "GEM/wiki_lingua", "dataset_name": "en", "subset": null, "prompt_id": "d3c5baa3-5e37-46f8-b1b2-5b834181c9da", "prompt_jinja": "{{source}}\n\nTL;DR in English: ||| {{target}}", "prompt_original_task": true, "comment": "", "rouge1_precision_stderr": 0.002227107904916062}, {"task_name": "GEM/wiki_lingua_en", "prompt_name": "tldr_en", "rouge1_recall": 0.2596588931478101, "fixed_answer_choice_list": null, "dataset_path": "GEM/wiki_lingua", "dataset_name": "en", "subset": null, "prompt_id": "d3c5baa3-5e37-46f8-b1b2-5b834181c9da", "prompt_jinja": "{{source}}\n\nTL;DR in English: ||| {{target}}", "prompt_original_task": true, "comment": "", "rouge1_recall_stderr": 0.002997542713484213}, {"task_name": "GEM/wiki_lingua_en", "prompt_name": "tldr_en", "rouge1_fmeasure": 0.1877757783616715, "fixed_answer_choice_list": null, "dataset_path": "GEM/wiki_lingua", "dataset_name": "en", "subset": null, "prompt_id": "d3c5baa3-5e37-46f8-b1b2-5b834181c9da", "prompt_jinja": "{{source}}\n\nTL;DR in English: ||| {{target}}", "prompt_original_task": true, "comment": "", "rouge1_fmeasure_stderr": 0.002098472966964512}, {"task_name": "GEM/wiki_lingua_en", "prompt_name": "tldr_en", "rouge2_precision": 0.03900987564301008, "fixed_answer_choice_list": null, "dataset_path": "GEM/wiki_lingua", "dataset_name": "en", "subset": null, "prompt_id": "d3c5baa3-5e37-46f8-b1b2-5b834181c9da", "prompt_jinja": "{{source}}\n\nTL;DR in English: ||| {{target}}", "prompt_original_task": true, "comment": "", "rouge2_precision_stderr": 0.0009646264507463447}, {"task_name": "GEM/wiki_lingua_en", "prompt_name": "tldr_en", "rouge2_recall": 0.06275053320845367, "fixed_answer_choice_list": null, "dataset_path": "GEM/wiki_lingua", "dataset_name": "en", "subset": null, "prompt_id": "d3c5baa3-5e37-46f8-b1b2-5b834181c9da", "prompt_jinja": "{{source}}\n\nTL;DR in English: ||| {{target}}", "prompt_original_task": true, "comment": "", "rouge2_recall_stderr": 0.0016213745197323872}, {"task_name": "GEM/wiki_lingua_en", "prompt_name": "tldr_en", "rouge2_fmeasure": 0.04392454941063653, "fixed_answer_choice_list": null, "dataset_path": "GEM/wiki_lingua", "dataset_name": "en", "subset": null, "prompt_id": "d3c5baa3-5e37-46f8-b1b2-5b834181c9da", "prompt_jinja": "{{source}}\n\nTL;DR in English: ||| {{target}}", "prompt_original_task": true, "comment": "", "rouge2_fmeasure_stderr": 0.001027826782042431}, {"task_name": "GEM/wiki_lingua_en", "prompt_name": "tldr_en", "rougeL_precision": 0.1321786244987408, "fixed_answer_choice_list": null, "dataset_path": "GEM/wiki_lingua", "dataset_name": "en", "subset": null, "prompt_id": "d3c5baa3-5e37-46f8-b1b2-5b834181c9da", "prompt_jinja": "{{source}}\n\nTL;DR in English: ||| {{target}}", "prompt_original_task": true, "comment": "", "rougeL_precision_stderr": 0.0015898806527527606}, {"task_name": "GEM/wiki_lingua_en", "prompt_name": "tldr_en", "rougeL_recall": 0.2108343289151636, "fixed_answer_choice_list": null, "dataset_path": "GEM/wiki_lingua", "dataset_name": "en", "subset": null, "prompt_id": "d3c5baa3-5e37-46f8-b1b2-5b834181c9da", "prompt_jinja": "{{source}}\n\nTL;DR in English: ||| {{target}}", "prompt_original_task": true, "comment": "", "rougeL_recall_stderr": 0.0024185861125649503}, {"task_name": "GEM/wiki_lingua_en", "prompt_name": "tldr_en", "rougeL_fmeasure": 0.1495463996881597, "fixed_answer_choice_list": null, "dataset_path": "GEM/wiki_lingua", "dataset_name": "en", "subset": null, "prompt_id": "d3c5baa3-5e37-46f8-b1b2-5b834181c9da", "prompt_jinja": "{{source}}\n\nTL;DR in English: ||| {{target}}", "prompt_original_task": true, "comment": "", "rougeL_fmeasure_stderr": 0.001529629957138695}, {"task_name": "GEM/wiki_lingua_en", "prompt_name": "tldr_en", "rougeLsum_precision": 0.15492456336179825, "fixed_answer_choice_list": null, "dataset_path": "GEM/wiki_lingua", "dataset_name": "en", "subset": null, "prompt_id": "d3c5baa3-5e37-46f8-b1b2-5b834181c9da", "prompt_jinja": "{{source}}\n\nTL;DR in English: ||| {{target}}", "prompt_original_task": true, "comment": "", "rougeLsum_precision_stderr": 0.0020566337458244716}, {"task_name": "GEM/wiki_lingua_en", "prompt_name": "tldr_en", "rougeLsum_recall": 0.24059158197735586, "fixed_answer_choice_list": null, "dataset_path": "GEM/wiki_lingua", "dataset_name": "en", "subset": null, "prompt_id": "d3c5baa3-5e37-46f8-b1b2-5b834181c9da", "prompt_jinja": "{{source}}\n\nTL;DR in English: ||| {{target}}", "prompt_original_task": true, "comment": "", "rougeLsum_recall_stderr": 0.0028186220199704517}, {"task_name": "GEM/wiki_lingua_en", "prompt_name": "tldr_en", "rougeLsum_fmeasure": 0.1735928623880009, "fixed_answer_choice_list": null, "dataset_path": "GEM/wiki_lingua", "dataset_name": "en", "subset": null, "prompt_id": "d3c5baa3-5e37-46f8-b1b2-5b834181c9da", "prompt_jinja": "{{source}}\n\nTL;DR in English: ||| {{target}}", "prompt_original_task": true, "comment": "", "rougeLsum_fmeasure_stderr": 0.0019465237397107696}, {"task_name": "GEM/wiki_lingua_en", "prompt_name": "tldr_en", "bleu": 2.443321935592914, "fixed_answer_choice_list": null, "dataset_path": "GEM/wiki_lingua", "dataset_name": "en", "subset": null, "prompt_id": "d3c5baa3-5e37-46f8-b1b2-5b834181c9da", "prompt_jinja": "{{source}}\n\nTL;DR in English: ||| {{target}}", "prompt_original_task": true, "comment": "", "bleu_stderr": 0.09064724887478164}], "config": {"model": "hf-causal", "model_args": "pretrained=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/lm1-4b2-84b-oscarroots/transformers,use_accelerate=True,tokenizer=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/gpt2,dtype=bfloat16", "task_args": "", "num_fewshot": 0, "batch_size": 16, "device": "cuda", "use_cache": false, "limit": 3000, "bootstrap_iters": 10, "seed": 1234}}
|
evaluation/generation/agg.lm1-4b2-84b-oscarroots_GEM-wiki_lingua_en_tldr_en_1.json
ADDED
|
@@ -0,0 +1 @@
|
|
|
|
|
|
|
| 1 |
+
{"results": [{"task_name": "GEM/wiki_lingua_en", "prompt_name": "tldr_en", "rouge1_precision": 0.2506949292837622, "fixed_answer_choice_list": null, "dataset_path": "GEM/wiki_lingua", "dataset_name": "en", "subset": null, "prompt_id": "d3c5baa3-5e37-46f8-b1b2-5b834181c9da", "prompt_jinja": "{{source}}\n\nTL;DR in English: ||| {{target}}", "prompt_original_task": true, "comment": "", "rouge1_precision_stderr": 0.0035826466742239185}, {"task_name": "GEM/wiki_lingua_en", "prompt_name": "tldr_en", "rouge1_recall": 0.216927054352879, "fixed_answer_choice_list": null, "dataset_path": "GEM/wiki_lingua", "dataset_name": "en", "subset": null, "prompt_id": "d3c5baa3-5e37-46f8-b1b2-5b834181c9da", "prompt_jinja": "{{source}}\n\nTL;DR in English: ||| {{target}}", "prompt_original_task": true, "comment": "", "rouge1_recall_stderr": 0.0028088091990487226}, {"task_name": "GEM/wiki_lingua_en", "prompt_name": "tldr_en", "rouge1_fmeasure": 0.19603976253059838, "fixed_answer_choice_list": null, "dataset_path": "GEM/wiki_lingua", "dataset_name": "en", "subset": null, "prompt_id": "d3c5baa3-5e37-46f8-b1b2-5b834181c9da", "prompt_jinja": "{{source}}\n\nTL;DR in English: ||| {{target}}", "prompt_original_task": true, "comment": "", "rouge1_fmeasure_stderr": 0.002209711985991064}, {"task_name": "GEM/wiki_lingua_en", "prompt_name": "tldr_en", "rouge2_precision": 0.06671631551602682, "fixed_answer_choice_list": null, "dataset_path": "GEM/wiki_lingua", "dataset_name": "en", "subset": null, "prompt_id": "d3c5baa3-5e37-46f8-b1b2-5b834181c9da", "prompt_jinja": "{{source}}\n\nTL;DR in English: ||| {{target}}", "prompt_original_task": true, "comment": "", "rouge2_precision_stderr": 0.0021245172015467873}, {"task_name": "GEM/wiki_lingua_en", "prompt_name": "tldr_en", "rouge2_recall": 0.051740103428563375, "fixed_answer_choice_list": null, "dataset_path": "GEM/wiki_lingua", "dataset_name": "en", "subset": null, "prompt_id": "d3c5baa3-5e37-46f8-b1b2-5b834181c9da", "prompt_jinja": "{{source}}\n\nTL;DR in English: ||| {{target}}", "prompt_original_task": true, "comment": "", "rouge2_recall_stderr": 0.0014534357317337603}, {"task_name": "GEM/wiki_lingua_en", "prompt_name": "tldr_en", "rouge2_fmeasure": 0.047953521910444996, "fixed_answer_choice_list": null, "dataset_path": "GEM/wiki_lingua", "dataset_name": "en", "subset": null, "prompt_id": "d3c5baa3-5e37-46f8-b1b2-5b834181c9da", "prompt_jinja": "{{source}}\n\nTL;DR in English: ||| {{target}}", "prompt_original_task": true, "comment": "", "rouge2_fmeasure_stderr": 0.0012655545908108951}, {"task_name": "GEM/wiki_lingua_en", "prompt_name": "tldr_en", "rougeL_precision": 0.1934840203286638, "fixed_answer_choice_list": null, "dataset_path": "GEM/wiki_lingua", "dataset_name": "en", "subset": null, "prompt_id": "d3c5baa3-5e37-46f8-b1b2-5b834181c9da", "prompt_jinja": "{{source}}\n\nTL;DR in English: ||| {{target}}", "prompt_original_task": true, "comment": "", "rougeL_precision_stderr": 0.002982369736201223}, {"task_name": "GEM/wiki_lingua_en", "prompt_name": "tldr_en", "rougeL_recall": 0.16375914032560498, "fixed_answer_choice_list": null, "dataset_path": "GEM/wiki_lingua", "dataset_name": "en", "subset": null, "prompt_id": "d3c5baa3-5e37-46f8-b1b2-5b834181c9da", "prompt_jinja": "{{source}}\n\nTL;DR in English: ||| {{target}}", "prompt_original_task": true, "comment": "", "rougeL_recall_stderr": 0.0021690067639137493}, {"task_name": "GEM/wiki_lingua_en", "prompt_name": "tldr_en", "rougeL_fmeasure": 0.14805027301359455, "fixed_answer_choice_list": null, "dataset_path": "GEM/wiki_lingua", "dataset_name": "en", "subset": null, "prompt_id": "d3c5baa3-5e37-46f8-b1b2-5b834181c9da", "prompt_jinja": "{{source}}\n\nTL;DR in English: ||| {{target}}", "prompt_original_task": true, "comment": "", "rougeL_fmeasure_stderr": 0.0016833891759156644}, {"task_name": "GEM/wiki_lingua_en", "prompt_name": "tldr_en", "rougeLsum_precision": 0.23568777027923624, "fixed_answer_choice_list": null, "dataset_path": "GEM/wiki_lingua", "dataset_name": "en", "subset": null, "prompt_id": "d3c5baa3-5e37-46f8-b1b2-5b834181c9da", "prompt_jinja": "{{source}}\n\nTL;DR in English: ||| {{target}}", "prompt_original_task": true, "comment": "", "rougeLsum_precision_stderr": 0.003415634236822358}, {"task_name": "GEM/wiki_lingua_en", "prompt_name": "tldr_en", "rougeLsum_recall": 0.2032981534504797, "fixed_answer_choice_list": null, "dataset_path": "GEM/wiki_lingua", "dataset_name": "en", "subset": null, "prompt_id": "d3c5baa3-5e37-46f8-b1b2-5b834181c9da", "prompt_jinja": "{{source}}\n\nTL;DR in English: ||| {{target}}", "prompt_original_task": true, "comment": "", "rougeLsum_recall_stderr": 0.002632061036819137}, {"task_name": "GEM/wiki_lingua_en", "prompt_name": "tldr_en", "rougeLsum_fmeasure": 0.18360306023241374, "fixed_answer_choice_list": null, "dataset_path": "GEM/wiki_lingua", "dataset_name": "en", "subset": null, "prompt_id": "d3c5baa3-5e37-46f8-b1b2-5b834181c9da", "prompt_jinja": "{{source}}\n\nTL;DR in English: ||| {{target}}", "prompt_original_task": true, "comment": "", "rougeLsum_fmeasure_stderr": 0.00205910275267234}, {"task_name": "GEM/wiki_lingua_en", "prompt_name": "tldr_en", "bleu": 2.897173783637095, "fixed_answer_choice_list": null, "dataset_path": "GEM/wiki_lingua", "dataset_name": "en", "subset": null, "prompt_id": "d3c5baa3-5e37-46f8-b1b2-5b834181c9da", "prompt_jinja": "{{source}}\n\nTL;DR in English: ||| {{target}}", "prompt_original_task": true, "comment": "", "bleu_stderr": 0.04723692099575378}], "config": {"model": "hf-causal", "model_args": "pretrained=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/lm1-4b2-84b-oscarroots/transformers,use_accelerate=True,tokenizer=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/gpt2,dtype=bfloat16", "task_args": "", "num_fewshot": 1, "batch_size": 16, "device": "cuda", "use_cache": false, "limit": 3000, "bootstrap_iters": 10, "seed": 1234}}
|
evaluation/generation/agg.lm1-4b2-84b-oscarroots_e2e_nlg_cleaned_generate_text_restaurant_0.json
ADDED
|
@@ -0,0 +1 @@
|
|
|
|
|
|
|
| 1 |
+
{"results": [{"task_name": "e2e_nlg_cleaned", "prompt_name": "generate_text_restaurant", "bleu": 5.941956523442191, "fixed_answer_choice_list": null, "dataset_path": "e2e_nlg_cleaned", "dataset_name": null, "subset": null, "prompt_id": "1acabbc3-c9b9-4624-a684-29faeccff46f", "prompt_jinja": "Given the following data about a restaurant:\n{% for feature in meaning_representation.split(\"]\") %} {% set key = feature.split(\"[\")[0].replace(\",\",\"\") %} {% set value = feature.replace(\",\",\"\").replace(key+\"[\", '''') %}\n{% if value != \"\" %} {{key}} : {{value}} {% endif %}\n{%- endfor %}\nGenerate some text about this restaurant. ||| {{human_reference}}", "prompt_original_task": true, "comment": "", "bleu_stderr": 0.09932749482493093}, {"task_name": "e2e_nlg_cleaned", "prompt_name": "generate_text_restaurant", "rouge1_precision": 0.20220167360973929, "fixed_answer_choice_list": null, "dataset_path": "e2e_nlg_cleaned", "dataset_name": null, "subset": null, "prompt_id": "1acabbc3-c9b9-4624-a684-29faeccff46f", "prompt_jinja": "Given the following data about a restaurant:\n{% for feature in meaning_representation.split(\"]\") %} {% set key = feature.split(\"[\")[0].replace(\",\",\"\") %} {% set value = feature.replace(\",\",\"\").replace(key+\"[\", '''') %}\n{% if value != \"\" %} {{key}} : {{value}} {% endif %}\n{%- endfor %}\nGenerate some text about this restaurant. ||| {{human_reference}}", "prompt_original_task": true, "comment": "", "rouge1_precision_stderr": 0.0017775397370267494}, {"task_name": "e2e_nlg_cleaned", "prompt_name": "generate_text_restaurant", "rouge1_recall": 0.40694948255704005, "fixed_answer_choice_list": null, "dataset_path": "e2e_nlg_cleaned", "dataset_name": null, "subset": null, "prompt_id": "1acabbc3-c9b9-4624-a684-29faeccff46f", "prompt_jinja": "Given the following data about a restaurant:\n{% for feature in meaning_representation.split(\"]\") %} {% set key = feature.split(\"[\")[0].replace(\",\",\"\") %} {% set value = feature.replace(\",\",\"\").replace(key+\"[\", '''') %}\n{% if value != \"\" %} {{key}} : {{value}} {% endif %}\n{%- endfor %}\nGenerate some text about this restaurant. ||| {{human_reference}}", "prompt_original_task": true, "comment": "", "rouge1_recall_stderr": 0.0030318532028052545}, {"task_name": "e2e_nlg_cleaned", "prompt_name": "generate_text_restaurant", "rouge1_fmeasure": 0.266035838888047, "fixed_answer_choice_list": null, "dataset_path": "e2e_nlg_cleaned", "dataset_name": null, "subset": null, "prompt_id": "1acabbc3-c9b9-4624-a684-29faeccff46f", "prompt_jinja": "Given the following data about a restaurant:\n{% for feature in meaning_representation.split(\"]\") %} {% set key = feature.split(\"[\")[0].replace(\",\",\"\") %} {% set value = feature.replace(\",\",\"\").replace(key+\"[\", '''') %}\n{% if value != \"\" %} {{key}} : {{value}} {% endif %}\n{%- endfor %}\nGenerate some text about this restaurant. ||| {{human_reference}}", "prompt_original_task": true, "comment": "", "rouge1_fmeasure_stderr": 0.00214687581319049}, {"task_name": "e2e_nlg_cleaned", "prompt_name": "generate_text_restaurant", "rouge2_precision": 0.08918451858000426, "fixed_answer_choice_list": null, "dataset_path": "e2e_nlg_cleaned", "dataset_name": null, "subset": null, "prompt_id": "1acabbc3-c9b9-4624-a684-29faeccff46f", "prompt_jinja": "Given the following data about a restaurant:\n{% for feature in meaning_representation.split(\"]\") %} {% set key = feature.split(\"[\")[0].replace(\",\",\"\") %} {% set value = feature.replace(\",\",\"\").replace(key+\"[\", '''') %}\n{% if value != \"\" %} {{key}} : {{value}} {% endif %}\n{%- endfor %}\nGenerate some text about this restaurant. ||| {{human_reference}}", "prompt_original_task": true, "comment": "", "rouge2_precision_stderr": 0.0011232374812415247}, {"task_name": "e2e_nlg_cleaned", "prompt_name": "generate_text_restaurant", "rouge2_recall": 0.1824874010352202, "fixed_answer_choice_list": null, "dataset_path": "e2e_nlg_cleaned", "dataset_name": null, "subset": null, "prompt_id": "1acabbc3-c9b9-4624-a684-29faeccff46f", "prompt_jinja": "Given the following data about a restaurant:\n{% for feature in meaning_representation.split(\"]\") %} {% set key = feature.split(\"[\")[0].replace(\",\",\"\") %} {% set value = feature.replace(\",\",\"\").replace(key+\"[\", '''') %}\n{% if value != \"\" %} {{key}} : {{value}} {% endif %}\n{%- endfor %}\nGenerate some text about this restaurant. ||| {{human_reference}}", "prompt_original_task": true, "comment": "", "rouge2_recall_stderr": 0.002231269996765666}, {"task_name": "e2e_nlg_cleaned", "prompt_name": "generate_text_restaurant", "rouge2_fmeasure": 0.11793161527142047, "fixed_answer_choice_list": null, "dataset_path": "e2e_nlg_cleaned", "dataset_name": null, "subset": null, "prompt_id": "1acabbc3-c9b9-4624-a684-29faeccff46f", "prompt_jinja": "Given the following data about a restaurant:\n{% for feature in meaning_representation.split(\"]\") %} {% set key = feature.split(\"[\")[0].replace(\",\",\"\") %} {% set value = feature.replace(\",\",\"\").replace(key+\"[\", '''') %}\n{% if value != \"\" %} {{key}} : {{value}} {% endif %}\n{%- endfor %}\nGenerate some text about this restaurant. ||| {{human_reference}}", "prompt_original_task": true, "comment": "", "rouge2_fmeasure_stderr": 0.0014395777814735315}, {"task_name": "e2e_nlg_cleaned", "prompt_name": "generate_text_restaurant", "rougeL_precision": 0.171931143567588, "fixed_answer_choice_list": null, "dataset_path": "e2e_nlg_cleaned", "dataset_name": null, "subset": null, "prompt_id": "1acabbc3-c9b9-4624-a684-29faeccff46f", "prompt_jinja": "Given the following data about a restaurant:\n{% for feature in meaning_representation.split(\"]\") %} {% set key = feature.split(\"[\")[0].replace(\",\",\"\") %} {% set value = feature.replace(\",\",\"\").replace(key+\"[\", '''') %}\n{% if value != \"\" %} {{key}} : {{value}} {% endif %}\n{%- endfor %}\nGenerate some text about this restaurant. ||| {{human_reference}}", "prompt_original_task": true, "comment": "", "rougeL_precision_stderr": 0.0014094508987155406}, {"task_name": "e2e_nlg_cleaned", "prompt_name": "generate_text_restaurant", "rougeL_recall": 0.3506402050881267, "fixed_answer_choice_list": null, "dataset_path": "e2e_nlg_cleaned", "dataset_name": null, "subset": null, "prompt_id": "1acabbc3-c9b9-4624-a684-29faeccff46f", "prompt_jinja": "Given the following data about a restaurant:\n{% for feature in meaning_representation.split(\"]\") %} {% set key = feature.split(\"[\")[0].replace(\",\",\"\") %} {% set value = feature.replace(\",\",\"\").replace(key+\"[\", '''') %}\n{% if value != \"\" %} {{key}} : {{value}} {% endif %}\n{%- endfor %}\nGenerate some text about this restaurant. ||| {{human_reference}}", "prompt_original_task": true, "comment": "", "rougeL_recall_stderr": 0.002593888393315589}, {"task_name": "e2e_nlg_cleaned", "prompt_name": "generate_text_restaurant", "rougeL_fmeasure": 0.22717154379396048, "fixed_answer_choice_list": null, "dataset_path": "e2e_nlg_cleaned", "dataset_name": null, "subset": null, "prompt_id": "1acabbc3-c9b9-4624-a684-29faeccff46f", "prompt_jinja": "Given the following data about a restaurant:\n{% for feature in meaning_representation.split(\"]\") %} {% set key = feature.split(\"[\")[0].replace(\",\",\"\") %} {% set value = feature.replace(\",\",\"\").replace(key+\"[\", '''') %}\n{% if value != \"\" %} {{key}} : {{value}} {% endif %}\n{%- endfor %}\nGenerate some text about this restaurant. ||| {{human_reference}}", "prompt_original_task": true, "comment": "", "rougeL_fmeasure_stderr": 0.0017325449588314497}, {"task_name": "e2e_nlg_cleaned", "prompt_name": "generate_text_restaurant", "rougeLsum_precision": 0.18007224082622914, "fixed_answer_choice_list": null, "dataset_path": "e2e_nlg_cleaned", "dataset_name": null, "subset": null, "prompt_id": "1acabbc3-c9b9-4624-a684-29faeccff46f", "prompt_jinja": "Given the following data about a restaurant:\n{% for feature in meaning_representation.split(\"]\") %} {% set key = feature.split(\"[\")[0].replace(\",\",\"\") %} {% set value = feature.replace(\",\",\"\").replace(key+\"[\", '''') %}\n{% if value != \"\" %} {{key}} : {{value}} {% endif %}\n{%- endfor %}\nGenerate some text about this restaurant. ||| {{human_reference}}", "prompt_original_task": true, "comment": "", "rougeLsum_precision_stderr": 0.001637841819840828}, {"task_name": "e2e_nlg_cleaned", "prompt_name": "generate_text_restaurant", "rougeLsum_recall": 0.36377758074636973, "fixed_answer_choice_list": null, "dataset_path": "e2e_nlg_cleaned", "dataset_name": null, "subset": null, "prompt_id": "1acabbc3-c9b9-4624-a684-29faeccff46f", "prompt_jinja": "Given the following data about a restaurant:\n{% for feature in meaning_representation.split(\"]\") %} {% set key = feature.split(\"[\")[0].replace(\",\",\"\") %} {% set value = feature.replace(\",\",\"\").replace(key+\"[\", '''') %}\n{% if value != \"\" %} {{key}} : {{value}} {% endif %}\n{%- endfor %}\nGenerate some text about this restaurant. ||| {{human_reference}}", "prompt_original_task": true, "comment": "", "rougeLsum_recall_stderr": 0.0028984303974106587}, {"task_name": "e2e_nlg_cleaned", "prompt_name": "generate_text_restaurant", "rougeLsum_fmeasure": 0.23720221029041466, "fixed_answer_choice_list": null, "dataset_path": "e2e_nlg_cleaned", "dataset_name": null, "subset": null, "prompt_id": "1acabbc3-c9b9-4624-a684-29faeccff46f", "prompt_jinja": "Given the following data about a restaurant:\n{% for feature in meaning_representation.split(\"]\") %} {% set key = feature.split(\"[\")[0].replace(\",\",\"\") %} {% set value = feature.replace(\",\",\"\").replace(key+\"[\", '''') %}\n{% if value != \"\" %} {{key}} : {{value}} {% endif %}\n{%- endfor %}\nGenerate some text about this restaurant. ||| {{human_reference}}", "prompt_original_task": true, "comment": "", "rougeLsum_fmeasure_stderr": 0.002003449214467719}], "config": {"model": "hf-causal", "model_args": "pretrained=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/lm1-4b2-84b-oscarroots/transformers,use_accelerate=True,tokenizer=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/gpt2,dtype=bfloat16", "task_args": "", "num_fewshot": 0, "batch_size": 16, "device": "cuda", "use_cache": false, "limit": 3000, "bootstrap_iters": 10, "seed": 1234}}
|
evaluation/generation/agg.lm1-4b2-84b-oscarroots_e2e_nlg_cleaned_generate_text_restaurant_1.json
ADDED
|
@@ -0,0 +1 @@
|
|
|
|
|
|
|
| 1 |
+
{"results": [{"task_name": "e2e_nlg_cleaned", "prompt_name": "generate_text_restaurant", "bleu": 12.380602370008575, "fixed_answer_choice_list": null, "dataset_path": "e2e_nlg_cleaned", "dataset_name": null, "subset": null, "prompt_id": "1acabbc3-c9b9-4624-a684-29faeccff46f", "prompt_jinja": "Given the following data about a restaurant:\n{% for feature in meaning_representation.split(\"]\") %} {% set key = feature.split(\"[\")[0].replace(\",\",\"\") %} {% set value = feature.replace(\",\",\"\").replace(key+\"[\", '''') %}\n{% if value != \"\" %} {{key}} : {{value}} {% endif %}\n{%- endfor %}\nGenerate some text about this restaurant. ||| {{human_reference}}", "prompt_original_task": true, "comment": "", "bleu_stderr": 0.19219222099661637}, {"task_name": "e2e_nlg_cleaned", "prompt_name": "generate_text_restaurant", "rouge1_precision": 0.5762149768264377, "fixed_answer_choice_list": null, "dataset_path": "e2e_nlg_cleaned", "dataset_name": null, "subset": null, "prompt_id": "1acabbc3-c9b9-4624-a684-29faeccff46f", "prompt_jinja": "Given the following data about a restaurant:\n{% for feature in meaning_representation.split(\"]\") %} {% set key = feature.split(\"[\")[0].replace(\",\",\"\") %} {% set value = feature.replace(\",\",\"\").replace(key+\"[\", '''') %}\n{% if value != \"\" %} {{key}} : {{value}} {% endif %}\n{%- endfor %}\nGenerate some text about this restaurant. ||| {{human_reference}}", "prompt_original_task": true, "comment": "", "rouge1_precision_stderr": 0.003318783916958475}, {"task_name": "e2e_nlg_cleaned", "prompt_name": "generate_text_restaurant", "rouge1_recall": 0.4407794090538949, "fixed_answer_choice_list": null, "dataset_path": "e2e_nlg_cleaned", "dataset_name": null, "subset": null, "prompt_id": "1acabbc3-c9b9-4624-a684-29faeccff46f", "prompt_jinja": "Given the following data about a restaurant:\n{% for feature in meaning_representation.split(\"]\") %} {% set key = feature.split(\"[\")[0].replace(\",\",\"\") %} {% set value = feature.replace(\",\",\"\").replace(key+\"[\", '''') %}\n{% if value != \"\" %} {{key}} : {{value}} {% endif %}\n{%- endfor %}\nGenerate some text about this restaurant. ||| {{human_reference}}", "prompt_original_task": true, "comment": "", "rouge1_recall_stderr": 0.0029897484855389944}, {"task_name": "e2e_nlg_cleaned", "prompt_name": "generate_text_restaurant", "rouge1_fmeasure": 0.4721348830907075, "fixed_answer_choice_list": null, "dataset_path": "e2e_nlg_cleaned", "dataset_name": null, "subset": null, "prompt_id": "1acabbc3-c9b9-4624-a684-29faeccff46f", "prompt_jinja": "Given the following data about a restaurant:\n{% for feature in meaning_representation.split(\"]\") %} {% set key = feature.split(\"[\")[0].replace(\",\",\"\") %} {% set value = feature.replace(\",\",\"\").replace(key+\"[\", '''') %}\n{% if value != \"\" %} {{key}} : {{value}} {% endif %}\n{%- endfor %}\nGenerate some text about this restaurant. ||| {{human_reference}}", "prompt_original_task": true, "comment": "", "rouge1_fmeasure_stderr": 0.0023404970653199955}, {"task_name": "e2e_nlg_cleaned", "prompt_name": "generate_text_restaurant", "rouge2_precision": 0.2814540689762945, "fixed_answer_choice_list": null, "dataset_path": "e2e_nlg_cleaned", "dataset_name": null, "subset": null, "prompt_id": "1acabbc3-c9b9-4624-a684-29faeccff46f", "prompt_jinja": "Given the following data about a restaurant:\n{% for feature in meaning_representation.split(\"]\") %} {% set key = feature.split(\"[\")[0].replace(\",\",\"\") %} {% set value = feature.replace(\",\",\"\").replace(key+\"[\", '''') %}\n{% if value != \"\" %} {{key}} : {{value}} {% endif %}\n{%- endfor %}\nGenerate some text about this restaurant. ||| {{human_reference}}", "prompt_original_task": true, "comment": "", "rouge2_precision_stderr": 0.002795334538538015}, {"task_name": "e2e_nlg_cleaned", "prompt_name": "generate_text_restaurant", "rouge2_recall": 0.21174089039601776, "fixed_answer_choice_list": null, "dataset_path": "e2e_nlg_cleaned", "dataset_name": null, "subset": null, "prompt_id": "1acabbc3-c9b9-4624-a684-29faeccff46f", "prompt_jinja": "Given the following data about a restaurant:\n{% for feature in meaning_representation.split(\"]\") %} {% set key = feature.split(\"[\")[0].replace(\",\",\"\") %} {% set value = feature.replace(\",\",\"\").replace(key+\"[\", '''') %}\n{% if value != \"\" %} {{key}} : {{value}} {% endif %}\n{%- endfor %}\nGenerate some text about this restaurant. ||| {{human_reference}}", "prompt_original_task": true, "comment": "", "rouge2_recall_stderr": 0.0022062395085723743}, {"task_name": "e2e_nlg_cleaned", "prompt_name": "generate_text_restaurant", "rouge2_fmeasure": 0.22711440839557615, "fixed_answer_choice_list": null, "dataset_path": "e2e_nlg_cleaned", "dataset_name": null, "subset": null, "prompt_id": "1acabbc3-c9b9-4624-a684-29faeccff46f", "prompt_jinja": "Given the following data about a restaurant:\n{% for feature in meaning_representation.split(\"]\") %} {% set key = feature.split(\"[\")[0].replace(\",\",\"\") %} {% set value = feature.replace(\",\",\"\").replace(key+\"[\", '''') %}\n{% if value != \"\" %} {{key}} : {{value}} {% endif %}\n{%- endfor %}\nGenerate some text about this restaurant. ||| {{human_reference}}", "prompt_original_task": true, "comment": "", "rouge2_fmeasure_stderr": 0.0020751801779315127}, {"task_name": "e2e_nlg_cleaned", "prompt_name": "generate_text_restaurant", "rougeL_precision": 0.4229881150623638, "fixed_answer_choice_list": null, "dataset_path": "e2e_nlg_cleaned", "dataset_name": null, "subset": null, "prompt_id": "1acabbc3-c9b9-4624-a684-29faeccff46f", "prompt_jinja": "Given the following data about a restaurant:\n{% for feature in meaning_representation.split(\"]\") %} {% set key = feature.split(\"[\")[0].replace(\",\",\"\") %} {% set value = feature.replace(\",\",\"\").replace(key+\"[\", '''') %}\n{% if value != \"\" %} {{key}} : {{value}} {% endif %}\n{%- endfor %}\nGenerate some text about this restaurant. ||| {{human_reference}}", "prompt_original_task": true, "comment": "", "rougeL_precision_stderr": 0.003072919322994061}, {"task_name": "e2e_nlg_cleaned", "prompt_name": "generate_text_restaurant", "rougeL_recall": 0.32033975746914833, "fixed_answer_choice_list": null, "dataset_path": "e2e_nlg_cleaned", "dataset_name": null, "subset": null, "prompt_id": "1acabbc3-c9b9-4624-a684-29faeccff46f", "prompt_jinja": "Given the following data about a restaurant:\n{% for feature in meaning_representation.split(\"]\") %} {% set key = feature.split(\"[\")[0].replace(\",\",\"\") %} {% set value = feature.replace(\",\",\"\").replace(key+\"[\", '''') %}\n{% if value != \"\" %} {{key}} : {{value}} {% endif %}\n{%- endfor %}\nGenerate some text about this restaurant. ||| {{human_reference}}", "prompt_original_task": true, "comment": "", "rougeL_recall_stderr": 0.002489730198496103}, {"task_name": "e2e_nlg_cleaned", "prompt_name": "generate_text_restaurant", "rougeL_fmeasure": 0.3439998053438798, "fixed_answer_choice_list": null, "dataset_path": "e2e_nlg_cleaned", "dataset_name": null, "subset": null, "prompt_id": "1acabbc3-c9b9-4624-a684-29faeccff46f", "prompt_jinja": "Given the following data about a restaurant:\n{% for feature in meaning_representation.split(\"]\") %} {% set key = feature.split(\"[\")[0].replace(\",\",\"\") %} {% set value = feature.replace(\",\",\"\").replace(key+\"[\", '''') %}\n{% if value != \"\" %} {{key}} : {{value}} {% endif %}\n{%- endfor %}\nGenerate some text about this restaurant. ||| {{human_reference}}", "prompt_original_task": true, "comment": "", "rougeL_fmeasure_stderr": 0.00212844149551342}, {"task_name": "e2e_nlg_cleaned", "prompt_name": "generate_text_restaurant", "rougeLsum_precision": 0.4731321806462157, "fixed_answer_choice_list": null, "dataset_path": "e2e_nlg_cleaned", "dataset_name": null, "subset": null, "prompt_id": "1acabbc3-c9b9-4624-a684-29faeccff46f", "prompt_jinja": "Given the following data about a restaurant:\n{% for feature in meaning_representation.split(\"]\") %} {% set key = feature.split(\"[\")[0].replace(\",\",\"\") %} {% set value = feature.replace(\",\",\"\").replace(key+\"[\", '''') %}\n{% if value != \"\" %} {{key}} : {{value}} {% endif %}\n{%- endfor %}\nGenerate some text about this restaurant. ||| {{human_reference}}", "prompt_original_task": true, "comment": "", "rougeLsum_precision_stderr": 0.0032652927514093106}, {"task_name": "e2e_nlg_cleaned", "prompt_name": "generate_text_restaurant", "rougeLsum_recall": 0.36070812284253606, "fixed_answer_choice_list": null, "dataset_path": "e2e_nlg_cleaned", "dataset_name": null, "subset": null, "prompt_id": "1acabbc3-c9b9-4624-a684-29faeccff46f", "prompt_jinja": "Given the following data about a restaurant:\n{% for feature in meaning_representation.split(\"]\") %} {% set key = feature.split(\"[\")[0].replace(\",\",\"\") %} {% set value = feature.replace(\",\",\"\").replace(key+\"[\", '''') %}\n{% if value != \"\" %} {{key}} : {{value}} {% endif %}\n{%- endfor %}\nGenerate some text about this restaurant. ||| {{human_reference}}", "prompt_original_task": true, "comment": "", "rougeLsum_recall_stderr": 0.0027712237046193665}, {"task_name": "e2e_nlg_cleaned", "prompt_name": "generate_text_restaurant", "rougeLsum_fmeasure": 0.3866905695770315, "fixed_answer_choice_list": null, "dataset_path": "e2e_nlg_cleaned", "dataset_name": null, "subset": null, "prompt_id": "1acabbc3-c9b9-4624-a684-29faeccff46f", "prompt_jinja": "Given the following data about a restaurant:\n{% for feature in meaning_representation.split(\"]\") %} {% set key = feature.split(\"[\")[0].replace(\",\",\"\") %} {% set value = feature.replace(\",\",\"\").replace(key+\"[\", '''') %}\n{% if value != \"\" %} {{key}} : {{value}} {% endif %}\n{%- endfor %}\nGenerate some text about this restaurant. ||| {{human_reference}}", "prompt_original_task": true, "comment": "", "rougeLsum_fmeasure_stderr": 0.0023474299675248548}], "config": {"model": "hf-causal", "model_args": "pretrained=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/lm1-4b2-84b-oscarroots/transformers,use_accelerate=True,tokenizer=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/gpt2,dtype=bfloat16", "task_args": "", "num_fewshot": 1, "batch_size": 16, "device": "cuda", "use_cache": false, "limit": 3000, "bootstrap_iters": 10, "seed": 1234}}
|
evaluation/generation/agg.lm1-4b2-84b-oscarroots_e2e_nlg_cleaned_generate_text_restaurant_2.json
ADDED
|
@@ -0,0 +1 @@
|
|
|
|
|
|
|
| 1 |
+
{"results": [{"task_name": "e2e_nlg_cleaned", "prompt_name": "generate_text_restaurant", "bleu": 14.191488024972156, "fixed_answer_choice_list": null, "dataset_path": "e2e_nlg_cleaned", "dataset_name": null, "subset": null, "prompt_id": "1acabbc3-c9b9-4624-a684-29faeccff46f", "prompt_jinja": "Given the following data about a restaurant:\n{% for feature in meaning_representation.split(\"]\") %} {% set key = feature.split(\"[\")[0].replace(\",\",\"\") %} {% set value = feature.replace(\",\",\"\").replace(key+\"[\", '''') %}\n{% if value != \"\" %} {{key}} : {{value}} {% endif %}\n{%- endfor %}\nGenerate some text about this restaurant. ||| {{human_reference}}", "prompt_original_task": true, "comment": "", "bleu_stderr": 0.19907510301810782}, {"task_name": "e2e_nlg_cleaned", "prompt_name": "generate_text_restaurant", "rouge1_precision": 0.5888620057074112, "fixed_answer_choice_list": null, "dataset_path": "e2e_nlg_cleaned", "dataset_name": null, "subset": null, "prompt_id": "1acabbc3-c9b9-4624-a684-29faeccff46f", "prompt_jinja": "Given the following data about a restaurant:\n{% for feature in meaning_representation.split(\"]\") %} {% set key = feature.split(\"[\")[0].replace(\",\",\"\") %} {% set value = feature.replace(\",\",\"\").replace(key+\"[\", '''') %}\n{% if value != \"\" %} {{key}} : {{value}} {% endif %}\n{%- endfor %}\nGenerate some text about this restaurant. ||| {{human_reference}}", "prompt_original_task": true, "comment": "", "rouge1_precision_stderr": 0.00325974855044316}, {"task_name": "e2e_nlg_cleaned", "prompt_name": "generate_text_restaurant", "rouge1_recall": 0.46734675933757636, "fixed_answer_choice_list": null, "dataset_path": "e2e_nlg_cleaned", "dataset_name": null, "subset": null, "prompt_id": "1acabbc3-c9b9-4624-a684-29faeccff46f", "prompt_jinja": "Given the following data about a restaurant:\n{% for feature in meaning_representation.split(\"]\") %} {% set key = feature.split(\"[\")[0].replace(\",\",\"\") %} {% set value = feature.replace(\",\",\"\").replace(key+\"[\", '''') %}\n{% if value != \"\" %} {{key}} : {{value}} {% endif %}\n{%- endfor %}\nGenerate some text about this restaurant. ||| {{human_reference}}", "prompt_original_task": true, "comment": "", "rouge1_recall_stderr": 0.002910336590920325}, {"task_name": "e2e_nlg_cleaned", "prompt_name": "generate_text_restaurant", "rouge1_fmeasure": 0.49478002813152466, "fixed_answer_choice_list": null, "dataset_path": "e2e_nlg_cleaned", "dataset_name": null, "subset": null, "prompt_id": "1acabbc3-c9b9-4624-a684-29faeccff46f", "prompt_jinja": "Given the following data about a restaurant:\n{% for feature in meaning_representation.split(\"]\") %} {% set key = feature.split(\"[\")[0].replace(\",\",\"\") %} {% set value = feature.replace(\",\",\"\").replace(key+\"[\", '''') %}\n{% if value != \"\" %} {{key}} : {{value}} {% endif %}\n{%- endfor %}\nGenerate some text about this restaurant. ||| {{human_reference}}", "prompt_original_task": true, "comment": "", "rouge1_fmeasure_stderr": 0.0022132976290390608}, {"task_name": "e2e_nlg_cleaned", "prompt_name": "generate_text_restaurant", "rouge2_precision": 0.298835335925771, "fixed_answer_choice_list": null, "dataset_path": "e2e_nlg_cleaned", "dataset_name": null, "subset": null, "prompt_id": "1acabbc3-c9b9-4624-a684-29faeccff46f", "prompt_jinja": "Given the following data about a restaurant:\n{% for feature in meaning_representation.split(\"]\") %} {% set key = feature.split(\"[\")[0].replace(\",\",\"\") %} {% set value = feature.replace(\",\",\"\").replace(key+\"[\", '''') %}\n{% if value != \"\" %} {{key}} : {{value}} {% endif %}\n{%- endfor %}\nGenerate some text about this restaurant. ||| {{human_reference}}", "prompt_original_task": true, "comment": "", "rouge2_precision_stderr": 0.0028167958629137044}, {"task_name": "e2e_nlg_cleaned", "prompt_name": "generate_text_restaurant", "rouge2_recall": 0.2332339594931545, "fixed_answer_choice_list": null, "dataset_path": "e2e_nlg_cleaned", "dataset_name": null, "subset": null, "prompt_id": "1acabbc3-c9b9-4624-a684-29faeccff46f", "prompt_jinja": "Given the following data about a restaurant:\n{% for feature in meaning_representation.split(\"]\") %} {% set key = feature.split(\"[\")[0].replace(\",\",\"\") %} {% set value = feature.replace(\",\",\"\").replace(key+\"[\", '''') %}\n{% if value != \"\" %} {{key}} : {{value}} {% endif %}\n{%- endfor %}\nGenerate some text about this restaurant. ||| {{human_reference}}", "prompt_original_task": true, "comment": "", "rouge2_recall_stderr": 0.002268844314664733}, {"task_name": "e2e_nlg_cleaned", "prompt_name": "generate_text_restaurant", "rouge2_fmeasure": 0.24713224212661672, "fixed_answer_choice_list": null, "dataset_path": "e2e_nlg_cleaned", "dataset_name": null, "subset": null, "prompt_id": "1acabbc3-c9b9-4624-a684-29faeccff46f", "prompt_jinja": "Given the following data about a restaurant:\n{% for feature in meaning_representation.split(\"]\") %} {% set key = feature.split(\"[\")[0].replace(\",\",\"\") %} {% set value = feature.replace(\",\",\"\").replace(key+\"[\", '''') %}\n{% if value != \"\" %} {{key}} : {{value}} {% endif %}\n{%- endfor %}\nGenerate some text about this restaurant. ||| {{human_reference}}", "prompt_original_task": true, "comment": "", "rouge2_fmeasure_stderr": 0.0020909646942975984}, {"task_name": "e2e_nlg_cleaned", "prompt_name": "generate_text_restaurant", "rougeL_precision": 0.4380934657095154, "fixed_answer_choice_list": null, "dataset_path": "e2e_nlg_cleaned", "dataset_name": null, "subset": null, "prompt_id": "1acabbc3-c9b9-4624-a684-29faeccff46f", "prompt_jinja": "Given the following data about a restaurant:\n{% for feature in meaning_representation.split(\"]\") %} {% set key = feature.split(\"[\")[0].replace(\",\",\"\") %} {% set value = feature.replace(\",\",\"\").replace(key+\"[\", '''') %}\n{% if value != \"\" %} {{key}} : {{value}} {% endif %}\n{%- endfor %}\nGenerate some text about this restaurant. ||| {{human_reference}}", "prompt_original_task": true, "comment": "", "rougeL_precision_stderr": 0.003041090518262408}, {"task_name": "e2e_nlg_cleaned", "prompt_name": "generate_text_restaurant", "rougeL_recall": 0.3447152118587733, "fixed_answer_choice_list": null, "dataset_path": "e2e_nlg_cleaned", "dataset_name": null, "subset": null, "prompt_id": "1acabbc3-c9b9-4624-a684-29faeccff46f", "prompt_jinja": "Given the following data about a restaurant:\n{% for feature in meaning_representation.split(\"]\") %} {% set key = feature.split(\"[\")[0].replace(\",\",\"\") %} {% set value = feature.replace(\",\",\"\").replace(key+\"[\", '''') %}\n{% if value != \"\" %} {{key}} : {{value}} {% endif %}\n{%- endfor %}\nGenerate some text about this restaurant. ||| {{human_reference}}", "prompt_original_task": true, "comment": "", "rougeL_recall_stderr": 0.002445756611409155}, {"task_name": "e2e_nlg_cleaned", "prompt_name": "generate_text_restaurant", "rougeL_fmeasure": 0.3659238384457185, "fixed_answer_choice_list": null, "dataset_path": "e2e_nlg_cleaned", "dataset_name": null, "subset": null, "prompt_id": "1acabbc3-c9b9-4624-a684-29faeccff46f", "prompt_jinja": "Given the following data about a restaurant:\n{% for feature in meaning_representation.split(\"]\") %} {% set key = feature.split(\"[\")[0].replace(\",\",\"\") %} {% set value = feature.replace(\",\",\"\").replace(key+\"[\", '''') %}\n{% if value != \"\" %} {{key}} : {{value}} {% endif %}\n{%- endfor %}\nGenerate some text about this restaurant. ||| {{human_reference}}", "prompt_original_task": true, "comment": "", "rougeL_fmeasure_stderr": 0.002069857446914006}, {"task_name": "e2e_nlg_cleaned", "prompt_name": "generate_text_restaurant", "rougeLsum_precision": 0.491576051493737, "fixed_answer_choice_list": null, "dataset_path": "e2e_nlg_cleaned", "dataset_name": null, "subset": null, "prompt_id": "1acabbc3-c9b9-4624-a684-29faeccff46f", "prompt_jinja": "Given the following data about a restaurant:\n{% for feature in meaning_representation.split(\"]\") %} {% set key = feature.split(\"[\")[0].replace(\",\",\"\") %} {% set value = feature.replace(\",\",\"\").replace(key+\"[\", '''') %}\n{% if value != \"\" %} {{key}} : {{value}} {% endif %}\n{%- endfor %}\nGenerate some text about this restaurant. ||| {{human_reference}}", "prompt_original_task": true, "comment": "", "rougeLsum_precision_stderr": 0.003239734182513254}, {"task_name": "e2e_nlg_cleaned", "prompt_name": "generate_text_restaurant", "rougeLsum_recall": 0.38921339856167103, "fixed_answer_choice_list": null, "dataset_path": "e2e_nlg_cleaned", "dataset_name": null, "subset": null, "prompt_id": "1acabbc3-c9b9-4624-a684-29faeccff46f", "prompt_jinja": "Given the following data about a restaurant:\n{% for feature in meaning_representation.split(\"]\") %} {% set key = feature.split(\"[\")[0].replace(\",\",\"\") %} {% set value = feature.replace(\",\",\"\").replace(key+\"[\", '''') %}\n{% if value != \"\" %} {{key}} : {{value}} {% endif %}\n{%- endfor %}\nGenerate some text about this restaurant. ||| {{human_reference}}", "prompt_original_task": true, "comment": "", "rougeLsum_recall_stderr": 0.0027477384856747842}, {"task_name": "e2e_nlg_cleaned", "prompt_name": "generate_text_restaurant", "rougeLsum_fmeasure": 0.41241936907841864, "fixed_answer_choice_list": null, "dataset_path": "e2e_nlg_cleaned", "dataset_name": null, "subset": null, "prompt_id": "1acabbc3-c9b9-4624-a684-29faeccff46f", "prompt_jinja": "Given the following data about a restaurant:\n{% for feature in meaning_representation.split(\"]\") %} {% set key = feature.split(\"[\")[0].replace(\",\",\"\") %} {% set value = feature.replace(\",\",\"\").replace(key+\"[\", '''') %}\n{% if value != \"\" %} {{key}} : {{value}} {% endif %}\n{%- endfor %}\nGenerate some text about this restaurant. ||| {{human_reference}}", "prompt_original_task": true, "comment": "", "rougeLsum_fmeasure_stderr": 0.0022987607835818604}], "config": {"model": "hf-causal", "model_args": "pretrained=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/lm1-4b2-84b-oscarroots/transformers,use_accelerate=True,tokenizer=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/gpt2,dtype=bfloat16", "task_args": "", "num_fewshot": 2, "batch_size": 16, "device": "cuda", "use_cache": false, "limit": 3000, "bootstrap_iters": 10, "seed": 1234}}
|
evaluation/generation/agg.lm1-4b2-84b-oscarroots_e2e_nlg_cleaned_generate_text_restaurant_3.json
ADDED
|
@@ -0,0 +1 @@
|
|
|
|
|
|
|
| 1 |
+
{"results": [{"task_name": "e2e_nlg_cleaned", "prompt_name": "generate_text_restaurant", "bleu": 14.64881594516423, "fixed_answer_choice_list": null, "dataset_path": "e2e_nlg_cleaned", "dataset_name": null, "subset": null, "prompt_id": "1acabbc3-c9b9-4624-a684-29faeccff46f", "prompt_jinja": "Given the following data about a restaurant:\n{% for feature in meaning_representation.split(\"]\") %} {% set key = feature.split(\"[\")[0].replace(\",\",\"\") %} {% set value = feature.replace(\",\",\"\").replace(key+\"[\", '''') %}\n{% if value != \"\" %} {{key}} : {{value}} {% endif %}\n{%- endfor %}\nGenerate some text about this restaurant. ||| {{human_reference}}", "prompt_original_task": true, "comment": "", "bleu_stderr": 0.14225876634497212}, {"task_name": "e2e_nlg_cleaned", "prompt_name": "generate_text_restaurant", "rouge1_precision": 0.5907860432410378, "fixed_answer_choice_list": null, "dataset_path": "e2e_nlg_cleaned", "dataset_name": null, "subset": null, "prompt_id": "1acabbc3-c9b9-4624-a684-29faeccff46f", "prompt_jinja": "Given the following data about a restaurant:\n{% for feature in meaning_representation.split(\"]\") %} {% set key = feature.split(\"[\")[0].replace(\",\",\"\") %} {% set value = feature.replace(\",\",\"\").replace(key+\"[\", '''') %}\n{% if value != \"\" %} {{key}} : {{value}} {% endif %}\n{%- endfor %}\nGenerate some text about this restaurant. ||| {{human_reference}}", "prompt_original_task": true, "comment": "", "rouge1_precision_stderr": 0.0031560435722029602}, {"task_name": "e2e_nlg_cleaned", "prompt_name": "generate_text_restaurant", "rouge1_recall": 0.47164290046583734, "fixed_answer_choice_list": null, "dataset_path": "e2e_nlg_cleaned", "dataset_name": null, "subset": null, "prompt_id": "1acabbc3-c9b9-4624-a684-29faeccff46f", "prompt_jinja": "Given the following data about a restaurant:\n{% for feature in meaning_representation.split(\"]\") %} {% set key = feature.split(\"[\")[0].replace(\",\",\"\") %} {% set value = feature.replace(\",\",\"\").replace(key+\"[\", '''') %}\n{% if value != \"\" %} {{key}} : {{value}} {% endif %}\n{%- endfor %}\nGenerate some text about this restaurant. ||| {{human_reference}}", "prompt_original_task": true, "comment": "", "rouge1_recall_stderr": 0.0028460616360397144}, {"task_name": "e2e_nlg_cleaned", "prompt_name": "generate_text_restaurant", "rouge1_fmeasure": 0.5004124175592417, "fixed_answer_choice_list": null, "dataset_path": "e2e_nlg_cleaned", "dataset_name": null, "subset": null, "prompt_id": "1acabbc3-c9b9-4624-a684-29faeccff46f", "prompt_jinja": "Given the following data about a restaurant:\n{% for feature in meaning_representation.split(\"]\") %} {% set key = feature.split(\"[\")[0].replace(\",\",\"\") %} {% set value = feature.replace(\",\",\"\").replace(key+\"[\", '''') %}\n{% if value != \"\" %} {{key}} : {{value}} {% endif %}\n{%- endfor %}\nGenerate some text about this restaurant. ||| {{human_reference}}", "prompt_original_task": true, "comment": "", "rouge1_fmeasure_stderr": 0.0022065167958013948}, {"task_name": "e2e_nlg_cleaned", "prompt_name": "generate_text_restaurant", "rouge2_precision": 0.30074421250718314, "fixed_answer_choice_list": null, "dataset_path": "e2e_nlg_cleaned", "dataset_name": null, "subset": null, "prompt_id": "1acabbc3-c9b9-4624-a684-29faeccff46f", "prompt_jinja": "Given the following data about a restaurant:\n{% for feature in meaning_representation.split(\"]\") %} {% set key = feature.split(\"[\")[0].replace(\",\",\"\") %} {% set value = feature.replace(\",\",\"\").replace(key+\"[\", '''') %}\n{% if value != \"\" %} {{key}} : {{value}} {% endif %}\n{%- endfor %}\nGenerate some text about this restaurant. ||| {{human_reference}}", "prompt_original_task": true, "comment": "", "rouge2_precision_stderr": 0.0027356725355736525}, {"task_name": "e2e_nlg_cleaned", "prompt_name": "generate_text_restaurant", "rouge2_recall": 0.23739401119844838, "fixed_answer_choice_list": null, "dataset_path": "e2e_nlg_cleaned", "dataset_name": null, "subset": null, "prompt_id": "1acabbc3-c9b9-4624-a684-29faeccff46f", "prompt_jinja": "Given the following data about a restaurant:\n{% for feature in meaning_representation.split(\"]\") %} {% set key = feature.split(\"[\")[0].replace(\",\",\"\") %} {% set value = feature.replace(\",\",\"\").replace(key+\"[\", '''') %}\n{% if value != \"\" %} {{key}} : {{value}} {% endif %}\n{%- endfor %}\nGenerate some text about this restaurant. ||| {{human_reference}}", "prompt_original_task": true, "comment": "", "rouge2_recall_stderr": 0.0022738788955147583}, {"task_name": "e2e_nlg_cleaned", "prompt_name": "generate_text_restaurant", "rouge2_fmeasure": 0.25184632683249925, "fixed_answer_choice_list": null, "dataset_path": "e2e_nlg_cleaned", "dataset_name": null, "subset": null, "prompt_id": "1acabbc3-c9b9-4624-a684-29faeccff46f", "prompt_jinja": "Given the following data about a restaurant:\n{% for feature in meaning_representation.split(\"]\") %} {% set key = feature.split(\"[\")[0].replace(\",\",\"\") %} {% set value = feature.replace(\",\",\"\").replace(key+\"[\", '''') %}\n{% if value != \"\" %} {{key}} : {{value}} {% endif %}\n{%- endfor %}\nGenerate some text about this restaurant. ||| {{human_reference}}", "prompt_original_task": true, "comment": "", "rouge2_fmeasure_stderr": 0.0021066699664814875}, {"task_name": "e2e_nlg_cleaned", "prompt_name": "generate_text_restaurant", "rougeL_precision": 0.437299096878406, "fixed_answer_choice_list": null, "dataset_path": "e2e_nlg_cleaned", "dataset_name": null, "subset": null, "prompt_id": "1acabbc3-c9b9-4624-a684-29faeccff46f", "prompt_jinja": "Given the following data about a restaurant:\n{% for feature in meaning_representation.split(\"]\") %} {% set key = feature.split(\"[\")[0].replace(\",\",\"\") %} {% set value = feature.replace(\",\",\"\").replace(key+\"[\", '''') %}\n{% if value != \"\" %} {{key}} : {{value}} {% endif %}\n{%- endfor %}\nGenerate some text about this restaurant. ||| {{human_reference}}", "prompt_original_task": true, "comment": "", "rougeL_precision_stderr": 0.002968957729484418}, {"task_name": "e2e_nlg_cleaned", "prompt_name": "generate_text_restaurant", "rougeL_recall": 0.3468480918537638, "fixed_answer_choice_list": null, "dataset_path": "e2e_nlg_cleaned", "dataset_name": null, "subset": null, "prompt_id": "1acabbc3-c9b9-4624-a684-29faeccff46f", "prompt_jinja": "Given the following data about a restaurant:\n{% for feature in meaning_representation.split(\"]\") %} {% set key = feature.split(\"[\")[0].replace(\",\",\"\") %} {% set value = feature.replace(\",\",\"\").replace(key+\"[\", '''') %}\n{% if value != \"\" %} {{key}} : {{value}} {% endif %}\n{%- endfor %}\nGenerate some text about this restaurant. ||| {{human_reference}}", "prompt_original_task": true, "comment": "", "rougeL_recall_stderr": 0.0024452067199333613}, {"task_name": "e2e_nlg_cleaned", "prompt_name": "generate_text_restaurant", "rougeL_fmeasure": 0.3686141972958827, "fixed_answer_choice_list": null, "dataset_path": "e2e_nlg_cleaned", "dataset_name": null, "subset": null, "prompt_id": "1acabbc3-c9b9-4624-a684-29faeccff46f", "prompt_jinja": "Given the following data about a restaurant:\n{% for feature in meaning_representation.split(\"]\") %} {% set key = feature.split(\"[\")[0].replace(\",\",\"\") %} {% set value = feature.replace(\",\",\"\").replace(key+\"[\", '''') %}\n{% if value != \"\" %} {{key}} : {{value}} {% endif %}\n{%- endfor %}\nGenerate some text about this restaurant. ||| {{human_reference}}", "prompt_original_task": true, "comment": "", "rougeL_fmeasure_stderr": 0.0021055632446747067}, {"task_name": "e2e_nlg_cleaned", "prompt_name": "generate_text_restaurant", "rougeLsum_precision": 0.4939511257717702, "fixed_answer_choice_list": null, "dataset_path": "e2e_nlg_cleaned", "dataset_name": null, "subset": null, "prompt_id": "1acabbc3-c9b9-4624-a684-29faeccff46f", "prompt_jinja": "Given the following data about a restaurant:\n{% for feature in meaning_representation.split(\"]\") %} {% set key = feature.split(\"[\")[0].replace(\",\",\"\") %} {% set value = feature.replace(\",\",\"\").replace(key+\"[\", '''') %}\n{% if value != \"\" %} {{key}} : {{value}} {% endif %}\n{%- endfor %}\nGenerate some text about this restaurant. ||| {{human_reference}}", "prompt_original_task": true, "comment": "", "rougeLsum_precision_stderr": 0.003153232253904959}, {"task_name": "e2e_nlg_cleaned", "prompt_name": "generate_text_restaurant", "rougeLsum_recall": 0.39417799230568645, "fixed_answer_choice_list": null, "dataset_path": "e2e_nlg_cleaned", "dataset_name": null, "subset": null, "prompt_id": "1acabbc3-c9b9-4624-a684-29faeccff46f", "prompt_jinja": "Given the following data about a restaurant:\n{% for feature in meaning_representation.split(\"]\") %} {% set key = feature.split(\"[\")[0].replace(\",\",\"\") %} {% set value = feature.replace(\",\",\"\").replace(key+\"[\", '''') %}\n{% if value != \"\" %} {{key}} : {{value}} {% endif %}\n{%- endfor %}\nGenerate some text about this restaurant. ||| {{human_reference}}", "prompt_original_task": true, "comment": "", "rougeLsum_recall_stderr": 0.0027496846190725264}, {"task_name": "e2e_nlg_cleaned", "prompt_name": "generate_text_restaurant", "rougeLsum_fmeasure": 0.4182292558816836, "fixed_answer_choice_list": null, "dataset_path": "e2e_nlg_cleaned", "dataset_name": null, "subset": null, "prompt_id": "1acabbc3-c9b9-4624-a684-29faeccff46f", "prompt_jinja": "Given the following data about a restaurant:\n{% for feature in meaning_representation.split(\"]\") %} {% set key = feature.split(\"[\")[0].replace(\",\",\"\") %} {% set value = feature.replace(\",\",\"\").replace(key+\"[\", '''') %}\n{% if value != \"\" %} {{key}} : {{value}} {% endif %}\n{%- endfor %}\nGenerate some text about this restaurant. ||| {{human_reference}}", "prompt_original_task": true, "comment": "", "rougeLsum_fmeasure_stderr": 0.0023341754403372627}], "config": {"model": "hf-causal", "model_args": "pretrained=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/lm1-4b2-84b-oscarroots/transformers,use_accelerate=True,tokenizer=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/gpt2,dtype=bfloat16", "task_args": "", "num_fewshot": 3, "batch_size": 16, "device": "cuda", "use_cache": false, "limit": 3000, "bootstrap_iters": 10, "seed": 1234}}
|
evaluation/generation/agg.lm1-4b2-84b-oscarroots_e2e_nlg_cleaned_generate_text_restaurant_4.json
ADDED
|
@@ -0,0 +1 @@
|
|
|
|
|
|
|
| 1 |
+
{"results": [{"task_name": "e2e_nlg_cleaned", "prompt_name": "generate_text_restaurant", "bleu": 15.098333020388077, "fixed_answer_choice_list": null, "dataset_path": "e2e_nlg_cleaned", "dataset_name": null, "subset": null, "prompt_id": "1acabbc3-c9b9-4624-a684-29faeccff46f", "prompt_jinja": "Given the following data about a restaurant:\n{% for feature in meaning_representation.split(\"]\") %} {% set key = feature.split(\"[\")[0].replace(\",\",\"\") %} {% set value = feature.replace(\",\",\"\").replace(key+\"[\", '''') %}\n{% if value != \"\" %} {{key}} : {{value}} {% endif %}\n{%- endfor %}\nGenerate some text about this restaurant. ||| {{human_reference}}", "prompt_original_task": true, "comment": "", "bleu_stderr": 0.12831977294846147}, {"task_name": "e2e_nlg_cleaned", "prompt_name": "generate_text_restaurant", "rouge1_precision": 0.5966648139686145, "fixed_answer_choice_list": null, "dataset_path": "e2e_nlg_cleaned", "dataset_name": null, "subset": null, "prompt_id": "1acabbc3-c9b9-4624-a684-29faeccff46f", "prompt_jinja": "Given the following data about a restaurant:\n{% for feature in meaning_representation.split(\"]\") %} {% set key = feature.split(\"[\")[0].replace(\",\",\"\") %} {% set value = feature.replace(\",\",\"\").replace(key+\"[\", '''') %}\n{% if value != \"\" %} {{key}} : {{value}} {% endif %}\n{%- endfor %}\nGenerate some text about this restaurant. ||| {{human_reference}}", "prompt_original_task": true, "comment": "", "rouge1_precision_stderr": 0.0032302255772308427}, {"task_name": "e2e_nlg_cleaned", "prompt_name": "generate_text_restaurant", "rouge1_recall": 0.47760538890316145, "fixed_answer_choice_list": null, "dataset_path": "e2e_nlg_cleaned", "dataset_name": null, "subset": null, "prompt_id": "1acabbc3-c9b9-4624-a684-29faeccff46f", "prompt_jinja": "Given the following data about a restaurant:\n{% for feature in meaning_representation.split(\"]\") %} {% set key = feature.split(\"[\")[0].replace(\",\",\"\") %} {% set value = feature.replace(\",\",\"\").replace(key+\"[\", '''') %}\n{% if value != \"\" %} {{key}} : {{value}} {% endif %}\n{%- endfor %}\nGenerate some text about this restaurant. ||| {{human_reference}}", "prompt_original_task": true, "comment": "", "rouge1_recall_stderr": 0.00285398486806638}, {"task_name": "e2e_nlg_cleaned", "prompt_name": "generate_text_restaurant", "rouge1_fmeasure": 0.5071734526172544, "fixed_answer_choice_list": null, "dataset_path": "e2e_nlg_cleaned", "dataset_name": null, "subset": null, "prompt_id": "1acabbc3-c9b9-4624-a684-29faeccff46f", "prompt_jinja": "Given the following data about a restaurant:\n{% for feature in meaning_representation.split(\"]\") %} {% set key = feature.split(\"[\")[0].replace(\",\",\"\") %} {% set value = feature.replace(\",\",\"\").replace(key+\"[\", '''') %}\n{% if value != \"\" %} {{key}} : {{value}} {% endif %}\n{%- endfor %}\nGenerate some text about this restaurant. ||| {{human_reference}}", "prompt_original_task": true, "comment": "", "rouge1_fmeasure_stderr": 0.002256209019372641}, {"task_name": "e2e_nlg_cleaned", "prompt_name": "generate_text_restaurant", "rouge2_precision": 0.3049813003705614, "fixed_answer_choice_list": null, "dataset_path": "e2e_nlg_cleaned", "dataset_name": null, "subset": null, "prompt_id": "1acabbc3-c9b9-4624-a684-29faeccff46f", "prompt_jinja": "Given the following data about a restaurant:\n{% for feature in meaning_representation.split(\"]\") %} {% set key = feature.split(\"[\")[0].replace(\",\",\"\") %} {% set value = feature.replace(\",\",\"\").replace(key+\"[\", '''') %}\n{% if value != \"\" %} {{key}} : {{value}} {% endif %}\n{%- endfor %}\nGenerate some text about this restaurant. ||| {{human_reference}}", "prompt_original_task": true, "comment": "", "rouge2_precision_stderr": 0.0028143522315018875}, {"task_name": "e2e_nlg_cleaned", "prompt_name": "generate_text_restaurant", "rouge2_recall": 0.24128439152213985, "fixed_answer_choice_list": null, "dataset_path": "e2e_nlg_cleaned", "dataset_name": null, "subset": null, "prompt_id": "1acabbc3-c9b9-4624-a684-29faeccff46f", "prompt_jinja": "Given the following data about a restaurant:\n{% for feature in meaning_representation.split(\"]\") %} {% set key = feature.split(\"[\")[0].replace(\",\",\"\") %} {% set value = feature.replace(\",\",\"\").replace(key+\"[\", '''') %}\n{% if value != \"\" %} {{key}} : {{value}} {% endif %}\n{%- endfor %}\nGenerate some text about this restaurant. ||| {{human_reference}}", "prompt_original_task": true, "comment": "", "rouge2_recall_stderr": 0.002326306182449381}, {"task_name": "e2e_nlg_cleaned", "prompt_name": "generate_text_restaurant", "rouge2_fmeasure": 0.2561891586621636, "fixed_answer_choice_list": null, "dataset_path": "e2e_nlg_cleaned", "dataset_name": null, "subset": null, "prompt_id": "1acabbc3-c9b9-4624-a684-29faeccff46f", "prompt_jinja": "Given the following data about a restaurant:\n{% for feature in meaning_representation.split(\"]\") %} {% set key = feature.split(\"[\")[0].replace(\",\",\"\") %} {% set value = feature.replace(\",\",\"\").replace(key+\"[\", '''') %}\n{% if value != \"\" %} {{key}} : {{value}} {% endif %}\n{%- endfor %}\nGenerate some text about this restaurant. ||| {{human_reference}}", "prompt_original_task": true, "comment": "", "rouge2_fmeasure_stderr": 0.002175088764672847}, {"task_name": "e2e_nlg_cleaned", "prompt_name": "generate_text_restaurant", "rougeL_precision": 0.4375644960495563, "fixed_answer_choice_list": null, "dataset_path": "e2e_nlg_cleaned", "dataset_name": null, "subset": null, "prompt_id": "1acabbc3-c9b9-4624-a684-29faeccff46f", "prompt_jinja": "Given the following data about a restaurant:\n{% for feature in meaning_representation.split(\"]\") %} {% set key = feature.split(\"[\")[0].replace(\",\",\"\") %} {% set value = feature.replace(\",\",\"\").replace(key+\"[\", '''') %}\n{% if value != \"\" %} {{key}} : {{value}} {% endif %}\n{%- endfor %}\nGenerate some text about this restaurant. ||| {{human_reference}}", "prompt_original_task": true, "comment": "", "rougeL_precision_stderr": 0.0029585544115121273}, {"task_name": "e2e_nlg_cleaned", "prompt_name": "generate_text_restaurant", "rougeL_recall": 0.3490377714500209, "fixed_answer_choice_list": null, "dataset_path": "e2e_nlg_cleaned", "dataset_name": null, "subset": null, "prompt_id": "1acabbc3-c9b9-4624-a684-29faeccff46f", "prompt_jinja": "Given the following data about a restaurant:\n{% for feature in meaning_representation.split(\"]\") %} {% set key = feature.split(\"[\")[0].replace(\",\",\"\") %} {% set value = feature.replace(\",\",\"\").replace(key+\"[\", '''') %}\n{% if value != \"\" %} {{key}} : {{value}} {% endif %}\n{%- endfor %}\nGenerate some text about this restaurant. ||| {{human_reference}}", "prompt_original_task": true, "comment": "", "rougeL_recall_stderr": 0.0024749540858779945}, {"task_name": "e2e_nlg_cleaned", "prompt_name": "generate_text_restaurant", "rougeL_fmeasure": 0.37081754277374707, "fixed_answer_choice_list": null, "dataset_path": "e2e_nlg_cleaned", "dataset_name": null, "subset": null, "prompt_id": "1acabbc3-c9b9-4624-a684-29faeccff46f", "prompt_jinja": "Given the following data about a restaurant:\n{% for feature in meaning_representation.split(\"]\") %} {% set key = feature.split(\"[\")[0].replace(\",\",\"\") %} {% set value = feature.replace(\",\",\"\").replace(key+\"[\", '''') %}\n{% if value != \"\" %} {{key}} : {{value}} {% endif %}\n{%- endfor %}\nGenerate some text about this restaurant. ||| {{human_reference}}", "prompt_original_task": true, "comment": "", "rougeL_fmeasure_stderr": 0.002131577935728707}, {"task_name": "e2e_nlg_cleaned", "prompt_name": "generate_text_restaurant", "rougeLsum_precision": 0.49662519790850435, "fixed_answer_choice_list": null, "dataset_path": "e2e_nlg_cleaned", "dataset_name": null, "subset": null, "prompt_id": "1acabbc3-c9b9-4624-a684-29faeccff46f", "prompt_jinja": "Given the following data about a restaurant:\n{% for feature in meaning_representation.split(\"]\") %} {% set key = feature.split(\"[\")[0].replace(\",\",\"\") %} {% set value = feature.replace(\",\",\"\").replace(key+\"[\", '''') %}\n{% if value != \"\" %} {{key}} : {{value}} {% endif %}\n{%- endfor %}\nGenerate some text about this restaurant. ||| {{human_reference}}", "prompt_original_task": true, "comment": "", "rougeLsum_precision_stderr": 0.003207650849575026}, {"task_name": "e2e_nlg_cleaned", "prompt_name": "generate_text_restaurant", "rougeLsum_recall": 0.3977409010762705, "fixed_answer_choice_list": null, "dataset_path": "e2e_nlg_cleaned", "dataset_name": null, "subset": null, "prompt_id": "1acabbc3-c9b9-4624-a684-29faeccff46f", "prompt_jinja": "Given the following data about a restaurant:\n{% for feature in meaning_representation.split(\"]\") %} {% set key = feature.split(\"[\")[0].replace(\",\",\"\") %} {% set value = feature.replace(\",\",\"\").replace(key+\"[\", '''') %}\n{% if value != \"\" %} {{key}} : {{value}} {% endif %}\n{%- endfor %}\nGenerate some text about this restaurant. ||| {{human_reference}}", "prompt_original_task": true, "comment": "", "rougeLsum_recall_stderr": 0.0027651459862259265}, {"task_name": "e2e_nlg_cleaned", "prompt_name": "generate_text_restaurant", "rougeLsum_fmeasure": 0.4222811145975071, "fixed_answer_choice_list": null, "dataset_path": "e2e_nlg_cleaned", "dataset_name": null, "subset": null, "prompt_id": "1acabbc3-c9b9-4624-a684-29faeccff46f", "prompt_jinja": "Given the following data about a restaurant:\n{% for feature in meaning_representation.split(\"]\") %} {% set key = feature.split(\"[\")[0].replace(\",\",\"\") %} {% set value = feature.replace(\",\",\"\").replace(key+\"[\", '''') %}\n{% if value != \"\" %} {{key}} : {{value}} {% endif %}\n{%- endfor %}\nGenerate some text about this restaurant. ||| {{human_reference}}", "prompt_original_task": true, "comment": "", "rougeLsum_fmeasure_stderr": 0.002382321269391712}], "config": {"model": "hf-causal", "model_args": "pretrained=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/lm1-4b2-84b-oscarroots/transformers,use_accelerate=True,tokenizer=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/gpt2,dtype=bfloat16", "task_args": "", "num_fewshot": 4, "batch_size": 16, "device": "cuda", "use_cache": false, "limit": 3000, "bootstrap_iters": 10, "seed": 1234}}
|
evaluation/generation/agg.lm1-4b2-84b-oscarroots_e2e_nlg_cleaned_generate_text_restaurant_5.json
ADDED
|
@@ -0,0 +1 @@
|
|
|
|
|
|
|
| 1 |
+
{"results": [{"task_name": "e2e_nlg_cleaned", "prompt_name": "generate_text_restaurant", "bleu": 15.062931767147242, "fixed_answer_choice_list": null, "dataset_path": "e2e_nlg_cleaned", "dataset_name": null, "subset": null, "prompt_id": "1acabbc3-c9b9-4624-a684-29faeccff46f", "prompt_jinja": "Given the following data about a restaurant:\n{% for feature in meaning_representation.split(\"]\") %} {% set key = feature.split(\"[\")[0].replace(\",\",\"\") %} {% set value = feature.replace(\",\",\"\").replace(key+\"[\", '''') %}\n{% if value != \"\" %} {{key}} : {{value}} {% endif %}\n{%- endfor %}\nGenerate some text about this restaurant. ||| {{human_reference}}", "prompt_original_task": true, "comment": "", "bleu_stderr": 0.19067140642933314}, {"task_name": "e2e_nlg_cleaned", "prompt_name": "generate_text_restaurant", "rouge1_precision": 0.5946866907857069, "fixed_answer_choice_list": null, "dataset_path": "e2e_nlg_cleaned", "dataset_name": null, "subset": null, "prompt_id": "1acabbc3-c9b9-4624-a684-29faeccff46f", "prompt_jinja": "Given the following data about a restaurant:\n{% for feature in meaning_representation.split(\"]\") %} {% set key = feature.split(\"[\")[0].replace(\",\",\"\") %} {% set value = feature.replace(\",\",\"\").replace(key+\"[\", '''') %}\n{% if value != \"\" %} {{key}} : {{value}} {% endif %}\n{%- endfor %}\nGenerate some text about this restaurant. ||| {{human_reference}}", "prompt_original_task": true, "comment": "", "rouge1_precision_stderr": 0.0032041378906118583}, {"task_name": "e2e_nlg_cleaned", "prompt_name": "generate_text_restaurant", "rouge1_recall": 0.4773278231081852, "fixed_answer_choice_list": null, "dataset_path": "e2e_nlg_cleaned", "dataset_name": null, "subset": null, "prompt_id": "1acabbc3-c9b9-4624-a684-29faeccff46f", "prompt_jinja": "Given the following data about a restaurant:\n{% for feature in meaning_representation.split(\"]\") %} {% set key = feature.split(\"[\")[0].replace(\",\",\"\") %} {% set value = feature.replace(\",\",\"\").replace(key+\"[\", '''') %}\n{% if value != \"\" %} {{key}} : {{value}} {% endif %}\n{%- endfor %}\nGenerate some text about this restaurant. ||| {{human_reference}}", "prompt_original_task": true, "comment": "", "rouge1_recall_stderr": 0.0027831508718061987}, {"task_name": "e2e_nlg_cleaned", "prompt_name": "generate_text_restaurant", "rouge1_fmeasure": 0.5077222285813221, "fixed_answer_choice_list": null, "dataset_path": "e2e_nlg_cleaned", "dataset_name": null, "subset": null, "prompt_id": "1acabbc3-c9b9-4624-a684-29faeccff46f", "prompt_jinja": "Given the following data about a restaurant:\n{% for feature in meaning_representation.split(\"]\") %} {% set key = feature.split(\"[\")[0].replace(\",\",\"\") %} {% set value = feature.replace(\",\",\"\").replace(key+\"[\", '''') %}\n{% if value != \"\" %} {{key}} : {{value}} {% endif %}\n{%- endfor %}\nGenerate some text about this restaurant. ||| {{human_reference}}", "prompt_original_task": true, "comment": "", "rouge1_fmeasure_stderr": 0.002220732488512632}, {"task_name": "e2e_nlg_cleaned", "prompt_name": "generate_text_restaurant", "rouge2_precision": 0.30451374992065544, "fixed_answer_choice_list": null, "dataset_path": "e2e_nlg_cleaned", "dataset_name": null, "subset": null, "prompt_id": "1acabbc3-c9b9-4624-a684-29faeccff46f", "prompt_jinja": "Given the following data about a restaurant:\n{% for feature in meaning_representation.split(\"]\") %} {% set key = feature.split(\"[\")[0].replace(\",\",\"\") %} {% set value = feature.replace(\",\",\"\").replace(key+\"[\", '''') %}\n{% if value != \"\" %} {{key}} : {{value}} {% endif %}\n{%- endfor %}\nGenerate some text about this restaurant. ||| {{human_reference}}", "prompt_original_task": true, "comment": "", "rouge2_precision_stderr": 0.002789941472506245}, {"task_name": "e2e_nlg_cleaned", "prompt_name": "generate_text_restaurant", "rouge2_recall": 0.2417166711851211, "fixed_answer_choice_list": null, "dataset_path": "e2e_nlg_cleaned", "dataset_name": null, "subset": null, "prompt_id": "1acabbc3-c9b9-4624-a684-29faeccff46f", "prompt_jinja": "Given the following data about a restaurant:\n{% for feature in meaning_representation.split(\"]\") %} {% set key = feature.split(\"[\")[0].replace(\",\",\"\") %} {% set value = feature.replace(\",\",\"\").replace(key+\"[\", '''') %}\n{% if value != \"\" %} {{key}} : {{value}} {% endif %}\n{%- endfor %}\nGenerate some text about this restaurant. ||| {{human_reference}}", "prompt_original_task": true, "comment": "", "rouge2_recall_stderr": 0.0022968048020895614}, {"task_name": "e2e_nlg_cleaned", "prompt_name": "generate_text_restaurant", "rouge2_fmeasure": 0.2570060960713732, "fixed_answer_choice_list": null, "dataset_path": "e2e_nlg_cleaned", "dataset_name": null, "subset": null, "prompt_id": "1acabbc3-c9b9-4624-a684-29faeccff46f", "prompt_jinja": "Given the following data about a restaurant:\n{% for feature in meaning_representation.split(\"]\") %} {% set key = feature.split(\"[\")[0].replace(\",\",\"\") %} {% set value = feature.replace(\",\",\"\").replace(key+\"[\", '''') %}\n{% if value != \"\" %} {{key}} : {{value}} {% endif %}\n{%- endfor %}\nGenerate some text about this restaurant. ||| {{human_reference}}", "prompt_original_task": true, "comment": "", "rouge2_fmeasure_stderr": 0.0021518885893666365}, {"task_name": "e2e_nlg_cleaned", "prompt_name": "generate_text_restaurant", "rougeL_precision": 0.43676493018556145, "fixed_answer_choice_list": null, "dataset_path": "e2e_nlg_cleaned", "dataset_name": null, "subset": null, "prompt_id": "1acabbc3-c9b9-4624-a684-29faeccff46f", "prompt_jinja": "Given the following data about a restaurant:\n{% for feature in meaning_representation.split(\"]\") %} {% set key = feature.split(\"[\")[0].replace(\",\",\"\") %} {% set value = feature.replace(\",\",\"\").replace(key+\"[\", '''') %}\n{% if value != \"\" %} {{key}} : {{value}} {% endif %}\n{%- endfor %}\nGenerate some text about this restaurant. ||| {{human_reference}}", "prompt_original_task": true, "comment": "", "rougeL_precision_stderr": 0.002927133353052165}, {"task_name": "e2e_nlg_cleaned", "prompt_name": "generate_text_restaurant", "rougeL_recall": 0.34988140572226323, "fixed_answer_choice_list": null, "dataset_path": "e2e_nlg_cleaned", "dataset_name": null, "subset": null, "prompt_id": "1acabbc3-c9b9-4624-a684-29faeccff46f", "prompt_jinja": "Given the following data about a restaurant:\n{% for feature in meaning_representation.split(\"]\") %} {% set key = feature.split(\"[\")[0].replace(\",\",\"\") %} {% set value = feature.replace(\",\",\"\").replace(key+\"[\", '''') %}\n{% if value != \"\" %} {{key}} : {{value}} {% endif %}\n{%- endfor %}\nGenerate some text about this restaurant. ||| {{human_reference}}", "prompt_original_task": true, "comment": "", "rougeL_recall_stderr": 0.0024419111341844576}, {"task_name": "e2e_nlg_cleaned", "prompt_name": "generate_text_restaurant", "rougeL_fmeasure": 0.3721494155980024, "fixed_answer_choice_list": null, "dataset_path": "e2e_nlg_cleaned", "dataset_name": null, "subset": null, "prompt_id": "1acabbc3-c9b9-4624-a684-29faeccff46f", "prompt_jinja": "Given the following data about a restaurant:\n{% for feature in meaning_representation.split(\"]\") %} {% set key = feature.split(\"[\")[0].replace(\",\",\"\") %} {% set value = feature.replace(\",\",\"\").replace(key+\"[\", '''') %}\n{% if value != \"\" %} {{key}} : {{value}} {% endif %}\n{%- endfor %}\nGenerate some text about this restaurant. ||| {{human_reference}}", "prompt_original_task": true, "comment": "", "rougeL_fmeasure_stderr": 0.002117723784512971}, {"task_name": "e2e_nlg_cleaned", "prompt_name": "generate_text_restaurant", "rougeLsum_precision": 0.49777714097705755, "fixed_answer_choice_list": null, "dataset_path": "e2e_nlg_cleaned", "dataset_name": null, "subset": null, "prompt_id": "1acabbc3-c9b9-4624-a684-29faeccff46f", "prompt_jinja": "Given the following data about a restaurant:\n{% for feature in meaning_representation.split(\"]\") %} {% set key = feature.split(\"[\")[0].replace(\",\",\"\") %} {% set value = feature.replace(\",\",\"\").replace(key+\"[\", '''') %}\n{% if value != \"\" %} {{key}} : {{value}} {% endif %}\n{%- endfor %}\nGenerate some text about this restaurant. ||| {{human_reference}}", "prompt_original_task": true, "comment": "", "rougeLsum_precision_stderr": 0.0031788848137924734}, {"task_name": "e2e_nlg_cleaned", "prompt_name": "generate_text_restaurant", "rougeLsum_recall": 0.39973890243703447, "fixed_answer_choice_list": null, "dataset_path": "e2e_nlg_cleaned", "dataset_name": null, "subset": null, "prompt_id": "1acabbc3-c9b9-4624-a684-29faeccff46f", "prompt_jinja": "Given the following data about a restaurant:\n{% for feature in meaning_representation.split(\"]\") %} {% set key = feature.split(\"[\")[0].replace(\",\",\"\") %} {% set value = feature.replace(\",\",\"\").replace(key+\"[\", '''') %}\n{% if value != \"\" %} {{key}} : {{value}} {% endif %}\n{%- endfor %}\nGenerate some text about this restaurant. ||| {{human_reference}}", "prompt_original_task": true, "comment": "", "rougeLsum_recall_stderr": 0.0027139961037825227}, {"task_name": "e2e_nlg_cleaned", "prompt_name": "generate_text_restaurant", "rougeLsum_fmeasure": 0.42510332300543213, "fixed_answer_choice_list": null, "dataset_path": "e2e_nlg_cleaned", "dataset_name": null, "subset": null, "prompt_id": "1acabbc3-c9b9-4624-a684-29faeccff46f", "prompt_jinja": "Given the following data about a restaurant:\n{% for feature in meaning_representation.split(\"]\") %} {% set key = feature.split(\"[\")[0].replace(\",\",\"\") %} {% set value = feature.replace(\",\",\"\").replace(key+\"[\", '''') %}\n{% if value != \"\" %} {{key}} : {{value}} {% endif %}\n{%- endfor %}\nGenerate some text about this restaurant. ||| {{human_reference}}", "prompt_original_task": true, "comment": "", "rougeLsum_fmeasure_stderr": 0.0023482381852689186}], "config": {"model": "hf-causal", "model_args": "pretrained=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/lm1-4b2-84b-oscarroots/transformers,use_accelerate=True,tokenizer=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/gpt2,dtype=bfloat16", "task_args": "", "num_fewshot": 5, "batch_size": 16, "device": "cuda", "use_cache": false, "limit": 3000, "bootstrap_iters": 10, "seed": 1234}}
|
evaluation/generation/agg.lm1-4b2-84b-oscarroots_gem_xsum_article_DOC_summary_0.json
ADDED
|
@@ -0,0 +1 @@
|
|
|
|
|
|
|
| 1 |
+
{"results": [{"task_name": "gem_xsum", "prompt_name": "article_DOC_summary", "rouge1_precision": 0.14732873010394443, "fixed_answer_choice_list": null, "dataset_path": "GEM/xsum", "dataset_name": null, "subset": "", "prompt_id": "a8d4ecfa-c944-44d5-878c-04fd5db59e64", "prompt_jinja": "Article: {{document}}\n\nSummary: ||| {{target}}", "prompt_original_task": true, "comment": "", "rouge1_precision_stderr": 0.0018472239906953342}, {"task_name": "gem_xsum", "prompt_name": "article_DOC_summary", "rouge1_recall": 0.35199358110459894, "fixed_answer_choice_list": null, "dataset_path": "GEM/xsum", "dataset_name": null, "subset": "", "prompt_id": "a8d4ecfa-c944-44d5-878c-04fd5db59e64", "prompt_jinja": "Article: {{document}}\n\nSummary: ||| {{target}}", "prompt_original_task": true, "comment": "", "rouge1_recall_stderr": 0.004413516152222082}, {"task_name": "gem_xsum", "prompt_name": "article_DOC_summary", "rouge1_fmeasure": 0.20505824244715506, "fixed_answer_choice_list": null, "dataset_path": "GEM/xsum", "dataset_name": null, "subset": "", "prompt_id": "a8d4ecfa-c944-44d5-878c-04fd5db59e64", "prompt_jinja": "Article: {{document}}\n\nSummary: ||| {{target}}", "prompt_original_task": true, "comment": "", "rouge1_fmeasure_stderr": 0.0024783005153629807}, {"task_name": "gem_xsum", "prompt_name": "article_DOC_summary", "rouge2_precision": 0.0324785643615811, "fixed_answer_choice_list": null, "dataset_path": "GEM/xsum", "dataset_name": null, "subset": "", "prompt_id": "a8d4ecfa-c944-44d5-878c-04fd5db59e64", "prompt_jinja": "Article: {{document}}\n\nSummary: ||| {{target}}", "prompt_original_task": true, "comment": "", "rouge2_precision_stderr": 0.0010795578817257175}, {"task_name": "gem_xsum", "prompt_name": "article_DOC_summary", "rouge2_recall": 0.08244261487005299, "fixed_answer_choice_list": null, "dataset_path": "GEM/xsum", "dataset_name": null, "subset": "", "prompt_id": "a8d4ecfa-c944-44d5-878c-04fd5db59e64", "prompt_jinja": "Article: {{document}}\n\nSummary: ||| {{target}}", "prompt_original_task": true, "comment": "", "rouge2_recall_stderr": 0.002863077391938578}, {"task_name": "gem_xsum", "prompt_name": "article_DOC_summary", "rouge2_fmeasure": 0.04590441078806022, "fixed_answer_choice_list": null, "dataset_path": "GEM/xsum", "dataset_name": null, "subset": "", "prompt_id": "a8d4ecfa-c944-44d5-878c-04fd5db59e64", "prompt_jinja": "Article: {{document}}\n\nSummary: ||| {{target}}", "prompt_original_task": true, "comment": "", "rouge2_fmeasure_stderr": 0.0015202206206850909}, {"task_name": "gem_xsum", "prompt_name": "article_DOC_summary", "rougeL_precision": 0.108970601383633, "fixed_answer_choice_list": null, "dataset_path": "GEM/xsum", "dataset_name": null, "subset": "", "prompt_id": "a8d4ecfa-c944-44d5-878c-04fd5db59e64", "prompt_jinja": "Article: {{document}}\n\nSummary: ||| {{target}}", "prompt_original_task": true, "comment": "", "rougeL_precision_stderr": 0.001382629098764731}, {"task_name": "gem_xsum", "prompt_name": "article_DOC_summary", "rougeL_recall": 0.2622701817944651, "fixed_answer_choice_list": null, "dataset_path": "GEM/xsum", "dataset_name": null, "subset": "", "prompt_id": "a8d4ecfa-c944-44d5-878c-04fd5db59e64", "prompt_jinja": "Article: {{document}}\n\nSummary: ||| {{target}}", "prompt_original_task": true, "comment": "", "rougeL_recall_stderr": 0.0035205468161132866}, {"task_name": "gem_xsum", "prompt_name": "article_DOC_summary", "rougeL_fmeasure": 0.1519188383604905, "fixed_answer_choice_list": null, "dataset_path": "GEM/xsum", "dataset_name": null, "subset": "", "prompt_id": "a8d4ecfa-c944-44d5-878c-04fd5db59e64", "prompt_jinja": "Article: {{document}}\n\nSummary: ||| {{target}}", "prompt_original_task": true, "comment": "", "rougeL_fmeasure_stderr": 0.0018845750655938495}, {"task_name": "gem_xsum", "prompt_name": "article_DOC_summary", "rougeLsum_precision": 0.1173139370778848, "fixed_answer_choice_list": null, "dataset_path": "GEM/xsum", "dataset_name": null, "subset": "", "prompt_id": "a8d4ecfa-c944-44d5-878c-04fd5db59e64", "prompt_jinja": "Article: {{document}}\n\nSummary: ||| {{target}}", "prompt_original_task": true, "comment": "", "rougeLsum_precision_stderr": 0.001569538852204758}, {"task_name": "gem_xsum", "prompt_name": "article_DOC_summary", "rougeLsum_recall": 0.2818166643830611, "fixed_answer_choice_list": null, "dataset_path": "GEM/xsum", "dataset_name": null, "subset": "", "prompt_id": "a8d4ecfa-c944-44d5-878c-04fd5db59e64", "prompt_jinja": "Article: {{document}}\n\nSummary: ||| {{target}}", "prompt_original_task": true, "comment": "", "rougeLsum_recall_stderr": 0.0038965162010598576}, {"task_name": "gem_xsum", "prompt_name": "article_DOC_summary", "rougeLsum_fmeasure": 0.16348753229063173, "fixed_answer_choice_list": null, "dataset_path": "GEM/xsum", "dataset_name": null, "subset": "", "prompt_id": "a8d4ecfa-c944-44d5-878c-04fd5db59e64", "prompt_jinja": "Article: {{document}}\n\nSummary: ||| {{target}}", "prompt_original_task": true, "comment": "", "rougeLsum_fmeasure_stderr": 0.0021323303474861774}, {"task_name": "gem_xsum", "prompt_name": "article_DOC_summary", "bleu": 1.750432821724133, "fixed_answer_choice_list": null, "dataset_path": "GEM/xsum", "dataset_name": null, "subset": "", "prompt_id": "a8d4ecfa-c944-44d5-878c-04fd5db59e64", "prompt_jinja": "Article: {{document}}\n\nSummary: ||| {{target}}", "prompt_original_task": true, "comment": "", "bleu_stderr": 0.11024260307164419}], "config": {"model": "hf-causal", "model_args": "pretrained=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/lm1-4b2-84b-oscarroots/transformers,use_accelerate=True,tokenizer=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/gpt2,dtype=bfloat16", "task_args": "", "num_fewshot": 0, "batch_size": 16, "device": "cuda", "use_cache": false, "limit": 3000, "bootstrap_iters": 10, "seed": 1234}}
|
evaluation/generation/agg.lm1-4b2-84b-oscarroots_gem_xsum_article_DOC_summary_1.json
ADDED
|
@@ -0,0 +1 @@
|
|
|
|
|
|
|
| 1 |
+
{"results": [{"task_name": "gem_xsum", "prompt_name": "article_DOC_summary", "rouge1_precision": 0.23169122870326006, "fixed_answer_choice_list": null, "dataset_path": "GEM/xsum", "dataset_name": null, "subset": "", "prompt_id": "a8d4ecfa-c944-44d5-878c-04fd5db59e64", "prompt_jinja": "Article: {{document}}\n\nSummary: ||| {{target}}", "prompt_original_task": true, "comment": "", "rouge1_precision_stderr": 0.004045147587622825}, {"task_name": "gem_xsum", "prompt_name": "article_DOC_summary", "rouge1_recall": 0.26815002474583066, "fixed_answer_choice_list": null, "dataset_path": "GEM/xsum", "dataset_name": null, "subset": "", "prompt_id": "a8d4ecfa-c944-44d5-878c-04fd5db59e64", "prompt_jinja": "Article: {{document}}\n\nSummary: ||| {{target}}", "prompt_original_task": true, "comment": "", "rouge1_recall_stderr": 0.004110152736623833}, {"task_name": "gem_xsum", "prompt_name": "article_DOC_summary", "rouge1_fmeasure": 0.22983801220388622, "fixed_answer_choice_list": null, "dataset_path": "GEM/xsum", "dataset_name": null, "subset": "", "prompt_id": "a8d4ecfa-c944-44d5-878c-04fd5db59e64", "prompt_jinja": "Article: {{document}}\n\nSummary: ||| {{target}}", "prompt_original_task": true, "comment": "", "rouge1_fmeasure_stderr": 0.0032940906774565366}, {"task_name": "gem_xsum", "prompt_name": "article_DOC_summary", "rouge2_precision": 0.052796359945369666, "fixed_answer_choice_list": null, "dataset_path": "GEM/xsum", "dataset_name": null, "subset": "", "prompt_id": "a8d4ecfa-c944-44d5-878c-04fd5db59e64", "prompt_jinja": "Article: {{document}}\n\nSummary: ||| {{target}}", "prompt_original_task": true, "comment": "", "rouge2_precision_stderr": 0.0023703879179179594}, {"task_name": "gem_xsum", "prompt_name": "article_DOC_summary", "rouge2_recall": 0.06066441266683632, "fixed_answer_choice_list": null, "dataset_path": "GEM/xsum", "dataset_name": null, "subset": "", "prompt_id": "a8d4ecfa-c944-44d5-878c-04fd5db59e64", "prompt_jinja": "Article: {{document}}\n\nSummary: ||| {{target}}", "prompt_original_task": true, "comment": "", "rouge2_recall_stderr": 0.0024082157247570543}, {"task_name": "gem_xsum", "prompt_name": "article_DOC_summary", "rouge2_fmeasure": 0.05179388258497403, "fixed_answer_choice_list": null, "dataset_path": "GEM/xsum", "dataset_name": null, "subset": "", "prompt_id": "a8d4ecfa-c944-44d5-878c-04fd5db59e64", "prompt_jinja": "Article: {{document}}\n\nSummary: ||| {{target}}", "prompt_original_task": true, "comment": "", "rouge2_fmeasure_stderr": 0.0020972850091121355}, {"task_name": "gem_xsum", "prompt_name": "article_DOC_summary", "rougeL_precision": 0.1746429223847538, "fixed_answer_choice_list": null, "dataset_path": "GEM/xsum", "dataset_name": null, "subset": "", "prompt_id": "a8d4ecfa-c944-44d5-878c-04fd5db59e64", "prompt_jinja": "Article: {{document}}\n\nSummary: ||| {{target}}", "prompt_original_task": true, "comment": "", "rougeL_precision_stderr": 0.003336919221467921}, {"task_name": "gem_xsum", "prompt_name": "article_DOC_summary", "rougeL_recall": 0.2011435130769754, "fixed_answer_choice_list": null, "dataset_path": "GEM/xsum", "dataset_name": null, "subset": "", "prompt_id": "a8d4ecfa-c944-44d5-878c-04fd5db59e64", "prompt_jinja": "Article: {{document}}\n\nSummary: ||| {{target}}", "prompt_original_task": true, "comment": "", "rougeL_recall_stderr": 0.0032461458681743278}, {"task_name": "gem_xsum", "prompt_name": "article_DOC_summary", "rougeL_fmeasure": 0.17244857599829136, "fixed_answer_choice_list": null, "dataset_path": "GEM/xsum", "dataset_name": null, "subset": "", "prompt_id": "a8d4ecfa-c944-44d5-878c-04fd5db59e64", "prompt_jinja": "Article: {{document}}\n\nSummary: ||| {{target}}", "prompt_original_task": true, "comment": "", "rougeL_fmeasure_stderr": 0.00267315623596849}, {"task_name": "gem_xsum", "prompt_name": "article_DOC_summary", "rougeLsum_precision": 0.1772831820859513, "fixed_answer_choice_list": null, "dataset_path": "GEM/xsum", "dataset_name": null, "subset": "", "prompt_id": "a8d4ecfa-c944-44d5-878c-04fd5db59e64", "prompt_jinja": "Article: {{document}}\n\nSummary: ||| {{target}}", "prompt_original_task": true, "comment": "", "rougeLsum_precision_stderr": 0.003324161176359277}, {"task_name": "gem_xsum", "prompt_name": "article_DOC_summary", "rougeLsum_recall": 0.20683242721981307, "fixed_answer_choice_list": null, "dataset_path": "GEM/xsum", "dataset_name": null, "subset": "", "prompt_id": "a8d4ecfa-c944-44d5-878c-04fd5db59e64", "prompt_jinja": "Article: {{document}}\n\nSummary: ||| {{target}}", "prompt_original_task": true, "comment": "", "rougeLsum_recall_stderr": 0.0034228474156704266}, {"task_name": "gem_xsum", "prompt_name": "article_DOC_summary", "rougeLsum_fmeasure": 0.1758880230368807, "fixed_answer_choice_list": null, "dataset_path": "GEM/xsum", "dataset_name": null, "subset": "", "prompt_id": "a8d4ecfa-c944-44d5-878c-04fd5db59e64", "prompt_jinja": "Article: {{document}}\n\nSummary: ||| {{target}}", "prompt_original_task": true, "comment": "", "rougeLsum_fmeasure_stderr": 0.0026930773402095492}, {"task_name": "gem_xsum", "prompt_name": "article_DOC_summary", "bleu": 2.4943571404092832, "fixed_answer_choice_list": null, "dataset_path": "GEM/xsum", "dataset_name": null, "subset": "", "prompt_id": "a8d4ecfa-c944-44d5-878c-04fd5db59e64", "prompt_jinja": "Article: {{document}}\n\nSummary: ||| {{target}}", "prompt_original_task": true, "comment": "", "bleu_stderr": 0.12369471155469476}], "config": {"model": "hf-causal", "model_args": "pretrained=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/lm1-4b2-84b-oscarroots/transformers,use_accelerate=True,tokenizer=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/gpt2,dtype=bfloat16", "task_args": "", "num_fewshot": 1, "batch_size": 16, "device": "cuda", "use_cache": false, "limit": 3000, "bootstrap_iters": 10, "seed": 1234}}
|
evaluation/generation/agg.lm1-4b2-84b-oscarroots_gem_xsum_article_DOC_summary_2.json
ADDED
|
@@ -0,0 +1 @@
|
|
|
|
|
|
|
| 1 |
+
{"results": [{"task_name": "gem_xsum", "prompt_name": "article_DOC_summary", "rouge1_precision": 0.25217889274484556, "fixed_answer_choice_list": null, "dataset_path": "GEM/xsum", "dataset_name": null, "subset": "", "prompt_id": "a8d4ecfa-c944-44d5-878c-04fd5db59e64", "prompt_jinja": "Article: {{document}}\n\nSummary: ||| {{target}}", "prompt_original_task": true, "comment": "", "rouge1_precision_stderr": 0.004187176500529052}, {"task_name": "gem_xsum", "prompt_name": "article_DOC_summary", "rouge1_recall": 0.2667996083056836, "fixed_answer_choice_list": null, "dataset_path": "GEM/xsum", "dataset_name": null, "subset": "", "prompt_id": "a8d4ecfa-c944-44d5-878c-04fd5db59e64", "prompt_jinja": "Article: {{document}}\n\nSummary: ||| {{target}}", "prompt_original_task": true, "comment": "", "rouge1_recall_stderr": 0.003952166510919622}, {"task_name": "gem_xsum", "prompt_name": "article_DOC_summary", "rouge1_fmeasure": 0.24350643855178628, "fixed_answer_choice_list": null, "dataset_path": "GEM/xsum", "dataset_name": null, "subset": "", "prompt_id": "a8d4ecfa-c944-44d5-878c-04fd5db59e64", "prompt_jinja": "Article: {{document}}\n\nSummary: ||| {{target}}", "prompt_original_task": true, "comment": "", "rouge1_fmeasure_stderr": 0.0034168567984665956}, {"task_name": "gem_xsum", "prompt_name": "article_DOC_summary", "rouge2_precision": 0.06179253514253428, "fixed_answer_choice_list": null, "dataset_path": "GEM/xsum", "dataset_name": null, "subset": "", "prompt_id": "a8d4ecfa-c944-44d5-878c-04fd5db59e64", "prompt_jinja": "Article: {{document}}\n\nSummary: ||| {{target}}", "prompt_original_task": true, "comment": "", "rouge2_precision_stderr": 0.0026427530004505444}, {"task_name": "gem_xsum", "prompt_name": "article_DOC_summary", "rouge2_recall": 0.06403547304250448, "fixed_answer_choice_list": null, "dataset_path": "GEM/xsum", "dataset_name": null, "subset": "", "prompt_id": "a8d4ecfa-c944-44d5-878c-04fd5db59e64", "prompt_jinja": "Article: {{document}}\n\nSummary: ||| {{target}}", "prompt_original_task": true, "comment": "", "rouge2_recall_stderr": 0.0024621761069962894}, {"task_name": "gem_xsum", "prompt_name": "article_DOC_summary", "rouge2_fmeasure": 0.05864225314595189, "fixed_answer_choice_list": null, "dataset_path": "GEM/xsum", "dataset_name": null, "subset": "", "prompt_id": "a8d4ecfa-c944-44d5-878c-04fd5db59e64", "prompt_jinja": "Article: {{document}}\n\nSummary: ||| {{target}}", "prompt_original_task": true, "comment": "", "rouge2_fmeasure_stderr": 0.002293228927176543}, {"task_name": "gem_xsum", "prompt_name": "article_DOC_summary", "rougeL_precision": 0.18918049038921544, "fixed_answer_choice_list": null, "dataset_path": "GEM/xsum", "dataset_name": null, "subset": "", "prompt_id": "a8d4ecfa-c944-44d5-878c-04fd5db59e64", "prompt_jinja": "Article: {{document}}\n\nSummary: ||| {{target}}", "prompt_original_task": true, "comment": "", "rougeL_precision_stderr": 0.0034708834616706697}, {"task_name": "gem_xsum", "prompt_name": "article_DOC_summary", "rougeL_recall": 0.20001592844363475, "fixed_answer_choice_list": null, "dataset_path": "GEM/xsum", "dataset_name": null, "subset": "", "prompt_id": "a8d4ecfa-c944-44d5-878c-04fd5db59e64", "prompt_jinja": "Article: {{document}}\n\nSummary: ||| {{target}}", "prompt_original_task": true, "comment": "", "rougeL_recall_stderr": 0.003205668683161223}, {"task_name": "gem_xsum", "prompt_name": "article_DOC_summary", "rougeL_fmeasure": 0.18232363203561192, "fixed_answer_choice_list": null, "dataset_path": "GEM/xsum", "dataset_name": null, "subset": "", "prompt_id": "a8d4ecfa-c944-44d5-878c-04fd5db59e64", "prompt_jinja": "Article: {{document}}\n\nSummary: ||| {{target}}", "prompt_original_task": true, "comment": "", "rougeL_fmeasure_stderr": 0.0028221690513137707}, {"task_name": "gem_xsum", "prompt_name": "article_DOC_summary", "rougeLsum_precision": 0.19188184345223513, "fixed_answer_choice_list": null, "dataset_path": "GEM/xsum", "dataset_name": null, "subset": "", "prompt_id": "a8d4ecfa-c944-44d5-878c-04fd5db59e64", "prompt_jinja": "Article: {{document}}\n\nSummary: ||| {{target}}", "prompt_original_task": true, "comment": "", "rougeLsum_precision_stderr": 0.0034530710844608826}, {"task_name": "gem_xsum", "prompt_name": "article_DOC_summary", "rougeLsum_recall": 0.20518196814507939, "fixed_answer_choice_list": null, "dataset_path": "GEM/xsum", "dataset_name": null, "subset": "", "prompt_id": "a8d4ecfa-c944-44d5-878c-04fd5db59e64", "prompt_jinja": "Article: {{document}}\n\nSummary: ||| {{target}}", "prompt_original_task": true, "comment": "", "rougeLsum_recall_stderr": 0.0033724061824908134}, {"task_name": "gem_xsum", "prompt_name": "article_DOC_summary", "rougeLsum_fmeasure": 0.18572746911328056, "fixed_answer_choice_list": null, "dataset_path": "GEM/xsum", "dataset_name": null, "subset": "", "prompt_id": "a8d4ecfa-c944-44d5-878c-04fd5db59e64", "prompt_jinja": "Article: {{document}}\n\nSummary: ||| {{target}}", "prompt_original_task": true, "comment": "", "rougeLsum_fmeasure_stderr": 0.002846842492367856}, {"task_name": "gem_xsum", "prompt_name": "article_DOC_summary", "bleu": 2.9857214106954544, "fixed_answer_choice_list": null, "dataset_path": "GEM/xsum", "dataset_name": null, "subset": "", "prompt_id": "a8d4ecfa-c944-44d5-878c-04fd5db59e64", "prompt_jinja": "Article: {{document}}\n\nSummary: ||| {{target}}", "prompt_original_task": true, "comment": "", "bleu_stderr": 0.14101412936916624}], "config": {"model": "hf-causal", "model_args": "pretrained=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/lm1-4b2-84b-oscarroots/transformers,use_accelerate=True,tokenizer=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/gpt2,dtype=bfloat16", "task_args": "", "num_fewshot": 2, "batch_size": 16, "device": "cuda", "use_cache": false, "limit": 3000, "bootstrap_iters": 10, "seed": 1234}}
|
evaluation/generation/agg.lm1-4b2-84b-oscarroots_gem_xsum_article_DOC_summary_3.json
ADDED
|
@@ -0,0 +1 @@
|
|
|
|
|
|
|
| 1 |
+
{"results": [{"task_name": "gem_xsum", "prompt_name": "article_DOC_summary", "rouge1_precision": 0.2484813044251097, "fixed_answer_choice_list": null, "dataset_path": "GEM/xsum", "dataset_name": null, "subset": "", "prompt_id": "a8d4ecfa-c944-44d5-878c-04fd5db59e64", "prompt_jinja": "Article: {{document}}\n\nSummary: ||| {{target}}", "prompt_original_task": true, "comment": "", "rouge1_precision_stderr": 0.0042984090370612816}, {"task_name": "gem_xsum", "prompt_name": "article_DOC_summary", "rouge1_recall": 0.25675482709920644, "fixed_answer_choice_list": null, "dataset_path": "GEM/xsum", "dataset_name": null, "subset": "", "prompt_id": "a8d4ecfa-c944-44d5-878c-04fd5db59e64", "prompt_jinja": "Article: {{document}}\n\nSummary: ||| {{target}}", "prompt_original_task": true, "comment": "", "rouge1_recall_stderr": 0.00429153056298093}, {"task_name": "gem_xsum", "prompt_name": "article_DOC_summary", "rouge1_fmeasure": 0.238220796075611, "fixed_answer_choice_list": null, "dataset_path": "GEM/xsum", "dataset_name": null, "subset": "", "prompt_id": "a8d4ecfa-c944-44d5-878c-04fd5db59e64", "prompt_jinja": "Article: {{document}}\n\nSummary: ||| {{target}}", "prompt_original_task": true, "comment": "", "rouge1_fmeasure_stderr": 0.003745482282361713}, {"task_name": "gem_xsum", "prompt_name": "article_DOC_summary", "rouge2_precision": 0.06066711842193548, "fixed_answer_choice_list": null, "dataset_path": "GEM/xsum", "dataset_name": null, "subset": "", "prompt_id": "a8d4ecfa-c944-44d5-878c-04fd5db59e64", "prompt_jinja": "Article: {{document}}\n\nSummary: ||| {{target}}", "prompt_original_task": true, "comment": "", "rouge2_precision_stderr": 0.0025878516044052237}, {"task_name": "gem_xsum", "prompt_name": "article_DOC_summary", "rouge2_recall": 0.06342202286918776, "fixed_answer_choice_list": null, "dataset_path": "GEM/xsum", "dataset_name": null, "subset": "", "prompt_id": "a8d4ecfa-c944-44d5-878c-04fd5db59e64", "prompt_jinja": "Article: {{document}}\n\nSummary: ||| {{target}}", "prompt_original_task": true, "comment": "", "rouge2_recall_stderr": 0.002586925750413113}, {"task_name": "gem_xsum", "prompt_name": "article_DOC_summary", "rouge2_fmeasure": 0.058505610925660055, "fixed_answer_choice_list": null, "dataset_path": "GEM/xsum", "dataset_name": null, "subset": "", "prompt_id": "a8d4ecfa-c944-44d5-878c-04fd5db59e64", "prompt_jinja": "Article: {{document}}\n\nSummary: ||| {{target}}", "prompt_original_task": true, "comment": "", "rouge2_fmeasure_stderr": 0.0023584061095609406}, {"task_name": "gem_xsum", "prompt_name": "article_DOC_summary", "rougeL_precision": 0.1878574292139355, "fixed_answer_choice_list": null, "dataset_path": "GEM/xsum", "dataset_name": null, "subset": "", "prompt_id": "a8d4ecfa-c944-44d5-878c-04fd5db59e64", "prompt_jinja": "Article: {{document}}\n\nSummary: ||| {{target}}", "prompt_original_task": true, "comment": "", "rougeL_precision_stderr": 0.003601580338442701}, {"task_name": "gem_xsum", "prompt_name": "article_DOC_summary", "rougeL_recall": 0.19283239743534678, "fixed_answer_choice_list": null, "dataset_path": "GEM/xsum", "dataset_name": null, "subset": "", "prompt_id": "a8d4ecfa-c944-44d5-878c-04fd5db59e64", "prompt_jinja": "Article: {{document}}\n\nSummary: ||| {{target}}", "prompt_original_task": true, "comment": "", "rougeL_recall_stderr": 0.0034899752275218544}, {"task_name": "gem_xsum", "prompt_name": "article_DOC_summary", "rougeL_fmeasure": 0.17904839989000923, "fixed_answer_choice_list": null, "dataset_path": "GEM/xsum", "dataset_name": null, "subset": "", "prompt_id": "a8d4ecfa-c944-44d5-878c-04fd5db59e64", "prompt_jinja": "Article: {{document}}\n\nSummary: ||| {{target}}", "prompt_original_task": true, "comment": "", "rougeL_fmeasure_stderr": 0.0030840608870087312}, {"task_name": "gem_xsum", "prompt_name": "article_DOC_summary", "rougeLsum_precision": 0.19005537260387176, "fixed_answer_choice_list": null, "dataset_path": "GEM/xsum", "dataset_name": null, "subset": "", "prompt_id": "a8d4ecfa-c944-44d5-878c-04fd5db59e64", "prompt_jinja": "Article: {{document}}\n\nSummary: ||| {{target}}", "prompt_original_task": true, "comment": "", "rougeLsum_precision_stderr": 0.0035974744150852533}, {"task_name": "gem_xsum", "prompt_name": "article_DOC_summary", "rougeLsum_recall": 0.1966886872714737, "fixed_answer_choice_list": null, "dataset_path": "GEM/xsum", "dataset_name": null, "subset": "", "prompt_id": "a8d4ecfa-c944-44d5-878c-04fd5db59e64", "prompt_jinja": "Article: {{document}}\n\nSummary: ||| {{target}}", "prompt_original_task": true, "comment": "", "rougeLsum_recall_stderr": 0.0035964115374883606}, {"task_name": "gem_xsum", "prompt_name": "article_DOC_summary", "rougeLsum_fmeasure": 0.18170572127239473, "fixed_answer_choice_list": null, "dataset_path": "GEM/xsum", "dataset_name": null, "subset": "", "prompt_id": "a8d4ecfa-c944-44d5-878c-04fd5db59e64", "prompt_jinja": "Article: {{document}}\n\nSummary: ||| {{target}}", "prompt_original_task": true, "comment": "", "rougeLsum_fmeasure_stderr": 0.003101828200101794}, {"task_name": "gem_xsum", "prompt_name": "article_DOC_summary", "bleu": 3.2870789581611426, "fixed_answer_choice_list": null, "dataset_path": "GEM/xsum", "dataset_name": null, "subset": "", "prompt_id": "a8d4ecfa-c944-44d5-878c-04fd5db59e64", "prompt_jinja": "Article: {{document}}\n\nSummary: ||| {{target}}", "prompt_original_task": true, "comment": "", "bleu_stderr": 0.21202333940864973}], "config": {"model": "hf-causal", "model_args": "pretrained=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/lm1-4b2-84b-oscarroots/transformers,use_accelerate=True,tokenizer=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/gpt2,dtype=bfloat16", "task_args": "", "num_fewshot": 3, "batch_size": 16, "device": "cuda", "use_cache": false, "limit": 3000, "bootstrap_iters": 10, "seed": 1234}}
|
evaluation/generation/agg.lm1-4b2-84b-oscarroots_gem_xsum_article_DOC_summary_4.json
ADDED
|
@@ -0,0 +1 @@
|
|
|
|
|
|
|
| 1 |
+
{"results": [{"task_name": "gem_xsum", "prompt_name": "article_DOC_summary", "rouge1_precision": 0.06527208935660792, "fixed_answer_choice_list": null, "dataset_path": "GEM/xsum", "dataset_name": null, "subset": "", "prompt_id": "a8d4ecfa-c944-44d5-878c-04fd5db59e64", "prompt_jinja": "Article: {{document}}\n\nSummary: ||| {{target}}", "prompt_original_task": true, "comment": "", "rouge1_precision_stderr": 0.004136508171521384}, {"task_name": "gem_xsum", "prompt_name": "article_DOC_summary", "rouge1_recall": 0.05998571726736939, "fixed_answer_choice_list": null, "dataset_path": "GEM/xsum", "dataset_name": null, "subset": "", "prompt_id": "a8d4ecfa-c944-44d5-878c-04fd5db59e64", "prompt_jinja": "Article: {{document}}\n\nSummary: ||| {{target}}", "prompt_original_task": true, "comment": "", "rouge1_recall_stderr": 0.003713797694626656}, {"task_name": "gem_xsum", "prompt_name": "article_DOC_summary", "rouge1_fmeasure": 0.056660365933148535, "fixed_answer_choice_list": null, "dataset_path": "GEM/xsum", "dataset_name": null, "subset": "", "prompt_id": "a8d4ecfa-c944-44d5-878c-04fd5db59e64", "prompt_jinja": "Article: {{document}}\n\nSummary: ||| {{target}}", "prompt_original_task": true, "comment": "", "rouge1_fmeasure_stderr": 0.0033872772198712937}, {"task_name": "gem_xsum", "prompt_name": "article_DOC_summary", "rouge2_precision": 0.01545632928618081, "fixed_answer_choice_list": null, "dataset_path": "GEM/xsum", "dataset_name": null, "subset": "", "prompt_id": "a8d4ecfa-c944-44d5-878c-04fd5db59e64", "prompt_jinja": "Article: {{document}}\n\nSummary: ||| {{target}}", "prompt_original_task": true, "comment": "", "rouge2_precision_stderr": 0.0016742571770703835}, {"task_name": "gem_xsum", "prompt_name": "article_DOC_summary", "rouge2_recall": 0.014726543547135009, "fixed_answer_choice_list": null, "dataset_path": "GEM/xsum", "dataset_name": null, "subset": "", "prompt_id": "a8d4ecfa-c944-44d5-878c-04fd5db59e64", "prompt_jinja": "Article: {{document}}\n\nSummary: ||| {{target}}", "prompt_original_task": true, "comment": "", "rouge2_recall_stderr": 0.0014902502152271866}, {"task_name": "gem_xsum", "prompt_name": "article_DOC_summary", "rouge2_fmeasure": 0.013504510872103278, "fixed_answer_choice_list": null, "dataset_path": "GEM/xsum", "dataset_name": null, "subset": "", "prompt_id": "a8d4ecfa-c944-44d5-878c-04fd5db59e64", "prompt_jinja": "Article: {{document}}\n\nSummary: ||| {{target}}", "prompt_original_task": true, "comment": "", "rouge2_fmeasure_stderr": 0.001329260201745657}, {"task_name": "gem_xsum", "prompt_name": "article_DOC_summary", "rougeL_precision": 0.0505669228329614, "fixed_answer_choice_list": null, "dataset_path": "GEM/xsum", "dataset_name": null, "subset": "", "prompt_id": "a8d4ecfa-c944-44d5-878c-04fd5db59e64", "prompt_jinja": "Article: {{document}}\n\nSummary: ||| {{target}}", "prompt_original_task": true, "comment": "", "rougeL_precision_stderr": 0.0033876169186837024}, {"task_name": "gem_xsum", "prompt_name": "article_DOC_summary", "rougeL_recall": 0.04522270312870648, "fixed_answer_choice_list": null, "dataset_path": "GEM/xsum", "dataset_name": null, "subset": "", "prompt_id": "a8d4ecfa-c944-44d5-878c-04fd5db59e64", "prompt_jinja": "Article: {{document}}\n\nSummary: ||| {{target}}", "prompt_original_task": true, "comment": "", "rougeL_recall_stderr": 0.0029093653261490714}, {"task_name": "gem_xsum", "prompt_name": "article_DOC_summary", "rougeL_fmeasure": 0.04292181483636768, "fixed_answer_choice_list": null, "dataset_path": "GEM/xsum", "dataset_name": null, "subset": "", "prompt_id": "a8d4ecfa-c944-44d5-878c-04fd5db59e64", "prompt_jinja": "Article: {{document}}\n\nSummary: ||| {{target}}", "prompt_original_task": true, "comment": "", "rougeL_fmeasure_stderr": 0.002659025122857177}, {"task_name": "gem_xsum", "prompt_name": "article_DOC_summary", "rougeLsum_precision": 0.051289550137691375, "fixed_answer_choice_list": null, "dataset_path": "GEM/xsum", "dataset_name": null, "subset": "", "prompt_id": "a8d4ecfa-c944-44d5-878c-04fd5db59e64", "prompt_jinja": "Article: {{document}}\n\nSummary: ||| {{target}}", "prompt_original_task": true, "comment": "", "rougeLsum_precision_stderr": 0.0034102478996916284}, {"task_name": "gem_xsum", "prompt_name": "article_DOC_summary", "rougeLsum_recall": 0.04641007879693294, "fixed_answer_choice_list": null, "dataset_path": "GEM/xsum", "dataset_name": null, "subset": "", "prompt_id": "a8d4ecfa-c944-44d5-878c-04fd5db59e64", "prompt_jinja": "Article: {{document}}\n\nSummary: ||| {{target}}", "prompt_original_task": true, "comment": "", "rougeLsum_recall_stderr": 0.002993715279238018}, {"task_name": "gem_xsum", "prompt_name": "article_DOC_summary", "rougeLsum_fmeasure": 0.04378042063061544, "fixed_answer_choice_list": null, "dataset_path": "GEM/xsum", "dataset_name": null, "subset": "", "prompt_id": "a8d4ecfa-c944-44d5-878c-04fd5db59e64", "prompt_jinja": "Article: {{document}}\n\nSummary: ||| {{target}}", "prompt_original_task": true, "comment": "", "rougeLsum_fmeasure_stderr": 0.0026997367087303044}, {"task_name": "gem_xsum", "prompt_name": "article_DOC_summary", "bleu": 0.20657036807352894, "fixed_answer_choice_list": null, "dataset_path": "GEM/xsum", "dataset_name": null, "subset": "", "prompt_id": "a8d4ecfa-c944-44d5-878c-04fd5db59e64", "prompt_jinja": "Article: {{document}}\n\nSummary: ||| {{target}}", "prompt_original_task": true, "comment": "", "bleu_stderr": 0.057452674047792286}], "config": {"model": "hf-causal", "model_args": "pretrained=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/lm1-4b2-84b-oscarroots/transformers,use_accelerate=True,tokenizer=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/gpt2,dtype=bfloat16", "task_args": "", "num_fewshot": 4, "batch_size": 16, "device": "cuda", "use_cache": false, "limit": 3000, "bootstrap_iters": 10, "seed": 1234}}
|
evaluation/generation/agg.lm1-4b2-84b-oscarroots_gem_xsum_article_DOC_summary_5.json
ADDED
|
@@ -0,0 +1 @@
|
|
|
|
|
|
|
| 1 |
+
{"results": [{"task_name": "gem_xsum", "prompt_name": "article_DOC_summary", "rouge1_precision": 0.0027295547123993985, "fixed_answer_choice_list": null, "dataset_path": "GEM/xsum", "dataset_name": null, "subset": "", "prompt_id": "a8d4ecfa-c944-44d5-878c-04fd5db59e64", "prompt_jinja": "Article: {{document}}\n\nSummary: ||| {{target}}", "prompt_original_task": true, "comment": "", "rouge1_precision_stderr": 0.0009121185765318135}, {"task_name": "gem_xsum", "prompt_name": "article_DOC_summary", "rouge1_recall": 0.0019729049386547235, "fixed_answer_choice_list": null, "dataset_path": "GEM/xsum", "dataset_name": null, "subset": "", "prompt_id": "a8d4ecfa-c944-44d5-878c-04fd5db59e64", "prompt_jinja": "Article: {{document}}\n\nSummary: ||| {{target}}", "prompt_original_task": true, "comment": "", "rouge1_recall_stderr": 0.0005615055607779773}, {"task_name": "gem_xsum", "prompt_name": "article_DOC_summary", "rouge1_fmeasure": 0.002178348093993909, "fixed_answer_choice_list": null, "dataset_path": "GEM/xsum", "dataset_name": null, "subset": "", "prompt_id": "a8d4ecfa-c944-44d5-878c-04fd5db59e64", "prompt_jinja": "Article: {{document}}\n\nSummary: ||| {{target}}", "prompt_original_task": true, "comment": "", "rouge1_fmeasure_stderr": 0.0006423774239749017}, {"task_name": "gem_xsum", "prompt_name": "article_DOC_summary", "rouge2_precision": 0.0005583570131732993, "fixed_answer_choice_list": null, "dataset_path": "GEM/xsum", "dataset_name": null, "subset": "", "prompt_id": "a8d4ecfa-c944-44d5-878c-04fd5db59e64", "prompt_jinja": "Article: {{document}}\n\nSummary: ||| {{target}}", "prompt_original_task": true, "comment": "", "rouge2_precision_stderr": 0.0003699378783076142}, {"task_name": "gem_xsum", "prompt_name": "article_DOC_summary", "rouge2_recall": 0.00025688148329657765, "fixed_answer_choice_list": null, "dataset_path": "GEM/xsum", "dataset_name": null, "subset": "", "prompt_id": "a8d4ecfa-c944-44d5-878c-04fd5db59e64", "prompt_jinja": "Article: {{document}}\n\nSummary: ||| {{target}}", "prompt_original_task": true, "comment": "", "rouge2_recall_stderr": 0.00014579873768176573}, {"task_name": "gem_xsum", "prompt_name": "article_DOC_summary", "rouge2_fmeasure": 0.0003426428225561565, "fixed_answer_choice_list": null, "dataset_path": "GEM/xsum", "dataset_name": null, "subset": "", "prompt_id": "a8d4ecfa-c944-44d5-878c-04fd5db59e64", "prompt_jinja": "Article: {{document}}\n\nSummary: ||| {{target}}", "prompt_original_task": true, "comment": "", "rouge2_fmeasure_stderr": 0.00020535656561021637}, {"task_name": "gem_xsum", "prompt_name": "article_DOC_summary", "rougeL_precision": 0.0022248632641988773, "fixed_answer_choice_list": null, "dataset_path": "GEM/xsum", "dataset_name": null, "subset": "", "prompt_id": "a8d4ecfa-c944-44d5-878c-04fd5db59e64", "prompt_jinja": "Article: {{document}}\n\nSummary: ||| {{target}}", "prompt_original_task": true, "comment": "", "rougeL_precision_stderr": 0.0007092471581067394}, {"task_name": "gem_xsum", "prompt_name": "article_DOC_summary", "rougeL_recall": 0.0015861083611375401, "fixed_answer_choice_list": null, "dataset_path": "GEM/xsum", "dataset_name": null, "subset": "", "prompt_id": "a8d4ecfa-c944-44d5-878c-04fd5db59e64", "prompt_jinja": "Article: {{document}}\n\nSummary: ||| {{target}}", "prompt_original_task": true, "comment": "", "rougeL_recall_stderr": 0.00043284232233835025}, {"task_name": "gem_xsum", "prompt_name": "article_DOC_summary", "rougeL_fmeasure": 0.0017770068245789574, "fixed_answer_choice_list": null, "dataset_path": "GEM/xsum", "dataset_name": null, "subset": "", "prompt_id": "a8d4ecfa-c944-44d5-878c-04fd5db59e64", "prompt_jinja": "Article: {{document}}\n\nSummary: ||| {{target}}", "prompt_original_task": true, "comment": "", "rougeL_fmeasure_stderr": 0.000505751759280628}, {"task_name": "gem_xsum", "prompt_name": "article_DOC_summary", "rougeLsum_precision": 0.0023748236425604474, "fixed_answer_choice_list": null, "dataset_path": "GEM/xsum", "dataset_name": null, "subset": "", "prompt_id": "a8d4ecfa-c944-44d5-878c-04fd5db59e64", "prompt_jinja": "Article: {{document}}\n\nSummary: ||| {{target}}", "prompt_original_task": true, "comment": "", "rougeLsum_precision_stderr": 0.0007550781242137756}, {"task_name": "gem_xsum", "prompt_name": "article_DOC_summary", "rougeLsum_recall": 0.0017618177197440384, "fixed_answer_choice_list": null, "dataset_path": "GEM/xsum", "dataset_name": null, "subset": "", "prompt_id": "a8d4ecfa-c944-44d5-878c-04fd5db59e64", "prompt_jinja": "Article: {{document}}\n\nSummary: ||| {{target}}", "prompt_original_task": true, "comment": "", "rougeLsum_recall_stderr": 0.0004963241045032187}, {"task_name": "gem_xsum", "prompt_name": "article_DOC_summary", "rougeLsum_fmeasure": 0.0019231486237150143, "fixed_answer_choice_list": null, "dataset_path": "GEM/xsum", "dataset_name": null, "subset": "", "prompt_id": "a8d4ecfa-c944-44d5-878c-04fd5db59e64", "prompt_jinja": "Article: {{document}}\n\nSummary: ||| {{target}}", "prompt_original_task": true, "comment": "", "rougeLsum_fmeasure_stderr": 0.0005504938348202478}, {"task_name": "gem_xsum", "prompt_name": "article_DOC_summary", "bleu": 1.444458864274154e-40, "fixed_answer_choice_list": null, "dataset_path": "GEM/xsum", "dataset_name": null, "subset": "", "prompt_id": "a8d4ecfa-c944-44d5-878c-04fd5db59e64", "prompt_jinja": "Article: {{document}}\n\nSummary: ||| {{target}}", "prompt_original_task": true, "comment": "", "bleu_stderr": 1.5488576236449342e-34}], "config": {"model": "hf-causal", "model_args": "pretrained=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/lm1-4b2-84b-oscarroots/transformers,use_accelerate=True,tokenizer=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/gpt2,dtype=bfloat16", "task_args": "", "num_fewshot": 5, "batch_size": 16, "device": "cuda", "use_cache": false, "limit": 3000, "bootstrap_iters": 10, "seed": 1234}}
|
evaluation/generation/examples.lm1-4b2-84b-oscarroots_GEM-web_nlg_en_PALM_prompt_0.jsonl
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:aeede8a8eb388313f0ce068e198c5129c3722eef0c207cbe4a9aba6dcb6bf7b1
|
| 3 |
+
size 4211918
|
evaluation/generation/examples.lm1-4b2-84b-oscarroots_GEM-web_nlg_en_PALM_prompt_1.jsonl
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:dd649f263e0eb797cde0735b9607cfd919db8c4c1e9c8bbe67ea1d45cf4067e0
|
| 3 |
+
size 4766627
|
evaluation/generation/examples.lm1-4b2-84b-oscarroots_GEM-web_nlg_en_PALM_prompt_2.jsonl
ADDED
|
File without changes
|
evaluation/generation/examples.lm1-4b2-84b-oscarroots_GEM-web_nlg_en_PALM_prompt_3.jsonl
ADDED
|
File without changes
|
evaluation/generation/examples.lm1-4b2-84b-oscarroots_GEM-web_nlg_en_PALM_prompt_4.jsonl
ADDED
|
File without changes
|
evaluation/generation/examples.lm1-4b2-84b-oscarroots_GEM-web_nlg_en_PALM_prompt_5.jsonl
ADDED
|
File without changes
|
evaluation/generation/examples.lm1-4b2-84b-oscarroots_GEM-wiki_lingua_en_tldr_en_0.jsonl
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:ff65078b7aafc1e78235b2d1a0c1056e6575648c8fa068277c5eb620110bbcb5
|
| 3 |
+
size 7662801
|
evaluation/generation/examples.lm1-4b2-84b-oscarroots_GEM-wiki_lingua_en_tldr_en_1.jsonl
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:e9fc033390efadb96f677cffa155a4e8eceb8483ee358a9b4e40651507708b53
|
| 3 |
+
size 13013328
|
evaluation/generation/examples.lm1-4b2-84b-oscarroots_GEM-wiki_lingua_en_tldr_en_2.jsonl
ADDED
|
File without changes
|
evaluation/generation/examples.lm1-4b2-84b-oscarroots_GEM-wiki_lingua_en_tldr_en_3.jsonl
ADDED
|
File without changes
|
evaluation/generation/examples.lm1-4b2-84b-oscarroots_GEM-wiki_lingua_en_tldr_en_4.jsonl
ADDED
|
File without changes
|
evaluation/generation/examples.lm1-4b2-84b-oscarroots_GEM-wiki_lingua_en_tldr_en_5.jsonl
ADDED
|
File without changes
|
evaluation/generation/examples.lm1-4b2-84b-oscarroots_e2e_nlg_cleaned_generate_text_restaurant_0.jsonl
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:d4f43abd5b7b62a2d30f00800086b7fbf4bc4bd348d470cb0036292ca9c1dcd9
|
| 3 |
+
size 4442035
|
evaluation/generation/examples.lm1-4b2-84b-oscarroots_e2e_nlg_cleaned_generate_text_restaurant_1.jsonl
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:54ad044cd4ee988067c0fc632c906c640f10d69f465a679c2446a90393f8f7ed
|
| 3 |
+
size 5004165
|
evaluation/generation/examples.lm1-4b2-84b-oscarroots_e2e_nlg_cleaned_generate_text_restaurant_2.jsonl
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:e32692d733a0c6839c64367c1fc7abc12049c4bf3cdb3ba316ae7538903028a9
|
| 3 |
+
size 6096820
|
evaluation/generation/examples.lm1-4b2-84b-oscarroots_e2e_nlg_cleaned_generate_text_restaurant_3.jsonl
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:e8ec67c67379579999df2a0407490bd0a5cd90a0882959fa253596f64c1fedfc
|
| 3 |
+
size 7177851
|
evaluation/generation/examples.lm1-4b2-84b-oscarroots_e2e_nlg_cleaned_generate_text_restaurant_4.jsonl
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:577b2a15ed121110e6b03b0a3f70d3e1f5f71c907f676c4634525634c79d9ede
|
| 3 |
+
size 8257061
|
evaluation/generation/examples.lm1-4b2-84b-oscarroots_e2e_nlg_cleaned_generate_text_restaurant_5.jsonl
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:53551673ae0235240be1ececebf2b0c8c6923d2da3eade250e1e5c79351ef738
|
| 3 |
+
size 9345047
|
evaluation/generation/examples.lm1-4b2-84b-oscarroots_gem_xsum_article_DOC_summary_0.jsonl
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:3b1462350d653e58054585e18f70b3e9533ac5d287bd34e6d54e2d501c6b6314
|
| 3 |
+
size 2833595
|
evaluation/generation/examples.lm1-4b2-84b-oscarroots_gem_xsum_article_DOC_summary_1.jsonl
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:bd120d63650da12ea034812ce0f09eea3ffd7ec227ee094b2d86afcc620ee960
|
| 3 |
+
size 4947829
|
evaluation/generation/examples.lm1-4b2-84b-oscarroots_gem_xsum_article_DOC_summary_2.jsonl
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:ced14626cf706debd42acc11e81f4e0d01a9560fb221cf2d7fced9bfc35cbcbb
|
| 3 |
+
size 7201408
|
evaluation/generation/examples.lm1-4b2-84b-oscarroots_gem_xsum_article_DOC_summary_3.jsonl
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:8a11ed3fa28e18394fb53da16704f603fc1b017c4c75d0a452fccc2636bfd5d1
|
| 3 |
+
size 9478985
|
evaluation/generation/examples.lm1-4b2-84b-oscarroots_gem_xsum_article_DOC_summary_4.jsonl
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:5633247857fd203d0e8d00bb75b082e114f0e0a6402965a340384c9170bb1a73
|
| 3 |
+
size 11632660
|
evaluation/generation/examples.lm1-4b2-84b-oscarroots_gem_xsum_article_DOC_summary_5.jsonl
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:03989bdc825c1fcc9871ac832f3e737efbc7f138f23c070f392448ecc64248bc
|
| 3 |
+
size 13897411
|
evaluation/generation/slim.lm1-4b2-84b-oscarroots_GEM-web_nlg_en_PALM_prompt_0.json
ADDED
|
@@ -0,0 +1,133 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"results": [
|
| 3 |
+
{
|
| 4 |
+
"task_name": "GEM/web_nlg_en",
|
| 5 |
+
"prompt_name": "PALM_prompt",
|
| 6 |
+
"bleu": 0.30792186288041745,
|
| 7 |
+
"dataset_path": "GEM/web_nlg",
|
| 8 |
+
"dataset_name": "en",
|
| 9 |
+
"subset": null,
|
| 10 |
+
"bleu_stderr": 0.022591197439731495
|
| 11 |
+
},
|
| 12 |
+
{
|
| 13 |
+
"task_name": "GEM/web_nlg_en",
|
| 14 |
+
"prompt_name": "PALM_prompt",
|
| 15 |
+
"rouge1_precision": 0.07098986968617506,
|
| 16 |
+
"dataset_path": "GEM/web_nlg",
|
| 17 |
+
"dataset_name": "en",
|
| 18 |
+
"subset": null,
|
| 19 |
+
"rouge1_precision_stderr": 0.0020679786797452465
|
| 20 |
+
},
|
| 21 |
+
{
|
| 22 |
+
"task_name": "GEM/web_nlg_en",
|
| 23 |
+
"prompt_name": "PALM_prompt",
|
| 24 |
+
"rouge1_recall": 0.3069347608020833,
|
| 25 |
+
"dataset_path": "GEM/web_nlg",
|
| 26 |
+
"dataset_name": "en",
|
| 27 |
+
"subset": null,
|
| 28 |
+
"rouge1_recall_stderr": 0.00517507368107689
|
| 29 |
+
},
|
| 30 |
+
{
|
| 31 |
+
"task_name": "GEM/web_nlg_en",
|
| 32 |
+
"prompt_name": "PALM_prompt",
|
| 33 |
+
"rouge1_fmeasure": 0.10294623137452774,
|
| 34 |
+
"dataset_path": "GEM/web_nlg",
|
| 35 |
+
"dataset_name": "en",
|
| 36 |
+
"subset": null,
|
| 37 |
+
"rouge1_fmeasure_stderr": 0.0020587417894922925
|
| 38 |
+
},
|
| 39 |
+
{
|
| 40 |
+
"task_name": "GEM/web_nlg_en",
|
| 41 |
+
"prompt_name": "PALM_prompt",
|
| 42 |
+
"rouge2_precision": 0.031171214011889447,
|
| 43 |
+
"dataset_path": "GEM/web_nlg",
|
| 44 |
+
"dataset_name": "en",
|
| 45 |
+
"subset": null,
|
| 46 |
+
"rouge2_precision_stderr": 0.0009608900637588894
|
| 47 |
+
},
|
| 48 |
+
{
|
| 49 |
+
"task_name": "GEM/web_nlg_en",
|
| 50 |
+
"prompt_name": "PALM_prompt",
|
| 51 |
+
"rouge2_recall": 0.14183320494534776,
|
| 52 |
+
"dataset_path": "GEM/web_nlg",
|
| 53 |
+
"dataset_name": "en",
|
| 54 |
+
"subset": null,
|
| 55 |
+
"rouge2_recall_stderr": 0.0033211715439776072
|
| 56 |
+
},
|
| 57 |
+
{
|
| 58 |
+
"task_name": "GEM/web_nlg_en",
|
| 59 |
+
"prompt_name": "PALM_prompt",
|
| 60 |
+
"rouge2_fmeasure": 0.04712663066068758,
|
| 61 |
+
"dataset_path": "GEM/web_nlg",
|
| 62 |
+
"dataset_name": "en",
|
| 63 |
+
"subset": null,
|
| 64 |
+
"rouge2_fmeasure_stderr": 0.0012276103918669153
|
| 65 |
+
},
|
| 66 |
+
{
|
| 67 |
+
"task_name": "GEM/web_nlg_en",
|
| 68 |
+
"prompt_name": "PALM_prompt",
|
| 69 |
+
"rougeL_precision": 0.06796024562406605,
|
| 70 |
+
"dataset_path": "GEM/web_nlg",
|
| 71 |
+
"dataset_name": "en",
|
| 72 |
+
"subset": null,
|
| 73 |
+
"rougeL_precision_stderr": 0.0019370776267962208
|
| 74 |
+
},
|
| 75 |
+
{
|
| 76 |
+
"task_name": "GEM/web_nlg_en",
|
| 77 |
+
"prompt_name": "PALM_prompt",
|
| 78 |
+
"rougeL_recall": 0.29664082837766304,
|
| 79 |
+
"dataset_path": "GEM/web_nlg",
|
| 80 |
+
"dataset_name": "en",
|
| 81 |
+
"subset": null,
|
| 82 |
+
"rougeL_recall_stderr": 0.005024240535996322
|
| 83 |
+
},
|
| 84 |
+
{
|
| 85 |
+
"task_name": "GEM/web_nlg_en",
|
| 86 |
+
"prompt_name": "PALM_prompt",
|
| 87 |
+
"rougeL_fmeasure": 0.09881593036325725,
|
| 88 |
+
"dataset_path": "GEM/web_nlg",
|
| 89 |
+
"dataset_name": "en",
|
| 90 |
+
"subset": null,
|
| 91 |
+
"rougeL_fmeasure_stderr": 0.001917130031576468
|
| 92 |
+
},
|
| 93 |
+
{
|
| 94 |
+
"task_name": "GEM/web_nlg_en",
|
| 95 |
+
"prompt_name": "PALM_prompt",
|
| 96 |
+
"rougeLsum_precision": 0.0670791406212816,
|
| 97 |
+
"dataset_path": "GEM/web_nlg",
|
| 98 |
+
"dataset_name": "en",
|
| 99 |
+
"subset": null,
|
| 100 |
+
"rougeLsum_precision_stderr": 0.00196698409746432
|
| 101 |
+
},
|
| 102 |
+
{
|
| 103 |
+
"task_name": "GEM/web_nlg_en",
|
| 104 |
+
"prompt_name": "PALM_prompt",
|
| 105 |
+
"rougeLsum_recall": 0.2878672957371347,
|
| 106 |
+
"dataset_path": "GEM/web_nlg",
|
| 107 |
+
"dataset_name": "en",
|
| 108 |
+
"subset": null,
|
| 109 |
+
"rougeLsum_recall_stderr": 0.004800974945636242
|
| 110 |
+
},
|
| 111 |
+
{
|
| 112 |
+
"task_name": "GEM/web_nlg_en",
|
| 113 |
+
"prompt_name": "PALM_prompt",
|
| 114 |
+
"rougeLsum_fmeasure": 0.09698777350644844,
|
| 115 |
+
"dataset_path": "GEM/web_nlg",
|
| 116 |
+
"dataset_name": "en",
|
| 117 |
+
"subset": null,
|
| 118 |
+
"rougeLsum_fmeasure_stderr": 0.0019324455424936378
|
| 119 |
+
}
|
| 120 |
+
],
|
| 121 |
+
"config": {
|
| 122 |
+
"model": "hf-causal",
|
| 123 |
+
"model_args": "pretrained=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/lm1-4b2-84b-oscarroots/transformers,use_accelerate=True,tokenizer=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/gpt2,dtype=bfloat16",
|
| 124 |
+
"task_args": "",
|
| 125 |
+
"num_fewshot": 0,
|
| 126 |
+
"batch_size": 16,
|
| 127 |
+
"device": "cuda",
|
| 128 |
+
"use_cache": false,
|
| 129 |
+
"limit": 3000,
|
| 130 |
+
"bootstrap_iters": 10,
|
| 131 |
+
"seed": 1234
|
| 132 |
+
}
|
| 133 |
+
}
|
evaluation/generation/slim.lm1-4b2-84b-oscarroots_GEM-web_nlg_en_PALM_prompt_1.json
ADDED
|
@@ -0,0 +1,133 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"results": [
|
| 3 |
+
{
|
| 4 |
+
"task_name": "GEM/web_nlg_en",
|
| 5 |
+
"prompt_name": "PALM_prompt",
|
| 6 |
+
"bleu": 0.5029735260771602,
|
| 7 |
+
"dataset_path": "GEM/web_nlg",
|
| 8 |
+
"dataset_name": "en",
|
| 9 |
+
"subset": null,
|
| 10 |
+
"bleu_stderr": 0.037504230092449804
|
| 11 |
+
},
|
| 12 |
+
{
|
| 13 |
+
"task_name": "GEM/web_nlg_en",
|
| 14 |
+
"prompt_name": "PALM_prompt",
|
| 15 |
+
"rouge1_precision": 0.13619661582031448,
|
| 16 |
+
"dataset_path": "GEM/web_nlg",
|
| 17 |
+
"dataset_name": "en",
|
| 18 |
+
"subset": null,
|
| 19 |
+
"rouge1_precision_stderr": 0.0043020137724201464
|
| 20 |
+
},
|
| 21 |
+
{
|
| 22 |
+
"task_name": "GEM/web_nlg_en",
|
| 23 |
+
"prompt_name": "PALM_prompt",
|
| 24 |
+
"rouge1_recall": 0.316637398074897,
|
| 25 |
+
"dataset_path": "GEM/web_nlg",
|
| 26 |
+
"dataset_name": "en",
|
| 27 |
+
"subset": null,
|
| 28 |
+
"rouge1_recall_stderr": 0.0047970506632331504
|
| 29 |
+
},
|
| 30 |
+
{
|
| 31 |
+
"task_name": "GEM/web_nlg_en",
|
| 32 |
+
"prompt_name": "PALM_prompt",
|
| 33 |
+
"rouge1_fmeasure": 0.15768658147508735,
|
| 34 |
+
"dataset_path": "GEM/web_nlg",
|
| 35 |
+
"dataset_name": "en",
|
| 36 |
+
"subset": null,
|
| 37 |
+
"rouge1_fmeasure_stderr": 0.003494665966696393
|
| 38 |
+
},
|
| 39 |
+
{
|
| 40 |
+
"task_name": "GEM/web_nlg_en",
|
| 41 |
+
"prompt_name": "PALM_prompt",
|
| 42 |
+
"rouge2_precision": 0.06820976880895747,
|
| 43 |
+
"dataset_path": "GEM/web_nlg",
|
| 44 |
+
"dataset_name": "en",
|
| 45 |
+
"subset": null,
|
| 46 |
+
"rouge2_precision_stderr": 0.0029059822832858398
|
| 47 |
+
},
|
| 48 |
+
{
|
| 49 |
+
"task_name": "GEM/web_nlg_en",
|
| 50 |
+
"prompt_name": "PALM_prompt",
|
| 51 |
+
"rouge2_recall": 0.15837063857057912,
|
| 52 |
+
"dataset_path": "GEM/web_nlg",
|
| 53 |
+
"dataset_name": "en",
|
| 54 |
+
"subset": null,
|
| 55 |
+
"rouge2_recall_stderr": 0.0034375560556260923
|
| 56 |
+
},
|
| 57 |
+
{
|
| 58 |
+
"task_name": "GEM/web_nlg_en",
|
| 59 |
+
"prompt_name": "PALM_prompt",
|
| 60 |
+
"rouge2_fmeasure": 0.07782418733894164,
|
| 61 |
+
"dataset_path": "GEM/web_nlg",
|
| 62 |
+
"dataset_name": "en",
|
| 63 |
+
"subset": null,
|
| 64 |
+
"rouge2_fmeasure_stderr": 0.0023362205542013246
|
| 65 |
+
},
|
| 66 |
+
{
|
| 67 |
+
"task_name": "GEM/web_nlg_en",
|
| 68 |
+
"prompt_name": "PALM_prompt",
|
| 69 |
+
"rougeL_precision": 0.12413525469005826,
|
| 70 |
+
"dataset_path": "GEM/web_nlg",
|
| 71 |
+
"dataset_name": "en",
|
| 72 |
+
"subset": null,
|
| 73 |
+
"rougeL_precision_stderr": 0.0038754321440188466
|
| 74 |
+
},
|
| 75 |
+
{
|
| 76 |
+
"task_name": "GEM/web_nlg_en",
|
| 77 |
+
"prompt_name": "PALM_prompt",
|
| 78 |
+
"rougeL_recall": 0.2992719822505979,
|
| 79 |
+
"dataset_path": "GEM/web_nlg",
|
| 80 |
+
"dataset_name": "en",
|
| 81 |
+
"subset": null,
|
| 82 |
+
"rougeL_recall_stderr": 0.004524889522002559
|
| 83 |
+
},
|
| 84 |
+
{
|
| 85 |
+
"task_name": "GEM/web_nlg_en",
|
| 86 |
+
"prompt_name": "PALM_prompt",
|
| 87 |
+
"rougeL_fmeasure": 0.14491859595914175,
|
| 88 |
+
"dataset_path": "GEM/web_nlg",
|
| 89 |
+
"dataset_name": "en",
|
| 90 |
+
"subset": null,
|
| 91 |
+
"rougeL_fmeasure_stderr": 0.003065753619906709
|
| 92 |
+
},
|
| 93 |
+
{
|
| 94 |
+
"task_name": "GEM/web_nlg_en",
|
| 95 |
+
"prompt_name": "PALM_prompt",
|
| 96 |
+
"rougeLsum_precision": 0.12668637709059938,
|
| 97 |
+
"dataset_path": "GEM/web_nlg",
|
| 98 |
+
"dataset_name": "en",
|
| 99 |
+
"subset": null,
|
| 100 |
+
"rougeLsum_precision_stderr": 0.003967270717894955
|
| 101 |
+
},
|
| 102 |
+
{
|
| 103 |
+
"task_name": "GEM/web_nlg_en",
|
| 104 |
+
"prompt_name": "PALM_prompt",
|
| 105 |
+
"rougeLsum_recall": 0.3010027831236105,
|
| 106 |
+
"dataset_path": "GEM/web_nlg",
|
| 107 |
+
"dataset_name": "en",
|
| 108 |
+
"subset": null,
|
| 109 |
+
"rougeLsum_recall_stderr": 0.004513515461941868
|
| 110 |
+
},
|
| 111 |
+
{
|
| 112 |
+
"task_name": "GEM/web_nlg_en",
|
| 113 |
+
"prompt_name": "PALM_prompt",
|
| 114 |
+
"rougeLsum_fmeasure": 0.14722111353362036,
|
| 115 |
+
"dataset_path": "GEM/web_nlg",
|
| 116 |
+
"dataset_name": "en",
|
| 117 |
+
"subset": null,
|
| 118 |
+
"rougeLsum_fmeasure_stderr": 0.0031421471913191945
|
| 119 |
+
}
|
| 120 |
+
],
|
| 121 |
+
"config": {
|
| 122 |
+
"model": "hf-causal",
|
| 123 |
+
"model_args": "pretrained=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/lm1-4b2-84b-oscarroots/transformers,use_accelerate=True,tokenizer=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/gpt2,dtype=bfloat16",
|
| 124 |
+
"task_args": "",
|
| 125 |
+
"num_fewshot": 1,
|
| 126 |
+
"batch_size": 16,
|
| 127 |
+
"device": "cuda",
|
| 128 |
+
"use_cache": false,
|
| 129 |
+
"limit": 3000,
|
| 130 |
+
"bootstrap_iters": 10,
|
| 131 |
+
"seed": 1234
|
| 132 |
+
}
|
| 133 |
+
}
|
evaluation/generation/slim.lm1-4b2-84b-oscarroots_GEM-wiki_lingua_en_tldr_en_0.json
ADDED
|
@@ -0,0 +1,133 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"results": [
|
| 3 |
+
{
|
| 4 |
+
"task_name": "GEM/wiki_lingua_en",
|
| 5 |
+
"prompt_name": "tldr_en",
|
| 6 |
+
"rouge1_precision": 0.1677624908585186,
|
| 7 |
+
"dataset_path": "GEM/wiki_lingua",
|
| 8 |
+
"dataset_name": "en",
|
| 9 |
+
"subset": null,
|
| 10 |
+
"rouge1_precision_stderr": 0.002227107904916062
|
| 11 |
+
},
|
| 12 |
+
{
|
| 13 |
+
"task_name": "GEM/wiki_lingua_en",
|
| 14 |
+
"prompt_name": "tldr_en",
|
| 15 |
+
"rouge1_recall": 0.2596588931478101,
|
| 16 |
+
"dataset_path": "GEM/wiki_lingua",
|
| 17 |
+
"dataset_name": "en",
|
| 18 |
+
"subset": null,
|
| 19 |
+
"rouge1_recall_stderr": 0.002997542713484213
|
| 20 |
+
},
|
| 21 |
+
{
|
| 22 |
+
"task_name": "GEM/wiki_lingua_en",
|
| 23 |
+
"prompt_name": "tldr_en",
|
| 24 |
+
"rouge1_fmeasure": 0.1877757783616715,
|
| 25 |
+
"dataset_path": "GEM/wiki_lingua",
|
| 26 |
+
"dataset_name": "en",
|
| 27 |
+
"subset": null,
|
| 28 |
+
"rouge1_fmeasure_stderr": 0.002098472966964512
|
| 29 |
+
},
|
| 30 |
+
{
|
| 31 |
+
"task_name": "GEM/wiki_lingua_en",
|
| 32 |
+
"prompt_name": "tldr_en",
|
| 33 |
+
"rouge2_precision": 0.03900987564301008,
|
| 34 |
+
"dataset_path": "GEM/wiki_lingua",
|
| 35 |
+
"dataset_name": "en",
|
| 36 |
+
"subset": null,
|
| 37 |
+
"rouge2_precision_stderr": 0.0009646264507463447
|
| 38 |
+
},
|
| 39 |
+
{
|
| 40 |
+
"task_name": "GEM/wiki_lingua_en",
|
| 41 |
+
"prompt_name": "tldr_en",
|
| 42 |
+
"rouge2_recall": 0.06275053320845367,
|
| 43 |
+
"dataset_path": "GEM/wiki_lingua",
|
| 44 |
+
"dataset_name": "en",
|
| 45 |
+
"subset": null,
|
| 46 |
+
"rouge2_recall_stderr": 0.0016213745197323872
|
| 47 |
+
},
|
| 48 |
+
{
|
| 49 |
+
"task_name": "GEM/wiki_lingua_en",
|
| 50 |
+
"prompt_name": "tldr_en",
|
| 51 |
+
"rouge2_fmeasure": 0.04392454941063653,
|
| 52 |
+
"dataset_path": "GEM/wiki_lingua",
|
| 53 |
+
"dataset_name": "en",
|
| 54 |
+
"subset": null,
|
| 55 |
+
"rouge2_fmeasure_stderr": 0.001027826782042431
|
| 56 |
+
},
|
| 57 |
+
{
|
| 58 |
+
"task_name": "GEM/wiki_lingua_en",
|
| 59 |
+
"prompt_name": "tldr_en",
|
| 60 |
+
"rougeL_precision": 0.1321786244987408,
|
| 61 |
+
"dataset_path": "GEM/wiki_lingua",
|
| 62 |
+
"dataset_name": "en",
|
| 63 |
+
"subset": null,
|
| 64 |
+
"rougeL_precision_stderr": 0.0015898806527527606
|
| 65 |
+
},
|
| 66 |
+
{
|
| 67 |
+
"task_name": "GEM/wiki_lingua_en",
|
| 68 |
+
"prompt_name": "tldr_en",
|
| 69 |
+
"rougeL_recall": 0.2108343289151636,
|
| 70 |
+
"dataset_path": "GEM/wiki_lingua",
|
| 71 |
+
"dataset_name": "en",
|
| 72 |
+
"subset": null,
|
| 73 |
+
"rougeL_recall_stderr": 0.0024185861125649503
|
| 74 |
+
},
|
| 75 |
+
{
|
| 76 |
+
"task_name": "GEM/wiki_lingua_en",
|
| 77 |
+
"prompt_name": "tldr_en",
|
| 78 |
+
"rougeL_fmeasure": 0.1495463996881597,
|
| 79 |
+
"dataset_path": "GEM/wiki_lingua",
|
| 80 |
+
"dataset_name": "en",
|
| 81 |
+
"subset": null,
|
| 82 |
+
"rougeL_fmeasure_stderr": 0.001529629957138695
|
| 83 |
+
},
|
| 84 |
+
{
|
| 85 |
+
"task_name": "GEM/wiki_lingua_en",
|
| 86 |
+
"prompt_name": "tldr_en",
|
| 87 |
+
"rougeLsum_precision": 0.15492456336179825,
|
| 88 |
+
"dataset_path": "GEM/wiki_lingua",
|
| 89 |
+
"dataset_name": "en",
|
| 90 |
+
"subset": null,
|
| 91 |
+
"rougeLsum_precision_stderr": 0.0020566337458244716
|
| 92 |
+
},
|
| 93 |
+
{
|
| 94 |
+
"task_name": "GEM/wiki_lingua_en",
|
| 95 |
+
"prompt_name": "tldr_en",
|
| 96 |
+
"rougeLsum_recall": 0.24059158197735586,
|
| 97 |
+
"dataset_path": "GEM/wiki_lingua",
|
| 98 |
+
"dataset_name": "en",
|
| 99 |
+
"subset": null,
|
| 100 |
+
"rougeLsum_recall_stderr": 0.0028186220199704517
|
| 101 |
+
},
|
| 102 |
+
{
|
| 103 |
+
"task_name": "GEM/wiki_lingua_en",
|
| 104 |
+
"prompt_name": "tldr_en",
|
| 105 |
+
"rougeLsum_fmeasure": 0.1735928623880009,
|
| 106 |
+
"dataset_path": "GEM/wiki_lingua",
|
| 107 |
+
"dataset_name": "en",
|
| 108 |
+
"subset": null,
|
| 109 |
+
"rougeLsum_fmeasure_stderr": 0.0019465237397107696
|
| 110 |
+
},
|
| 111 |
+
{
|
| 112 |
+
"task_name": "GEM/wiki_lingua_en",
|
| 113 |
+
"prompt_name": "tldr_en",
|
| 114 |
+
"bleu": 2.443321935592914,
|
| 115 |
+
"dataset_path": "GEM/wiki_lingua",
|
| 116 |
+
"dataset_name": "en",
|
| 117 |
+
"subset": null,
|
| 118 |
+
"bleu_stderr": 0.09064724887478164
|
| 119 |
+
}
|
| 120 |
+
],
|
| 121 |
+
"config": {
|
| 122 |
+
"model": "hf-causal",
|
| 123 |
+
"model_args": "pretrained=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/lm1-4b2-84b-oscarroots/transformers,use_accelerate=True,tokenizer=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/gpt2,dtype=bfloat16",
|
| 124 |
+
"task_args": "",
|
| 125 |
+
"num_fewshot": 0,
|
| 126 |
+
"batch_size": 16,
|
| 127 |
+
"device": "cuda",
|
| 128 |
+
"use_cache": false,
|
| 129 |
+
"limit": 3000,
|
| 130 |
+
"bootstrap_iters": 10,
|
| 131 |
+
"seed": 1234
|
| 132 |
+
}
|
| 133 |
+
}
|
evaluation/generation/slim.lm1-4b2-84b-oscarroots_GEM-wiki_lingua_en_tldr_en_1.json
ADDED
|
@@ -0,0 +1,133 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"results": [
|
| 3 |
+
{
|
| 4 |
+
"task_name": "GEM/wiki_lingua_en",
|
| 5 |
+
"prompt_name": "tldr_en",
|
| 6 |
+
"rouge1_precision": 0.2506949292837622,
|
| 7 |
+
"dataset_path": "GEM/wiki_lingua",
|
| 8 |
+
"dataset_name": "en",
|
| 9 |
+
"subset": null,
|
| 10 |
+
"rouge1_precision_stderr": 0.0035826466742239185
|
| 11 |
+
},
|
| 12 |
+
{
|
| 13 |
+
"task_name": "GEM/wiki_lingua_en",
|
| 14 |
+
"prompt_name": "tldr_en",
|
| 15 |
+
"rouge1_recall": 0.216927054352879,
|
| 16 |
+
"dataset_path": "GEM/wiki_lingua",
|
| 17 |
+
"dataset_name": "en",
|
| 18 |
+
"subset": null,
|
| 19 |
+
"rouge1_recall_stderr": 0.0028088091990487226
|
| 20 |
+
},
|
| 21 |
+
{
|
| 22 |
+
"task_name": "GEM/wiki_lingua_en",
|
| 23 |
+
"prompt_name": "tldr_en",
|
| 24 |
+
"rouge1_fmeasure": 0.19603976253059838,
|
| 25 |
+
"dataset_path": "GEM/wiki_lingua",
|
| 26 |
+
"dataset_name": "en",
|
| 27 |
+
"subset": null,
|
| 28 |
+
"rouge1_fmeasure_stderr": 0.002209711985991064
|
| 29 |
+
},
|
| 30 |
+
{
|
| 31 |
+
"task_name": "GEM/wiki_lingua_en",
|
| 32 |
+
"prompt_name": "tldr_en",
|
| 33 |
+
"rouge2_precision": 0.06671631551602682,
|
| 34 |
+
"dataset_path": "GEM/wiki_lingua",
|
| 35 |
+
"dataset_name": "en",
|
| 36 |
+
"subset": null,
|
| 37 |
+
"rouge2_precision_stderr": 0.0021245172015467873
|
| 38 |
+
},
|
| 39 |
+
{
|
| 40 |
+
"task_name": "GEM/wiki_lingua_en",
|
| 41 |
+
"prompt_name": "tldr_en",
|
| 42 |
+
"rouge2_recall": 0.051740103428563375,
|
| 43 |
+
"dataset_path": "GEM/wiki_lingua",
|
| 44 |
+
"dataset_name": "en",
|
| 45 |
+
"subset": null,
|
| 46 |
+
"rouge2_recall_stderr": 0.0014534357317337603
|
| 47 |
+
},
|
| 48 |
+
{
|
| 49 |
+
"task_name": "GEM/wiki_lingua_en",
|
| 50 |
+
"prompt_name": "tldr_en",
|
| 51 |
+
"rouge2_fmeasure": 0.047953521910444996,
|
| 52 |
+
"dataset_path": "GEM/wiki_lingua",
|
| 53 |
+
"dataset_name": "en",
|
| 54 |
+
"subset": null,
|
| 55 |
+
"rouge2_fmeasure_stderr": 0.0012655545908108951
|
| 56 |
+
},
|
| 57 |
+
{
|
| 58 |
+
"task_name": "GEM/wiki_lingua_en",
|
| 59 |
+
"prompt_name": "tldr_en",
|
| 60 |
+
"rougeL_precision": 0.1934840203286638,
|
| 61 |
+
"dataset_path": "GEM/wiki_lingua",
|
| 62 |
+
"dataset_name": "en",
|
| 63 |
+
"subset": null,
|
| 64 |
+
"rougeL_precision_stderr": 0.002982369736201223
|
| 65 |
+
},
|
| 66 |
+
{
|
| 67 |
+
"task_name": "GEM/wiki_lingua_en",
|
| 68 |
+
"prompt_name": "tldr_en",
|
| 69 |
+
"rougeL_recall": 0.16375914032560498,
|
| 70 |
+
"dataset_path": "GEM/wiki_lingua",
|
| 71 |
+
"dataset_name": "en",
|
| 72 |
+
"subset": null,
|
| 73 |
+
"rougeL_recall_stderr": 0.0021690067639137493
|
| 74 |
+
},
|
| 75 |
+
{
|
| 76 |
+
"task_name": "GEM/wiki_lingua_en",
|
| 77 |
+
"prompt_name": "tldr_en",
|
| 78 |
+
"rougeL_fmeasure": 0.14805027301359455,
|
| 79 |
+
"dataset_path": "GEM/wiki_lingua",
|
| 80 |
+
"dataset_name": "en",
|
| 81 |
+
"subset": null,
|
| 82 |
+
"rougeL_fmeasure_stderr": 0.0016833891759156644
|
| 83 |
+
},
|
| 84 |
+
{
|
| 85 |
+
"task_name": "GEM/wiki_lingua_en",
|
| 86 |
+
"prompt_name": "tldr_en",
|
| 87 |
+
"rougeLsum_precision": 0.23568777027923624,
|
| 88 |
+
"dataset_path": "GEM/wiki_lingua",
|
| 89 |
+
"dataset_name": "en",
|
| 90 |
+
"subset": null,
|
| 91 |
+
"rougeLsum_precision_stderr": 0.003415634236822358
|
| 92 |
+
},
|
| 93 |
+
{
|
| 94 |
+
"task_name": "GEM/wiki_lingua_en",
|
| 95 |
+
"prompt_name": "tldr_en",
|
| 96 |
+
"rougeLsum_recall": 0.2032981534504797,
|
| 97 |
+
"dataset_path": "GEM/wiki_lingua",
|
| 98 |
+
"dataset_name": "en",
|
| 99 |
+
"subset": null,
|
| 100 |
+
"rougeLsum_recall_stderr": 0.002632061036819137
|
| 101 |
+
},
|
| 102 |
+
{
|
| 103 |
+
"task_name": "GEM/wiki_lingua_en",
|
| 104 |
+
"prompt_name": "tldr_en",
|
| 105 |
+
"rougeLsum_fmeasure": 0.18360306023241374,
|
| 106 |
+
"dataset_path": "GEM/wiki_lingua",
|
| 107 |
+
"dataset_name": "en",
|
| 108 |
+
"subset": null,
|
| 109 |
+
"rougeLsum_fmeasure_stderr": 0.00205910275267234
|
| 110 |
+
},
|
| 111 |
+
{
|
| 112 |
+
"task_name": "GEM/wiki_lingua_en",
|
| 113 |
+
"prompt_name": "tldr_en",
|
| 114 |
+
"bleu": 2.897173783637095,
|
| 115 |
+
"dataset_path": "GEM/wiki_lingua",
|
| 116 |
+
"dataset_name": "en",
|
| 117 |
+
"subset": null,
|
| 118 |
+
"bleu_stderr": 0.04723692099575378
|
| 119 |
+
}
|
| 120 |
+
],
|
| 121 |
+
"config": {
|
| 122 |
+
"model": "hf-causal",
|
| 123 |
+
"model_args": "pretrained=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/lm1-4b2-84b-oscarroots/transformers,use_accelerate=True,tokenizer=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/gpt2,dtype=bfloat16",
|
| 124 |
+
"task_args": "",
|
| 125 |
+
"num_fewshot": 1,
|
| 126 |
+
"batch_size": 16,
|
| 127 |
+
"device": "cuda",
|
| 128 |
+
"use_cache": false,
|
| 129 |
+
"limit": 3000,
|
| 130 |
+
"bootstrap_iters": 10,
|
| 131 |
+
"seed": 1234
|
| 132 |
+
}
|
| 133 |
+
}
|
evaluation/generation/slim.lm1-4b2-84b-oscarroots_e2e_nlg_cleaned_generate_text_restaurant_0.json
ADDED
|
@@ -0,0 +1,133 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"results": [
|
| 3 |
+
{
|
| 4 |
+
"task_name": "e2e_nlg_cleaned",
|
| 5 |
+
"prompt_name": "generate_text_restaurant",
|
| 6 |
+
"bleu": 5.941956523442191,
|
| 7 |
+
"dataset_path": "e2e_nlg_cleaned",
|
| 8 |
+
"dataset_name": null,
|
| 9 |
+
"subset": null,
|
| 10 |
+
"bleu_stderr": 0.09932749482493093
|
| 11 |
+
},
|
| 12 |
+
{
|
| 13 |
+
"task_name": "e2e_nlg_cleaned",
|
| 14 |
+
"prompt_name": "generate_text_restaurant",
|
| 15 |
+
"rouge1_precision": 0.20220167360973929,
|
| 16 |
+
"dataset_path": "e2e_nlg_cleaned",
|
| 17 |
+
"dataset_name": null,
|
| 18 |
+
"subset": null,
|
| 19 |
+
"rouge1_precision_stderr": 0.0017775397370267494
|
| 20 |
+
},
|
| 21 |
+
{
|
| 22 |
+
"task_name": "e2e_nlg_cleaned",
|
| 23 |
+
"prompt_name": "generate_text_restaurant",
|
| 24 |
+
"rouge1_recall": 0.40694948255704005,
|
| 25 |
+
"dataset_path": "e2e_nlg_cleaned",
|
| 26 |
+
"dataset_name": null,
|
| 27 |
+
"subset": null,
|
| 28 |
+
"rouge1_recall_stderr": 0.0030318532028052545
|
| 29 |
+
},
|
| 30 |
+
{
|
| 31 |
+
"task_name": "e2e_nlg_cleaned",
|
| 32 |
+
"prompt_name": "generate_text_restaurant",
|
| 33 |
+
"rouge1_fmeasure": 0.266035838888047,
|
| 34 |
+
"dataset_path": "e2e_nlg_cleaned",
|
| 35 |
+
"dataset_name": null,
|
| 36 |
+
"subset": null,
|
| 37 |
+
"rouge1_fmeasure_stderr": 0.00214687581319049
|
| 38 |
+
},
|
| 39 |
+
{
|
| 40 |
+
"task_name": "e2e_nlg_cleaned",
|
| 41 |
+
"prompt_name": "generate_text_restaurant",
|
| 42 |
+
"rouge2_precision": 0.08918451858000426,
|
| 43 |
+
"dataset_path": "e2e_nlg_cleaned",
|
| 44 |
+
"dataset_name": null,
|
| 45 |
+
"subset": null,
|
| 46 |
+
"rouge2_precision_stderr": 0.0011232374812415247
|
| 47 |
+
},
|
| 48 |
+
{
|
| 49 |
+
"task_name": "e2e_nlg_cleaned",
|
| 50 |
+
"prompt_name": "generate_text_restaurant",
|
| 51 |
+
"rouge2_recall": 0.1824874010352202,
|
| 52 |
+
"dataset_path": "e2e_nlg_cleaned",
|
| 53 |
+
"dataset_name": null,
|
| 54 |
+
"subset": null,
|
| 55 |
+
"rouge2_recall_stderr": 0.002231269996765666
|
| 56 |
+
},
|
| 57 |
+
{
|
| 58 |
+
"task_name": "e2e_nlg_cleaned",
|
| 59 |
+
"prompt_name": "generate_text_restaurant",
|
| 60 |
+
"rouge2_fmeasure": 0.11793161527142047,
|
| 61 |
+
"dataset_path": "e2e_nlg_cleaned",
|
| 62 |
+
"dataset_name": null,
|
| 63 |
+
"subset": null,
|
| 64 |
+
"rouge2_fmeasure_stderr": 0.0014395777814735315
|
| 65 |
+
},
|
| 66 |
+
{
|
| 67 |
+
"task_name": "e2e_nlg_cleaned",
|
| 68 |
+
"prompt_name": "generate_text_restaurant",
|
| 69 |
+
"rougeL_precision": 0.171931143567588,
|
| 70 |
+
"dataset_path": "e2e_nlg_cleaned",
|
| 71 |
+
"dataset_name": null,
|
| 72 |
+
"subset": null,
|
| 73 |
+
"rougeL_precision_stderr": 0.0014094508987155406
|
| 74 |
+
},
|
| 75 |
+
{
|
| 76 |
+
"task_name": "e2e_nlg_cleaned",
|
| 77 |
+
"prompt_name": "generate_text_restaurant",
|
| 78 |
+
"rougeL_recall": 0.3506402050881267,
|
| 79 |
+
"dataset_path": "e2e_nlg_cleaned",
|
| 80 |
+
"dataset_name": null,
|
| 81 |
+
"subset": null,
|
| 82 |
+
"rougeL_recall_stderr": 0.002593888393315589
|
| 83 |
+
},
|
| 84 |
+
{
|
| 85 |
+
"task_name": "e2e_nlg_cleaned",
|
| 86 |
+
"prompt_name": "generate_text_restaurant",
|
| 87 |
+
"rougeL_fmeasure": 0.22717154379396048,
|
| 88 |
+
"dataset_path": "e2e_nlg_cleaned",
|
| 89 |
+
"dataset_name": null,
|
| 90 |
+
"subset": null,
|
| 91 |
+
"rougeL_fmeasure_stderr": 0.0017325449588314497
|
| 92 |
+
},
|
| 93 |
+
{
|
| 94 |
+
"task_name": "e2e_nlg_cleaned",
|
| 95 |
+
"prompt_name": "generate_text_restaurant",
|
| 96 |
+
"rougeLsum_precision": 0.18007224082622914,
|
| 97 |
+
"dataset_path": "e2e_nlg_cleaned",
|
| 98 |
+
"dataset_name": null,
|
| 99 |
+
"subset": null,
|
| 100 |
+
"rougeLsum_precision_stderr": 0.001637841819840828
|
| 101 |
+
},
|
| 102 |
+
{
|
| 103 |
+
"task_name": "e2e_nlg_cleaned",
|
| 104 |
+
"prompt_name": "generate_text_restaurant",
|
| 105 |
+
"rougeLsum_recall": 0.36377758074636973,
|
| 106 |
+
"dataset_path": "e2e_nlg_cleaned",
|
| 107 |
+
"dataset_name": null,
|
| 108 |
+
"subset": null,
|
| 109 |
+
"rougeLsum_recall_stderr": 0.0028984303974106587
|
| 110 |
+
},
|
| 111 |
+
{
|
| 112 |
+
"task_name": "e2e_nlg_cleaned",
|
| 113 |
+
"prompt_name": "generate_text_restaurant",
|
| 114 |
+
"rougeLsum_fmeasure": 0.23720221029041466,
|
| 115 |
+
"dataset_path": "e2e_nlg_cleaned",
|
| 116 |
+
"dataset_name": null,
|
| 117 |
+
"subset": null,
|
| 118 |
+
"rougeLsum_fmeasure_stderr": 0.002003449214467719
|
| 119 |
+
}
|
| 120 |
+
],
|
| 121 |
+
"config": {
|
| 122 |
+
"model": "hf-causal",
|
| 123 |
+
"model_args": "pretrained=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/lm1-4b2-84b-oscarroots/transformers,use_accelerate=True,tokenizer=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/gpt2,dtype=bfloat16",
|
| 124 |
+
"task_args": "",
|
| 125 |
+
"num_fewshot": 0,
|
| 126 |
+
"batch_size": 16,
|
| 127 |
+
"device": "cuda",
|
| 128 |
+
"use_cache": false,
|
| 129 |
+
"limit": 3000,
|
| 130 |
+
"bootstrap_iters": 10,
|
| 131 |
+
"seed": 1234
|
| 132 |
+
}
|
| 133 |
+
}
|
evaluation/generation/slim.lm1-4b2-84b-oscarroots_e2e_nlg_cleaned_generate_text_restaurant_1.json
ADDED
|
@@ -0,0 +1,133 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"results": [
|
| 3 |
+
{
|
| 4 |
+
"task_name": "e2e_nlg_cleaned",
|
| 5 |
+
"prompt_name": "generate_text_restaurant",
|
| 6 |
+
"bleu": 12.380602370008575,
|
| 7 |
+
"dataset_path": "e2e_nlg_cleaned",
|
| 8 |
+
"dataset_name": null,
|
| 9 |
+
"subset": null,
|
| 10 |
+
"bleu_stderr": 0.19219222099661637
|
| 11 |
+
},
|
| 12 |
+
{
|
| 13 |
+
"task_name": "e2e_nlg_cleaned",
|
| 14 |
+
"prompt_name": "generate_text_restaurant",
|
| 15 |
+
"rouge1_precision": 0.5762149768264377,
|
| 16 |
+
"dataset_path": "e2e_nlg_cleaned",
|
| 17 |
+
"dataset_name": null,
|
| 18 |
+
"subset": null,
|
| 19 |
+
"rouge1_precision_stderr": 0.003318783916958475
|
| 20 |
+
},
|
| 21 |
+
{
|
| 22 |
+
"task_name": "e2e_nlg_cleaned",
|
| 23 |
+
"prompt_name": "generate_text_restaurant",
|
| 24 |
+
"rouge1_recall": 0.4407794090538949,
|
| 25 |
+
"dataset_path": "e2e_nlg_cleaned",
|
| 26 |
+
"dataset_name": null,
|
| 27 |
+
"subset": null,
|
| 28 |
+
"rouge1_recall_stderr": 0.0029897484855389944
|
| 29 |
+
},
|
| 30 |
+
{
|
| 31 |
+
"task_name": "e2e_nlg_cleaned",
|
| 32 |
+
"prompt_name": "generate_text_restaurant",
|
| 33 |
+
"rouge1_fmeasure": 0.4721348830907075,
|
| 34 |
+
"dataset_path": "e2e_nlg_cleaned",
|
| 35 |
+
"dataset_name": null,
|
| 36 |
+
"subset": null,
|
| 37 |
+
"rouge1_fmeasure_stderr": 0.0023404970653199955
|
| 38 |
+
},
|
| 39 |
+
{
|
| 40 |
+
"task_name": "e2e_nlg_cleaned",
|
| 41 |
+
"prompt_name": "generate_text_restaurant",
|
| 42 |
+
"rouge2_precision": 0.2814540689762945,
|
| 43 |
+
"dataset_path": "e2e_nlg_cleaned",
|
| 44 |
+
"dataset_name": null,
|
| 45 |
+
"subset": null,
|
| 46 |
+
"rouge2_precision_stderr": 0.002795334538538015
|
| 47 |
+
},
|
| 48 |
+
{
|
| 49 |
+
"task_name": "e2e_nlg_cleaned",
|
| 50 |
+
"prompt_name": "generate_text_restaurant",
|
| 51 |
+
"rouge2_recall": 0.21174089039601776,
|
| 52 |
+
"dataset_path": "e2e_nlg_cleaned",
|
| 53 |
+
"dataset_name": null,
|
| 54 |
+
"subset": null,
|
| 55 |
+
"rouge2_recall_stderr": 0.0022062395085723743
|
| 56 |
+
},
|
| 57 |
+
{
|
| 58 |
+
"task_name": "e2e_nlg_cleaned",
|
| 59 |
+
"prompt_name": "generate_text_restaurant",
|
| 60 |
+
"rouge2_fmeasure": 0.22711440839557615,
|
| 61 |
+
"dataset_path": "e2e_nlg_cleaned",
|
| 62 |
+
"dataset_name": null,
|
| 63 |
+
"subset": null,
|
| 64 |
+
"rouge2_fmeasure_stderr": 0.0020751801779315127
|
| 65 |
+
},
|
| 66 |
+
{
|
| 67 |
+
"task_name": "e2e_nlg_cleaned",
|
| 68 |
+
"prompt_name": "generate_text_restaurant",
|
| 69 |
+
"rougeL_precision": 0.4229881150623638,
|
| 70 |
+
"dataset_path": "e2e_nlg_cleaned",
|
| 71 |
+
"dataset_name": null,
|
| 72 |
+
"subset": null,
|
| 73 |
+
"rougeL_precision_stderr": 0.003072919322994061
|
| 74 |
+
},
|
| 75 |
+
{
|
| 76 |
+
"task_name": "e2e_nlg_cleaned",
|
| 77 |
+
"prompt_name": "generate_text_restaurant",
|
| 78 |
+
"rougeL_recall": 0.32033975746914833,
|
| 79 |
+
"dataset_path": "e2e_nlg_cleaned",
|
| 80 |
+
"dataset_name": null,
|
| 81 |
+
"subset": null,
|
| 82 |
+
"rougeL_recall_stderr": 0.002489730198496103
|
| 83 |
+
},
|
| 84 |
+
{
|
| 85 |
+
"task_name": "e2e_nlg_cleaned",
|
| 86 |
+
"prompt_name": "generate_text_restaurant",
|
| 87 |
+
"rougeL_fmeasure": 0.3439998053438798,
|
| 88 |
+
"dataset_path": "e2e_nlg_cleaned",
|
| 89 |
+
"dataset_name": null,
|
| 90 |
+
"subset": null,
|
| 91 |
+
"rougeL_fmeasure_stderr": 0.00212844149551342
|
| 92 |
+
},
|
| 93 |
+
{
|
| 94 |
+
"task_name": "e2e_nlg_cleaned",
|
| 95 |
+
"prompt_name": "generate_text_restaurant",
|
| 96 |
+
"rougeLsum_precision": 0.4731321806462157,
|
| 97 |
+
"dataset_path": "e2e_nlg_cleaned",
|
| 98 |
+
"dataset_name": null,
|
| 99 |
+
"subset": null,
|
| 100 |
+
"rougeLsum_precision_stderr": 0.0032652927514093106
|
| 101 |
+
},
|
| 102 |
+
{
|
| 103 |
+
"task_name": "e2e_nlg_cleaned",
|
| 104 |
+
"prompt_name": "generate_text_restaurant",
|
| 105 |
+
"rougeLsum_recall": 0.36070812284253606,
|
| 106 |
+
"dataset_path": "e2e_nlg_cleaned",
|
| 107 |
+
"dataset_name": null,
|
| 108 |
+
"subset": null,
|
| 109 |
+
"rougeLsum_recall_stderr": 0.0027712237046193665
|
| 110 |
+
},
|
| 111 |
+
{
|
| 112 |
+
"task_name": "e2e_nlg_cleaned",
|
| 113 |
+
"prompt_name": "generate_text_restaurant",
|
| 114 |
+
"rougeLsum_fmeasure": 0.3866905695770315,
|
| 115 |
+
"dataset_path": "e2e_nlg_cleaned",
|
| 116 |
+
"dataset_name": null,
|
| 117 |
+
"subset": null,
|
| 118 |
+
"rougeLsum_fmeasure_stderr": 0.0023474299675248548
|
| 119 |
+
}
|
| 120 |
+
],
|
| 121 |
+
"config": {
|
| 122 |
+
"model": "hf-causal",
|
| 123 |
+
"model_args": "pretrained=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/lm1-4b2-84b-oscarroots/transformers,use_accelerate=True,tokenizer=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/gpt2,dtype=bfloat16",
|
| 124 |
+
"task_args": "",
|
| 125 |
+
"num_fewshot": 1,
|
| 126 |
+
"batch_size": 16,
|
| 127 |
+
"device": "cuda",
|
| 128 |
+
"use_cache": false,
|
| 129 |
+
"limit": 3000,
|
| 130 |
+
"bootstrap_iters": 10,
|
| 131 |
+
"seed": 1234
|
| 132 |
+
}
|
| 133 |
+
}
|
evaluation/generation/slim.lm1-4b2-84b-oscarroots_e2e_nlg_cleaned_generate_text_restaurant_2.json
ADDED
|
@@ -0,0 +1,133 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"results": [
|
| 3 |
+
{
|
| 4 |
+
"task_name": "e2e_nlg_cleaned",
|
| 5 |
+
"prompt_name": "generate_text_restaurant",
|
| 6 |
+
"bleu": 14.191488024972156,
|
| 7 |
+
"dataset_path": "e2e_nlg_cleaned",
|
| 8 |
+
"dataset_name": null,
|
| 9 |
+
"subset": null,
|
| 10 |
+
"bleu_stderr": 0.19907510301810782
|
| 11 |
+
},
|
| 12 |
+
{
|
| 13 |
+
"task_name": "e2e_nlg_cleaned",
|
| 14 |
+
"prompt_name": "generate_text_restaurant",
|
| 15 |
+
"rouge1_precision": 0.5888620057074112,
|
| 16 |
+
"dataset_path": "e2e_nlg_cleaned",
|
| 17 |
+
"dataset_name": null,
|
| 18 |
+
"subset": null,
|
| 19 |
+
"rouge1_precision_stderr": 0.00325974855044316
|
| 20 |
+
},
|
| 21 |
+
{
|
| 22 |
+
"task_name": "e2e_nlg_cleaned",
|
| 23 |
+
"prompt_name": "generate_text_restaurant",
|
| 24 |
+
"rouge1_recall": 0.46734675933757636,
|
| 25 |
+
"dataset_path": "e2e_nlg_cleaned",
|
| 26 |
+
"dataset_name": null,
|
| 27 |
+
"subset": null,
|
| 28 |
+
"rouge1_recall_stderr": 0.002910336590920325
|
| 29 |
+
},
|
| 30 |
+
{
|
| 31 |
+
"task_name": "e2e_nlg_cleaned",
|
| 32 |
+
"prompt_name": "generate_text_restaurant",
|
| 33 |
+
"rouge1_fmeasure": 0.49478002813152466,
|
| 34 |
+
"dataset_path": "e2e_nlg_cleaned",
|
| 35 |
+
"dataset_name": null,
|
| 36 |
+
"subset": null,
|
| 37 |
+
"rouge1_fmeasure_stderr": 0.0022132976290390608
|
| 38 |
+
},
|
| 39 |
+
{
|
| 40 |
+
"task_name": "e2e_nlg_cleaned",
|
| 41 |
+
"prompt_name": "generate_text_restaurant",
|
| 42 |
+
"rouge2_precision": 0.298835335925771,
|
| 43 |
+
"dataset_path": "e2e_nlg_cleaned",
|
| 44 |
+
"dataset_name": null,
|
| 45 |
+
"subset": null,
|
| 46 |
+
"rouge2_precision_stderr": 0.0028167958629137044
|
| 47 |
+
},
|
| 48 |
+
{
|
| 49 |
+
"task_name": "e2e_nlg_cleaned",
|
| 50 |
+
"prompt_name": "generate_text_restaurant",
|
| 51 |
+
"rouge2_recall": 0.2332339594931545,
|
| 52 |
+
"dataset_path": "e2e_nlg_cleaned",
|
| 53 |
+
"dataset_name": null,
|
| 54 |
+
"subset": null,
|
| 55 |
+
"rouge2_recall_stderr": 0.002268844314664733
|
| 56 |
+
},
|
| 57 |
+
{
|
| 58 |
+
"task_name": "e2e_nlg_cleaned",
|
| 59 |
+
"prompt_name": "generate_text_restaurant",
|
| 60 |
+
"rouge2_fmeasure": 0.24713224212661672,
|
| 61 |
+
"dataset_path": "e2e_nlg_cleaned",
|
| 62 |
+
"dataset_name": null,
|
| 63 |
+
"subset": null,
|
| 64 |
+
"rouge2_fmeasure_stderr": 0.0020909646942975984
|
| 65 |
+
},
|
| 66 |
+
{
|
| 67 |
+
"task_name": "e2e_nlg_cleaned",
|
| 68 |
+
"prompt_name": "generate_text_restaurant",
|
| 69 |
+
"rougeL_precision": 0.4380934657095154,
|
| 70 |
+
"dataset_path": "e2e_nlg_cleaned",
|
| 71 |
+
"dataset_name": null,
|
| 72 |
+
"subset": null,
|
| 73 |
+
"rougeL_precision_stderr": 0.003041090518262408
|
| 74 |
+
},
|
| 75 |
+
{
|
| 76 |
+
"task_name": "e2e_nlg_cleaned",
|
| 77 |
+
"prompt_name": "generate_text_restaurant",
|
| 78 |
+
"rougeL_recall": 0.3447152118587733,
|
| 79 |
+
"dataset_path": "e2e_nlg_cleaned",
|
| 80 |
+
"dataset_name": null,
|
| 81 |
+
"subset": null,
|
| 82 |
+
"rougeL_recall_stderr": 0.002445756611409155
|
| 83 |
+
},
|
| 84 |
+
{
|
| 85 |
+
"task_name": "e2e_nlg_cleaned",
|
| 86 |
+
"prompt_name": "generate_text_restaurant",
|
| 87 |
+
"rougeL_fmeasure": 0.3659238384457185,
|
| 88 |
+
"dataset_path": "e2e_nlg_cleaned",
|
| 89 |
+
"dataset_name": null,
|
| 90 |
+
"subset": null,
|
| 91 |
+
"rougeL_fmeasure_stderr": 0.002069857446914006
|
| 92 |
+
},
|
| 93 |
+
{
|
| 94 |
+
"task_name": "e2e_nlg_cleaned",
|
| 95 |
+
"prompt_name": "generate_text_restaurant",
|
| 96 |
+
"rougeLsum_precision": 0.491576051493737,
|
| 97 |
+
"dataset_path": "e2e_nlg_cleaned",
|
| 98 |
+
"dataset_name": null,
|
| 99 |
+
"subset": null,
|
| 100 |
+
"rougeLsum_precision_stderr": 0.003239734182513254
|
| 101 |
+
},
|
| 102 |
+
{
|
| 103 |
+
"task_name": "e2e_nlg_cleaned",
|
| 104 |
+
"prompt_name": "generate_text_restaurant",
|
| 105 |
+
"rougeLsum_recall": 0.38921339856167103,
|
| 106 |
+
"dataset_path": "e2e_nlg_cleaned",
|
| 107 |
+
"dataset_name": null,
|
| 108 |
+
"subset": null,
|
| 109 |
+
"rougeLsum_recall_stderr": 0.0027477384856747842
|
| 110 |
+
},
|
| 111 |
+
{
|
| 112 |
+
"task_name": "e2e_nlg_cleaned",
|
| 113 |
+
"prompt_name": "generate_text_restaurant",
|
| 114 |
+
"rougeLsum_fmeasure": 0.41241936907841864,
|
| 115 |
+
"dataset_path": "e2e_nlg_cleaned",
|
| 116 |
+
"dataset_name": null,
|
| 117 |
+
"subset": null,
|
| 118 |
+
"rougeLsum_fmeasure_stderr": 0.0022987607835818604
|
| 119 |
+
}
|
| 120 |
+
],
|
| 121 |
+
"config": {
|
| 122 |
+
"model": "hf-causal",
|
| 123 |
+
"model_args": "pretrained=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/lm1-4b2-84b-oscarroots/transformers,use_accelerate=True,tokenizer=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/gpt2,dtype=bfloat16",
|
| 124 |
+
"task_args": "",
|
| 125 |
+
"num_fewshot": 2,
|
| 126 |
+
"batch_size": 16,
|
| 127 |
+
"device": "cuda",
|
| 128 |
+
"use_cache": false,
|
| 129 |
+
"limit": 3000,
|
| 130 |
+
"bootstrap_iters": 10,
|
| 131 |
+
"seed": 1234
|
| 132 |
+
}
|
| 133 |
+
}
|
evaluation/generation/slim.lm1-4b2-84b-oscarroots_e2e_nlg_cleaned_generate_text_restaurant_3.json
ADDED
|
@@ -0,0 +1,133 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"results": [
|
| 3 |
+
{
|
| 4 |
+
"task_name": "e2e_nlg_cleaned",
|
| 5 |
+
"prompt_name": "generate_text_restaurant",
|
| 6 |
+
"bleu": 14.64881594516423,
|
| 7 |
+
"dataset_path": "e2e_nlg_cleaned",
|
| 8 |
+
"dataset_name": null,
|
| 9 |
+
"subset": null,
|
| 10 |
+
"bleu_stderr": 0.14225876634497212
|
| 11 |
+
},
|
| 12 |
+
{
|
| 13 |
+
"task_name": "e2e_nlg_cleaned",
|
| 14 |
+
"prompt_name": "generate_text_restaurant",
|
| 15 |
+
"rouge1_precision": 0.5907860432410378,
|
| 16 |
+
"dataset_path": "e2e_nlg_cleaned",
|
| 17 |
+
"dataset_name": null,
|
| 18 |
+
"subset": null,
|
| 19 |
+
"rouge1_precision_stderr": 0.0031560435722029602
|
| 20 |
+
},
|
| 21 |
+
{
|
| 22 |
+
"task_name": "e2e_nlg_cleaned",
|
| 23 |
+
"prompt_name": "generate_text_restaurant",
|
| 24 |
+
"rouge1_recall": 0.47164290046583734,
|
| 25 |
+
"dataset_path": "e2e_nlg_cleaned",
|
| 26 |
+
"dataset_name": null,
|
| 27 |
+
"subset": null,
|
| 28 |
+
"rouge1_recall_stderr": 0.0028460616360397144
|
| 29 |
+
},
|
| 30 |
+
{
|
| 31 |
+
"task_name": "e2e_nlg_cleaned",
|
| 32 |
+
"prompt_name": "generate_text_restaurant",
|
| 33 |
+
"rouge1_fmeasure": 0.5004124175592417,
|
| 34 |
+
"dataset_path": "e2e_nlg_cleaned",
|
| 35 |
+
"dataset_name": null,
|
| 36 |
+
"subset": null,
|
| 37 |
+
"rouge1_fmeasure_stderr": 0.0022065167958013948
|
| 38 |
+
},
|
| 39 |
+
{
|
| 40 |
+
"task_name": "e2e_nlg_cleaned",
|
| 41 |
+
"prompt_name": "generate_text_restaurant",
|
| 42 |
+
"rouge2_precision": 0.30074421250718314,
|
| 43 |
+
"dataset_path": "e2e_nlg_cleaned",
|
| 44 |
+
"dataset_name": null,
|
| 45 |
+
"subset": null,
|
| 46 |
+
"rouge2_precision_stderr": 0.0027356725355736525
|
| 47 |
+
},
|
| 48 |
+
{
|
| 49 |
+
"task_name": "e2e_nlg_cleaned",
|
| 50 |
+
"prompt_name": "generate_text_restaurant",
|
| 51 |
+
"rouge2_recall": 0.23739401119844838,
|
| 52 |
+
"dataset_path": "e2e_nlg_cleaned",
|
| 53 |
+
"dataset_name": null,
|
| 54 |
+
"subset": null,
|
| 55 |
+
"rouge2_recall_stderr": 0.0022738788955147583
|
| 56 |
+
},
|
| 57 |
+
{
|
| 58 |
+
"task_name": "e2e_nlg_cleaned",
|
| 59 |
+
"prompt_name": "generate_text_restaurant",
|
| 60 |
+
"rouge2_fmeasure": 0.25184632683249925,
|
| 61 |
+
"dataset_path": "e2e_nlg_cleaned",
|
| 62 |
+
"dataset_name": null,
|
| 63 |
+
"subset": null,
|
| 64 |
+
"rouge2_fmeasure_stderr": 0.0021066699664814875
|
| 65 |
+
},
|
| 66 |
+
{
|
| 67 |
+
"task_name": "e2e_nlg_cleaned",
|
| 68 |
+
"prompt_name": "generate_text_restaurant",
|
| 69 |
+
"rougeL_precision": 0.437299096878406,
|
| 70 |
+
"dataset_path": "e2e_nlg_cleaned",
|
| 71 |
+
"dataset_name": null,
|
| 72 |
+
"subset": null,
|
| 73 |
+
"rougeL_precision_stderr": 0.002968957729484418
|
| 74 |
+
},
|
| 75 |
+
{
|
| 76 |
+
"task_name": "e2e_nlg_cleaned",
|
| 77 |
+
"prompt_name": "generate_text_restaurant",
|
| 78 |
+
"rougeL_recall": 0.3468480918537638,
|
| 79 |
+
"dataset_path": "e2e_nlg_cleaned",
|
| 80 |
+
"dataset_name": null,
|
| 81 |
+
"subset": null,
|
| 82 |
+
"rougeL_recall_stderr": 0.0024452067199333613
|
| 83 |
+
},
|
| 84 |
+
{
|
| 85 |
+
"task_name": "e2e_nlg_cleaned",
|
| 86 |
+
"prompt_name": "generate_text_restaurant",
|
| 87 |
+
"rougeL_fmeasure": 0.3686141972958827,
|
| 88 |
+
"dataset_path": "e2e_nlg_cleaned",
|
| 89 |
+
"dataset_name": null,
|
| 90 |
+
"subset": null,
|
| 91 |
+
"rougeL_fmeasure_stderr": 0.0021055632446747067
|
| 92 |
+
},
|
| 93 |
+
{
|
| 94 |
+
"task_name": "e2e_nlg_cleaned",
|
| 95 |
+
"prompt_name": "generate_text_restaurant",
|
| 96 |
+
"rougeLsum_precision": 0.4939511257717702,
|
| 97 |
+
"dataset_path": "e2e_nlg_cleaned",
|
| 98 |
+
"dataset_name": null,
|
| 99 |
+
"subset": null,
|
| 100 |
+
"rougeLsum_precision_stderr": 0.003153232253904959
|
| 101 |
+
},
|
| 102 |
+
{
|
| 103 |
+
"task_name": "e2e_nlg_cleaned",
|
| 104 |
+
"prompt_name": "generate_text_restaurant",
|
| 105 |
+
"rougeLsum_recall": 0.39417799230568645,
|
| 106 |
+
"dataset_path": "e2e_nlg_cleaned",
|
| 107 |
+
"dataset_name": null,
|
| 108 |
+
"subset": null,
|
| 109 |
+
"rougeLsum_recall_stderr": 0.0027496846190725264
|
| 110 |
+
},
|
| 111 |
+
{
|
| 112 |
+
"task_name": "e2e_nlg_cleaned",
|
| 113 |
+
"prompt_name": "generate_text_restaurant",
|
| 114 |
+
"rougeLsum_fmeasure": 0.4182292558816836,
|
| 115 |
+
"dataset_path": "e2e_nlg_cleaned",
|
| 116 |
+
"dataset_name": null,
|
| 117 |
+
"subset": null,
|
| 118 |
+
"rougeLsum_fmeasure_stderr": 0.0023341754403372627
|
| 119 |
+
}
|
| 120 |
+
],
|
| 121 |
+
"config": {
|
| 122 |
+
"model": "hf-causal",
|
| 123 |
+
"model_args": "pretrained=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/lm1-4b2-84b-oscarroots/transformers,use_accelerate=True,tokenizer=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/gpt2,dtype=bfloat16",
|
| 124 |
+
"task_args": "",
|
| 125 |
+
"num_fewshot": 3,
|
| 126 |
+
"batch_size": 16,
|
| 127 |
+
"device": "cuda",
|
| 128 |
+
"use_cache": false,
|
| 129 |
+
"limit": 3000,
|
| 130 |
+
"bootstrap_iters": 10,
|
| 131 |
+
"seed": 1234
|
| 132 |
+
}
|
| 133 |
+
}
|
evaluation/generation/slim.lm1-4b2-84b-oscarroots_e2e_nlg_cleaned_generate_text_restaurant_4.json
ADDED
|
@@ -0,0 +1,133 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"results": [
|
| 3 |
+
{
|
| 4 |
+
"task_name": "e2e_nlg_cleaned",
|
| 5 |
+
"prompt_name": "generate_text_restaurant",
|
| 6 |
+
"bleu": 15.098333020388077,
|
| 7 |
+
"dataset_path": "e2e_nlg_cleaned",
|
| 8 |
+
"dataset_name": null,
|
| 9 |
+
"subset": null,
|
| 10 |
+
"bleu_stderr": 0.12831977294846147
|
| 11 |
+
},
|
| 12 |
+
{
|
| 13 |
+
"task_name": "e2e_nlg_cleaned",
|
| 14 |
+
"prompt_name": "generate_text_restaurant",
|
| 15 |
+
"rouge1_precision": 0.5966648139686145,
|
| 16 |
+
"dataset_path": "e2e_nlg_cleaned",
|
| 17 |
+
"dataset_name": null,
|
| 18 |
+
"subset": null,
|
| 19 |
+
"rouge1_precision_stderr": 0.0032302255772308427
|
| 20 |
+
},
|
| 21 |
+
{
|
| 22 |
+
"task_name": "e2e_nlg_cleaned",
|
| 23 |
+
"prompt_name": "generate_text_restaurant",
|
| 24 |
+
"rouge1_recall": 0.47760538890316145,
|
| 25 |
+
"dataset_path": "e2e_nlg_cleaned",
|
| 26 |
+
"dataset_name": null,
|
| 27 |
+
"subset": null,
|
| 28 |
+
"rouge1_recall_stderr": 0.00285398486806638
|
| 29 |
+
},
|
| 30 |
+
{
|
| 31 |
+
"task_name": "e2e_nlg_cleaned",
|
| 32 |
+
"prompt_name": "generate_text_restaurant",
|
| 33 |
+
"rouge1_fmeasure": 0.5071734526172544,
|
| 34 |
+
"dataset_path": "e2e_nlg_cleaned",
|
| 35 |
+
"dataset_name": null,
|
| 36 |
+
"subset": null,
|
| 37 |
+
"rouge1_fmeasure_stderr": 0.002256209019372641
|
| 38 |
+
},
|
| 39 |
+
{
|
| 40 |
+
"task_name": "e2e_nlg_cleaned",
|
| 41 |
+
"prompt_name": "generate_text_restaurant",
|
| 42 |
+
"rouge2_precision": 0.3049813003705614,
|
| 43 |
+
"dataset_path": "e2e_nlg_cleaned",
|
| 44 |
+
"dataset_name": null,
|
| 45 |
+
"subset": null,
|
| 46 |
+
"rouge2_precision_stderr": 0.0028143522315018875
|
| 47 |
+
},
|
| 48 |
+
{
|
| 49 |
+
"task_name": "e2e_nlg_cleaned",
|
| 50 |
+
"prompt_name": "generate_text_restaurant",
|
| 51 |
+
"rouge2_recall": 0.24128439152213985,
|
| 52 |
+
"dataset_path": "e2e_nlg_cleaned",
|
| 53 |
+
"dataset_name": null,
|
| 54 |
+
"subset": null,
|
| 55 |
+
"rouge2_recall_stderr": 0.002326306182449381
|
| 56 |
+
},
|
| 57 |
+
{
|
| 58 |
+
"task_name": "e2e_nlg_cleaned",
|
| 59 |
+
"prompt_name": "generate_text_restaurant",
|
| 60 |
+
"rouge2_fmeasure": 0.2561891586621636,
|
| 61 |
+
"dataset_path": "e2e_nlg_cleaned",
|
| 62 |
+
"dataset_name": null,
|
| 63 |
+
"subset": null,
|
| 64 |
+
"rouge2_fmeasure_stderr": 0.002175088764672847
|
| 65 |
+
},
|
| 66 |
+
{
|
| 67 |
+
"task_name": "e2e_nlg_cleaned",
|
| 68 |
+
"prompt_name": "generate_text_restaurant",
|
| 69 |
+
"rougeL_precision": 0.4375644960495563,
|
| 70 |
+
"dataset_path": "e2e_nlg_cleaned",
|
| 71 |
+
"dataset_name": null,
|
| 72 |
+
"subset": null,
|
| 73 |
+
"rougeL_precision_stderr": 0.0029585544115121273
|
| 74 |
+
},
|
| 75 |
+
{
|
| 76 |
+
"task_name": "e2e_nlg_cleaned",
|
| 77 |
+
"prompt_name": "generate_text_restaurant",
|
| 78 |
+
"rougeL_recall": 0.3490377714500209,
|
| 79 |
+
"dataset_path": "e2e_nlg_cleaned",
|
| 80 |
+
"dataset_name": null,
|
| 81 |
+
"subset": null,
|
| 82 |
+
"rougeL_recall_stderr": 0.0024749540858779945
|
| 83 |
+
},
|
| 84 |
+
{
|
| 85 |
+
"task_name": "e2e_nlg_cleaned",
|
| 86 |
+
"prompt_name": "generate_text_restaurant",
|
| 87 |
+
"rougeL_fmeasure": 0.37081754277374707,
|
| 88 |
+
"dataset_path": "e2e_nlg_cleaned",
|
| 89 |
+
"dataset_name": null,
|
| 90 |
+
"subset": null,
|
| 91 |
+
"rougeL_fmeasure_stderr": 0.002131577935728707
|
| 92 |
+
},
|
| 93 |
+
{
|
| 94 |
+
"task_name": "e2e_nlg_cleaned",
|
| 95 |
+
"prompt_name": "generate_text_restaurant",
|
| 96 |
+
"rougeLsum_precision": 0.49662519790850435,
|
| 97 |
+
"dataset_path": "e2e_nlg_cleaned",
|
| 98 |
+
"dataset_name": null,
|
| 99 |
+
"subset": null,
|
| 100 |
+
"rougeLsum_precision_stderr": 0.003207650849575026
|
| 101 |
+
},
|
| 102 |
+
{
|
| 103 |
+
"task_name": "e2e_nlg_cleaned",
|
| 104 |
+
"prompt_name": "generate_text_restaurant",
|
| 105 |
+
"rougeLsum_recall": 0.3977409010762705,
|
| 106 |
+
"dataset_path": "e2e_nlg_cleaned",
|
| 107 |
+
"dataset_name": null,
|
| 108 |
+
"subset": null,
|
| 109 |
+
"rougeLsum_recall_stderr": 0.0027651459862259265
|
| 110 |
+
},
|
| 111 |
+
{
|
| 112 |
+
"task_name": "e2e_nlg_cleaned",
|
| 113 |
+
"prompt_name": "generate_text_restaurant",
|
| 114 |
+
"rougeLsum_fmeasure": 0.4222811145975071,
|
| 115 |
+
"dataset_path": "e2e_nlg_cleaned",
|
| 116 |
+
"dataset_name": null,
|
| 117 |
+
"subset": null,
|
| 118 |
+
"rougeLsum_fmeasure_stderr": 0.002382321269391712
|
| 119 |
+
}
|
| 120 |
+
],
|
| 121 |
+
"config": {
|
| 122 |
+
"model": "hf-causal",
|
| 123 |
+
"model_args": "pretrained=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/lm1-4b2-84b-oscarroots/transformers,use_accelerate=True,tokenizer=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/gpt2,dtype=bfloat16",
|
| 124 |
+
"task_args": "",
|
| 125 |
+
"num_fewshot": 4,
|
| 126 |
+
"batch_size": 16,
|
| 127 |
+
"device": "cuda",
|
| 128 |
+
"use_cache": false,
|
| 129 |
+
"limit": 3000,
|
| 130 |
+
"bootstrap_iters": 10,
|
| 131 |
+
"seed": 1234
|
| 132 |
+
}
|
| 133 |
+
}
|