Spaces:

aail-hf
/

ensemble_machine

Running

App Files Files Community

diana3135 commited on Nov 5, 2024

Commit

60cc9a1

1 Parent(s): a631890

specify evaluation standards in prompt

Browse files

Files changed (1) hide show

utils.py +17 -19

utils.py CHANGED Viewed

@@ -69,22 +69,20 @@ def modify_with_suggestion(task_description, text, suggestions, api_key = None):
              f"Modify the answer based on the following suggestions: {suggestions}"
     return generate_text_with_gpt(prompt, api_key)
-# def get_evaluation_with_gpt(task_description, text, api_key = None):
-#     prompt = f"Given the task:{task_description}, the answer provided is: {text}\n" + \
-#              f"Evaluate the answer and provide scores between 0 and 10,\n" + \
-#              f"where criteria for evaluation are Novelty, Feasibility, and Defensibility.\n" + \
-#              f"Use a professional standard, where 5 represents an acceptable quality,\n" + \
-#              f"and scores closer to 10 should reflect exceptional quality.\n" + \
-#              f"Finally conclude with an overall score in the range of 0 to 10."
-#     return generate_text_with_gpt(prompt, api_key)
-def get_evaluation_with_gpt(task_description, text, api_key = None):
-    prompt = f"Given the task: {task_description}, the provided answer is: {text}\n" + \
-             f"Evaluate the answer according to the following criteria, providing both a score (0-10) and a brief comment (1 sentence) for each.\n" + \
-             f"Use a professional standard, where 5 represents acceptable quality, and scores closer to 10 reflect exceptional quality.\n" + \
-             f"Please format the output exactly as follows:\n" + \
-             f"Novelty: [Score]\nComment: [Short comment on Novelty]\n" + \
-             f"Feasibility: [Score]\nComment: [Short comment on Feasibility]\n" + \
-             f"Defensibility: [Score]\nComment: [Short comment on Defensibility]\n" + \
-             f"Overall Score: [Score]\nOverall Comment: [Overall feedback on the answer]\n"
-    return generate_text_with_gpt(prompt, api_key)

              f"Modify the answer based on the following suggestions: {suggestions}"
     return generate_text_with_gpt(prompt, api_key)
+def get_evaluation_with_gpt(task_description, text, api_key=None):
+    prompt = (
+        f"Given the task: {task_description}, the provided answer is: {text}\n"
+        f"Please evaluate the answer based on the following criteria, using a scale from 0 to 10, where:\n"
+        f"0-2 reflects very poor quality, with minimal value or relevance.\n"
+        f"3-4 indicates below-average quality with significant shortcomings.\n"
+        f"5 represents acceptable quality.\n"
+        f"6-8 signifies good to very good quality, showing substantial thought and accuracy.\n"
+        f"9-10 represents exceptional quality, with outstanding insight or depth.\n\n"
+        f"Provide both a score and a brief comment (1 sentence) for each criterion.\n"
+        f"Please format the output exactly as follows:\n"
+        f"Novelty: [Score]\nComment: [Short comment on Novelty]\n"
+        f"Implementability: [Score]\nComment: [Short comment on Feasibility]\n"
+        f"Defensibility: [Score]\nComment: [Short comment on Defensibility]\n"
+        f"Overall Score: [Score]\nOverall Comment: [Overall feedback on the answer]\n"
+    )
+    return generate_text_with_gpt(prompt, api_key)