Spaces:

aail-hf
/

ensemble_machine

Running

App Files Files Community

diana3135 commited on Nov 4, 2024

Commit

a631890

1 Parent(s): 35425cb

update evaluation prompt, formatting

Browse files

Files changed (2) hide show

constants.py +1 -1
utils.py +20 -9

constants.py CHANGED Viewed

@@ -20,7 +20,7 @@ SDG_DETAILS = """
 GPT_PROMPT_parallel="""
-For AI generted/modified parts, our base model is GPT-3.5-turbo.
 For direct generation, we use instructions like following:
 "Given the task as: [task_description], provide an answer: "

 GPT_PROMPT_parallel="""
+For AI generated/modified parts, our base model is GPT-3.5-turbo.
 For direct generation, we use instructions like following:
 "Given the task as: [task_description], provide an answer: "

utils.py CHANGED Viewed

@@ -49,8 +49,8 @@ def generate_text_with_gpt(prompts, api_key = None):
         return ""
-def generate_ai_initial_answer(task_descriotion, api_key=None):
-    prompt = f"Given the task: {task_descriotion}, provide an answer: "
     return generate_text_with_gpt(prompt, api_key)
 def merge_texts_parallel(task_description, human_text, ai_text, api_key = None):
@@ -69,11 +69,22 @@ def modify_with_suggestion(task_description, text, suggestions, api_key = None):
              f"Modify the answer based on the following suggestions: {suggestions}"
     return generate_text_with_gpt(prompt, api_key)
 def get_evaluation_with_gpt(task_description, text, api_key = None):
-    prompt = f"Given the task:{task_description}, the answer provided is: {text}\n" + \
-             f"Evaluate the answer and provide scores between 0 and 10,\n" + \
-             f"where criteria for evaluation are Novelty, Feasibility, and Defensibility.\n" + \
-             f"Use a professional standard, where 5 represents an acceptable quality,\n" + \
-             f"and scores closer to 10 should reflect exceptional quality.\n" + \
-             f"Finally conclude with an overall score in the range of 0 to 10."
-    return generate_text_with_gpt(prompt, api_key)

         return ""
+def generate_ai_initial_answer(task_description, api_key=None):
+    prompt = f"Given the task: {task_description}, provide an answer: "
     return generate_text_with_gpt(prompt, api_key)
 def merge_texts_parallel(task_description, human_text, ai_text, api_key = None):
              f"Modify the answer based on the following suggestions: {suggestions}"
     return generate_text_with_gpt(prompt, api_key)
+# def get_evaluation_with_gpt(task_description, text, api_key = None):
+#     prompt = f"Given the task:{task_description}, the answer provided is: {text}\n" + \
+#              f"Evaluate the answer and provide scores between 0 and 10,\n" + \
+#              f"where criteria for evaluation are Novelty, Feasibility, and Defensibility.\n" + \
+#              f"Use a professional standard, where 5 represents an acceptable quality,\n" + \
+#              f"and scores closer to 10 should reflect exceptional quality.\n" + \
+#              f"Finally conclude with an overall score in the range of 0 to 10."
+#     return generate_text_with_gpt(prompt, api_key)
 def get_evaluation_with_gpt(task_description, text, api_key = None):
+    prompt = f"Given the task: {task_description}, the provided answer is: {text}\n" + \
+             f"Evaluate the answer according to the following criteria, providing both a score (0-10) and a brief comment (1 sentence) for each.\n" + \
+             f"Use a professional standard, where 5 represents acceptable quality, and scores closer to 10 reflect exceptional quality.\n" + \
+             f"Please format the output exactly as follows:\n" + \
+             f"Novelty: [Score]\nComment: [Short comment on Novelty]\n" + \
+             f"Feasibility: [Score]\nComment: [Short comment on Feasibility]\n" + \
+             f"Defensibility: [Score]\nComment: [Short comment on Defensibility]\n" + \
+             f"Overall Score: [Score]\nOverall Comment: [Overall feedback on the answer]\n"
+    return generate_text_with_gpt(prompt, api_key)