diana3135 commited on
Commit
a631890
·
1 Parent(s): 35425cb

update evaluation prompt, formatting

Browse files
Files changed (2) hide show
  1. constants.py +1 -1
  2. utils.py +20 -9
constants.py CHANGED
@@ -20,7 +20,7 @@ SDG_DETAILS = """
20
 
21
 
22
  GPT_PROMPT_parallel="""
23
- For AI generted/modified parts, our base model is GPT-3.5-turbo.
24
 
25
  For direct generation, we use instructions like following:
26
  "Given the task as: [task_description], provide an answer: "
 
20
 
21
 
22
  GPT_PROMPT_parallel="""
23
+ For AI generated/modified parts, our base model is GPT-3.5-turbo.
24
 
25
  For direct generation, we use instructions like following:
26
  "Given the task as: [task_description], provide an answer: "
utils.py CHANGED
@@ -49,8 +49,8 @@ def generate_text_with_gpt(prompts, api_key = None):
49
  return ""
50
 
51
 
52
- def generate_ai_initial_answer(task_descriotion, api_key=None):
53
- prompt = f"Given the task: {task_descriotion}, provide an answer: "
54
  return generate_text_with_gpt(prompt, api_key)
55
 
56
  def merge_texts_parallel(task_description, human_text, ai_text, api_key = None):
@@ -69,11 +69,22 @@ def modify_with_suggestion(task_description, text, suggestions, api_key = None):
69
  f"Modify the answer based on the following suggestions: {suggestions}"
70
  return generate_text_with_gpt(prompt, api_key)
71
 
 
 
 
 
 
 
 
 
 
72
  def get_evaluation_with_gpt(task_description, text, api_key = None):
73
- prompt = f"Given the task:{task_description}, the answer provided is: {text}\n" + \
74
- f"Evaluate the answer and provide scores between 0 and 10,\n" + \
75
- f"where criteria for evaluation are Novelty, Feasibility, and Defensibility.\n" + \
76
- f"Use a professional standard, where 5 represents an acceptable quality,\n" + \
77
- f"and scores closer to 10 should reflect exceptional quality.\n" + \
78
- f"Finally conclude with an overall score in the range of 0 to 10."
79
- return generate_text_with_gpt(prompt, api_key)
 
 
 
49
  return ""
50
 
51
 
52
+ def generate_ai_initial_answer(task_description, api_key=None):
53
+ prompt = f"Given the task: {task_description}, provide an answer: "
54
  return generate_text_with_gpt(prompt, api_key)
55
 
56
  def merge_texts_parallel(task_description, human_text, ai_text, api_key = None):
 
69
  f"Modify the answer based on the following suggestions: {suggestions}"
70
  return generate_text_with_gpt(prompt, api_key)
71
 
72
+ # def get_evaluation_with_gpt(task_description, text, api_key = None):
73
+ # prompt = f"Given the task:{task_description}, the answer provided is: {text}\n" + \
74
+ # f"Evaluate the answer and provide scores between 0 and 10,\n" + \
75
+ # f"where criteria for evaluation are Novelty, Feasibility, and Defensibility.\n" + \
76
+ # f"Use a professional standard, where 5 represents an acceptable quality,\n" + \
77
+ # f"and scores closer to 10 should reflect exceptional quality.\n" + \
78
+ # f"Finally conclude with an overall score in the range of 0 to 10."
79
+ # return generate_text_with_gpt(prompt, api_key)
80
+
81
  def get_evaluation_with_gpt(task_description, text, api_key = None):
82
+ prompt = f"Given the task: {task_description}, the provided answer is: {text}\n" + \
83
+ f"Evaluate the answer according to the following criteria, providing both a score (0-10) and a brief comment (1 sentence) for each.\n" + \
84
+ f"Use a professional standard, where 5 represents acceptable quality, and scores closer to 10 reflect exceptional quality.\n" + \
85
+ f"Please format the output exactly as follows:\n" + \
86
+ f"Novelty: [Score]\nComment: [Short comment on Novelty]\n" + \
87
+ f"Feasibility: [Score]\nComment: [Short comment on Feasibility]\n" + \
88
+ f"Defensibility: [Score]\nComment: [Short comment on Defensibility]\n" + \
89
+ f"Overall Score: [Score]\nOverall Comment: [Overall feedback on the answer]\n"
90
+ return generate_text_with_gpt(prompt, api_key)