diana3135 commited on
Commit
60cc9a1
·
1 Parent(s): a631890

specify evaluation standards in prompt

Browse files
Files changed (1) hide show
  1. utils.py +17 -19
utils.py CHANGED
@@ -69,22 +69,20 @@ def modify_with_suggestion(task_description, text, suggestions, api_key = None):
69
  f"Modify the answer based on the following suggestions: {suggestions}"
70
  return generate_text_with_gpt(prompt, api_key)
71
 
72
- # def get_evaluation_with_gpt(task_description, text, api_key = None):
73
- # prompt = f"Given the task:{task_description}, the answer provided is: {text}\n" + \
74
- # f"Evaluate the answer and provide scores between 0 and 10,\n" + \
75
- # f"where criteria for evaluation are Novelty, Feasibility, and Defensibility.\n" + \
76
- # f"Use a professional standard, where 5 represents an acceptable quality,\n" + \
77
- # f"and scores closer to 10 should reflect exceptional quality.\n" + \
78
- # f"Finally conclude with an overall score in the range of 0 to 10."
79
- # return generate_text_with_gpt(prompt, api_key)
80
-
81
- def get_evaluation_with_gpt(task_description, text, api_key = None):
82
- prompt = f"Given the task: {task_description}, the provided answer is: {text}\n" + \
83
- f"Evaluate the answer according to the following criteria, providing both a score (0-10) and a brief comment (1 sentence) for each.\n" + \
84
- f"Use a professional standard, where 5 represents acceptable quality, and scores closer to 10 reflect exceptional quality.\n" + \
85
- f"Please format the output exactly as follows:\n" + \
86
- f"Novelty: [Score]\nComment: [Short comment on Novelty]\n" + \
87
- f"Feasibility: [Score]\nComment: [Short comment on Feasibility]\n" + \
88
- f"Defensibility: [Score]\nComment: [Short comment on Defensibility]\n" + \
89
- f"Overall Score: [Score]\nOverall Comment: [Overall feedback on the answer]\n"
90
- return generate_text_with_gpt(prompt, api_key)
 
69
  f"Modify the answer based on the following suggestions: {suggestions}"
70
  return generate_text_with_gpt(prompt, api_key)
71
 
72
+ def get_evaluation_with_gpt(task_description, text, api_key=None):
73
+ prompt = (
74
+ f"Given the task: {task_description}, the provided answer is: {text}\n"
75
+ f"Please evaluate the answer based on the following criteria, using a scale from 0 to 10, where:\n"
76
+ f"0-2 reflects very poor quality, with minimal value or relevance.\n"
77
+ f"3-4 indicates below-average quality with significant shortcomings.\n"
78
+ f"5 represents acceptable quality.\n"
79
+ f"6-8 signifies good to very good quality, showing substantial thought and accuracy.\n"
80
+ f"9-10 represents exceptional quality, with outstanding insight or depth.\n\n"
81
+ f"Provide both a score and a brief comment (1 sentence) for each criterion.\n"
82
+ f"Please format the output exactly as follows:\n"
83
+ f"Novelty: [Score]\nComment: [Short comment on Novelty]\n"
84
+ f"Implementability: [Score]\nComment: [Short comment on Feasibility]\n"
85
+ f"Defensibility: [Score]\nComment: [Short comment on Defensibility]\n"
86
+ f"Overall Score: [Score]\nOverall Comment: [Overall feedback on the answer]\n"
87
+ )
88
+ return generate_text_with_gpt(prompt, api_key)