Spaces:
Running
Running
diana3135
commited on
Commit
·
a631890
1
Parent(s):
35425cb
update evaluation prompt, formatting
Browse files- constants.py +1 -1
- utils.py +20 -9
constants.py
CHANGED
|
@@ -20,7 +20,7 @@ SDG_DETAILS = """
|
|
| 20 |
|
| 21 |
|
| 22 |
GPT_PROMPT_parallel="""
|
| 23 |
-
For AI
|
| 24 |
|
| 25 |
For direct generation, we use instructions like following:
|
| 26 |
"Given the task as: [task_description], provide an answer: "
|
|
|
|
| 20 |
|
| 21 |
|
| 22 |
GPT_PROMPT_parallel="""
|
| 23 |
+
For AI generated/modified parts, our base model is GPT-3.5-turbo.
|
| 24 |
|
| 25 |
For direct generation, we use instructions like following:
|
| 26 |
"Given the task as: [task_description], provide an answer: "
|
utils.py
CHANGED
|
@@ -49,8 +49,8 @@ def generate_text_with_gpt(prompts, api_key = None):
|
|
| 49 |
return ""
|
| 50 |
|
| 51 |
|
| 52 |
-
def generate_ai_initial_answer(
|
| 53 |
-
prompt = f"Given the task: {
|
| 54 |
return generate_text_with_gpt(prompt, api_key)
|
| 55 |
|
| 56 |
def merge_texts_parallel(task_description, human_text, ai_text, api_key = None):
|
|
@@ -69,11 +69,22 @@ def modify_with_suggestion(task_description, text, suggestions, api_key = None):
|
|
| 69 |
f"Modify the answer based on the following suggestions: {suggestions}"
|
| 70 |
return generate_text_with_gpt(prompt, api_key)
|
| 71 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 72 |
def get_evaluation_with_gpt(task_description, text, api_key = None):
|
| 73 |
-
prompt = f"Given the task:{task_description}, the answer
|
| 74 |
-
f"Evaluate the answer
|
| 75 |
-
f"
|
| 76 |
-
f"
|
| 77 |
-
f"
|
| 78 |
-
f"
|
| 79 |
-
|
|
|
|
|
|
|
|
|
| 49 |
return ""
|
| 50 |
|
| 51 |
|
| 52 |
+
def generate_ai_initial_answer(task_description, api_key=None):
|
| 53 |
+
prompt = f"Given the task: {task_description}, provide an answer: "
|
| 54 |
return generate_text_with_gpt(prompt, api_key)
|
| 55 |
|
| 56 |
def merge_texts_parallel(task_description, human_text, ai_text, api_key = None):
|
|
|
|
| 69 |
f"Modify the answer based on the following suggestions: {suggestions}"
|
| 70 |
return generate_text_with_gpt(prompt, api_key)
|
| 71 |
|
| 72 |
+
# def get_evaluation_with_gpt(task_description, text, api_key = None):
|
| 73 |
+
# prompt = f"Given the task:{task_description}, the answer provided is: {text}\n" + \
|
| 74 |
+
# f"Evaluate the answer and provide scores between 0 and 10,\n" + \
|
| 75 |
+
# f"where criteria for evaluation are Novelty, Feasibility, and Defensibility.\n" + \
|
| 76 |
+
# f"Use a professional standard, where 5 represents an acceptable quality,\n" + \
|
| 77 |
+
# f"and scores closer to 10 should reflect exceptional quality.\n" + \
|
| 78 |
+
# f"Finally conclude with an overall score in the range of 0 to 10."
|
| 79 |
+
# return generate_text_with_gpt(prompt, api_key)
|
| 80 |
+
|
| 81 |
def get_evaluation_with_gpt(task_description, text, api_key = None):
|
| 82 |
+
prompt = f"Given the task: {task_description}, the provided answer is: {text}\n" + \
|
| 83 |
+
f"Evaluate the answer according to the following criteria, providing both a score (0-10) and a brief comment (1 sentence) for each.\n" + \
|
| 84 |
+
f"Use a professional standard, where 5 represents acceptable quality, and scores closer to 10 reflect exceptional quality.\n" + \
|
| 85 |
+
f"Please format the output exactly as follows:\n" + \
|
| 86 |
+
f"Novelty: [Score]\nComment: [Short comment on Novelty]\n" + \
|
| 87 |
+
f"Feasibility: [Score]\nComment: [Short comment on Feasibility]\n" + \
|
| 88 |
+
f"Defensibility: [Score]\nComment: [Short comment on Defensibility]\n" + \
|
| 89 |
+
f"Overall Score: [Score]\nOverall Comment: [Overall feedback on the answer]\n"
|
| 90 |
+
return generate_text_with_gpt(prompt, api_key)
|