Zekun Wu
commited on
Commit
·
4d4a56e
1
Parent(s):
a75576c
add
Browse files- util/evaluator.py +7 -7
util/evaluator.py
CHANGED
|
@@ -34,7 +34,7 @@ class evaluator:
|
|
| 34 |
evaluation_prompt = f"""You are provided with a user's question and the corresponding explanation generated by
|
| 35 |
an AI model. Your task is to evaluate the explanation based on the following five principles. Each principle
|
| 36 |
should be scored on a scale from 0 to 1, where 0 indicates that the principle is not met at all,
|
| 37 |
-
and 1 indicates that the principle is fully satisfied. Additionally, provide a brief explanation for each score to justify your rating.
|
| 38 |
|
| 39 |
Question:
|
| 40 |
{question}
|
|
@@ -69,23 +69,23 @@ class evaluator:
|
|
| 69 |
Example JSON format:
|
| 70 |
{{
|
| 71 |
"Factually Correct": {{
|
| 72 |
-
"Justification": "
|
| 73 |
"Score": 9
|
| 74 |
}},
|
| 75 |
"Useful": {{
|
| 76 |
-
"Justification": "
|
| 77 |
"Score": 8.5
|
| 78 |
}},
|
| 79 |
"Context Specific": {{
|
| 80 |
-
"Justification": "
|
| 81 |
"Score": 8
|
| 82 |
}},
|
| 83 |
"User Specific": {{
|
| 84 |
-
"Justification": "
|
| 85 |
"Score": 7.5
|
| 86 |
}},
|
| 87 |
"Provides Pluralism": {{
|
| 88 |
-
"Justification": "
|
| 89 |
"Score": 7
|
| 90 |
}}
|
| 91 |
}}
|
|
@@ -225,7 +225,7 @@ def write_evaluation_commentary(scores):
|
|
| 225 |
comment = "Lacks diversity in viewpoints, limiting the depth of exploration into the topic."
|
| 226 |
|
| 227 |
evaluation_details.append(
|
| 228 |
-
{'Principle': principle, 'Score': score, '
|
| 229 |
|
| 230 |
return evaluation_details
|
| 231 |
# def write_evaluation_commentary(scores):
|
|
|
|
| 34 |
evaluation_prompt = f"""You are provided with a user's question and the corresponding explanation generated by
|
| 35 |
an AI model. Your task is to evaluate the explanation based on the following five principles. Each principle
|
| 36 |
should be scored on a scale from 0 to 1, where 0 indicates that the principle is not met at all,
|
| 37 |
+
and 1 indicates that the principle is fully satisfied. Additionally, provide a brief ten words explanation for each score to justify your rating.
|
| 38 |
|
| 39 |
Question:
|
| 40 |
{question}
|
|
|
|
| 69 |
Example JSON format:
|
| 70 |
{{
|
| 71 |
"Factually Correct": {{
|
| 72 |
+
"Justification": "xxx",
|
| 73 |
"Score": 9
|
| 74 |
}},
|
| 75 |
"Useful": {{
|
| 76 |
+
"Justification": "xxx",
|
| 77 |
"Score": 8.5
|
| 78 |
}},
|
| 79 |
"Context Specific": {{
|
| 80 |
+
"Justification": "xxx",
|
| 81 |
"Score": 8
|
| 82 |
}},
|
| 83 |
"User Specific": {{
|
| 84 |
+
"Justification": "xxx",
|
| 85 |
"Score": 7.5
|
| 86 |
}},
|
| 87 |
"Provides Pluralism": {{
|
| 88 |
+
"Justification": "xxx",
|
| 89 |
"Score": 7
|
| 90 |
}}
|
| 91 |
}}
|
|
|
|
| 225 |
comment = "Lacks diversity in viewpoints, limiting the depth of exploration into the topic."
|
| 226 |
|
| 227 |
evaluation_details.append(
|
| 228 |
+
{'Principle': principle, 'Score': score, 'Justification': justification,'Commentary': comment})
|
| 229 |
|
| 230 |
return evaluation_details
|
| 231 |
# def write_evaluation_commentary(scores):
|