Spaces:

aail-hf
/

ensemble_machine

Sleeping

diana3135 commited on Nov 15, 2024

Commit

4010b2c

1 Parent(s): 0c0398a

add alignment and inimitability, remove comment

Files changed (1) hide show

utils.py CHANGED Viewed

@@ -71,22 +71,22 @@ def modify_with_suggestion(task_description, text, suggestions, api_key = None):
 def get_evaluation_with_gpt(task_description, text, api_key=None):
     prompt = (
-        f"Given the task: {task_description}, the provided answer is: {text}\n"
-        f"Evaluate the answer using a scale from 0 to 10. Scores should reflect stricter quality thresholds:\n"
-        f"0-4 indicates below-average quality with significant shortcomings.\n"
-        f"5-6 represents acceptable quality.\n"
-        f"7-8 signifies good quality, showing substantial thought.\n"
-        f"9-10 represents exceptional quality, with insightful, and highly concrete suggestions.\n\n"
-        f"When evaluating, use the entire scoring range and avoid defaulting to mid-range scores.\n\n"
-        f"Evaluate based on the criteria:\n\n"
-        f"Novelty: The uniqueness and innovation of the ideas."
-        f"Implementability: The practicality of suggested actions.\n"
-        f"Inimitability: Difficult for competitors to replicate.\n"
-        f"Please format the output exactly as follows:\n"
-        f"Novelty: [Score]\nComment: [Short comment on Novelty]\n"
-        f"Implementability: [Score]\nComment: [Short comment on Implementability]\n"
-        f"Inimitability: [Score]\nComment: [Short comment on Inimitability]\n"
-        f"Overall Score: [Score]\nOverall Comment: [Overall feedback]\n"
     )
     return generate_text_with_gpt(prompt, api_key)

 def get_evaluation_with_gpt(task_description, text, api_key=None):
     prompt = (
+	    f"Given the task: {task_description}, the provided answer is: {text}\n"
+	    f"Evaluate the answer using a scale from 0 to 10. Scores should reflect how well the ideas fit each specific evaluation criteria:\n"
+	    f"0-2: Poor fit; the idea demonstrates minimal relevance to the criteria.\n"
+	    f"3-5: Partial fit; the idea shows some relevance but contains significant shortcomings.\n"
+	    f"6-8: Good fit; the idea aligns well with the criteria, showing clear relevance and thoughtfulness.\n"
+	    f"9-10: Excellent fit; the idea fully aligns with the criteria, demonstrating exceptional insight.\n\n"
+	    f"When evaluating, use the entire scoring range and avoid defaulting to mid-range scores.\n\n"
+	    f"Evaluate based on the following criteria:\n"
+	    f"Novelty: The uniqueness and innovation of the ideas.\n"
+	    f"Implementability: The practicality of suggested actions.\n"
+	    f"Inimitability: The difficulty for competitors to replicate the ideas.\n"
+	    f"Alignment: The degree to which the ideas align with Airbnb’s goals and 17 SDGs.\n\n"
+	    f"Please format the output exactly as follows:\n"
+	    f"Novelty: [Score]\n"
+	    f"Implementability: [Score]\n"
+	    f"Inimitability: [Score]\n"
+	    f"Alignment: [Score]\n"
     )
     return generate_text_with_gpt(prompt, api_key)