Spaces:
Running
Running
Update app.py
Browse files
app.py
CHANGED
|
@@ -22,12 +22,16 @@ def get_evaluation_questions():
|
|
| 22 |
Both lists must be of equal length.
|
| 23 |
"""
|
| 24 |
questions_str = os.environ.get("TEST_QUESTION_1")
|
|
|
|
| 25 |
expected_str = os.environ.get("TEST_EXPECTED_1")
|
|
|
|
| 26 |
if not questions_str or not expected_str:
|
| 27 |
return []
|
| 28 |
try:
|
| 29 |
questions_list = json.loads(questions_str)
|
|
|
|
| 30 |
expected_list = json.loads(expected_str)
|
|
|
|
| 31 |
except Exception as e:
|
| 32 |
print(f"Error parsing evaluation questions: {str(e)}")
|
| 33 |
return []
|
|
@@ -94,6 +98,7 @@ def submit_prompt(email, name, system_prompt):
|
|
| 94 |
)
|
| 95 |
# Extract the answer from the response object.
|
| 96 |
answer = response.choices[0].message.content.strip()
|
|
|
|
| 97 |
except Exception as e:
|
| 98 |
answer = f"Error during OpenAI API call: {str(e)}"
|
| 99 |
|
|
@@ -101,8 +106,10 @@ def submit_prompt(email, name, system_prompt):
|
|
| 101 |
if expected.lower() in answer.lower():
|
| 102 |
score += 1
|
| 103 |
verdict = "Correct"
|
|
|
|
| 104 |
else:
|
| 105 |
verdict = "Incorrect"
|
|
|
|
| 106 |
|
| 107 |
responses.append(
|
| 108 |
f"Question: {question}\n"
|
|
|
|
| 22 |
Both lists must be of equal length.
|
| 23 |
"""
|
| 24 |
questions_str = os.environ.get("TEST_QUESTION_1")
|
| 25 |
+
print("questions",questions_str)
|
| 26 |
expected_str = os.environ.get("TEST_EXPECTED_1")
|
| 27 |
+
print("expected",expected_str)
|
| 28 |
if not questions_str or not expected_str:
|
| 29 |
return []
|
| 30 |
try:
|
| 31 |
questions_list = json.loads(questions_str)
|
| 32 |
+
print("questions lst ",questions_list)
|
| 33 |
expected_list = json.loads(expected_str)
|
| 34 |
+
print("expected lst",expected_list)
|
| 35 |
except Exception as e:
|
| 36 |
print(f"Error parsing evaluation questions: {str(e)}")
|
| 37 |
return []
|
|
|
|
| 98 |
)
|
| 99 |
# Extract the answer from the response object.
|
| 100 |
answer = response.choices[0].message.content.strip()
|
| 101 |
+
print("llm answer", answer)
|
| 102 |
except Exception as e:
|
| 103 |
answer = f"Error during OpenAI API call: {str(e)}"
|
| 104 |
|
|
|
|
| 106 |
if expected.lower() in answer.lower():
|
| 107 |
score += 1
|
| 108 |
verdict = "Correct"
|
| 109 |
+
print(f"{expected.lower()} DOES NOT MATCH {answer.lower()}")
|
| 110 |
else:
|
| 111 |
verdict = "Incorrect"
|
| 112 |
+
print(f"{expected.lower()} MATCHES {answer.lower()}")
|
| 113 |
|
| 114 |
responses.append(
|
| 115 |
f"Question: {question}\n"
|