Ajaykanth Maddi commited on
Commit ·
cb0a1c3
1
Parent(s): 6d9b4d3
Code Changes - Evaluation Function
Browse files
app.py
CHANGED
|
@@ -102,7 +102,7 @@ def run_evaluation(
|
|
| 102 |
docs = []
|
| 103 |
else:
|
| 104 |
query, ans, docs = processdata(instance, noise_rate, passage_num, dataset, correct_rate)
|
| 105 |
-
print(f"Results: \n*********query: {query}, \n*********Answer: {ans}, \n*********docs: {docs}\n*********\n")
|
| 106 |
|
| 107 |
label, prediction, factlabel = predict(query, ans, docs, model, system, instruction, temperature, dataset)
|
| 108 |
print("f******** Label: {label}\n******** Prediction: {prediction}\n******** factlabel: {factlabel}\n ******** \n")
|
|
@@ -158,10 +158,16 @@ def run_evaluation(
|
|
| 158 |
})
|
| 159 |
|
| 160 |
# Save results
|
| 161 |
-
|
| 162 |
-
|
| 163 |
-
|
| 164 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 165 |
return scores
|
| 166 |
|
| 167 |
def evaluate_model_ui(modelname, dataset, api_key, url, temperature, noise_rate, correct_rate, passage_num, factchecking):
|
|
|
|
| 102 |
docs = []
|
| 103 |
else:
|
| 104 |
query, ans, docs = processdata(instance, noise_rate, passage_num, dataset, correct_rate)
|
| 105 |
+
# print(f"Results: \n*********query: {query}, \n*********Answer: {ans}, \n*********docs: {docs}\n*********\n")
|
| 106 |
|
| 107 |
label, prediction, factlabel = predict(query, ans, docs, model, system, instruction, temperature, dataset)
|
| 108 |
print("f******** Label: {label}\n******** Prediction: {prediction}\n******** factlabel: {factlabel}\n ******** \n")
|
|
|
|
| 158 |
})
|
| 159 |
|
| 160 |
# Save results
|
| 161 |
+
try:
|
| 162 |
+
score_file = f"{output_file[:-5]}_result.json"
|
| 163 |
+
with open(score_file, 'w') as f:
|
| 164 |
+
json.dump(scores, f, ensure_ascii=False, indent=4)
|
| 165 |
+
except Exception as e:
|
| 166 |
+
print("Error saving scores:", e)
|
| 167 |
+
|
| 168 |
+
# with open(score_file, 'w') as f:
|
| 169 |
+
# json.dump(scores, f, ensure_ascii=False, indent=4)
|
| 170 |
+
print(f"Scores ares {scores}")
|
| 171 |
return scores
|
| 172 |
|
| 173 |
def evaluate_model_ui(modelname, dataset, api_key, url, temperature, noise_rate, correct_rate, passage_num, factchecking):
|