Update app.py
Browse files
app.py
CHANGED
|
@@ -170,6 +170,13 @@ def run_comparison(target_model, target_adapter, count):
|
|
| 170 |
ft_accuracy, ft_subjects = run_mmlu_evaluation(ft_model, tokenizer, f"{target_adapter}", count)
|
| 171 |
|
| 172 |
progress(1, desc="Evaluation complete.")
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 173 |
|
| 174 |
output = ''
|
| 175 |
|
|
@@ -273,6 +280,16 @@ def run_comparison(target_model, target_adapter, count):
|
|
| 273 |
if other_changes:
|
| 274 |
output += f"📋 Other: {np.mean(other_changes):+.2f}% avg change ({len(other_changes)} subjects)\n\n"
|
| 275 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 276 |
return output
|
| 277 |
|
| 278 |
def get_base_model(adapter_repo):
|
|
@@ -420,7 +437,7 @@ with gr.Blocks() as interface:
|
|
| 420 |
output_text_markdown = gr.Markdown("""
|
| 421 |
|
| 422 |
""")
|
| 423 |
-
|
| 424 |
evaluate_button.click(
|
| 425 |
fn=disable_button,
|
| 426 |
inputs=None,
|
|
|
|
| 170 |
ft_accuracy, ft_subjects = run_mmlu_evaluation(ft_model, tokenizer, f"{target_adapter}", count)
|
| 171 |
|
| 172 |
progress(1, desc="Evaluation complete.")
|
| 173 |
+
|
| 174 |
+
data = {
|
| 175 |
+
"base_accuracy": base_accuracy,
|
| 176 |
+
"base_subjects": base_subjects,
|
| 177 |
+
"adapter_accuracy": ft_accuracy,
|
| 178 |
+
"adapter_subjects": ft_subjects
|
| 179 |
+
}
|
| 180 |
|
| 181 |
output = ''
|
| 182 |
|
|
|
|
| 280 |
if other_changes:
|
| 281 |
output += f"📋 Other: {np.mean(other_changes):+.2f}% avg change ({len(other_changes)} subjects)\n\n"
|
| 282 |
|
| 283 |
+
print(data)
|
| 284 |
+
print(output)
|
| 285 |
+
|
| 286 |
+
# Create a temporary file
|
| 287 |
+
temp_file = tempfile.NamedTemporaryFile(delete=False, suffix=".json")
|
| 288 |
+
|
| 289 |
+
# Save JSON
|
| 290 |
+
with open(temp_file.name, "w", encoding="utf-8") as f:
|
| 291 |
+
json.dump(data, f, indent=2, ensure_ascii=False)
|
| 292 |
+
|
| 293 |
return output
|
| 294 |
|
| 295 |
def get_base_model(adapter_repo):
|
|
|
|
| 437 |
output_text_markdown = gr.Markdown("""
|
| 438 |
|
| 439 |
""")
|
| 440 |
+
|
| 441 |
evaluate_button.click(
|
| 442 |
fn=disable_button,
|
| 443 |
inputs=None,
|