Spaces:

akweury
/

elvis_human

Sleeping

App Files Files Community

jing commited on Sep 2, 2025

Commit

e3b08c9

1 Parent(s): 18b350c

fix a bug

Browse files

Files changed (1) hide show

app.py +26 -5

app.py CHANGED Viewed

@@ -26,6 +26,22 @@ os.makedirs(UPLOAD_DIR, exist_ok=True)
 HF_TOKEN = os.environ.get("HF_TOKEN")
 DATASET_REPO_ID = "akweury/ELVIS-Human-Results"  # Updated with your dataset repo
 def analyze_and_update():
     api = HfApi(token=HF_TOKEN)
     files = api.list_repo_files(DATASET_REPO_ID, repo_type="dataset")
@@ -71,20 +87,25 @@ def analyze_and_update():
                     principle_stats[principle]["count"] += 1
                     if item.get("correct", False):
                         principle_stats[principle]["correct"] += 1
                 if y_true and y_pred:
                     try:
                         acc = np.mean([yt == yp for yt, yp in zip(y_true, y_pred)])
-                        f1 = f1_score(y_true, y_pred)
                         precision = precision_score(y_true, y_pred, zero_division=0)
                         recall = recall_score(y_true, y_pred, zero_division=0)
                         avg_time = np.mean(solve_times) if solve_times else 0
                         principle_stats[principle]["acc_list"].append(acc)
-                        principle_stats[principle]["f1_list"].append(f1)
-                        principle_stats[principle]["precision_list"].append(precision)
-                        principle_stats[principle]["recall_list"].append(recall)
                         principle_stats[principle]["solve_time_list"].append(avg_time)
                     except Exception:
-                        pass
     with open("README.md", "w") as f:
         f.write("| Principle | Avg Accuracy ± Std | Avg F1 ± Std | Avg Precision ± Std | Avg Recall ± Std | Avg Solve Time (s) ± Std | Avg Hardness ± Std | Count |\n")

 HF_TOKEN = os.environ.get("HF_TOKEN")
 DATASET_REPO_ID = "akweury/ELVIS-Human-Results"  # Updated with your dataset repo
+def confusion_matrix_elements(predictions, ground_truth):
+    TN = sum(1 for p, gt in zip(predictions, ground_truth) if p == 0 and gt == 0)
+    FP = sum(1 for p, gt in zip(predictions, ground_truth) if p == 1 and gt == 0)
+    FN = sum(1 for p, gt in zip(predictions, ground_truth) if p == 0 and gt == 1)
+    TP = sum(1 for p, gt in zip(predictions, ground_truth) if p == 1 and gt == 1)
+    return TN, FP, FN, TP
+def calculate_metrics(TN, FP, FN, TP):
+    precision = TP / (TP + FP) if (TP + FP) > 0 else 0
+    recall = TP / (TP + FN) if (TP + FN) > 0 else 0
+    f1_score = 2 * (precision * recall) / (precision + recall) if (precision + recall) > 0 else 0
+    return precision, recall, f1_score
 def analyze_and_update():
     api = HfApi(token=HF_TOKEN)
     files = api.list_repo_files(DATASET_REPO_ID, repo_type="dataset")
                     principle_stats[principle]["count"] += 1
                     if item.get("correct", False):
                         principle_stats[principle]["correct"] += 1
+                TN, FP, FN, TP = confusion_matrix_elements(y_pred, y_true)
+                precision, recall, f1_score = calculate_metrics(TN, FP, FN, TP)
                 if y_true and y_pred:
                     try:
                         acc = np.mean([yt == yp for yt, yp in zip(y_true, y_pred)])
                         precision = precision_score(y_true, y_pred, zero_division=0)
                         recall = recall_score(y_true, y_pred, zero_division=0)
                         avg_time = np.mean(solve_times) if solve_times else 0
                         principle_stats[principle]["acc_list"].append(acc)
+                        principle_stats[principle]["f1_list"]= f1_score
+                        principle_stats[principle]["precision_list"] = precision
+                        principle_stats[principle]["recall_list"] = recall
                         principle_stats[principle]["solve_time_list"].append(avg_time)
                     except Exception:
+                        raise ValueError("Error in calculating metrics.")
     with open("README.md", "w") as f:
         f.write("| Principle | Avg Accuracy ± Std | Avg F1 ± Std | Avg Precision ± Std | Avg Recall ± Std | Avg Solve Time (s) ± Std | Avg Hardness ± Std | Count |\n")