Spaces:

VyLala
/

BioMetadataAudit

Running

App Files Files Community

VyLala commited on Dec 21, 2025

Commit

71dd8cb

verified ·

1 Parent(s): 0179b52

Update confidence_score.py

Browse files

Files changed (1) hide show

confidence_score.py +58 -56

confidence_score.py CHANGED Viewed

@@ -144,7 +144,8 @@ def compute_confidence_score_and_tier(
     cons_cfg = rules["consistency"]
     print("start compare gb country and pre country")
     if gb_country is not None and pred_country is not None:
-        if gb_country == pred_country:
             score += cons_cfg["match"]
             explanations.append(
                 "Predicted country matches GenBank country metadata."
@@ -154,6 +155,7 @@ def compute_confidence_score_and_tier(
             explanations.append(
                 "Conflict between predicted country and GenBank country metadata."
             )
     else:
         # Only give "no contradiction" bonus if there is at least some evidence
         if has_geo or has_pubmed or accession_in_text:
@@ -161,7 +163,7 @@ def compute_confidence_score_and_tier(
             explanations.append(
                 "No contradiction detected across available sources."
             )
     # ---------- Signal 3: Evidence density ----------
     num_pubs = int(signals.get("num_publications", 0))
     dens_cfg = rules["evidence_density"]
@@ -207,59 +209,59 @@ def compute_confidence_score_and_tier(
     return score, tier, explanations
-if __name__ == "__main__":
-    # Quick local sanity-check examples (manual smoke tests)
-    rules = set_rules()
-    examples = [
-        {
-            "name": "Strong, clean case",
-            "signals": {
-                "has_geo_loc_name": True,
-                "has_pubmed": True,
-                "accession_found_in_text": True,
-                "predicted_country": "USA",
-                "genbank_country": "United States of America",
-                "num_publications": 3,
-                "missing_key_fields": False,
-                "known_failure_pattern": False,
-            },
-        },
-        {
-            "name": "Weak, conflicting case",
-            "signals": {
-                "has_geo_loc_name": True,
-                "has_pubmed": False,
-                "accession_found_in_text": False,
-                "predicted_country": "Japan",
-                "genbank_country": "France",
-                "num_publications": 0,
-                "missing_key_fields": True,
-                "known_failure_pattern": True,
-            },
-        },
-        {
-            "name": "Medium, sparse but okay",
-            "signals": {
-                "has_geo_loc_name": False,
-                "has_pubmed": True,
-                "accession_found_in_text": False,
-                "predicted_country": "United Kingdom",
-                "genbank_country": None,
-                "num_publications": 1,
-                "missing_key_fields": False,
-                "known_failure_pattern": False,
-            },
-        },
-    ]
-    for ex in examples:
-        score, tier, expl = compute_confidence_score_and_tier(
-            ex["signals"], rules
-        )
-        print("====", ex["name"], "====")
-        print("Score:", score, "| Tier:", tier)
-        print("Reasons:")
-        for e in expl:
-            print(" -", e)
-        print()

     cons_cfg = rules["consistency"]
     print("start compare gb country and pre country")
     if gb_country is not None and pred_country is not None:
+        print("inside comparison")
+        if gb_country.lower() == pred_country.lower():
             score += cons_cfg["match"]
             explanations.append(
                 "Predicted country matches GenBank country metadata."
             explanations.append(
                 "Conflict between predicted country and GenBank country metadata."
             )
+        print("done comparison")
     else:
         # Only give "no contradiction" bonus if there is at least some evidence
         if has_geo or has_pubmed or accession_in_text:
             explanations.append(
                 "No contradiction detected across available sources."
             )
+    print("start evidence density")
     # ---------- Signal 3: Evidence density ----------
     num_pubs = int(signals.get("num_publications", 0))
     dens_cfg = rules["evidence_density"]
     return score, tier, explanations
+# if __name__ == "__main__":
+#     # Quick local sanity-check examples (manual smoke tests)
+#     rules = set_rules()
+#     examples = [
+#         {
+#             "name": "Strong, clean case",
+#             "signals": {
+#                 "has_geo_loc_name": True,
+#                 "has_pubmed": True,
+#                 "accession_found_in_text": True,
+#                 "predicted_country": "USA",
+#                 "genbank_country": "United States of America",
+#                 "num_publications": 3,
+#                 "missing_key_fields": False,
+#                 "known_failure_pattern": False,
+#             },
+#         },
+#         {
+#             "name": "Weak, conflicting case",
+#             "signals": {
+#                 "has_geo_loc_name": True,
+#                 "has_pubmed": False,
+#                 "accession_found_in_text": False,
+#                 "predicted_country": "Japan",
+#                 "genbank_country": "France",
+#                 "num_publications": 0,
+#                 "missing_key_fields": True,
+#                 "known_failure_pattern": True,
+#             },
+#         },
+#         {
+#             "name": "Medium, sparse but okay",
+#             "signals": {
+#                 "has_geo_loc_name": False,
+#                 "has_pubmed": True,
+#                 "accession_found_in_text": False,
+#                 "predicted_country": "United Kingdom",
+#                 "genbank_country": None,
+#                 "num_publications": 1,
+#                 "missing_key_fields": False,
+#                 "known_failure_pattern": False,
+#             },
+#         },
+#     ]
+#     for ex in examples:
+#         score, tier, expl = compute_confidence_score_and_tier(
+#             ex["signals"], rules
+#         )
+#         print("====", ex["name"], "====")
+#         print("Score:", score, "| Tier:", tier)
+#         print("Reasons:")
+#         for e in expl:
+#             print(" -", e)
+#         print()