VyLala commited on
Commit
71dd8cb
·
verified ·
1 Parent(s): 0179b52

Update confidence_score.py

Browse files
Files changed (1) hide show
  1. confidence_score.py +58 -56
confidence_score.py CHANGED
@@ -144,7 +144,8 @@ def compute_confidence_score_and_tier(
144
  cons_cfg = rules["consistency"]
145
  print("start compare gb country and pre country")
146
  if gb_country is not None and pred_country is not None:
147
- if gb_country == pred_country:
 
148
  score += cons_cfg["match"]
149
  explanations.append(
150
  "Predicted country matches GenBank country metadata."
@@ -154,6 +155,7 @@ def compute_confidence_score_and_tier(
154
  explanations.append(
155
  "Conflict between predicted country and GenBank country metadata."
156
  )
 
157
  else:
158
  # Only give "no contradiction" bonus if there is at least some evidence
159
  if has_geo or has_pubmed or accession_in_text:
@@ -161,7 +163,7 @@ def compute_confidence_score_and_tier(
161
  explanations.append(
162
  "No contradiction detected across available sources."
163
  )
164
-
165
  # ---------- Signal 3: Evidence density ----------
166
  num_pubs = int(signals.get("num_publications", 0))
167
  dens_cfg = rules["evidence_density"]
@@ -207,59 +209,59 @@ def compute_confidence_score_and_tier(
207
  return score, tier, explanations
208
 
209
 
210
- if __name__ == "__main__":
211
- # Quick local sanity-check examples (manual smoke tests)
212
- rules = set_rules()
213
 
214
- examples = [
215
- {
216
- "name": "Strong, clean case",
217
- "signals": {
218
- "has_geo_loc_name": True,
219
- "has_pubmed": True,
220
- "accession_found_in_text": True,
221
- "predicted_country": "USA",
222
- "genbank_country": "United States of America",
223
- "num_publications": 3,
224
- "missing_key_fields": False,
225
- "known_failure_pattern": False,
226
- },
227
- },
228
- {
229
- "name": "Weak, conflicting case",
230
- "signals": {
231
- "has_geo_loc_name": True,
232
- "has_pubmed": False,
233
- "accession_found_in_text": False,
234
- "predicted_country": "Japan",
235
- "genbank_country": "France",
236
- "num_publications": 0,
237
- "missing_key_fields": True,
238
- "known_failure_pattern": True,
239
- },
240
- },
241
- {
242
- "name": "Medium, sparse but okay",
243
- "signals": {
244
- "has_geo_loc_name": False,
245
- "has_pubmed": True,
246
- "accession_found_in_text": False,
247
- "predicted_country": "United Kingdom",
248
- "genbank_country": None,
249
- "num_publications": 1,
250
- "missing_key_fields": False,
251
- "known_failure_pattern": False,
252
- },
253
- },
254
- ]
255
 
256
- for ex in examples:
257
- score, tier, expl = compute_confidence_score_and_tier(
258
- ex["signals"], rules
259
- )
260
- print("====", ex["name"], "====")
261
- print("Score:", score, "| Tier:", tier)
262
- print("Reasons:")
263
- for e in expl:
264
- print(" -", e)
265
- print()
 
144
  cons_cfg = rules["consistency"]
145
  print("start compare gb country and pre country")
146
  if gb_country is not None and pred_country is not None:
147
+ print("inside comparison")
148
+ if gb_country.lower() == pred_country.lower():
149
  score += cons_cfg["match"]
150
  explanations.append(
151
  "Predicted country matches GenBank country metadata."
 
155
  explanations.append(
156
  "Conflict between predicted country and GenBank country metadata."
157
  )
158
+ print("done comparison")
159
  else:
160
  # Only give "no contradiction" bonus if there is at least some evidence
161
  if has_geo or has_pubmed or accession_in_text:
 
163
  explanations.append(
164
  "No contradiction detected across available sources."
165
  )
166
+ print("start evidence density")
167
  # ---------- Signal 3: Evidence density ----------
168
  num_pubs = int(signals.get("num_publications", 0))
169
  dens_cfg = rules["evidence_density"]
 
209
  return score, tier, explanations
210
 
211
 
212
+ # if __name__ == "__main__":
213
+ # # Quick local sanity-check examples (manual smoke tests)
214
+ # rules = set_rules()
215
 
216
+ # examples = [
217
+ # {
218
+ # "name": "Strong, clean case",
219
+ # "signals": {
220
+ # "has_geo_loc_name": True,
221
+ # "has_pubmed": True,
222
+ # "accession_found_in_text": True,
223
+ # "predicted_country": "USA",
224
+ # "genbank_country": "United States of America",
225
+ # "num_publications": 3,
226
+ # "missing_key_fields": False,
227
+ # "known_failure_pattern": False,
228
+ # },
229
+ # },
230
+ # {
231
+ # "name": "Weak, conflicting case",
232
+ # "signals": {
233
+ # "has_geo_loc_name": True,
234
+ # "has_pubmed": False,
235
+ # "accession_found_in_text": False,
236
+ # "predicted_country": "Japan",
237
+ # "genbank_country": "France",
238
+ # "num_publications": 0,
239
+ # "missing_key_fields": True,
240
+ # "known_failure_pattern": True,
241
+ # },
242
+ # },
243
+ # {
244
+ # "name": "Medium, sparse but okay",
245
+ # "signals": {
246
+ # "has_geo_loc_name": False,
247
+ # "has_pubmed": True,
248
+ # "accession_found_in_text": False,
249
+ # "predicted_country": "United Kingdom",
250
+ # "genbank_country": None,
251
+ # "num_publications": 1,
252
+ # "missing_key_fields": False,
253
+ # "known_failure_pattern": False,
254
+ # },
255
+ # },
256
+ # ]
257
 
258
+ # for ex in examples:
259
+ # score, tier, expl = compute_confidence_score_and_tier(
260
+ # ex["signals"], rules
261
+ # )
262
+ # print("====", ex["name"], "====")
263
+ # print("Score:", score, "| Tier:", tier)
264
+ # print("Reasons:")
265
+ # for e in expl:
266
+ # print(" -", e)
267
+ # print()