VyLala commited on
Commit
8d12117
·
verified ·
1 Parent(s): 97864c2

Update mtdna_backend.py

Browse files
Files changed (1) hide show
  1. mtdna_backend.py +15 -9
mtdna_backend.py CHANGED
@@ -249,15 +249,20 @@ async def summarize_results(accession, stop_flag=None, niche_cases=None):
249
  if niche_cases:
250
  niche_cases = ", ".join(niche_cases)
251
  print("this is niche case inside summarize result: ", niche_cases)
252
- outputs = await pipeline_classify_sample_location_cached(accession, stop_flag, save_df, niche_cases)
253
  print("do the dummy output")
254
- # outputs = {"PX272359.1":{'isolate': 'A84',
255
- # 'country': {'australia': ['ncbi', 'rag_llm-The geographic location is inferred from "geo_loc_name: Australia: Queensland" which explicitly states Australia as the country.. The sample is inferred to be modern because the text mentions a "collection_date: 19-NOV-2025", indicating a contemporary collection.']},
256
- # 'sample_type': {'modern': ['rag_llm-The geographic location is inferred from "geo_loc_name: Australia: Queensland" which explicitly states Australia as the country.. The sample is inferred to be modern because the text mentions a "collection_date: 19-NOV-2025", indicating a contemporary collection.']},
257
- # 'query_cost': '0.004663', 'time_cost': '23.895 seconds',
258
- # 'source': ['https://pubmed.ncbi.nlm.nih.gov/30528080/', 'https://www.nature.com/articles/srep43402', 'https://www.science.org/doi/10.1126/sciadv.ady9493'],
259
- # 'file_chunk': 'Genomic_evidence_supports_the_long_chronology_for__merged_document.docx',
260
- # 'file_all_output': 'Genomic_evidence_supports_the_long_chronology_for__all_merged_document.docx'}}
 
 
 
 
 
261
  if stop_flag is not None and stop_flag.value:
262
  print(f"🛑 Skipped {accession} mid-pipeline.")
263
  return []
@@ -303,9 +308,10 @@ async def summarize_results(accession, stop_flag=None, niche_cases=None):
303
  # signals for confidence score
304
  signals_confidence_score = outputs[key]["signals"]
305
  rules = confidence_score.set_rules()
 
306
  score, tier, explanations_score = confidence_score.compute_confidence_score_and_tier(signals_confidence_score,rules)
307
  confidence_values = f"{tier} ({score})" + "\n" + explanations_score
308
-
309
  if niche_cases:
310
  row = {
311
  "Sample ID": truncate_cell(label or "unknown"),
 
249
  if niche_cases:
250
  niche_cases = ", ".join(niche_cases)
251
  print("this is niche case inside summarize result: ", niche_cases)
252
+ #outputs = await pipeline_classify_sample_location_cached(accession, stop_flag, save_df, niche_cases)
253
  print("do the dummy output")
254
+ outputs = {"KY680825":{'isolate': 'NAT107',
255
+ 'country':
256
+ {'ecuador': ['ncbi',
257
+ 'rag_llm-The geographic location "Ecuador" is explicitly listed under "geo_loc_name" for the isolate NAT107. The text mentions "217 novel modern mitogenomes", indicating the sample is from a living individual.']},
258
+ 'sample_type':
259
+ {'modern': ['rag_llm-The geographic location "Ecuador" is explicitly listed under "geo_loc_name" for the isolate NAT107. The text mentions "217 novel modern mitogenomes", indicating the sample is from a living individual.']},
260
+ 'query_cost': '0.000941', 'time_cost': '9.246 seconds',
261
+ 'source': ['https://doi.org/10.1093/molbev/msx267', 'https://pubmed.ncbi.nlm.nih.gov/29099937/'],
262
+ 'file_chunk': 'The_Paleo-Indian_Entry_into_South_America_Accordin_merged_document.docx',
263
+ 'file_all_output': 'The_Paleo-Indian_Entry_into_South_America_Accordin_all_merged_document.docx',
264
+ 'signals': {'has_geo_loc_name': True, 'has_pubmed': True, 'accession_found_in_text': True, 'predicted_country': 'ecuador', 'genbank_country': 'ecuador', 'num_publications': 3, 'missing_key_fields': False, 'known_failure_pattern': False}}
265
+ }
266
  if stop_flag is not None and stop_flag.value:
267
  print(f"🛑 Skipped {accession} mid-pipeline.")
268
  return []
 
308
  # signals for confidence score
309
  signals_confidence_score = outputs[key]["signals"]
310
  rules = confidence_score.set_rules()
311
+ print("start to compute confidence score")
312
  score, tier, explanations_score = confidence_score.compute_confidence_score_and_tier(signals_confidence_score,rules)
313
  confidence_values = f"{tier} ({score})" + "\n" + explanations_score
314
+ print("confidence_values: ", confidence_values)
315
  if niche_cases:
316
  row = {
317
  "Sample ID": truncate_cell(label or "unknown"),