Spaces:

VyLala
/

BioMetadataAudit

Running

App Files Files Community

VyLala commited on Dec 21, 2025

Commit

8d12117

verified ·

1 Parent(s): 97864c2

Update mtdna_backend.py

Browse files

Files changed (1) hide show

mtdna_backend.py +15 -9

mtdna_backend.py CHANGED Viewed

@@ -249,15 +249,20 @@ async def summarize_results(accession, stop_flag=None, niche_cases=None):
         if niche_cases:
             niche_cases = ", ".join(niche_cases)
         print("this is niche case inside summarize result: ", niche_cases)
-        outputs = await pipeline_classify_sample_location_cached(accession, stop_flag, save_df, niche_cases)
         print("do the dummy output")
-        # outputs = {"PX272359.1":{'isolate': 'A84',
-        #                        'country': {'australia': ['ncbi', 'rag_llm-The geographic location is inferred from "geo_loc_name: Australia: Queensland" which explicitly states Australia as the country.. The sample is inferred to be modern because the text mentions a "collection_date: 19-NOV-2025", indicating a contemporary collection.']},
-        #                        'sample_type': {'modern': ['rag_llm-The geographic location is inferred from "geo_loc_name: Australia: Queensland" which explicitly states Australia as the country.. The sample is inferred to be modern because the text mentions a "collection_date: 19-NOV-2025", indicating a contemporary collection.']},
-        #                        'query_cost': '0.004663', 'time_cost': '23.895 seconds',
-        #                        'source': ['https://pubmed.ncbi.nlm.nih.gov/30528080/', 'https://www.nature.com/articles/srep43402', 'https://www.science.org/doi/10.1126/sciadv.ady9493'],
-        #                        'file_chunk': 'Genomic_evidence_supports_the_long_chronology_for__merged_document.docx',
-        #                        'file_all_output': 'Genomic_evidence_supports_the_long_chronology_for__all_merged_document.docx'}}
         if stop_flag is not None and stop_flag.value:
             print(f"🛑 Skipped {accession} mid-pipeline.")
             return []
@@ -303,9 +308,10 @@ async def summarize_results(accession, stop_flag=None, niche_cases=None):
         # signals for confidence score
         signals_confidence_score = outputs[key]["signals"]
         rules = confidence_score.set_rules()
         score, tier, explanations_score = confidence_score.compute_confidence_score_and_tier(signals_confidence_score,rules)
         confidence_values = f"{tier} ({score})" + "\n" + explanations_score
         if niche_cases:
             row = {
                 "Sample ID": truncate_cell(label or "unknown"),

         if niche_cases:
             niche_cases = ", ".join(niche_cases)
         print("this is niche case inside summarize result: ", niche_cases)
+        #outputs = await pipeline_classify_sample_location_cached(accession, stop_flag, save_df, niche_cases)
         print("do the dummy output")
+        outputs = {"KY680825":{'isolate': 'NAT107',
+                   'country':
+                   {'ecuador': ['ncbi',
+                                'rag_llm-The geographic location "Ecuador" is explicitly listed under "geo_loc_name" for the isolate NAT107. The text mentions "217 novel modern mitogenomes", indicating the sample is from a living individual.']},
+                   'sample_type':
+                   {'modern': ['rag_llm-The geographic location "Ecuador" is explicitly listed under "geo_loc_name" for the isolate NAT107. The text mentions "217 novel modern mitogenomes", indicating the sample is from a living individual.']},
+                   'query_cost': '0.000941', 'time_cost': '9.246 seconds',
+                   'source': ['https://doi.org/10.1093/molbev/msx267', 'https://pubmed.ncbi.nlm.nih.gov/29099937/'],
+                   'file_chunk': 'The_Paleo-Indian_Entry_into_South_America_Accordin_merged_document.docx',
+                   'file_all_output': 'The_Paleo-Indian_Entry_into_South_America_Accordin_all_merged_document.docx',
+                   'signals': {'has_geo_loc_name': True, 'has_pubmed': True, 'accession_found_in_text': True, 'predicted_country': 'ecuador', 'genbank_country': 'ecuador', 'num_publications': 3, 'missing_key_fields': False, 'known_failure_pattern': False}}
+                  }
         if stop_flag is not None and stop_flag.value:
             print(f"🛑 Skipped {accession} mid-pipeline.")
             return []
         # signals for confidence score
         signals_confidence_score = outputs[key]["signals"]
         rules = confidence_score.set_rules()
+        print("start to compute confidence score")
         score, tier, explanations_score = confidence_score.compute_confidence_score_and_tier(signals_confidence_score,rules)
         confidence_values = f"{tier} ({score})" + "\n" + explanations_score
+        print("confidence_values: ", confidence_values)
         if niche_cases:
             row = {
                 "Sample ID": truncate_cell(label or "unknown"),