Spaces:
Running
Running
Update mtdna_backend.py
Browse files- mtdna_backend.py +15 -9
mtdna_backend.py
CHANGED
|
@@ -249,15 +249,20 @@ async def summarize_results(accession, stop_flag=None, niche_cases=None):
|
|
| 249 |
if niche_cases:
|
| 250 |
niche_cases = ", ".join(niche_cases)
|
| 251 |
print("this is niche case inside summarize result: ", niche_cases)
|
| 252 |
-
outputs = await pipeline_classify_sample_location_cached(accession, stop_flag, save_df, niche_cases)
|
| 253 |
print("do the dummy output")
|
| 254 |
-
|
| 255 |
-
|
| 256 |
-
|
| 257 |
-
|
| 258 |
-
|
| 259 |
-
|
| 260 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 261 |
if stop_flag is not None and stop_flag.value:
|
| 262 |
print(f"🛑 Skipped {accession} mid-pipeline.")
|
| 263 |
return []
|
|
@@ -303,9 +308,10 @@ async def summarize_results(accession, stop_flag=None, niche_cases=None):
|
|
| 303 |
# signals for confidence score
|
| 304 |
signals_confidence_score = outputs[key]["signals"]
|
| 305 |
rules = confidence_score.set_rules()
|
|
|
|
| 306 |
score, tier, explanations_score = confidence_score.compute_confidence_score_and_tier(signals_confidence_score,rules)
|
| 307 |
confidence_values = f"{tier} ({score})" + "\n" + explanations_score
|
| 308 |
-
|
| 309 |
if niche_cases:
|
| 310 |
row = {
|
| 311 |
"Sample ID": truncate_cell(label or "unknown"),
|
|
|
|
| 249 |
if niche_cases:
|
| 250 |
niche_cases = ", ".join(niche_cases)
|
| 251 |
print("this is niche case inside summarize result: ", niche_cases)
|
| 252 |
+
#outputs = await pipeline_classify_sample_location_cached(accession, stop_flag, save_df, niche_cases)
|
| 253 |
print("do the dummy output")
|
| 254 |
+
outputs = {"KY680825":{'isolate': 'NAT107',
|
| 255 |
+
'country':
|
| 256 |
+
{'ecuador': ['ncbi',
|
| 257 |
+
'rag_llm-The geographic location "Ecuador" is explicitly listed under "geo_loc_name" for the isolate NAT107. The text mentions "217 novel modern mitogenomes", indicating the sample is from a living individual.']},
|
| 258 |
+
'sample_type':
|
| 259 |
+
{'modern': ['rag_llm-The geographic location "Ecuador" is explicitly listed under "geo_loc_name" for the isolate NAT107. The text mentions "217 novel modern mitogenomes", indicating the sample is from a living individual.']},
|
| 260 |
+
'query_cost': '0.000941', 'time_cost': '9.246 seconds',
|
| 261 |
+
'source': ['https://doi.org/10.1093/molbev/msx267', 'https://pubmed.ncbi.nlm.nih.gov/29099937/'],
|
| 262 |
+
'file_chunk': 'The_Paleo-Indian_Entry_into_South_America_Accordin_merged_document.docx',
|
| 263 |
+
'file_all_output': 'The_Paleo-Indian_Entry_into_South_America_Accordin_all_merged_document.docx',
|
| 264 |
+
'signals': {'has_geo_loc_name': True, 'has_pubmed': True, 'accession_found_in_text': True, 'predicted_country': 'ecuador', 'genbank_country': 'ecuador', 'num_publications': 3, 'missing_key_fields': False, 'known_failure_pattern': False}}
|
| 265 |
+
}
|
| 266 |
if stop_flag is not None and stop_flag.value:
|
| 267 |
print(f"🛑 Skipped {accession} mid-pipeline.")
|
| 268 |
return []
|
|
|
|
| 308 |
# signals for confidence score
|
| 309 |
signals_confidence_score = outputs[key]["signals"]
|
| 310 |
rules = confidence_score.set_rules()
|
| 311 |
+
print("start to compute confidence score")
|
| 312 |
score, tier, explanations_score = confidence_score.compute_confidence_score_and_tier(signals_confidence_score,rules)
|
| 313 |
confidence_values = f"{tier} ({score})" + "\n" + explanations_score
|
| 314 |
+
print("confidence_values: ", confidence_values)
|
| 315 |
if niche_cases:
|
| 316 |
row = {
|
| 317 |
"Sample ID": truncate_cell(label or "unknown"),
|