Spaces:
Running
Running
Update pipeline.py
Browse files- pipeline.py +8 -6
pipeline.py
CHANGED
|
@@ -403,6 +403,7 @@ def pipeline_with_gemini(accessions,stop_flag=None, niche_cases=None, save_df=No
|
|
| 403 |
print(f"🛑 Stop processing {accession}, aborting early...")
|
| 404 |
return {}
|
| 405 |
# check doi first
|
|
|
|
| 406 |
if len(acc_score["file_all_output"]) == 0 and len(acc_score["file_chunk"]) == 0:
|
| 407 |
if doi != "unknown":
|
| 408 |
link = 'https://doi.org/' + doi
|
|
@@ -445,14 +446,15 @@ def pipeline_with_gemini(accessions,stop_flag=None, niche_cases=None, save_df=No
|
|
| 445 |
links = unique_preserve_order(links)
|
| 446 |
acc_score["source"] = links
|
| 447 |
else:
|
|
|
|
| 448 |
try:
|
| 449 |
temp_source = False
|
| 450 |
if save_df is not None and not save_df.empty:
|
| 451 |
print("save df not none")
|
| 452 |
-
print(str(
|
| 453 |
print(str(all_filename))
|
| 454 |
-
if str(
|
| 455 |
-
link = save_df.loc[save_df["file_chunk"]==str(
|
| 456 |
#link = row["Sources"].iloc[0]
|
| 457 |
if "http" in link:
|
| 458 |
print("yeah http in save df source")
|
|
@@ -558,9 +560,9 @@ def pipeline_with_gemini(accessions,stop_flag=None, niche_cases=None, save_df=No
|
|
| 558 |
if not chunk and not all_output:
|
| 559 |
print("not chunk and all output")
|
| 560 |
# else: check if we can reuse these chunk and all output of existed accession to find another
|
| 561 |
-
if str(
|
| 562 |
-
print("first time have chunk path: ", str(
|
| 563 |
-
acc_score["file_chunk"] = str(
|
| 564 |
if str(all_filename) != "":
|
| 565 |
print("first time have all path: ", str(all_filename))
|
| 566 |
acc_score["file_all_output"] = str(all_filename)
|
|
|
|
| 403 |
print(f"🛑 Stop processing {accession}, aborting early...")
|
| 404 |
return {}
|
| 405 |
# check doi first
|
| 406 |
+
print("acc sscore for file all output and chunk: ", acc_score["file_all_output"], acc_score["file_chunk"])
|
| 407 |
if len(acc_score["file_all_output"]) == 0 and len(acc_score["file_chunk"]) == 0:
|
| 408 |
if doi != "unknown":
|
| 409 |
link = 'https://doi.org/' + doi
|
|
|
|
| 446 |
links = unique_preserve_order(links)
|
| 447 |
acc_score["source"] = links
|
| 448 |
else:
|
| 449 |
+
print("no chunk or all output")
|
| 450 |
try:
|
| 451 |
temp_source = False
|
| 452 |
if save_df is not None and not save_df.empty:
|
| 453 |
print("save df not none")
|
| 454 |
+
print(str(chunk_filename))
|
| 455 |
print(str(all_filename))
|
| 456 |
+
if str(chunk_filename) != "":
|
| 457 |
+
link = save_df.loc[save_df["file_chunk"]==str(chunk_filename),"Sources"].iloc[0]
|
| 458 |
#link = row["Sources"].iloc[0]
|
| 459 |
if "http" in link:
|
| 460 |
print("yeah http in save df source")
|
|
|
|
| 560 |
if not chunk and not all_output:
|
| 561 |
print("not chunk and all output")
|
| 562 |
# else: check if we can reuse these chunk and all output of existed accession to find another
|
| 563 |
+
if str(chunk_filename) != "":
|
| 564 |
+
print("first time have chunk path: ", str(chunk_filename))
|
| 565 |
+
acc_score["file_chunk"] = str(chunk_filename)
|
| 566 |
if str(all_filename) != "":
|
| 567 |
print("first time have all path: ", str(all_filename))
|
| 568 |
acc_score["file_all_output"] = str(all_filename)
|