Spaces:
Running
Running
Update pipeline.py
Browse files- pipeline.py +8 -7
pipeline.py
CHANGED
|
@@ -267,8 +267,12 @@ async def process_link_chunk_allOutput(link, iso, acc, saveLinkFolder, linksWith
|
|
| 267 |
|
| 268 |
return all_output
|
| 269 |
|
| 270 |
-
|
|
|
|
|
|
|
|
|
|
| 271 |
article_text = ""
|
|
|
|
| 272 |
if doi != "unknown":
|
| 273 |
link = 'https://doi.org/' + doi
|
| 274 |
# get the file to create listOfFile for each id
|
|
@@ -365,9 +369,6 @@ async def extractSources(doi, linksWithTexts, links, all_output, iso, acc, saveL
|
|
| 365 |
if more_linksWithTexts: linksWithTexts.update(more_linksWithTexts)
|
| 366 |
return linksWithTexts, links, all_output
|
| 367 |
|
| 368 |
-
from Bio import Entrez
|
| 369 |
-
Entrez.email = "your_email@example.com" # required by NCBI
|
| 370 |
-
|
| 371 |
# Main execution
|
| 372 |
async def pipeline_with_gemini(accessions,stop_flag=None, save_df=None, niche_cases=None):
|
| 373 |
# output: country, sample_type, ethnic, location, money_cost, time_cost, explain
|
|
@@ -503,7 +504,7 @@ async def pipeline_with_gemini(accessions,stop_flag=None, save_df=None, niche_ca
|
|
| 503 |
acc_score["file_all_output"] = str(all_filename)
|
| 504 |
print("acc sscore for file all output: ", acc_score["file_all_output"])
|
| 505 |
if len(acc_score["file_all_output"]) == 0 or doi!="unknown":
|
| 506 |
-
linksWithTexts, links, all_output = await extractSources(
|
| 507 |
links = unique_preserve_order(links)
|
| 508 |
print("this is links: ",links)
|
| 509 |
acc_score["source"] = links
|
|
@@ -539,14 +540,14 @@ async def pipeline_with_gemini(accessions,stop_flag=None, save_df=None, niche_ca
|
|
| 539 |
temp_source = True
|
| 540 |
if temp_source:
|
| 541 |
print("temp source is true so have to try again search link")
|
| 542 |
-
linksWithTexts, links, all_output = await extractSources(
|
| 543 |
links = unique_preserve_order(links)
|
| 544 |
print("links: ", links)
|
| 545 |
acc_score["source"] = links
|
| 546 |
except:
|
| 547 |
try:
|
| 548 |
print("in the exception and start to get link")
|
| 549 |
-
linksWithTexts, links, all_output = await extractSources(
|
| 550 |
links = unique_preserve_order(links)
|
| 551 |
print("this is links: ",links)
|
| 552 |
acc_score["source"] = links
|
|
|
|
| 267 |
|
| 268 |
return all_output
|
| 269 |
|
| 270 |
+
from Bio import Entrez
|
| 271 |
+
Entrez.email = "your_email@example.com" # required by NCBI
|
| 272 |
+
|
| 273 |
+
async def extractSources(meta, linksWithTexts, links, all_output, acc, saveLinkFolder, niche_cases=None):
|
| 274 |
article_text = ""
|
| 275 |
+
iso, title, doi, pudID, features = meta["country"], meta["specific_location"], meta["ethnicity"], meta["sample_type"], meta["collection_date"], meta["isolate"], meta["title"], meta["doi"], meta["pubmed_id"], meta["all_features"]
|
| 276 |
if doi != "unknown":
|
| 277 |
link = 'https://doi.org/' + doi
|
| 278 |
# get the file to create listOfFile for each id
|
|
|
|
| 369 |
if more_linksWithTexts: linksWithTexts.update(more_linksWithTexts)
|
| 370 |
return linksWithTexts, links, all_output
|
| 371 |
|
|
|
|
|
|
|
|
|
|
| 372 |
# Main execution
|
| 373 |
async def pipeline_with_gemini(accessions,stop_flag=None, save_df=None, niche_cases=None):
|
| 374 |
# output: country, sample_type, ethnic, location, money_cost, time_cost, explain
|
|
|
|
| 504 |
acc_score["file_all_output"] = str(all_filename)
|
| 505 |
print("acc sscore for file all output: ", acc_score["file_all_output"])
|
| 506 |
if len(acc_score["file_all_output"]) == 0 or doi!="unknown":
|
| 507 |
+
linksWithTexts, links, all_output = await extractSources(meta, linksWithTexts, links, all_output, acc, sample_folder_id, niche_cases)
|
| 508 |
links = unique_preserve_order(links)
|
| 509 |
print("this is links: ",links)
|
| 510 |
acc_score["source"] = links
|
|
|
|
| 540 |
temp_source = True
|
| 541 |
if temp_source:
|
| 542 |
print("temp source is true so have to try again search link")
|
| 543 |
+
linksWithTexts, links, all_output = await extractSources(meta, linksWithTexts, links, all_output, acc, sample_folder_id, niche_cases)
|
| 544 |
links = unique_preserve_order(links)
|
| 545 |
print("links: ", links)
|
| 546 |
acc_score["source"] = links
|
| 547 |
except:
|
| 548 |
try:
|
| 549 |
print("in the exception and start to get link")
|
| 550 |
+
linksWithTexts, links, all_output = await extractSources(meta, linksWithTexts, links, all_output, acc, sample_folder_id, niche_cases)
|
| 551 |
links = unique_preserve_order(links)
|
| 552 |
print("this is links: ",links)
|
| 553 |
acc_score["source"] = links
|