VyLala commited on
Commit
5c9f29c
·
verified ·
1 Parent(s): 08ddb3f

Update pipeline.py

Browse files
Files changed (1) hide show
  1. pipeline.py +8 -7
pipeline.py CHANGED
@@ -267,8 +267,12 @@ async def process_link_chunk_allOutput(link, iso, acc, saveLinkFolder, linksWith
267
 
268
  return all_output
269
 
270
- async def extractSources(doi, linksWithTexts, links, all_output, iso, acc, saveLinkFolder, niche_cases=None):
 
 
 
271
  article_text = ""
 
272
  if doi != "unknown":
273
  link = 'https://doi.org/' + doi
274
  # get the file to create listOfFile for each id
@@ -365,9 +369,6 @@ async def extractSources(doi, linksWithTexts, links, all_output, iso, acc, saveL
365
  if more_linksWithTexts: linksWithTexts.update(more_linksWithTexts)
366
  return linksWithTexts, links, all_output
367
 
368
- from Bio import Entrez
369
- Entrez.email = "your_email@example.com" # required by NCBI
370
-
371
  # Main execution
372
  async def pipeline_with_gemini(accessions,stop_flag=None, save_df=None, niche_cases=None):
373
  # output: country, sample_type, ethnic, location, money_cost, time_cost, explain
@@ -503,7 +504,7 @@ async def pipeline_with_gemini(accessions,stop_flag=None, save_df=None, niche_ca
503
  acc_score["file_all_output"] = str(all_filename)
504
  print("acc sscore for file all output: ", acc_score["file_all_output"])
505
  if len(acc_score["file_all_output"]) == 0 or doi!="unknown":
506
- linksWithTexts, links, all_output = await extractSources(doi, linksWithTexts, links, all_output, iso, acc, sample_folder_id, niche_cases)
507
  links = unique_preserve_order(links)
508
  print("this is links: ",links)
509
  acc_score["source"] = links
@@ -539,14 +540,14 @@ async def pipeline_with_gemini(accessions,stop_flag=None, save_df=None, niche_ca
539
  temp_source = True
540
  if temp_source:
541
  print("temp source is true so have to try again search link")
542
- linksWithTexts, links, all_output = await extractSources(doi, linksWithTexts, links, all_output, iso, acc, sample_folder_id, niche_cases)
543
  links = unique_preserve_order(links)
544
  print("links: ", links)
545
  acc_score["source"] = links
546
  except:
547
  try:
548
  print("in the exception and start to get link")
549
- linksWithTexts, links, all_output = await extractSources(doi, linksWithTexts, links, all_output, iso, acc, sample_folder_id, niche_cases)
550
  links = unique_preserve_order(links)
551
  print("this is links: ",links)
552
  acc_score["source"] = links
 
267
 
268
  return all_output
269
 
270
+ from Bio import Entrez
271
+ Entrez.email = "your_email@example.com" # required by NCBI
272
+
273
+ async def extractSources(meta, linksWithTexts, links, all_output, acc, saveLinkFolder, niche_cases=None):
274
  article_text = ""
275
+ iso, title, doi, pudID, features = meta["country"], meta["specific_location"], meta["ethnicity"], meta["sample_type"], meta["collection_date"], meta["isolate"], meta["title"], meta["doi"], meta["pubmed_id"], meta["all_features"]
276
  if doi != "unknown":
277
  link = 'https://doi.org/' + doi
278
  # get the file to create listOfFile for each id
 
369
  if more_linksWithTexts: linksWithTexts.update(more_linksWithTexts)
370
  return linksWithTexts, links, all_output
371
 
 
 
 
372
  # Main execution
373
  async def pipeline_with_gemini(accessions,stop_flag=None, save_df=None, niche_cases=None):
374
  # output: country, sample_type, ethnic, location, money_cost, time_cost, explain
 
504
  acc_score["file_all_output"] = str(all_filename)
505
  print("acc sscore for file all output: ", acc_score["file_all_output"])
506
  if len(acc_score["file_all_output"]) == 0 or doi!="unknown":
507
+ linksWithTexts, links, all_output = await extractSources(meta, linksWithTexts, links, all_output, acc, sample_folder_id, niche_cases)
508
  links = unique_preserve_order(links)
509
  print("this is links: ",links)
510
  acc_score["source"] = links
 
540
  temp_source = True
541
  if temp_source:
542
  print("temp source is true so have to try again search link")
543
+ linksWithTexts, links, all_output = await extractSources(meta, linksWithTexts, links, all_output, acc, sample_folder_id, niche_cases)
544
  links = unique_preserve_order(links)
545
  print("links: ", links)
546
  acc_score["source"] = links
547
  except:
548
  try:
549
  print("in the exception and start to get link")
550
+ linksWithTexts, links, all_output = await extractSources(meta, linksWithTexts, links, all_output, acc, sample_folder_id, niche_cases)
551
  links = unique_preserve_order(links)
552
  print("this is links: ",links)
553
  acc_score["source"] = links