VyLala commited on
Commit
43014a2
·
verified ·
1 Parent(s): 64f5cf1

Update pipeline.py

Browse files
Files changed (1) hide show
  1. pipeline.py +10 -5
pipeline.py CHANGED
@@ -287,13 +287,18 @@ async def process_link_chunk_allOutput(link, iso, acc, saveLinkFolder, out_links
287
  asyncio.to_thread(data_preprocess.extract_table, link, saveLinkFolder),
288
  timeout=10
289
  )
 
 
290
  except Exception:
291
  tables_link = []
292
 
293
  # --- merge ---
294
  try:
295
  print("just merge text and tables")
296
- final_input_link = text_link + ", ".join(tables_link)
 
 
 
297
  except Exception:
298
  print("no succeed here in preprocess docu")
299
  final_input_link = ""
@@ -306,9 +311,9 @@ async def process_link_chunk_allOutput(link, iso, acc, saveLinkFolder, out_links
306
  if len(final_input_link) > 1000000:
307
  final_input_link = data_preprocess.normalize_for_overlap(final_input_link)
308
  if len(final_input_link) > 1000000:
309
- final_input_link = final_input_link[:100000]
310
 
311
- all_output = data_preprocess.normalize_for_overlap(all_output) + final_input_link
312
 
313
  return context, all_output, chunk
314
 
@@ -802,8 +807,8 @@ async def pipeline_with_gemini(accessions,stop_flag=None, niche_cases=None, save
802
 
803
  # combine results
804
  for context, new_all_output, new_chunk in results:
805
- all_output = new_all_output
806
- chunk = new_chunk
807
  # for link in links:
808
  # print(link)
809
  # # if len(all_output) > 1000*1000:
 
287
  asyncio.to_thread(data_preprocess.extract_table, link, saveLinkFolder),
288
  timeout=10
289
  )
290
+ print("this is table link: ", str(table_links))
291
+
292
  except Exception:
293
  tables_link = []
294
 
295
  # --- merge ---
296
  try:
297
  print("just merge text and tables")
298
+ try:
299
+ final_input_link = text_link + ", ".join(tables_link)
300
+ except:
301
+ final_input_link = str(text_link) + str(tables_link)
302
  except Exception:
303
  print("no succeed here in preprocess docu")
304
  final_input_link = ""
 
311
  if len(final_input_link) > 1000000:
312
  final_input_link = data_preprocess.normalize_for_overlap(final_input_link)
313
  if len(final_input_link) > 1000000:
314
+ final_input_link = final_input_link[:1000000]
315
 
316
+ all_output += data_preprocess.normalize_for_overlap(all_output) + final_input_link
317
 
318
  return context, all_output, chunk
319
 
 
807
 
808
  # combine results
809
  for context, new_all_output, new_chunk in results:
810
+ all_output += new_all_output
811
+ chunk += new_chunk
812
  # for link in links:
813
  # print(link)
814
  # # if len(all_output) > 1000*1000: