Spaces:

techconspartners
/

ConversAI

Sleeping

Rauhan commited on Aug 13, 2024

Commit

4a38803

1 Parent(s): 0ff268d

UPDATE: ThreadPoolExecutor

Files changed (1) hide show

functions.py CHANGED Viewed

@@ -292,13 +292,13 @@ def getLinks(url: str, timeout = 30):
     return list(set([x[:len(x) - 1] if x[-1] == "/" else x for x in uniqueLinks]))
-def getText(image):
-  global reader
-  return "\n".join([text[1] for text in reader.readtext(np.array(image.resize((500, 500))), paragraph=True)])
 def getTextFromImagePDF(pdfBytes):
     allImages = convert_from_bytes(pdfBytes)
-    with ThreadPoolExecutor(max_workers = 25) as p:
         texts = list(p.map(getText, allImages))
     return "\n\n\n".join(texts)

     return list(set([x[:len(x) - 1] if x[-1] == "/" else x for x in uniqueLinks]))
 def getTextFromImagePDF(pdfBytes):
+    def getText(image):
+        global reader
+        return "\n".join([text[1] for text in reader.readtext(np.array(image), paragraph=True)])
     allImages = convert_from_bytes(pdfBytes)
+    with ThreadPoolExecutor(max_workers = 32) as p:
         texts = list(p.map(getText, allImages))
     return "\n\n\n".join(texts)