Spaces:
Runtime error
Runtime error
Commit ·
4620265
1
Parent(s): 6cae73d
Update app.py
Browse files
app.py
CHANGED
|
@@ -38,12 +38,6 @@ import os
|
|
| 38 |
home_path = "/home/user/app/"
|
| 39 |
folder_path = "/home/user/app/doc/"
|
| 40 |
|
| 41 |
-
|
| 42 |
-
# os.environ["OPENAI_API_KEY"] = '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'
|
| 43 |
-
# os.environ["OPENAI_API_KEY"] = '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'
|
| 44 |
-
# os.environ["OPENAI_API_KEY"] = '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' # from CHGPT
|
| 45 |
-
# os.environ["OPENAI_API_KEY"] = '' # from CHGPT
|
| 46 |
-
|
| 47 |
OPENAI_API_KEY = os.getenv("OPENAI_API_KEY")
|
| 48 |
|
| 49 |
# from gpt_index import SimpleDirectoryReader, GPTListIndex, GPTSimpleVectorIndex, LLMPredictor, PromptHelper
|
|
@@ -117,8 +111,8 @@ def extractScannedPDF(filePath, chainType):
|
|
| 117 |
for pil_im in images:
|
| 118 |
print('Page ' + str(counter))
|
| 119 |
counter += 1
|
| 120 |
-
if counter >= 3:
|
| 121 |
-
|
| 122 |
text += "\nPage " + str(counter) + "\n"
|
| 123 |
ocr_dict = pytesseract.image_to_data(pil_im, lang='eng', output_type=Output.DICT)
|
| 124 |
text += " ".join(ocr_dict['text']) + "\n"
|
|
|
|
| 38 |
home_path = "/home/user/app/"
|
| 39 |
folder_path = "/home/user/app/doc/"
|
| 40 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 41 |
OPENAI_API_KEY = os.getenv("OPENAI_API_KEY")
|
| 42 |
|
| 43 |
# from gpt_index import SimpleDirectoryReader, GPTListIndex, GPTSimpleVectorIndex, LLMPredictor, PromptHelper
|
|
|
|
| 111 |
for pil_im in images:
|
| 112 |
print('Page ' + str(counter))
|
| 113 |
counter += 1
|
| 114 |
+
# if counter >= 3:
|
| 115 |
+
# break
|
| 116 |
text += "\nPage " + str(counter) + "\n"
|
| 117 |
ocr_dict = pytesseract.image_to_data(pil_im, lang='eng', output_type=Output.DICT)
|
| 118 |
text += " ".join(ocr_dict['text']) + "\n"
|