Spaces:
Sleeping
Sleeping
Update app.py
Browse files
app.py
CHANGED
|
@@ -11,16 +11,20 @@ import os
|
|
| 11 |
import nltk
|
| 12 |
nltk.download('punkt')
|
| 13 |
|
| 14 |
-
# Install Poppler in the runtime environment
|
| 15 |
-
os.system("apt-get update && apt-get install -y poppler-utils")
|
| 16 |
|
| 17 |
secret = os.getenv('Groq_api')
|
| 18 |
|
| 19 |
working_dir = os.path.dirname(os.path.abspath(__file__))
|
| 20 |
|
| 21 |
def load_documents(file_path):
|
| 22 |
-
# Specify poppler_path to ensure compatibility
|
| 23 |
-
loader = UnstructuredPDFLoader(
|
|
|
|
|
|
|
|
|
|
|
|
|
| 24 |
documents = loader.load()
|
| 25 |
return documents
|
| 26 |
|
|
|
|
| 11 |
import nltk
|
| 12 |
nltk.download('punkt')
|
| 13 |
|
| 14 |
+
# Install Poppler and Tesseract in the runtime environment
|
| 15 |
+
os.system("apt-get update && apt-get install -y poppler-utils tesseract-ocr")
|
| 16 |
|
| 17 |
secret = os.getenv('Groq_api')
|
| 18 |
|
| 19 |
working_dir = os.path.dirname(os.path.abspath(__file__))
|
| 20 |
|
| 21 |
def load_documents(file_path):
|
| 22 |
+
# Specify poppler_path and tesseract_path to ensure compatibility
|
| 23 |
+
loader = UnstructuredPDFLoader(
|
| 24 |
+
file_path,
|
| 25 |
+
poppler_path="/usr/bin",
|
| 26 |
+
tesseract_path="/usr/bin/tesseract"
|
| 27 |
+
)
|
| 28 |
documents = loader.load()
|
| 29 |
return documents
|
| 30 |
|