Spaces:
Sleeping
Sleeping
Update app.py
Browse files
app.py
CHANGED
|
@@ -16,8 +16,13 @@ from pdf2image import convert_from_path
|
|
| 16 |
nltk.download('punkt')
|
| 17 |
nltk.download('stopwords')
|
| 18 |
|
| 19 |
-
#
|
| 20 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 21 |
|
| 22 |
# Function for PyMuPDF text extraction
|
| 23 |
def extract_text_with_pymupdf(pdf_file):
|
|
|
|
| 16 |
nltk.download('punkt')
|
| 17 |
nltk.download('stopwords')
|
| 18 |
|
| 19 |
+
# Download and load the SpaCy model if not already available
|
| 20 |
+
try:
|
| 21 |
+
nlp = spacy.load("en_core_web_sm")
|
| 22 |
+
except OSError:
|
| 23 |
+
from spacy.cli import download
|
| 24 |
+
download("en_core_web_sm") # Downloads the model
|
| 25 |
+
nlp = spacy.load("en_core_web_sm") # Loads the model after downloading
|
| 26 |
|
| 27 |
# Function for PyMuPDF text extraction
|
| 28 |
def extract_text_with_pymupdf(pdf_file):
|