Spaces:

DreamStream-1
/

CDF-HR

Sleeping

DreamStream-1 commited on Nov 25, 2024

Commit

6ef0bb8

verified ·

1 Parent(s): 716287a

Update app.py

Files changed (1) hide show

app.py CHANGED Viewed

@@ -16,8 +16,13 @@ from pdf2image import convert_from_path
 nltk.download('punkt')
 nltk.download('stopwords')
-# Load the SpaCy model
-nlp = spacy.load("en_core_web_sm")
 # Function for PyMuPDF text extraction
 def extract_text_with_pymupdf(pdf_file):

 nltk.download('punkt')
 nltk.download('stopwords')
+# Download and load the SpaCy model if not already available
+try:
+    nlp = spacy.load("en_core_web_sm")
+except OSError:
+    from spacy.cli import download
+    download("en_core_web_sm")  # Downloads the model
+    nlp = spacy.load("en_core_web_sm")  # Loads the model after downloading
 # Function for PyMuPDF text extraction
 def extract_text_with_pymupdf(pdf_file):