DreamStream-1 commited on
Commit
6ef0bb8
·
verified ·
1 Parent(s): 716287a

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +7 -2
app.py CHANGED
@@ -16,8 +16,13 @@ from pdf2image import convert_from_path
16
  nltk.download('punkt')
17
  nltk.download('stopwords')
18
 
19
- # Load the SpaCy model
20
- nlp = spacy.load("en_core_web_sm")
 
 
 
 
 
21
 
22
  # Function for PyMuPDF text extraction
23
  def extract_text_with_pymupdf(pdf_file):
 
16
  nltk.download('punkt')
17
  nltk.download('stopwords')
18
 
19
+ # Download and load the SpaCy model if not already available
20
+ try:
21
+ nlp = spacy.load("en_core_web_sm")
22
+ except OSError:
23
+ from spacy.cli import download
24
+ download("en_core_web_sm") # Downloads the model
25
+ nlp = spacy.load("en_core_web_sm") # Loads the model after downloading
26
 
27
  # Function for PyMuPDF text extraction
28
  def extract_text_with_pymupdf(pdf_file):