Spaces:
Sleeping
Sleeping
Update app.py
Browse filesAdding https://github.com/explosion/spacy-models/releases/download/en_core_web_sm-3.5.0/en_core_web_sm-3.5.0.tar.gz in requirements
app.py
CHANGED
|
@@ -78,7 +78,15 @@ from transformers import AutoTokenizer, AutoModel
|
|
| 78 |
import torch
|
| 79 |
|
| 80 |
# Load SpaCy model
|
| 81 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 82 |
|
| 83 |
# Load Hugging Face Transformers model
|
| 84 |
tokenizer = AutoTokenizer.from_pretrained("sentence-transformers/all-mpnet-base-v2")
|
|
@@ -100,6 +108,9 @@ model = AutoModel.from_pretrained("sentence-transformers/all-mpnet-base-v2")
|
|
| 100 |
import re
|
| 101 |
from nltk.corpus import stopwords
|
| 102 |
from nltk.tokenize import word_tokenize
|
|
|
|
|
|
|
|
|
|
| 103 |
|
| 104 |
def combined_text_processing(text):
|
| 105 |
# Remove punctuation, numbers, URLs, and special characters
|
|
|
|
| 78 |
import torch
|
| 79 |
|
| 80 |
# Load SpaCy model
|
| 81 |
+
# Install the 'en_core_web_sm' model if it isn't already installed
|
| 82 |
+
try:
|
| 83 |
+
nlp = spacy.load('en_core_web_sm')
|
| 84 |
+
except OSError:
|
| 85 |
+
# Instead of this try~catch, we could also include this < https://github.com/explosion/spacy-models/releases/download/en_core_web_sm-3.5.0/en_core_web_sm-3.5.0.tar.gz > in the requirements.txt to directly load it
|
| 86 |
+
from spacy.cli import download
|
| 87 |
+
download('en_core_web_sm')
|
| 88 |
+
nlp = spacy.load('en_core_web_sm')
|
| 89 |
+
|
| 90 |
|
| 91 |
# Load Hugging Face Transformers model
|
| 92 |
tokenizer = AutoTokenizer.from_pretrained("sentence-transformers/all-mpnet-base-v2")
|
|
|
|
| 108 |
import re
|
| 109 |
from nltk.corpus import stopwords
|
| 110 |
from nltk.tokenize import word_tokenize
|
| 111 |
+
# Download necessary NLTK data
|
| 112 |
+
nltk.download('punkt')
|
| 113 |
+
nltk.download('stopwords')
|
| 114 |
|
| 115 |
def combined_text_processing(text):
|
| 116 |
# Remove punctuation, numbers, URLs, and special characters
|