Spaces:
Sleeping
Sleeping
Delete nltk_setup.py
Browse files- nltk_setup.py +0 -56
nltk_setup.py
DELETED
|
@@ -1,56 +0,0 @@
|
|
| 1 |
-
import nltk
|
| 2 |
-
import os
|
| 3 |
-
|
| 4 |
-
def setup_nltk():
|
| 5 |
-
try:
|
| 6 |
-
# Set NLTK data path to a local directory
|
| 7 |
-
nltk_data = os.path.join(os.getcwd(), 'nltk_data')
|
| 8 |
-
os.makedirs(nltk_data, exist_ok=True)
|
| 9 |
-
nltk.data.path.append(nltk_data)
|
| 10 |
-
|
| 11 |
-
# List of NLTK packages to download
|
| 12 |
-
packages = [
|
| 13 |
-
('punkt', 'tokenizers/punkt'),
|
| 14 |
-
('stopwords', 'corpora/stopwords'),
|
| 15 |
-
('averaged_perceptron_tagger', 'taggers/averaged_perceptron_tagger'),
|
| 16 |
-
('averaged_perceptron_tagger_eng', 'taggers/averaged_perceptron_tagger_eng'),
|
| 17 |
-
('wordnet', 'corpora/wordnet'),
|
| 18 |
-
('omw-1.4', 'corpora/omw-1.4')
|
| 19 |
-
]
|
| 20 |
-
|
| 21 |
-
print("\n=== Setting up NLTK data ===")
|
| 22 |
-
|
| 23 |
-
# Download each package
|
| 24 |
-
for package, path in packages:
|
| 25 |
-
try:
|
| 26 |
-
nltk.data.find(path)
|
| 27 |
-
print(f"✓ {package} is already available")
|
| 28 |
-
except LookupError:
|
| 29 |
-
print(f"Downloading {package}...")
|
| 30 |
-
nltk.download(package, download_dir=nltk_data)
|
| 31 |
-
print(f"✓ Downloaded {package}")
|
| 32 |
-
|
| 33 |
-
# Test NLTK components
|
| 34 |
-
print("\n=== Testing NLTK Components ===")
|
| 35 |
-
from nltk.tokenize import sent_tokenize, word_tokenize
|
| 36 |
-
from nltk.tag import pos_tag
|
| 37 |
-
from nltk.corpus import stopwords
|
| 38 |
-
|
| 39 |
-
sent_tokenize("This is a test.")
|
| 40 |
-
word_tokenize("This is a test.")
|
| 41 |
-
pos_tag(["test", "this", "is", "a", "sentence"])
|
| 42 |
-
stopwords.words('english')
|
| 43 |
-
|
| 44 |
-
print("\n=== NLTK Setup Completed Successfully ===\n")
|
| 45 |
-
return True
|
| 46 |
-
|
| 47 |
-
except Exception as e:
|
| 48 |
-
print(f"\n⚠ Error during NLTK setup: {str(e)}")
|
| 49 |
-
print("\nPlease try running these commands manually in a Python shell:")
|
| 50 |
-
print("import nltk")
|
| 51 |
-
for package, _ in packages:
|
| 52 |
-
print(f"nltk.download('{package}')")
|
| 53 |
-
return False
|
| 54 |
-
|
| 55 |
-
if __name__ == "__main__":
|
| 56 |
-
setup_nltk()
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|