Omnamdev02 commited on
Commit
cf698c8
·
unverified ·
1 Parent(s): 075f13d

Delete nltk_setup.py

Browse files
Files changed (1) hide show
  1. nltk_setup.py +0 -56
nltk_setup.py DELETED
@@ -1,56 +0,0 @@
1
- import nltk
2
- import os
3
-
4
- def setup_nltk():
5
- try:
6
- # Set NLTK data path to a local directory
7
- nltk_data = os.path.join(os.getcwd(), 'nltk_data')
8
- os.makedirs(nltk_data, exist_ok=True)
9
- nltk.data.path.append(nltk_data)
10
-
11
- # List of NLTK packages to download
12
- packages = [
13
- ('punkt', 'tokenizers/punkt'),
14
- ('stopwords', 'corpora/stopwords'),
15
- ('averaged_perceptron_tagger', 'taggers/averaged_perceptron_tagger'),
16
- ('averaged_perceptron_tagger_eng', 'taggers/averaged_perceptron_tagger_eng'),
17
- ('wordnet', 'corpora/wordnet'),
18
- ('omw-1.4', 'corpora/omw-1.4')
19
- ]
20
-
21
- print("\n=== Setting up NLTK data ===")
22
-
23
- # Download each package
24
- for package, path in packages:
25
- try:
26
- nltk.data.find(path)
27
- print(f"✓ {package} is already available")
28
- except LookupError:
29
- print(f"Downloading {package}...")
30
- nltk.download(package, download_dir=nltk_data)
31
- print(f"✓ Downloaded {package}")
32
-
33
- # Test NLTK components
34
- print("\n=== Testing NLTK Components ===")
35
- from nltk.tokenize import sent_tokenize, word_tokenize
36
- from nltk.tag import pos_tag
37
- from nltk.corpus import stopwords
38
-
39
- sent_tokenize("This is a test.")
40
- word_tokenize("This is a test.")
41
- pos_tag(["test", "this", "is", "a", "sentence"])
42
- stopwords.words('english')
43
-
44
- print("\n=== NLTK Setup Completed Successfully ===\n")
45
- return True
46
-
47
- except Exception as e:
48
- print(f"\n⚠ Error during NLTK setup: {str(e)}")
49
- print("\nPlease try running these commands manually in a Python shell:")
50
- print("import nltk")
51
- for package, _ in packages:
52
- print(f"nltk.download('{package}')")
53
- return False
54
-
55
- if __name__ == "__main__":
56
- setup_nltk()