File size: 1,234 Bytes
d5f197c
 
aed8ba9
d5f197c
aed8ba9
d5f197c
 
 
 
 
 
aed8ba9
d5f197c
 
 
 
aed8ba9
d5f197c
 
 
aed8ba9
d5f197c
 
 
aed8ba9
d5f197c
 
 
aed8ba9
d5f197c
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
# # nltk_setup.py
# import os

# nltk_data_path = "/home/user/app/nltk_data"

# # 克隆 nltk_data 仓库
# if not os.path.exists(nltk_data_path):
#     print(f"Cloning nltk_data to {nltk_data_path}...")
#     os.system(f'git clone https://github.com/nltk/nltk_data.git --branch gh-pages {nltk_data_path}')
# else:
#     print(f"nltk_data already exists at {nltk_data_path}, skipping clone.")

# # 移动 packages 和解压必要的数据
# packages_src = os.path.join(nltk_data_path, 'packages')
# tokenizers_dir = os.path.join(nltk_data_path, 'tokenizers')
# taggers_dir = os.path.join(nltk_data_path, 'taggers')

# if os.path.exists(packages_src):
#     print("Moving packages...")
#     os.system(f'mv {packages_src}/* {nltk_data_path}/')

# if os.path.exists(os.path.join(tokenizers_dir, 'punkt.zip')):
#     print("Unzipping punkt tokenizer data...")
#     os.system(f'unzip {os.path.join(tokenizers_dir, "punkt.zip")} -d {tokenizers_dir}')

# if os.path.exists(os.path.join(taggers_dir, 'averaged_perceptron_tagger.zip')):
#     print("Unzipping averaged_perceptron_tagger data...")
#     os.system(f'unzip {os.path.join(taggers_dir, "averaged_perceptron_tagger.zip")} -d {taggers_dir}')

# print("NLTK data setup completed.")