# # nltk_setup.py # import os # nltk_data_path = "/home/user/app/nltk_data" # # 克隆 nltk_data 仓库 # if not os.path.exists(nltk_data_path): # print(f"Cloning nltk_data to {nltk_data_path}...") # os.system(f'git clone https://github.com/nltk/nltk_data.git --branch gh-pages {nltk_data_path}') # else: # print(f"nltk_data already exists at {nltk_data_path}, skipping clone.") # # 移动 packages 和解压必要的数据 # packages_src = os.path.join(nltk_data_path, 'packages') # tokenizers_dir = os.path.join(nltk_data_path, 'tokenizers') # taggers_dir = os.path.join(nltk_data_path, 'taggers') # if os.path.exists(packages_src): # print("Moving packages...") # os.system(f'mv {packages_src}/* {nltk_data_path}/') # if os.path.exists(os.path.join(tokenizers_dir, 'punkt.zip')): # print("Unzipping punkt tokenizer data...") # os.system(f'unzip {os.path.join(tokenizers_dir, "punkt.zip")} -d {tokenizers_dir}') # if os.path.exists(os.path.join(taggers_dir, 'averaged_perceptron_tagger.zip')): # print("Unzipping averaged_perceptron_tagger data...") # os.system(f'unzip {os.path.join(taggers_dir, "averaged_perceptron_tagger.zip")} -d {taggers_dir}') # print("NLTK data setup completed.")