Intern_streamlit_rag / nltk_setup.py
xiaoqianran
Add application file
d5f197c
# # nltk_setup.py
# import os
# nltk_data_path = "/home/user/app/nltk_data"
# # 克隆 nltk_data 仓库
# if not os.path.exists(nltk_data_path):
# print(f"Cloning nltk_data to {nltk_data_path}...")
# os.system(f'git clone https://github.com/nltk/nltk_data.git --branch gh-pages {nltk_data_path}')
# else:
# print(f"nltk_data already exists at {nltk_data_path}, skipping clone.")
# # 移动 packages 和解压必要的数据
# packages_src = os.path.join(nltk_data_path, 'packages')
# tokenizers_dir = os.path.join(nltk_data_path, 'tokenizers')
# taggers_dir = os.path.join(nltk_data_path, 'taggers')
# if os.path.exists(packages_src):
# print("Moving packages...")
# os.system(f'mv {packages_src}/* {nltk_data_path}/')
# if os.path.exists(os.path.join(tokenizers_dir, 'punkt.zip')):
# print("Unzipping punkt tokenizer data...")
# os.system(f'unzip {os.path.join(tokenizers_dir, "punkt.zip")} -d {tokenizers_dir}')
# if os.path.exists(os.path.join(taggers_dir, 'averaged_perceptron_tagger.zip')):
# print("Unzipping averaged_perceptron_tagger data...")
# os.system(f'unzip {os.path.join(taggers_dir, "averaged_perceptron_tagger.zip")} -d {taggers_dir}')
# print("NLTK data setup completed.")