Spaces:
Build error
Build error
xiaoqianran commited on
Commit ·
aed8ba9
1
Parent(s): e8eba5b
Add application file
Browse files- app.py +69 -3
- download_hf.py +14 -0
- environment_setup.py +34 -0
- nltk_setup.py +30 -0
- requirements.txt +10 -0
- test_internlm_api.py +28 -0
app.py
CHANGED
|
@@ -1,5 +1,71 @@
|
|
|
|
|
|
|
|
|
|
|
| 1 |
|
| 2 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
| 3 |
|
| 4 |
-
|
| 5 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
# llamaindex_rag.py
|
| 2 |
+
import os
|
| 3 |
+
print(c)
|
| 4 |
|
| 5 |
+
from llama_index.core import VectorStoreIndex, SimpleDirectoryReader
|
| 6 |
+
from llama_index.core.settings import Settings
|
| 7 |
+
from llama_index.embeddings.huggingface import HuggingFaceEmbedding
|
| 8 |
+
from llama_index.legacy.callbacks import CallbackManager
|
| 9 |
+
from llama_index.llms.openai_like import OpenAILike
|
| 10 |
|
| 11 |
+
# API 密钥和 Base URL 配置 (建议使用环境变量)
|
| 12 |
+
api_key = os.getenv('INTERNLM_API_KEY') or "YOUR_API_KEY_HERE" # 替换为你的 API 密钥 或设置环境变量
|
| 13 |
+
api_base_url = os.environ.get("INTERNLM_BASE_URL") or "https://internlm-chat.intern-ai.org.cn/puyu/api/v1/" # 替换为你的 API Base URL 或设置环境变量
|
| 14 |
+
model_name = "internlm2.5-latest"
|
| 15 |
+
|
| 16 |
+
if api_key == "YOUR_API_KEY_HERE":
|
| 17 |
+
print("警告:请在脚本中或环境变量中配置您的 InternLM API 密钥。")
|
| 18 |
+
|
| 19 |
+
|
| 20 |
+
|
| 21 |
+
|
| 22 |
+
# download_hf.py
|
| 23 |
+
import os
|
| 24 |
+
|
| 25 |
+
# 模型保存路径
|
| 26 |
+
model_dir = "/teamspace/studios/this_studio/model/sentence-transformer"
|
| 27 |
+
|
| 28 |
+
# 确保模型目录存在
|
| 29 |
+
os.makedirs(model_dir, exist_ok=True)
|
| 30 |
+
|
| 31 |
+
# 下载模型
|
| 32 |
+
command = f'huggingface-cli download --resume-download sentence-transformers/paraphrase-multilingual-MiniLM-L12-v2 --local-dir {model_dir}'
|
| 33 |
+
print(f"Downloading sentence-transformers model to {model_dir}...")
|
| 34 |
+
os.system(command)
|
| 35 |
+
print("Sentence-transformers model download completed.")
|
| 36 |
+
|
| 37 |
+
|
| 38 |
+
|
| 39 |
+
|
| 40 |
+
# Callback 管理器
|
| 41 |
+
callback_manager = CallbackManager()
|
| 42 |
+
|
| 43 |
+
# 初始化 LLM
|
| 44 |
+
llm = OpenAILike(
|
| 45 |
+
model=model_name,
|
| 46 |
+
api_base=api_base_url,
|
| 47 |
+
api_key=api_key,
|
| 48 |
+
is_chat_model=True,
|
| 49 |
+
callback_manager=callback_manager
|
| 50 |
+
)
|
| 51 |
+
|
| 52 |
+
# 初始化 HuggingFace 嵌入模型
|
| 53 |
+
embed_model = HuggingFaceEmbedding(
|
| 54 |
+
model_name="/teamspace/studios/this_studio/model/sentence-transformer" # 确保模型已下载到此路径
|
| 55 |
+
)
|
| 56 |
+
Settings.embed_model = embed_model
|
| 57 |
+
Settings.llm = llm
|
| 58 |
+
|
| 59 |
+
# 数据文件路径 (请替换为你的数据文件路径)
|
| 60 |
+
data_dir = "/teamspace/studios/this_studio/data" # 假设数据文件在此目录下
|
| 61 |
+
documents = SimpleDirectoryReader(data_dir).load_data()
|
| 62 |
+
|
| 63 |
+
# 构建索引
|
| 64 |
+
index = VectorStoreIndex.from_documents(documents)
|
| 65 |
+
query_engine = index.as_query_engine()
|
| 66 |
+
|
| 67 |
+
# 执行查询
|
| 68 |
+
query_text = "燕知春和江若雪在什么地方认识,她们参加了什么比赛,创立了什么组织?"
|
| 69 |
+
response = query_engine.query(query_text)
|
| 70 |
+
|
| 71 |
+
print(response)
|
download_hf.py
ADDED
|
@@ -0,0 +1,14 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
# download_hf.py
|
| 2 |
+
import os
|
| 3 |
+
|
| 4 |
+
# 模型保存路径
|
| 5 |
+
model_dir = "/teamspace/studios/this_studio/model/sentence-transformer"
|
| 6 |
+
|
| 7 |
+
# 确保模型目录存在
|
| 8 |
+
os.makedirs(model_dir, exist_ok=True)
|
| 9 |
+
|
| 10 |
+
# 下载模型
|
| 11 |
+
command = f'huggingface-cli download --resume-download sentence-transformers/paraphrase-multilingual-MiniLM-L12-v2 --local-dir {model_dir}'
|
| 12 |
+
print(f"Downloading sentence-transformers model to {model_dir}...")
|
| 13 |
+
os.system(command)
|
| 14 |
+
print("Sentence-transformers model download completed.")
|
environment_setup.py
ADDED
|
@@ -0,0 +1,34 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
# environment_setup.py
|
| 2 |
+
import subprocess
|
| 3 |
+
import sys
|
| 4 |
+
import os
|
| 5 |
+
|
| 6 |
+
def install_packages():
|
| 7 |
+
packages = [
|
| 8 |
+
"einops==0.7.0",
|
| 9 |
+
"protobuf==5.26.1",
|
| 10 |
+
"llama-index==0.11.20",
|
| 11 |
+
"llama-index-llms-replicate==0.3.0",
|
| 12 |
+
"llama-index-llms-openai-like==0.2.0",
|
| 13 |
+
"llama-index-embeddings-huggingface==0.3.1",
|
| 14 |
+
"llama-index-embeddings-instructor==0.2.1",
|
| 15 |
+
"torch==2.5.0",
|
| 16 |
+
"torchvision==0.20.0",
|
| 17 |
+
"torchaudio==2.5.0"
|
| 18 |
+
]
|
| 19 |
+
index_url = "https://download.pytorch.org/whl/cu121" # 根据你的 CUDA 版本调整
|
| 20 |
+
|
| 21 |
+
for package in packages:
|
| 22 |
+
try:
|
| 23 |
+
if package.startswith("torch") or package.startswith("torchvision") or package.startswith("torchaudio"):
|
| 24 |
+
subprocess.check_call([sys.executable, "-m", "pip", "install", f"{package}", "--index-url", index_url, "-q"])
|
| 25 |
+
else:
|
| 26 |
+
subprocess.check_call([sys.executable, "-m", "pip", "install", f"{package}", "-q"])
|
| 27 |
+
print(f"Successfully installed {package}")
|
| 28 |
+
except subprocess.CalledProcessError as e:
|
| 29 |
+
print(f"Error installing {package}: {e}")
|
| 30 |
+
|
| 31 |
+
if __name__ == "__main__":
|
| 32 |
+
print("Starting to install required packages...")
|
| 33 |
+
install_packages()
|
| 34 |
+
print("Package installation completed.")
|
nltk_setup.py
ADDED
|
@@ -0,0 +1,30 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
# nltk_setup.py
|
| 2 |
+
import os
|
| 3 |
+
|
| 4 |
+
nltk_data_path = "/teamspace/studios/this_studio/nltk_data"
|
| 5 |
+
|
| 6 |
+
# 克隆 nltk_data 仓库
|
| 7 |
+
if not os.path.exists(nltk_data_path):
|
| 8 |
+
print(f"Cloning nltk_data to {nltk_data_path}...")
|
| 9 |
+
os.system(f'git clone https://github.com/nltk/nltk_data.git --branch gh-pages {nltk_data_path}')
|
| 10 |
+
else:
|
| 11 |
+
print(f"nltk_data already exists at {nltk_data_path}, skipping clone.")
|
| 12 |
+
|
| 13 |
+
# 移动 packages 和解压必要的数据
|
| 14 |
+
packages_src = os.path.join(nltk_data_path, 'packages')
|
| 15 |
+
tokenizers_dir = os.path.join(nltk_data_path, 'tokenizers')
|
| 16 |
+
taggers_dir = os.path.join(nltk_data_path, 'taggers')
|
| 17 |
+
|
| 18 |
+
if os.path.exists(packages_src):
|
| 19 |
+
print("Moving packages...")
|
| 20 |
+
os.system(f'mv {packages_src}/* {nltk_data_path}/')
|
| 21 |
+
|
| 22 |
+
if os.path.exists(os.path.join(tokenizers_dir, 'punkt.zip')):
|
| 23 |
+
print("Unzipping punkt tokenizer data...")
|
| 24 |
+
os.system(f'unzip {os.path.join(tokenizers_dir, "punkt.zip")} -d {tokenizers_dir}')
|
| 25 |
+
|
| 26 |
+
if os.path.exists(os.path.join(taggers_dir, 'averaged_perceptron_tagger.zip')):
|
| 27 |
+
print("Unzipping averaged_perceptron_tagger data...")
|
| 28 |
+
os.system(f'unzip {os.path.join(taggers_dir, "averaged_perceptron_tagger.zip")} -d {taggers_dir}')
|
| 29 |
+
|
| 30 |
+
print("NLTK data setup completed.")
|
requirements.txt
ADDED
|
@@ -0,0 +1,10 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
einops==0.7.0
|
| 2 |
+
protobuf==5.26.1
|
| 3 |
+
llama-index==0.11.20
|
| 4 |
+
llama-index-llms-replicate==0.3.0
|
| 5 |
+
llama-index-llms-openai-like==0.2.0
|
| 6 |
+
llama-index-embeddings-huggingface==0.3.1
|
| 7 |
+
llama-index-embeddings-instructor==0.2.1
|
| 8 |
+
torch==2.5.0
|
| 9 |
+
torchvision==0.20.0
|
| 10 |
+
torchaudio==2.5.0
|
test_internlm_api.py
ADDED
|
@@ -0,0 +1,28 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
# test_internlm_api.py
|
| 2 |
+
from openai import OpenAI
|
| 3 |
+
import os
|
| 4 |
+
|
| 5 |
+
# 替换为你的 InternLM API Key 和 Base URL,或者设置为环境变量
|
| 6 |
+
api_key = os.getenv('INTERNLM_API_KEY') or "YOUR_API_KEY_HERE" # 建议使用环境变量
|
| 7 |
+
base_url = os.getenv('INTERNLM_API_KEY') or "https://internlm-chat.intern-ai.org.cn/puyu/api/v1/"
|
| 8 |
+
model_name = "internlm2.5-latest"
|
| 9 |
+
|
| 10 |
+
if api_key == "YOUR_API_KEY_HERE":
|
| 11 |
+
print("警告:请在脚本中或环境变量中配置您的 InternLM API 密钥。")
|
| 12 |
+
|
| 13 |
+
client = OpenAI(
|
| 14 |
+
api_key=api_key,
|
| 15 |
+
base_url=base_url,
|
| 16 |
+
)
|
| 17 |
+
|
| 18 |
+
try:
|
| 19 |
+
chat_rsp = client.chat.completions.create(
|
| 20 |
+
model=model_name,
|
| 21 |
+
messages=[{"role": "user", "content": "燕知春和江若雪在什么地方认识,她们参加了什么比赛,创立了什么组织?"}],
|
| 22 |
+
)
|
| 23 |
+
|
| 24 |
+
for choice in chat_rsp.choices:
|
| 25 |
+
print(choice.message.content)
|
| 26 |
+
|
| 27 |
+
except Exception as e:
|
| 28 |
+
print(f"API 调用失败: {e}")
|