xiaoqianran commited on
Commit
aed8ba9
·
1 Parent(s): e8eba5b

Add application file

Browse files
Files changed (6) hide show
  1. app.py +69 -3
  2. download_hf.py +14 -0
  3. environment_setup.py +34 -0
  4. nltk_setup.py +30 -0
  5. requirements.txt +10 -0
  6. test_internlm_api.py +28 -0
app.py CHANGED
@@ -1,5 +1,71 @@
 
 
 
1
 
2
- import streamlit as st
 
 
 
 
3
 
4
- x = st.slider('Select a value')
5
- st.write(x, 'squared is', x * x)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # llamaindex_rag.py
2
+ import os
3
+ print(c)
4
 
5
+ from llama_index.core import VectorStoreIndex, SimpleDirectoryReader
6
+ from llama_index.core.settings import Settings
7
+ from llama_index.embeddings.huggingface import HuggingFaceEmbedding
8
+ from llama_index.legacy.callbacks import CallbackManager
9
+ from llama_index.llms.openai_like import OpenAILike
10
 
11
+ # API 密钥和 Base URL 配置 (建议使用环境变量)
12
+ api_key = os.getenv('INTERNLM_API_KEY') or "YOUR_API_KEY_HERE" # 替换为你的 API 密钥 或设置环境变量
13
+ api_base_url = os.environ.get("INTERNLM_BASE_URL") or "https://internlm-chat.intern-ai.org.cn/puyu/api/v1/" # 替换为你的 API Base URL 或设置环境变量
14
+ model_name = "internlm2.5-latest"
15
+
16
+ if api_key == "YOUR_API_KEY_HERE":
17
+ print("警告:请在脚本中或环境变量中配置您的 InternLM API 密钥。")
18
+
19
+
20
+
21
+
22
+ # download_hf.py
23
+ import os
24
+
25
+ # 模型保存路径
26
+ model_dir = "/teamspace/studios/this_studio/model/sentence-transformer"
27
+
28
+ # 确保模型目录存在
29
+ os.makedirs(model_dir, exist_ok=True)
30
+
31
+ # 下载模型
32
+ command = f'huggingface-cli download --resume-download sentence-transformers/paraphrase-multilingual-MiniLM-L12-v2 --local-dir {model_dir}'
33
+ print(f"Downloading sentence-transformers model to {model_dir}...")
34
+ os.system(command)
35
+ print("Sentence-transformers model download completed.")
36
+
37
+
38
+
39
+
40
+ # Callback 管理器
41
+ callback_manager = CallbackManager()
42
+
43
+ # 初始化 LLM
44
+ llm = OpenAILike(
45
+ model=model_name,
46
+ api_base=api_base_url,
47
+ api_key=api_key,
48
+ is_chat_model=True,
49
+ callback_manager=callback_manager
50
+ )
51
+
52
+ # 初始化 HuggingFace 嵌入模型
53
+ embed_model = HuggingFaceEmbedding(
54
+ model_name="/teamspace/studios/this_studio/model/sentence-transformer" # 确保模型已下载到此路径
55
+ )
56
+ Settings.embed_model = embed_model
57
+ Settings.llm = llm
58
+
59
+ # 数据文件路径 (请替换为你的数据文件路径)
60
+ data_dir = "/teamspace/studios/this_studio/data" # 假设数据文件在此目录下
61
+ documents = SimpleDirectoryReader(data_dir).load_data()
62
+
63
+ # 构建索引
64
+ index = VectorStoreIndex.from_documents(documents)
65
+ query_engine = index.as_query_engine()
66
+
67
+ # 执行查询
68
+ query_text = "燕知春和江若雪在什么地方认识,她们参加了什么比赛,创立了什么组织?"
69
+ response = query_engine.query(query_text)
70
+
71
+ print(response)
download_hf.py ADDED
@@ -0,0 +1,14 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # download_hf.py
2
+ import os
3
+
4
+ # 模型保存路径
5
+ model_dir = "/teamspace/studios/this_studio/model/sentence-transformer"
6
+
7
+ # 确保模型目录存在
8
+ os.makedirs(model_dir, exist_ok=True)
9
+
10
+ # 下载模型
11
+ command = f'huggingface-cli download --resume-download sentence-transformers/paraphrase-multilingual-MiniLM-L12-v2 --local-dir {model_dir}'
12
+ print(f"Downloading sentence-transformers model to {model_dir}...")
13
+ os.system(command)
14
+ print("Sentence-transformers model download completed.")
environment_setup.py ADDED
@@ -0,0 +1,34 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # environment_setup.py
2
+ import subprocess
3
+ import sys
4
+ import os
5
+
6
+ def install_packages():
7
+ packages = [
8
+ "einops==0.7.0",
9
+ "protobuf==5.26.1",
10
+ "llama-index==0.11.20",
11
+ "llama-index-llms-replicate==0.3.0",
12
+ "llama-index-llms-openai-like==0.2.0",
13
+ "llama-index-embeddings-huggingface==0.3.1",
14
+ "llama-index-embeddings-instructor==0.2.1",
15
+ "torch==2.5.0",
16
+ "torchvision==0.20.0",
17
+ "torchaudio==2.5.0"
18
+ ]
19
+ index_url = "https://download.pytorch.org/whl/cu121" # 根据你的 CUDA 版本调整
20
+
21
+ for package in packages:
22
+ try:
23
+ if package.startswith("torch") or package.startswith("torchvision") or package.startswith("torchaudio"):
24
+ subprocess.check_call([sys.executable, "-m", "pip", "install", f"{package}", "--index-url", index_url, "-q"])
25
+ else:
26
+ subprocess.check_call([sys.executable, "-m", "pip", "install", f"{package}", "-q"])
27
+ print(f"Successfully installed {package}")
28
+ except subprocess.CalledProcessError as e:
29
+ print(f"Error installing {package}: {e}")
30
+
31
+ if __name__ == "__main__":
32
+ print("Starting to install required packages...")
33
+ install_packages()
34
+ print("Package installation completed.")
nltk_setup.py ADDED
@@ -0,0 +1,30 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # nltk_setup.py
2
+ import os
3
+
4
+ nltk_data_path = "/teamspace/studios/this_studio/nltk_data"
5
+
6
+ # 克隆 nltk_data 仓库
7
+ if not os.path.exists(nltk_data_path):
8
+ print(f"Cloning nltk_data to {nltk_data_path}...")
9
+ os.system(f'git clone https://github.com/nltk/nltk_data.git --branch gh-pages {nltk_data_path}')
10
+ else:
11
+ print(f"nltk_data already exists at {nltk_data_path}, skipping clone.")
12
+
13
+ # 移动 packages 和解压必要的数据
14
+ packages_src = os.path.join(nltk_data_path, 'packages')
15
+ tokenizers_dir = os.path.join(nltk_data_path, 'tokenizers')
16
+ taggers_dir = os.path.join(nltk_data_path, 'taggers')
17
+
18
+ if os.path.exists(packages_src):
19
+ print("Moving packages...")
20
+ os.system(f'mv {packages_src}/* {nltk_data_path}/')
21
+
22
+ if os.path.exists(os.path.join(tokenizers_dir, 'punkt.zip')):
23
+ print("Unzipping punkt tokenizer data...")
24
+ os.system(f'unzip {os.path.join(tokenizers_dir, "punkt.zip")} -d {tokenizers_dir}')
25
+
26
+ if os.path.exists(os.path.join(taggers_dir, 'averaged_perceptron_tagger.zip')):
27
+ print("Unzipping averaged_perceptron_tagger data...")
28
+ os.system(f'unzip {os.path.join(taggers_dir, "averaged_perceptron_tagger.zip")} -d {taggers_dir}')
29
+
30
+ print("NLTK data setup completed.")
requirements.txt ADDED
@@ -0,0 +1,10 @@
 
 
 
 
 
 
 
 
 
 
 
1
+ einops==0.7.0
2
+ protobuf==5.26.1
3
+ llama-index==0.11.20
4
+ llama-index-llms-replicate==0.3.0
5
+ llama-index-llms-openai-like==0.2.0
6
+ llama-index-embeddings-huggingface==0.3.1
7
+ llama-index-embeddings-instructor==0.2.1
8
+ torch==2.5.0
9
+ torchvision==0.20.0
10
+ torchaudio==2.5.0
test_internlm_api.py ADDED
@@ -0,0 +1,28 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # test_internlm_api.py
2
+ from openai import OpenAI
3
+ import os
4
+
5
+ # 替换为你的 InternLM API Key 和 Base URL,或者设置为环境变量
6
+ api_key = os.getenv('INTERNLM_API_KEY') or "YOUR_API_KEY_HERE" # 建议使用环境变量
7
+ base_url = os.getenv('INTERNLM_API_KEY') or "https://internlm-chat.intern-ai.org.cn/puyu/api/v1/"
8
+ model_name = "internlm2.5-latest"
9
+
10
+ if api_key == "YOUR_API_KEY_HERE":
11
+ print("警告:请在脚本中或环境变量中配置您的 InternLM API 密钥。")
12
+
13
+ client = OpenAI(
14
+ api_key=api_key,
15
+ base_url=base_url,
16
+ )
17
+
18
+ try:
19
+ chat_rsp = client.chat.completions.create(
20
+ model=model_name,
21
+ messages=[{"role": "user", "content": "燕知春和江若雪在什么地方认识,她们参加了什么比赛,创立了什么组织?"}],
22
+ )
23
+
24
+ for choice in chat_rsp.choices:
25
+ print(choice.message.content)
26
+
27
+ except Exception as e:
28
+ print(f"API 调用失败: {e}")