Upload 9 files
Browse files- Dockerfile +20 -0
- README.md +2 -12
- chat.py +93 -0
- create_kb.py +134 -0
- docker +0 -0
- html_string.py +150 -0
- main.py +116 -0
- requirements.txt +25 -0
- upload_file.py +107 -0
Dockerfile
ADDED
|
@@ -0,0 +1,20 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
# 使用Python官方基础镜像,这里以Python 3.10为例,可根据实际情况调整
|
| 2 |
+
FROM python:3.10-slim
|
| 3 |
+
|
| 4 |
+
# 设置工作目录
|
| 5 |
+
WORKDIR /app
|
| 6 |
+
|
| 7 |
+
# 将项目文件复制到容器内的工作目录
|
| 8 |
+
COPY . /app
|
| 9 |
+
|
| 10 |
+
# 安装项目所需的依赖包
|
| 11 |
+
RUN pip install --no-cache-dir -r requirements.txt
|
| 12 |
+
|
| 13 |
+
# 暴露容器运行时的端口,假设项目运行在8000端口,可根据实际情况调整
|
| 14 |
+
EXPOSE 8000
|
| 15 |
+
|
| 16 |
+
# 设置环境变量
|
| 17 |
+
ENV DASHSCOPE_API_KEY=sk-1874513f361347c5b4b2a17883a7182b
|
| 18 |
+
|
| 19 |
+
# 定义容器启动时执行的命令,这里假设项目使用uvicorn启动,可根据实际情况调整
|
| 20 |
+
CMD ["uvicorn", "main:app", "--host", "0.0.0.0", "--port", "8000"]
|
README.md
CHANGED
|
@@ -1,12 +1,2 @@
|
|
| 1 |
-
--
|
| 2 |
-
|
| 3 |
-
emoji: 📉
|
| 4 |
-
colorFrom: purple
|
| 5 |
-
colorTo: pink
|
| 6 |
-
sdk: docker
|
| 7 |
-
pinned: false
|
| 8 |
-
license: apache-2.0
|
| 9 |
-
short_description: rag
|
| 10 |
-
---
|
| 11 |
-
|
| 12 |
-
Check out the configuration reference at https://huggingface.co/docs/hub/spaces-config-reference
|
|
|
|
| 1 |
+
Run: uvicorn main:app --port 7866
|
| 2 |
+
Then visit 127.0.0.1:7866
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
chat.py
ADDED
|
@@ -0,0 +1,93 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
import os
|
| 2 |
+
from openai import OpenAI
|
| 3 |
+
from llama_index.core import StorageContext,load_index_from_storage,Settings
|
| 4 |
+
from llama_index.embeddings.dashscope import (
|
| 5 |
+
DashScopeEmbedding,
|
| 6 |
+
DashScopeTextEmbeddingModels,
|
| 7 |
+
DashScopeTextEmbeddingType,
|
| 8 |
+
)
|
| 9 |
+
from llama_index.postprocessor.dashscope_rerank import DashScopeRerank
|
| 10 |
+
from create_kb import *
|
| 11 |
+
DB_PATH = "VectorStore"
|
| 12 |
+
TMP_NAME = "tmp_abcd"
|
| 13 |
+
EMBED_MODEL = DashScopeEmbedding(
|
| 14 |
+
model_name=DashScopeTextEmbeddingModels.TEXT_EMBEDDING_V2,
|
| 15 |
+
text_type=DashScopeTextEmbeddingType.TEXT_TYPE_DOCUMENT,
|
| 16 |
+
)
|
| 17 |
+
# 若使用本地嵌入模型,请取消以下注释:
|
| 18 |
+
# from langchain_community.embeddings import ModelScopeEmbeddings
|
| 19 |
+
# from llama_index.embeddings.langchain import LangchainEmbedding
|
| 20 |
+
# embeddings = ModelScopeEmbeddings(model_id="modelscope/iic/nlp_gte_sentence-embedding_chinese-large")
|
| 21 |
+
# EMBED_MODEL = LangchainEmbedding(embeddings)
|
| 22 |
+
|
| 23 |
+
# 设置嵌入模型
|
| 24 |
+
Settings.embed_model = EMBED_MODEL
|
| 25 |
+
|
| 26 |
+
def get_model_response(multi_modal_input,history,model,temperature,max_tokens,history_round,db_name,similarity_threshold,chunk_cnt):
|
| 27 |
+
# prompt = multi_modal_input['text']
|
| 28 |
+
prompt = history[-1][0]
|
| 29 |
+
tmp_files = multi_modal_input['files']
|
| 30 |
+
if os.path.exists(os.path.join("File",TMP_NAME)):
|
| 31 |
+
db_name = TMP_NAME
|
| 32 |
+
else:
|
| 33 |
+
if tmp_files:
|
| 34 |
+
create_tmp_kb(tmp_files)
|
| 35 |
+
db_name = TMP_NAME
|
| 36 |
+
# 获取index
|
| 37 |
+
print(f"prompt:{prompt},tmp_files:{tmp_files},db_name:{db_name}")
|
| 38 |
+
try:
|
| 39 |
+
dashscope_rerank = DashScopeRerank(top_n=chunk_cnt,return_documents=True)
|
| 40 |
+
storage_context = StorageContext.from_defaults(
|
| 41 |
+
persist_dir=os.path.join(DB_PATH,db_name)
|
| 42 |
+
)
|
| 43 |
+
index = load_index_from_storage(storage_context)
|
| 44 |
+
print("index获取完成")
|
| 45 |
+
retriever_engine = index.as_retriever(
|
| 46 |
+
similarity_top_k=20,
|
| 47 |
+
)
|
| 48 |
+
# 获取chunk
|
| 49 |
+
retrieve_chunk = retriever_engine.retrieve(prompt)
|
| 50 |
+
print(f"原始chunk为:{retrieve_chunk}")
|
| 51 |
+
try:
|
| 52 |
+
results = dashscope_rerank.postprocess_nodes(retrieve_chunk, query_str=prompt)
|
| 53 |
+
print(f"rerank成功,重排后的chunk为:{results}")
|
| 54 |
+
except:
|
| 55 |
+
results = retrieve_chunk[:chunk_cnt]
|
| 56 |
+
print(f"rerank失败,chunk为:{results}")
|
| 57 |
+
chunk_text = ""
|
| 58 |
+
chunk_show = ""
|
| 59 |
+
for i in range(len(results)):
|
| 60 |
+
if results[i].score >= similarity_threshold:
|
| 61 |
+
chunk_text = chunk_text + f"## {i+1}:\n {results[i].text}\n"
|
| 62 |
+
chunk_show = chunk_show + f"## {i+1}:\n {results[i].text}\nscore: {round(results[i].score,2)}\n"
|
| 63 |
+
print(f"已获取chunk:{chunk_text}")
|
| 64 |
+
prompt_template = f"请参考以下内容:{chunk_text},以合适的语气回答用户的问题:{prompt}。如果参考内容中有图片链接也请直接返回。"
|
| 65 |
+
except Exception as e:
|
| 66 |
+
print(f"异常信息:{e}")
|
| 67 |
+
prompt_template = prompt
|
| 68 |
+
chunk_show = ""
|
| 69 |
+
history[-1][-1] = ""
|
| 70 |
+
client = OpenAI(
|
| 71 |
+
api_key=os.getenv("DASHSCOPE_API_KEY"),
|
| 72 |
+
base_url="https://dashscope.aliyuncs.com/compatible-mode/v1",
|
| 73 |
+
)
|
| 74 |
+
system_message = {'role': 'system', 'content': 'You are a helpful assistant.'}
|
| 75 |
+
messages = []
|
| 76 |
+
history_round = min(len(history),history_round)
|
| 77 |
+
for i in range(history_round):
|
| 78 |
+
messages.append({'role': 'user', 'content': history[-history_round+i][0]})
|
| 79 |
+
messages.append({'role': 'assistant', 'content': history[-history_round+i][1]})
|
| 80 |
+
messages.append({'role': 'user', 'content': prompt_template})
|
| 81 |
+
messages = [system_message] + messages
|
| 82 |
+
completion = client.chat.completions.create(
|
| 83 |
+
model=model,
|
| 84 |
+
messages=messages,
|
| 85 |
+
temperature=temperature,
|
| 86 |
+
max_tokens=max_tokens,
|
| 87 |
+
stream=True
|
| 88 |
+
)
|
| 89 |
+
assistant_response = ""
|
| 90 |
+
for chunk in completion:
|
| 91 |
+
assistant_response += chunk.choices[0].delta.content
|
| 92 |
+
history[-1][-1] = assistant_response
|
| 93 |
+
yield history,chunk_show
|
create_kb.py
ADDED
|
@@ -0,0 +1,134 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
#####################################
|
| 2 |
+
###### 创建知识库 #######
|
| 3 |
+
#####################################
|
| 4 |
+
import gradio as gr
|
| 5 |
+
import os
|
| 6 |
+
import shutil
|
| 7 |
+
from llama_index.core import VectorStoreIndex,Settings,SimpleDirectoryReader
|
| 8 |
+
from llama_index.embeddings.dashscope import (
|
| 9 |
+
DashScopeEmbedding,
|
| 10 |
+
DashScopeTextEmbeddingModels,
|
| 11 |
+
DashScopeTextEmbeddingType,
|
| 12 |
+
)
|
| 13 |
+
from llama_index.core.schema import TextNode
|
| 14 |
+
from upload_file import *
|
| 15 |
+
DB_PATH = "VectorStore"
|
| 16 |
+
STRUCTURED_FILE_PATH = "File/Structured"
|
| 17 |
+
UNSTRUCTURED_FILE_PATH = "File/Unstructured"
|
| 18 |
+
TMP_NAME = "tmp_abcd"
|
| 19 |
+
EMBED_MODEL = DashScopeEmbedding(
|
| 20 |
+
model_name=DashScopeTextEmbeddingModels.TEXT_EMBEDDING_V2,
|
| 21 |
+
text_type=DashScopeTextEmbeddingType.TEXT_TYPE_DOCUMENT,
|
| 22 |
+
)
|
| 23 |
+
# 若使用本地嵌入模型,请取消以下注释:
|
| 24 |
+
# from langchain_community.embeddings import ModelScopeEmbeddings
|
| 25 |
+
# from llama_index.embeddings.langchain import LangchainEmbedding
|
| 26 |
+
# embeddings = ModelScopeEmbeddings(model_id="modelscope/iic/nlp_gte_sentence-embedding_chinese-large")
|
| 27 |
+
# EMBED_MODEL = LangchainEmbedding(embeddings)
|
| 28 |
+
|
| 29 |
+
|
| 30 |
+
# 设置嵌入模型
|
| 31 |
+
Settings.embed_model = EMBED_MODEL
|
| 32 |
+
# 刷新知识库
|
| 33 |
+
def refresh_knowledge_base():
|
| 34 |
+
return os.listdir(DB_PATH)
|
| 35 |
+
|
| 36 |
+
# 创建非结构化向量数据库
|
| 37 |
+
def create_unstructured_db(db_name:str,label_name:list):
|
| 38 |
+
print(f"知识库名称为:{db_name},类目名称为:{label_name}")
|
| 39 |
+
if label_name is None:
|
| 40 |
+
gr.Info("没有选择类目")
|
| 41 |
+
elif len(db_name) == 0:
|
| 42 |
+
gr.Info("没有命名知识库")
|
| 43 |
+
# 判断是否存在同名向量数据库
|
| 44 |
+
elif db_name in os.listdir(DB_PATH):
|
| 45 |
+
gr.Info("知识库已存在,请换个名字或删除原来知识库再创建")
|
| 46 |
+
else:
|
| 47 |
+
gr.Info("正在创建知识库,请等待知识库创建成功信息显示后前往RAG问答")
|
| 48 |
+
documents = []
|
| 49 |
+
for label in label_name:
|
| 50 |
+
label_path = os.path.join(UNSTRUCTURED_FILE_PATH,label)
|
| 51 |
+
documents.extend(SimpleDirectoryReader(label_path).load_data())
|
| 52 |
+
index = VectorStoreIndex.from_documents(
|
| 53 |
+
documents
|
| 54 |
+
)
|
| 55 |
+
db_path = os.path.join(DB_PATH,db_name)
|
| 56 |
+
if not os.path.exists(db_path):
|
| 57 |
+
os.mkdir(db_path)
|
| 58 |
+
index.storage_context.persist(db_path)
|
| 59 |
+
elif os.path.exists(db_path):
|
| 60 |
+
pass
|
| 61 |
+
gr.Info("知识库创建成功,可前往RAG问答进行提问")
|
| 62 |
+
|
| 63 |
+
# 创建结构化向量数据库
|
| 64 |
+
def create_structured_db(db_name:str,data_table:list):
|
| 65 |
+
print(f"知识库名称为:{db_name},数据表名称为:{data_table}")
|
| 66 |
+
if data_table is None:
|
| 67 |
+
gr.Info("没有选择数据表")
|
| 68 |
+
elif len(db_name) == 0:
|
| 69 |
+
gr.Info("没有命名知识库")
|
| 70 |
+
# 判断是否存在同名向量数据库
|
| 71 |
+
elif db_name in os.listdir(DB_PATH):
|
| 72 |
+
gr.Info("知识库已存在,请换个名字或删除原来知识库再创建")
|
| 73 |
+
else:
|
| 74 |
+
gr.Info("正在创建知识库,请等待知识库创建成功信息显示后前往RAG问答")
|
| 75 |
+
documents = []
|
| 76 |
+
for label in data_table:
|
| 77 |
+
label_path = os.path.join(STRUCTURED_FILE_PATH,label)
|
| 78 |
+
documents.extend(SimpleDirectoryReader(label_path).load_data())
|
| 79 |
+
# index = VectorStoreIndex.from_documents(
|
| 80 |
+
# documents
|
| 81 |
+
# )
|
| 82 |
+
nodes = []
|
| 83 |
+
for doc in documents:
|
| 84 |
+
doc_content = doc.get_content().split('\n')
|
| 85 |
+
for chunk in doc_content:
|
| 86 |
+
node = TextNode(text=chunk)
|
| 87 |
+
node.metadata = {'source': doc.get_doc_id(),'file_name':doc.metadata['file_name']}
|
| 88 |
+
nodes = nodes + [node]
|
| 89 |
+
index = VectorStoreIndex(nodes)
|
| 90 |
+
db_path = os.path.join(DB_PATH,db_name)
|
| 91 |
+
if not os.path.exists(db_path):
|
| 92 |
+
os.mkdir(db_path)
|
| 93 |
+
index.storage_context.persist(db_path)
|
| 94 |
+
gr.Info("知识库创建成功,可前往RAG问答进行提问")
|
| 95 |
+
|
| 96 |
+
|
| 97 |
+
# 删除指定名称知识库
|
| 98 |
+
def delete_db(db_name:str):
|
| 99 |
+
if db_name is not None:
|
| 100 |
+
folder_path = os.path.join(DB_PATH, db_name)
|
| 101 |
+
if os.path.exists(folder_path):
|
| 102 |
+
shutil.rmtree(folder_path)
|
| 103 |
+
gr.Info(f"已成功删除{db_name}知识库")
|
| 104 |
+
print(f"已成功删除{db_name}知识库")
|
| 105 |
+
else:
|
| 106 |
+
gr.Info(f"{db_name}知识库不存在")
|
| 107 |
+
print(f"{db_name}知识库不存在")
|
| 108 |
+
|
| 109 |
+
# 实时更新知识库列表
|
| 110 |
+
def update_knowledge_base():
|
| 111 |
+
return gr.update(choices=os.listdir(DB_PATH))
|
| 112 |
+
|
| 113 |
+
# 临时文件创建知识库
|
| 114 |
+
def create_tmp_kb(files):
|
| 115 |
+
if not os.path.exists(os.path.join("File",TMP_NAME)):
|
| 116 |
+
os.mkdir(os.path.join("File",TMP_NAME))
|
| 117 |
+
for file in files:
|
| 118 |
+
file_name = os.path.basename(file)
|
| 119 |
+
shutil.move(file,os.path.join("File",TMP_NAME,file_name))
|
| 120 |
+
documents = SimpleDirectoryReader(os.path.join("File",TMP_NAME)).load_data()
|
| 121 |
+
index = VectorStoreIndex.from_documents(
|
| 122 |
+
documents
|
| 123 |
+
)
|
| 124 |
+
db_path = os.path.join(DB_PATH,TMP_NAME)
|
| 125 |
+
if not os.path.exists(db_path):
|
| 126 |
+
os.mkdir(db_path)
|
| 127 |
+
index.storage_context.persist(db_path)
|
| 128 |
+
|
| 129 |
+
# 清除tmp文件夹下内容
|
| 130 |
+
def clear_tmp():
|
| 131 |
+
if os.path.exists(os.path.join("File",TMP_NAME)):
|
| 132 |
+
shutil.rmtree(os.path.join("File",TMP_NAME))
|
| 133 |
+
if os.path.exists(os.path.join(DB_PATH,TMP_NAME)):
|
| 134 |
+
shutil.rmtree(os.path.join(DB_PATH,TMP_NAME))
|
docker
ADDED
|
File without changes
|
html_string.py
ADDED
|
@@ -0,0 +1,150 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
main_html = """<!DOCTYPE html>
|
| 2 |
+
<html lang="zh">
|
| 3 |
+
<head>
|
| 4 |
+
<meta charset="UTF-8">
|
| 5 |
+
<meta name="viewport" content="width=device-width, initial-scale=1.0">
|
| 6 |
+
<title>本地RAG测试工程</title>
|
| 7 |
+
<link rel="stylesheet" href="https://fonts.googleapis.com/icon?family=Material+Icons">
|
| 8 |
+
<style>
|
| 9 |
+
body {
|
| 10 |
+
font-family: Arial, sans-serif;
|
| 11 |
+
background-color: #f5f5f5;
|
| 12 |
+
margin: 0;
|
| 13 |
+
padding: 0;
|
| 14 |
+
display: flex;
|
| 15 |
+
flex-direction: column;
|
| 16 |
+
align-items: center;
|
| 17 |
+
}
|
| 18 |
+
header {
|
| 19 |
+
background-color: #2196f3;
|
| 20 |
+
color: white;
|
| 21 |
+
width: 100%;
|
| 22 |
+
padding: 1.5em;
|
| 23 |
+
text-align: center;
|
| 24 |
+
box-shadow: 0 2px 4px rgba(0, 0, 0, 0.1);
|
| 25 |
+
}
|
| 26 |
+
main {
|
| 27 |
+
margin: 2em;
|
| 28 |
+
box-shadow: 0 2px 4px rgba(0, 0, 0, 0.1);
|
| 29 |
+
background-color: white;
|
| 30 |
+
border-radius: 8px;
|
| 31 |
+
overflow: hidden;
|
| 32 |
+
width: 90%;
|
| 33 |
+
max-width: 800px;
|
| 34 |
+
padding: 2em;
|
| 35 |
+
}
|
| 36 |
+
h1 {
|
| 37 |
+
color: #333;
|
| 38 |
+
}
|
| 39 |
+
p {
|
| 40 |
+
color: #666;
|
| 41 |
+
font-size: 1.1em;
|
| 42 |
+
}
|
| 43 |
+
ul {
|
| 44 |
+
list-style-type: none;
|
| 45 |
+
padding: 0;
|
| 46 |
+
}
|
| 47 |
+
ul li {
|
| 48 |
+
background-color: #2196f3;
|
| 49 |
+
margin: 0.5em 0;
|
| 50 |
+
padding: 1em;
|
| 51 |
+
border-radius: 4px;
|
| 52 |
+
transition: background-color 0.3s;
|
| 53 |
+
}
|
| 54 |
+
ul li a {
|
| 55 |
+
color: white;
|
| 56 |
+
text-decoration: none;
|
| 57 |
+
display: flex;
|
| 58 |
+
align-items: center;
|
| 59 |
+
}
|
| 60 |
+
ul li:hover {
|
| 61 |
+
background-color: #1976d2;
|
| 62 |
+
}
|
| 63 |
+
.material-icons {
|
| 64 |
+
margin-right: 0.5em;
|
| 65 |
+
}
|
| 66 |
+
</style>
|
| 67 |
+
</head>
|
| 68 |
+
<body>
|
| 69 |
+
<header>
|
| 70 |
+
<h1>本地RAG测试工程</h1>
|
| 71 |
+
</header>
|
| 72 |
+
<main>
|
| 73 |
+
<p>如果您需要基于上传的文档与模型直接对话,请直接访问<a href="/chat">RAG问答</a>,并在输入框位置上传文件,就可以开始对话了。(此次上传的数据在页面刷新后无法保留,若您希望可以持久使用、维护知识库,请创建知识库)。</p>
|
| 74 |
+
<p>如果您需要创建或更新知识库,请按照<a href="/upload_data">上传数据</a>、<a href="/create_knowledge_base">创建知识库</a>操作,在<a href="/chat">RAG问答</a>中的“知识库选择”位置选择您需要使用的知识库。</p>
|
| 75 |
+
<p>如果您需要基于已创建好的知识库进行问答,请直接访问<a href="/chat">RAG问答</a>,在“加载知识库”处选择您已创建的知识库。</p>
|
| 76 |
+
<ul>
|
| 77 |
+
<li><a href="/upload_data"><span class="material-icons"></span> 1. 上传数据</a></li>
|
| 78 |
+
<li><a href="/create_knowledge_base"><span class="material-icons"></span> 2. 创建知识库</a></li>
|
| 79 |
+
<li><a href="/chat"><span class="material-icons"></span> 3. RAG问答</a></li>
|
| 80 |
+
</ul>
|
| 81 |
+
</main>
|
| 82 |
+
</body>
|
| 83 |
+
</html>"""
|
| 84 |
+
|
| 85 |
+
plain_html = """<!DOCTYPE html>
|
| 86 |
+
<html lang="zh">
|
| 87 |
+
<head>
|
| 88 |
+
<title>RAG问答</title>
|
| 89 |
+
<link href="https://fonts.googleapis.com/icon?family=Material+Icons" rel="stylesheet">
|
| 90 |
+
<style>
|
| 91 |
+
.links-container {
|
| 92 |
+
display: flex;
|
| 93 |
+
justify-content: center; /* 在容器中居中分布子元素 */
|
| 94 |
+
list-style-type: none; /* 去掉ul默认的列表样式 */
|
| 95 |
+
padding: 0; /* 去掉ul默认的内边距 */
|
| 96 |
+
margin: 0; /* 去掉ul默认的外边距 */
|
| 97 |
+
}
|
| 98 |
+
.links-container li {
|
| 99 |
+
margin: 0 5px; /* 每个li元素的左右留出一些空间 */
|
| 100 |
+
padding: 10px 15px; /* 添加内边距 */
|
| 101 |
+
border: 1px solid #ccc; /* 添加边框 */
|
| 102 |
+
border-radius: 5px; /* 添加圆角 */
|
| 103 |
+
background-color: #f9f9f9; /* 背景颜色 */
|
| 104 |
+
transition: background-color 0.3s; /* 背景颜色变化的过渡效果 */
|
| 105 |
+
display: flex; /* 使用flex布局 */
|
| 106 |
+
align-items: center; /* 垂直居中对齐 */
|
| 107 |
+
height: 50px; /* 设置固定高度,确保一致 */
|
| 108 |
+
}
|
| 109 |
+
.links-container li:hover {
|
| 110 |
+
background-color: #e0e0e0; /* 悬停时的背景颜色 */
|
| 111 |
+
}
|
| 112 |
+
.links-container a {
|
| 113 |
+
text-decoration: none !important; /* 去掉链接的下划线 */
|
| 114 |
+
color: #333; /* 链接颜色 */
|
| 115 |
+
font-family: Arial, sans-serif; /* 字体 */
|
| 116 |
+
font-size: 14px; /* 字体大小 */
|
| 117 |
+
display: flex; /* 使用flex布局 */
|
| 118 |
+
align-items: center; /* 垂直居中对齐 */
|
| 119 |
+
height: 100%; /* 确保链接高度与父元素一致 */
|
| 120 |
+
}
|
| 121 |
+
.material-icons {
|
| 122 |
+
font-size: 20px; /* 图标大小 */
|
| 123 |
+
margin-right: 8px; /* 图标和文字间的间距 */
|
| 124 |
+
text-decoration: none; /* 确保图标没有下划线 */
|
| 125 |
+
}
|
| 126 |
+
|
| 127 |
+
/* 深色模式样式 */
|
| 128 |
+
@media (prefers-color-scheme: dark) {
|
| 129 |
+
.links-container li {
|
| 130 |
+
background-color: #333; /* 深色模式下的背景颜色 */
|
| 131 |
+
border-color: #555; /* 深色模式下的边框颜色 */
|
| 132 |
+
}
|
| 133 |
+
.links-container li:hover {
|
| 134 |
+
background-color: #555; /* 深色模式下悬停时的背景颜色 */
|
| 135 |
+
}
|
| 136 |
+
.links-container a {
|
| 137 |
+
color: #f9f9f9; /* 深色模式下的文字颜色 */
|
| 138 |
+
}
|
| 139 |
+
}
|
| 140 |
+
</style>
|
| 141 |
+
</head>
|
| 142 |
+
<body>
|
| 143 |
+
<ul class="links-container">
|
| 144 |
+
<li><a href="/"><span class="material-icons">home</span> 主页</a></li>
|
| 145 |
+
<li><a href="/upload_data"><span class="material-icons">cloud_upload</span> 上传数据</a></li>
|
| 146 |
+
<li><a href="/create_knowledge_base"><span class="material-icons">library_add</span> 创建知识库</a></li>
|
| 147 |
+
<li><a href="/chat"><span class="material-icons">question_answer</span> RAG问答</a></li>
|
| 148 |
+
</ul>
|
| 149 |
+
</body>
|
| 150 |
+
</html>"""
|
main.py
ADDED
|
@@ -0,0 +1,116 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
from fastapi import FastAPI
|
| 2 |
+
from fastapi.responses import HTMLResponse
|
| 3 |
+
import gradio as gr
|
| 4 |
+
import os
|
| 5 |
+
from html_string import main_html,plain_html
|
| 6 |
+
from upload_file import *
|
| 7 |
+
from create_kb import *
|
| 8 |
+
from chat import get_model_response
|
| 9 |
+
def user(user_message, history):
|
| 10 |
+
print(user_message)
|
| 11 |
+
return {'text': '','files': user_message['files']}, history + [[user_message['text'], None]]
|
| 12 |
+
|
| 13 |
+
#####################################
|
| 14 |
+
###### gradio界面 #######
|
| 15 |
+
#####################################
|
| 16 |
+
|
| 17 |
+
def get_chat_block():
|
| 18 |
+
with gr.Blocks(theme=gr.themes.Base(),css=".gradio_container { background-color: #f0f0f0; }") as chat:
|
| 19 |
+
gr.HTML(plain_html)
|
| 20 |
+
with gr.Row():
|
| 21 |
+
with gr.Column(scale=10):
|
| 22 |
+
chatbot = gr.Chatbot(label="Chatbot",height=750,avatar_images=("images/user.jpeg","images/tongyi.png"))
|
| 23 |
+
with gr.Row():
|
| 24 |
+
#
|
| 25 |
+
input_message = gr.MultimodalTextbox(label="请输入",file_types=[".xlsx",".csv",".docx",".pdf",".txt"],scale=7)
|
| 26 |
+
clear_btn = gr.ClearButton(chatbot,input_message,scale=1)
|
| 27 |
+
# 模型与知识库参数
|
| 28 |
+
with gr.Column(scale=5):
|
| 29 |
+
knowledge_base =gr.Dropdown(choices=os.listdir(DB_PATH),label="加载知识库",interactive=True,scale=2)
|
| 30 |
+
with gr.Accordion(label="召回文本段",open=False):
|
| 31 |
+
chunk_text = gr.Textbox(label="召回文本段",interactive=False,scale=5,lines=10)
|
| 32 |
+
with gr.Accordion(label="模型设置",open=True):
|
| 33 |
+
model =gr.Dropdown(choices=['qwen-max','qwen-plus','qwen-turbo'],label="选择模型",interactive=True,value="qwen-max",scale=2)
|
| 34 |
+
temperature = gr.Slider(maximum=2,minimum=0,interactive=True,label="温度参数",step=0.01,value=0.85,scale=2)
|
| 35 |
+
max_tokens = gr.Slider(maximum=2000,minimum=0,interactive=True,label="最大回复长度",step=50,value=1024,scale=2)
|
| 36 |
+
history_round = gr.Slider(maximum=30,minimum=1,interactive=True,label="携带上下文轮数",step=1,value=3,scale=2)
|
| 37 |
+
with gr.Accordion(label="RAG参数设置",open=True):
|
| 38 |
+
chunk_cnt = gr.Slider(maximum=20,minimum=1,interactive=True,label="选择召回片段数",step=1,value=5,scale=2)
|
| 39 |
+
similarity_threshold = gr.Slider(maximum=1,minimum=0,interactive=True,label="相似度阈值",step=0.01,value=0.2,scale=2)
|
| 40 |
+
input_message.submit(fn=user,inputs=[input_message,chatbot],outputs=[input_message,chatbot],queue=False).then(
|
| 41 |
+
fn=get_model_response,inputs=[input_message,chatbot,model,temperature,max_tokens,history_round,knowledge_base,similarity_threshold,chunk_cnt],outputs=[chatbot,chunk_text]
|
| 42 |
+
)
|
| 43 |
+
chat.load(update_knowledge_base,[],knowledge_base)
|
| 44 |
+
chat.load(clear_tmp)
|
| 45 |
+
return chat
|
| 46 |
+
|
| 47 |
+
|
| 48 |
+
def get_upload_block():
|
| 49 |
+
with gr.Blocks(theme=gr.themes.Base()) as upload:
|
| 50 |
+
gr.HTML(plain_html)
|
| 51 |
+
with gr.Tab("非结构化数据"):
|
| 52 |
+
with gr.Accordion(label="新建类目",open=True):
|
| 53 |
+
with gr.Column(scale=2):
|
| 54 |
+
unstructured_file = gr.Files(file_types=["pdf","docx","txt"])
|
| 55 |
+
with gr.Row():
|
| 56 |
+
new_label = gr.Textbox(label="类目名称",placeholder="请输入类目名称",scale=5)
|
| 57 |
+
create_label_btn = gr.Button("新建类目",variant="primary",scale=1)
|
| 58 |
+
with gr.Accordion(label="管理类目",open=False):
|
| 59 |
+
with gr.Row():
|
| 60 |
+
data_label =gr.Dropdown(choices=os.listdir(UNSTRUCTURED_FILE_PATH),label="管理类目",interactive=True,scale=8,multiselect=True)
|
| 61 |
+
delete_label_btn = gr.Button("删除类目",variant="stop",scale=1)
|
| 62 |
+
with gr.Tab("结构化数据"):
|
| 63 |
+
with gr.Accordion(label="新建数据表",open=True):
|
| 64 |
+
with gr.Column(scale=2):
|
| 65 |
+
structured_file = gr.Files(file_types=["xlsx","csv"])
|
| 66 |
+
with gr.Row():
|
| 67 |
+
new_label_1 = gr.Textbox(label="数据表名称",placeholder="请输入数据表名称",scale=5)
|
| 68 |
+
create_label_btn_1 = gr.Button("新建数据表",variant="primary",scale=1)
|
| 69 |
+
with gr.Accordion(label="管理数据表",open=False):
|
| 70 |
+
with gr.Row():
|
| 71 |
+
data_label_1 =gr.Dropdown(choices=os.listdir(STRUCTURED_FILE_PATH),label="管理数据表",interactive=True,scale=8,multiselect=True)
|
| 72 |
+
delete_data_table_btn = gr.Button("删除数据表",variant="stop",scale=1)
|
| 73 |
+
delete_label_btn.click(delete_label,inputs=[data_label]).then(fn=update_label,outputs=[data_label])
|
| 74 |
+
create_label_btn.click(fn=upload_unstructured_file,inputs=[unstructured_file,new_label]).then(fn=update_label,outputs=[data_label])
|
| 75 |
+
delete_data_table_btn.click(delete_data_table,inputs=[data_label_1]).then(fn=update_datatable,outputs=[data_label_1])
|
| 76 |
+
create_label_btn_1.click(fn=upload_structured_file,inputs=[structured_file,new_label_1]).then(fn=update_datatable,outputs=[data_label_1])
|
| 77 |
+
upload.load(update_label,[],data_label)
|
| 78 |
+
upload.load(update_datatable,[],data_label_1)
|
| 79 |
+
return upload
|
| 80 |
+
|
| 81 |
+
def get_knowledge_base_block():
|
| 82 |
+
with gr.Blocks(theme=gr.themes.Base()) as knowledge:
|
| 83 |
+
gr.HTML(plain_html)
|
| 84 |
+
# 非结构化数据知识库
|
| 85 |
+
with gr.Tab("非结构化数据"):
|
| 86 |
+
with gr.Row():
|
| 87 |
+
data_label_2 =gr.Dropdown(choices=os.listdir(UNSTRUCTURED_FILE_PATH),label="选择类目",interactive=True,scale=2,multiselect=True)
|
| 88 |
+
knowledge_base_name = gr.Textbox(label="知识库名称",placeholder="请输入知识库名称",scale=2)
|
| 89 |
+
create_knowledge_base_btn = gr.Button("确认创建知识库",variant="primary",scale=1)
|
| 90 |
+
# 结构化数据知识库
|
| 91 |
+
with gr.Tab("结构化数据"):
|
| 92 |
+
with gr.Row():
|
| 93 |
+
data_label_3 =gr.Dropdown(choices=os.listdir(STRUCTURED_FILE_PATH),label="选择数据表",interactive=True,scale=2,multiselect=True)
|
| 94 |
+
knowledge_base_name_1 = gr.Textbox(label="知识库名称",placeholder="请输入知识库名称",scale=2)
|
| 95 |
+
create_knowledge_base_btn_1 = gr.Button("确认创建知识库",variant="primary",scale=1)
|
| 96 |
+
with gr.Row():
|
| 97 |
+
knowledge_base =gr.Dropdown(choices=os.listdir(DB_PATH),label="管理知识库",interactive=True,scale=4)
|
| 98 |
+
delete_db_btn = gr.Button("删除知识库",variant="stop",scale=1)
|
| 99 |
+
create_knowledge_base_btn.click(fn=create_unstructured_db,inputs=[knowledge_base_name,data_label_2]).then(update_knowledge_base,outputs=[knowledge_base])
|
| 100 |
+
delete_db_btn.click(delete_db,inputs=[knowledge_base]).then(update_knowledge_base,outputs=[knowledge_base])
|
| 101 |
+
create_knowledge_base_btn_1.click(fn=create_structured_db,inputs=[knowledge_base_name_1,data_label_3]).then(update_knowledge_base,outputs=[knowledge_base])
|
| 102 |
+
knowledge.load(update_knowledge_base,[],knowledge_base)
|
| 103 |
+
knowledge.load(update_label,[],data_label_2)
|
| 104 |
+
knowledge.load(update_datatable,[],data_label_3)
|
| 105 |
+
return knowledge
|
| 106 |
+
|
| 107 |
+
app = FastAPI()
|
| 108 |
+
@app.get("/", response_class=HTMLResponse)
|
| 109 |
+
def read_main():
|
| 110 |
+
html_content = main_html
|
| 111 |
+
return HTMLResponse(content=html_content)
|
| 112 |
+
|
| 113 |
+
|
| 114 |
+
app = gr.mount_gradio_app(app, get_chat_block(), path="/chat")
|
| 115 |
+
app = gr.mount_gradio_app(app, get_upload_block(), path="/upload_data")
|
| 116 |
+
app = gr.mount_gradio_app(app, get_knowledge_base_block(), path="/create_knowledge_base")
|
requirements.txt
ADDED
|
@@ -0,0 +1,25 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
gradio==4.32.0
|
| 2 |
+
faiss-cpu==1.8.0.post1
|
| 3 |
+
dashscope==1.20.4
|
| 4 |
+
openai==1.55.3
|
| 5 |
+
httpx==0.27.0
|
| 6 |
+
llama-index-vector-stores-faiss==0.1.2
|
| 7 |
+
llama-index-embeddings-dashscope==0.1.4
|
| 8 |
+
llama-index-readers-file==0.1.33
|
| 9 |
+
matplotlib==3.9.3
|
| 10 |
+
docx2txt==0.8
|
| 11 |
+
openpyxl==3.1.5
|
| 12 |
+
llama-index-core==0.10.67
|
| 13 |
+
uvicorn==0.30.6
|
| 14 |
+
fastapi==0.112.0
|
| 15 |
+
llama-index-postprocessor-dashscope-rerank-custom==0.1.0
|
| 16 |
+
simplejson==3.19.3
|
| 17 |
+
# modelscope==1.18.0
|
| 18 |
+
# langchain_community==0.2.16
|
| 19 |
+
# transformers==4.44.2
|
| 20 |
+
# llama_index.embeddings.huggingface==0.2.3
|
| 21 |
+
# llama-index-embeddings-langchain==0.1.2
|
| 22 |
+
# datasets==2.21.0
|
| 23 |
+
# oss2==2.19.0
|
| 24 |
+
# sortedcontainers==2.4.0
|
| 25 |
+
# addict==2.4.0s
|
upload_file.py
ADDED
|
@@ -0,0 +1,107 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
#####################################
|
| 2 |
+
####### 上传文件 #######
|
| 3 |
+
#####################################
|
| 4 |
+
import gradio as gr
|
| 5 |
+
import os
|
| 6 |
+
import shutil
|
| 7 |
+
import pandas as pd
|
| 8 |
+
STRUCTURED_FILE_PATH = "File/Structured"
|
| 9 |
+
UNSTRUCTURED_FILE_PATH = "File/Unstructured"
|
| 10 |
+
# 刷新非结构化类目
|
| 11 |
+
def refresh_label():
|
| 12 |
+
return os.listdir(UNSTRUCTURED_FILE_PATH)
|
| 13 |
+
|
| 14 |
+
# 刷新结构化数据表
|
| 15 |
+
def refresh_data_table():
|
| 16 |
+
return os.listdir(STRUCTURED_FILE_PATH)
|
| 17 |
+
|
| 18 |
+
# 上传非结构化数据
|
| 19 |
+
def upload_unstructured_file(files,label_name):
|
| 20 |
+
if files is None:
|
| 21 |
+
gr.Info("请上传文件")
|
| 22 |
+
elif len(label_name) == 0:
|
| 23 |
+
gr.Info("请输入类目名称")
|
| 24 |
+
# 判断类目是否存在
|
| 25 |
+
elif label_name in os.listdir(UNSTRUCTURED_FILE_PATH):
|
| 26 |
+
gr.Info(f"{label_name}类目已存在")
|
| 27 |
+
else:
|
| 28 |
+
try:
|
| 29 |
+
if not os.path.exists(os.path.join(UNSTRUCTURED_FILE_PATH,label_name)):
|
| 30 |
+
os.mkdir(os.path.join(UNSTRUCTURED_FILE_PATH,label_name))
|
| 31 |
+
for file in files:
|
| 32 |
+
print(file)
|
| 33 |
+
file_path = file.name
|
| 34 |
+
file_name = os.path.basename(file_path)
|
| 35 |
+
destination_file_path = os.path.join(UNSTRUCTURED_FILE_PATH,label_name,file_name)
|
| 36 |
+
shutil.move(file_path,destination_file_path)
|
| 37 |
+
gr.Info(f"文件已上传至{label_name}类目中,请前往创建知识库")
|
| 38 |
+
except:
|
| 39 |
+
gr.Info(f"请勿重复上传")
|
| 40 |
+
|
| 41 |
+
# 上传结构化数据
|
| 42 |
+
def upload_structured_file(files,label_name):
|
| 43 |
+
if files is None:
|
| 44 |
+
gr.Info("请上传文件")
|
| 45 |
+
elif len(label_name) == 0:
|
| 46 |
+
gr.Info("请输入数据表名称")
|
| 47 |
+
# 判断数据表是否存在
|
| 48 |
+
elif label_name in os.listdir(STRUCTURED_FILE_PATH):
|
| 49 |
+
gr.Info(f"{label_name}数据表已存在")
|
| 50 |
+
else:
|
| 51 |
+
try:
|
| 52 |
+
if not os.path.exists(os.path.join(STRUCTURED_FILE_PATH,label_name)):
|
| 53 |
+
os.mkdir(os.path.join(STRUCTURED_FILE_PATH,label_name))
|
| 54 |
+
for file in files:
|
| 55 |
+
file_path = file.name
|
| 56 |
+
file_name = os.path.basename(file_path)
|
| 57 |
+
destination_file_path = os.path.join(STRUCTURED_FILE_PATH,label_name,file_name)
|
| 58 |
+
shutil.move(file_path,destination_file_path)
|
| 59 |
+
if os.path.splitext(destination_file_path)[1] == ".xlsx":
|
| 60 |
+
df = pd.read_excel(destination_file_path)
|
| 61 |
+
elif os.path.splitext(destination_file_path)[1] == ".csv":
|
| 62 |
+
df = pd.read_csv(destination_file_path)
|
| 63 |
+
txt_file_name = os.path.splitext(file_name)[0]+'.txt'
|
| 64 |
+
columns = df.columns
|
| 65 |
+
with open(os.path.join(STRUCTURED_FILE_PATH,label_name,txt_file_name),"w") as file:
|
| 66 |
+
for idx,row in df.iterrows():
|
| 67 |
+
file.write("【")
|
| 68 |
+
info = []
|
| 69 |
+
for col in columns:
|
| 70 |
+
info.append(f"{col}:{row[col]}")
|
| 71 |
+
infos = ",".join(info)
|
| 72 |
+
file.write(infos)
|
| 73 |
+
if idx != len(df)-1:
|
| 74 |
+
file.write("】\n")
|
| 75 |
+
else:
|
| 76 |
+
file.write("】")
|
| 77 |
+
os.remove(destination_file_path)
|
| 78 |
+
gr.Info(f"文件已上传至{label_name}数据表中,请前往创建知识库")
|
| 79 |
+
except:
|
| 80 |
+
gr.Info(f"请勿重复上传")
|
| 81 |
+
|
| 82 |
+
# 实时更新结构化数据表
|
| 83 |
+
def update_datatable():
|
| 84 |
+
return gr.update(choices=os.listdir(STRUCTURED_FILE_PATH))
|
| 85 |
+
|
| 86 |
+
|
| 87 |
+
# 实时更新非结构化类目
|
| 88 |
+
def update_label():
|
| 89 |
+
return gr.update(choices=os.listdir(UNSTRUCTURED_FILE_PATH))
|
| 90 |
+
|
| 91 |
+
# 删除类目
|
| 92 |
+
def delete_label(label_name):
|
| 93 |
+
if label_name is not None:
|
| 94 |
+
for label in label_name:
|
| 95 |
+
folder_path = os.path.join(UNSTRUCTURED_FILE_PATH,label)
|
| 96 |
+
if os.path.exists(folder_path):
|
| 97 |
+
shutil.rmtree(folder_path)
|
| 98 |
+
gr.Info(f"{label}类目已删除")
|
| 99 |
+
|
| 100 |
+
# 删除数据表
|
| 101 |
+
def delete_data_table(table_name):
|
| 102 |
+
if table_name is not None:
|
| 103 |
+
for table in table_name:
|
| 104 |
+
folder_path = os.path.join(STRUCTURED_FILE_PATH,table)
|
| 105 |
+
if os.path.exists(folder_path):
|
| 106 |
+
shutil.rmtree(folder_path)
|
| 107 |
+
gr.Info(f"{table}数据表已删除")
|