|
|
import streamlit as st |
|
|
from PIL import Image |
|
|
import base64 |
|
|
import requests |
|
|
import json |
|
|
from voice_toolkit import voice_toolkit |
|
|
import traceback |
|
|
import streamlit as st |
|
|
from PIL import Image |
|
|
import base64 |
|
|
import numpy as np |
|
|
import faiss |
|
|
from sentence_transformers import SentenceTransformer |
|
|
from docx import Document |
|
|
import os |
|
|
from openai import OpenAI |
|
|
|
|
|
|
|
|
icon_path = "images/院徽.ico" |
|
|
|
|
|
ICON = Image.open(icon_path) |
|
|
with open(icon_path, "rb") as img_file: |
|
|
ICON_base64 = base64.b64encode(img_file.read()).decode() |
|
|
|
|
|
st.set_page_config( |
|
|
page_title="ikun-对话", |
|
|
layout="centered", |
|
|
page_icon=ICON, |
|
|
menu_items={ |
|
|
'About' |
|
|
: '广西警察学院' |
|
|
} |
|
|
) |
|
|
|
|
|
with st.sidebar: |
|
|
icon_text = f""" |
|
|
<div class="icon-text-container" style="text-align: center;"> |
|
|
<img src='data:image/png;base64,{ICON_base64}' alt='icon' style='width: 70px; height: 70px; margin: 0 auto; display: block;'> |
|
|
<span style='font-size: 24px;'>课程助手--ikun</span> |
|
|
</div> |
|
|
""" |
|
|
st.markdown( |
|
|
icon_text, |
|
|
unsafe_allow_html=True, |
|
|
) |
|
|
|
|
|
st.sidebar.title('输入') |
|
|
option2 = st.sidebar.selectbox('方式', ['键盘', '语音']) |
|
|
|
|
|
|
|
|
st.sidebar.title('参数') |
|
|
with st.sidebar.expander("内容生成"): |
|
|
if "max_new_tokens" not in st.session_state: |
|
|
st.session_state["max_new_tokens"] = 800 |
|
|
st.session_state["top_p"] = 0.9 |
|
|
st.session_state["temperature"] = 0.2 |
|
|
st.session_state["repetition_penalty"] = 1.1 |
|
|
parameter_1 = st.slider('max_new_tokens', min_value=50, max_value=1000, |
|
|
value=st.session_state.max_new_tokens, |
|
|
step=50) |
|
|
parameter_2 = st.slider('top_p', min_value=0.5, max_value=0.95, value=st.session_state.top_p, step=0.01) |
|
|
parameter_3 = st.slider('temperature', min_value=0.1, max_value=3.0, value=st.session_state.temperature, |
|
|
step=0.1) |
|
|
parameter_4 = st.slider('repetition_penalty', min_value=0.5, max_value=5.0, |
|
|
value=st.session_state.repetition_penalty, step=0.1) |
|
|
|
|
|
st.session_state["max_new_tokens"] = parameter_1 |
|
|
st.session_state["top_p"] = parameter_2 |
|
|
st.session_state["temperature"] = parameter_3 |
|
|
st.session_state["repetition_penalty"] = parameter_4 |
|
|
|
|
|
st.title("🪶 智课灵犀") |
|
|
st.caption("🌈 由广西警察学院开发(声明:因校园网络波动,可能暂时无法连接到服务器,请稍后再试)") |
|
|
|
|
|
|
|
|
if "chat_type" not in st.session_state or st.session_state["chat_type"] != "chat": |
|
|
st.session_state["chat_type"] = "chat" |
|
|
|
|
|
if "is_recording" not in st.session_state: |
|
|
st.session_state.is_recording = False |
|
|
|
|
|
if "user_input_area" not in st.session_state: |
|
|
st.session_state.user_input_area = "" |
|
|
|
|
|
if "user_voice_value" not in st.session_state: |
|
|
st.session_state.user_voice_value = "" |
|
|
|
|
|
if "voice_flag" not in st.session_state: |
|
|
st.session_state["voice_flag"] = "" |
|
|
|
|
|
if "messages" not in st.session_state: |
|
|
st.session_state["messages"] = [{"role": "assistant", "message": "你好,我是广西警察学院课程知识答疑小助手“ikun”。"}] |
|
|
|
|
|
for msg in st.session_state.messages: |
|
|
st.chat_message(msg["role"]).write(msg["message"]) |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
WORD_DOC_PATH = "知识库.docx" |
|
|
VECTOR_INDEX_PATH = "faiss_index.index" |
|
|
TEXT_DATA_PATH = "text_data.npy" |
|
|
EMBEDDING_MODEL = "sentence-transformers/all-MiniLM-L6-v2" |
|
|
|
|
|
client = OpenAI( |
|
|
base_url='https://api-inference.modelscope.cn/v1/', |
|
|
api_key='7ed44f86-e2c6-4b85-9c4a-26eacfc2e5ee', |
|
|
) |
|
|
embedder = SentenceTransformer(EMBEDDING_MODEL) |
|
|
|
|
|
|
|
|
|
|
|
def create_vector_store(): |
|
|
if os.path.exists(VECTOR_INDEX_PATH): |
|
|
return |
|
|
|
|
|
doc = Document(WORD_DOC_PATH) |
|
|
chunks = [] |
|
|
current_chunk = [] |
|
|
|
|
|
for para in doc.paragraphs: |
|
|
text = para.text.strip() |
|
|
if text: |
|
|
if text.startswith("第") and "条" in text: |
|
|
if current_chunk: |
|
|
chunks.append(" ".join(current_chunk)) |
|
|
current_chunk = [] |
|
|
current_chunk.append(text) |
|
|
|
|
|
if current_chunk: |
|
|
chunks.append(" ".join(current_chunk)) |
|
|
|
|
|
embeddings = embedder.encode(chunks, convert_to_tensor=False) |
|
|
embeddings = np.array(embeddings).astype('float32') |
|
|
|
|
|
dimension = embeddings.shape[1] |
|
|
index = faiss.IndexFlatL2(dimension) |
|
|
index.add(embeddings) |
|
|
|
|
|
faiss.write_index(index, VECTOR_INDEX_PATH) |
|
|
np.save(TEXT_DATA_PATH, np.array(chunks)) |
|
|
|
|
|
|
|
|
create_vector_store() |
|
|
|
|
|
|
|
|
def search_knowledge(query, top_k=6): |
|
|
index = faiss.read_index(VECTOR_INDEX_PATH) |
|
|
text_data = np.load(TEXT_DATA_PATH, allow_pickle=True) |
|
|
|
|
|
query_embedding = embedder.encode([query], convert_to_tensor=False) |
|
|
query_embedding = np.array(query_embedding).astype('float32') |
|
|
|
|
|
distances, indices = index.search(query_embedding, top_k) |
|
|
return "\n".join([text_data[i] for i in indices[0]]) |
|
|
|
|
|
|
|
|
|
|
|
def generate_response(prompt): |
|
|
|
|
|
context = search_knowledge(prompt) |
|
|
|
|
|
|
|
|
messages = [ |
|
|
{"role": "system", "content": f"基于以下知识回答问题,如果不知道就说不知道:\n{context}"}, |
|
|
{"role": "user", "content": prompt} |
|
|
] |
|
|
|
|
|
|
|
|
full_response = "" |
|
|
response_container = st.empty() |
|
|
|
|
|
|
|
|
knowledge_content = f"🔍 知识库相关内容:\n{context}\n\n💡 深度思考:\n" |
|
|
response_container.markdown(knowledge_content) |
|
|
|
|
|
|
|
|
stream = client.chat.completions.create( |
|
|
model='deepseek-ai/DeepSeek-R1', |
|
|
messages=messages, |
|
|
stream=True, |
|
|
max_tokens=st.session_state.max_new_tokens, |
|
|
temperature=st.session_state.temperature, |
|
|
top_p=st.session_state.top_p |
|
|
) |
|
|
|
|
|
|
|
|
thinking_done = False |
|
|
for chunk in stream: |
|
|
content = chunk.choices[0].delta.content or "" |
|
|
reasoning = getattr(chunk.choices[0].delta, "reasoning_content", "") or "" |
|
|
|
|
|
if reasoning: |
|
|
knowledge_content += reasoning |
|
|
response_container.markdown(knowledge_content + "▌") |
|
|
|
|
|
if content and not reasoning: |
|
|
if not thinking_done: |
|
|
knowledge_content += "\n\n✅ 最终答案:\n" |
|
|
thinking_done = True |
|
|
knowledge_content += content |
|
|
response_container.markdown(knowledge_content + "▌") |
|
|
|
|
|
response_container.markdown(knowledge_content) |
|
|
return knowledge_content |
|
|
|
|
|
|
|
|
|
|
|
def send_message(): |
|
|
|
|
|
full_response = generate_response(st.session_state.messages[-1]["message"]) |
|
|
st.session_state.messages[-1] = {"role": "assistant", "message": full_response} |
|
|
|
|
|
|
|
|
|
|
|
if option2 == "键盘": |
|
|
if prompt := st.chat_input(placeholder="输入..."): |
|
|
st.session_state.messages.append({"role": "user", "message": prompt}) |
|
|
st.chat_message("user").write(prompt) |
|
|
|
|
|
|
|
|
st.session_state.messages.append({"role": "assistant", "message": ""}) |
|
|
|
|
|
|
|
|
send_message() |
|
|
st.rerun() |
|
|
elif option2 == "语音": |
|
|
|
|
|
with st.form("input_form", clear_on_submit=True): |
|
|
prompt = st.text_area( |
|
|
"**输入:**", |
|
|
key="user_input_area", |
|
|
value=st.session_state.user_voice_value, |
|
|
help="在此输入文本或通过语音输入。" |
|
|
) |
|
|
submitted = st.form_submit_button("确认提交") |
|
|
|
|
|
|
|
|
if submitted: |
|
|
st.session_state.messages.append({"role": "user", "message": prompt}) |
|
|
st.chat_message("user").write(prompt) |
|
|
answer = send_message() |
|
|
st.session_state.messages.append({"role": "assistant", "message": answer["response_text"]}) |
|
|
st.chat_message("assistant").write(answer["response_text"]) |
|
|
|
|
|
|
|
|
|
|
|
st.session_state.user_voice_value = "" |
|
|
st.rerun() |
|
|
|
|
|
vocie_result = voice_toolkit() |
|
|
|
|
|
if ( |
|
|
vocie_result and vocie_result["voice_result"]["flag"] == "interim" |
|
|
) or st.session_state["voice_flag"] == "interim": |
|
|
st.session_state["voice_flag"] = "interim" |
|
|
st.session_state["user_voice_value"] = vocie_result["voice_result"]["value"] |
|
|
if vocie_result["voice_result"]["flag"] == "final": |
|
|
st.session_state["voice_flag"] = "final" |
|
|
st.rerun() |
|
|
|