import streamlit as st from PIL import Image import base64 import requests import json from voice_toolkit import voice_toolkit import traceback import streamlit as st from PIL import Image import base64 import numpy as np import faiss from sentence_transformers import SentenceTransformer from docx import Document import os from openai import OpenAI icon_path = "images/院徽.ico" ICON = Image.open(icon_path) with open(icon_path, "rb") as img_file: ICON_base64 = base64.b64encode(img_file.read()).decode() st.set_page_config( page_title="ikun-对话", layout="centered", page_icon=ICON, menu_items={ 'About' : '广西警察学院' } ) with st.sidebar: icon_text = f"""
icon 课程助手--ikun
""" st.markdown( icon_text, unsafe_allow_html=True, ) st.sidebar.title('输入') option2 = st.sidebar.selectbox('方式', ['键盘', '语音']) # 添加滑动条 st.sidebar.title('参数') with st.sidebar.expander("内容生成"): if "max_new_tokens" not in st.session_state: st.session_state["max_new_tokens"] = 800 st.session_state["top_p"] = 0.9 st.session_state["temperature"] = 0.2 st.session_state["repetition_penalty"] = 1.1 parameter_1 = st.slider('max_new_tokens', min_value=50, max_value=1000, value=st.session_state.max_new_tokens, step=50) parameter_2 = st.slider('top_p', min_value=0.5, max_value=0.95, value=st.session_state.top_p, step=0.01) parameter_3 = st.slider('temperature', min_value=0.1, max_value=3.0, value=st.session_state.temperature, step=0.1) parameter_4 = st.slider('repetition_penalty', min_value=0.5, max_value=5.0, value=st.session_state.repetition_penalty, step=0.1) st.session_state["max_new_tokens"] = parameter_1 st.session_state["top_p"] = parameter_2 st.session_state["temperature"] = parameter_3 st.session_state["repetition_penalty"] = parameter_4 st.title("🪶 智课灵犀") st.caption("🌈 由广西警察学院开发(声明:因校园网络波动,可能暂时无法连接到服务器,请稍后再试)") # 状态 if "chat_type" not in st.session_state or st.session_state["chat_type"] != "chat": st.session_state["chat_type"] = "chat" if "is_recording" not in st.session_state: st.session_state.is_recording = False if "user_input_area" not in st.session_state: st.session_state.user_input_area = "" if "user_voice_value" not in st.session_state: st.session_state.user_voice_value = "" if "voice_flag" not in st.session_state: st.session_state["voice_flag"] = "" if "messages" not in st.session_state: st.session_state["messages"] = [{"role": "assistant", "message": "你好,我是广西警察学院课程知识答疑小助手“ikun”。"}] for msg in st.session_state.messages: st.chat_message(msg["role"]).write(msg["message"]) # 新增RAG相关配置和初始化 WORD_DOC_PATH = "知识库.docx" VECTOR_INDEX_PATH = "faiss_index.index" TEXT_DATA_PATH = "text_data.npy" EMBEDDING_MODEL = "sentence-transformers/all-MiniLM-L6-v2" client = OpenAI( base_url='https://api-inference.modelscope.cn/v1/', api_key='7ed44f86-e2c6-4b85-9c4a-26eacfc2e5ee', ) embedder = SentenceTransformer(EMBEDDING_MODEL) # 初始化向量存储 def create_vector_store(): if os.path.exists(VECTOR_INDEX_PATH): return doc = Document(WORD_DOC_PATH) chunks = [] current_chunk = [] for para in doc.paragraphs: text = para.text.strip() if text: if text.startswith("第") and "条" in text: if current_chunk: chunks.append(" ".join(current_chunk)) current_chunk = [] current_chunk.append(text) if current_chunk: chunks.append(" ".join(current_chunk)) embeddings = embedder.encode(chunks, convert_to_tensor=False) embeddings = np.array(embeddings).astype('float32') dimension = embeddings.shape[1] index = faiss.IndexFlatL2(dimension) index.add(embeddings) faiss.write_index(index, VECTOR_INDEX_PATH) np.save(TEXT_DATA_PATH, np.array(chunks)) create_vector_store() def search_knowledge(query, top_k=6): index = faiss.read_index(VECTOR_INDEX_PATH) text_data = np.load(TEXT_DATA_PATH, allow_pickle=True) query_embedding = embedder.encode([query], convert_to_tensor=False) query_embedding = np.array(query_embedding).astype('float32') distances, indices = index.search(query_embedding, top_k) return "\n".join([text_data[i] for i in indices[0]]) # 修改后的消息处理函数 def generate_response(prompt): # 检索相关知识 context = search_knowledge(prompt) # 构建对话消息 messages = [ {"role": "system", "content": f"基于以下知识回答问题,如果不知道就说不知道:\n{context}"}, {"role": "user", "content": prompt} ] # 流式生成响应 full_response = "" response_container = st.empty() # 先显示知识库内容 knowledge_content = f"🔍 知识库相关内容:\n{context}\n\n💡 深度思考:\n" response_container.markdown(knowledge_content) # 生成回答 stream = client.chat.completions.create( model='deepseek-ai/DeepSeek-R1', messages=messages, stream=True, max_tokens=st.session_state.max_new_tokens, temperature=st.session_state.temperature, top_p=st.session_state.top_p ) # 处理流式输出 thinking_done = False for chunk in stream: content = chunk.choices[0].delta.content or "" reasoning = getattr(chunk.choices[0].delta, "reasoning_content", "") or "" if reasoning: knowledge_content += reasoning response_container.markdown(knowledge_content + "▌") if content and not reasoning: if not thinking_done: knowledge_content += "\n\n✅ 最终答案:\n" thinking_done = True knowledge_content += content response_container.markdown(knowledge_content + "▌") response_container.markdown(knowledge_content) return knowledge_content # 修改后的发送消息函数 def send_message(): # 生成响应并更新消息记录 full_response = generate_response(st.session_state.messages[-1]["message"]) st.session_state.messages[-1] = {"role": "assistant", "message": full_response} # 界面部分保持不变(只修改键盘输入处理) if option2 == "键盘": if prompt := st.chat_input(placeholder="输入..."): st.session_state.messages.append({"role": "user", "message": prompt}) st.chat_message("user").write(prompt) # 先添加空白的助手消息占位符 st.session_state.messages.append({"role": "assistant", "message": ""}) # 生成并更新响应 send_message() st.rerun() elif option2 == "语音": # 文本输入表单 with st.form("input_form", clear_on_submit=True): prompt = st.text_area( "**输入:**", key="user_input_area", value=st.session_state.user_voice_value, help="在此输入文本或通过语音输入。" ) submitted = st.form_submit_button("确认提交") # 处理提交 if submitted: st.session_state.messages.append({"role": "user", "message": prompt}) st.chat_message("user").write(prompt) answer = send_message() st.session_state.messages.append({"role": "assistant", "message": answer["response_text"]}) st.chat_message("assistant").write(answer["response_text"]) # print(st.session_state) st.session_state.user_voice_value = "" st.rerun() # 语音输入 vocie_result = voice_toolkit() # vocie_result会保存最后一次结果 if ( vocie_result and vocie_result["voice_result"]["flag"] == "interim" ) or st.session_state["voice_flag"] == "interim": st.session_state["voice_flag"] = "interim" st.session_state["user_voice_value"] = vocie_result["voice_result"]["value"] if vocie_result["voice_result"]["flag"] == "final": st.session_state["voice_flag"] = "final" st.rerun()