File size: 8,660 Bytes
2909463
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
import streamlit as st
from PIL import Image
import base64
import requests
import json
from voice_toolkit import voice_toolkit
import traceback
import streamlit as st
from PIL import Image
import base64
import numpy as np
import faiss
from sentence_transformers import SentenceTransformer
from docx import Document
import os
from openai import OpenAI


icon_path = "images/院徽.ico"

ICON = Image.open(icon_path)
with open(icon_path, "rb") as img_file:
    ICON_base64 = base64.b64encode(img_file.read()).decode()

st.set_page_config(
    page_title="ikun-对话",
    layout="centered",
    page_icon=ICON,
    menu_items={
        'About'
        : '广西警察学院'
    }
)

with st.sidebar:
    icon_text = f"""
        <div class="icon-text-container" style="text-align: center;">
            <img src='data:image/png;base64,{ICON_base64}' alt='icon' style='width: 70px; height: 70px; margin: 0 auto; display: block;'>
            <span style='font-size: 24px;'>课程助手--ikun</span>
        </div>
        """
    st.markdown(
        icon_text,
        unsafe_allow_html=True,
    )

st.sidebar.title('输入')
option2 = st.sidebar.selectbox('方式', ['键盘', '语音'])

# 添加滑动条
st.sidebar.title('参数')
with st.sidebar.expander("内容生成"):
    if "max_new_tokens" not in st.session_state:
        st.session_state["max_new_tokens"] = 800
        st.session_state["top_p"] = 0.9
        st.session_state["temperature"] = 0.2
        st.session_state["repetition_penalty"] = 1.1
    parameter_1 = st.slider('max_new_tokens', min_value=50, max_value=1000,
                            value=st.session_state.max_new_tokens,
                            step=50)
    parameter_2 = st.slider('top_p', min_value=0.5, max_value=0.95, value=st.session_state.top_p, step=0.01)
    parameter_3 = st.slider('temperature', min_value=0.1, max_value=3.0, value=st.session_state.temperature,
                            step=0.1)
    parameter_4 = st.slider('repetition_penalty', min_value=0.5, max_value=5.0,
                            value=st.session_state.repetition_penalty, step=0.1)

    st.session_state["max_new_tokens"] = parameter_1
    st.session_state["top_p"] = parameter_2
    st.session_state["temperature"] = parameter_3
    st.session_state["repetition_penalty"] = parameter_4

st.title("🪶 智课灵犀")
st.caption("🌈 由广西警察学院开发(声明:因校园网络波动,可能暂时无法连接到服务器,请稍后再试)")

# 状态
if "chat_type" not in st.session_state or st.session_state["chat_type"] != "chat":
    st.session_state["chat_type"] = "chat"

if "is_recording" not in st.session_state:
    st.session_state.is_recording = False

if "user_input_area" not in st.session_state:
    st.session_state.user_input_area = ""

if "user_voice_value" not in st.session_state:
    st.session_state.user_voice_value = ""

if "voice_flag" not in st.session_state:
    st.session_state["voice_flag"] = ""

if "messages" not in st.session_state:
    st.session_state["messages"] = [{"role": "assistant", "message": "你好,我是广西警察学院课程知识答疑小助手“ikun”。"}]

for msg in st.session_state.messages:
    st.chat_message(msg["role"]).write(msg["message"])




# 新增RAG相关配置和初始化
WORD_DOC_PATH = "知识库.docx"
VECTOR_INDEX_PATH = "faiss_index.index"
TEXT_DATA_PATH = "text_data.npy"
EMBEDDING_MODEL = "sentence-transformers/all-MiniLM-L6-v2"

client = OpenAI(
    base_url='https://api-inference.modelscope.cn/v1/',
    api_key='7ed44f86-e2c6-4b85-9c4a-26eacfc2e5ee',
)
embedder = SentenceTransformer(EMBEDDING_MODEL)


# 初始化向量存储
def create_vector_store():
    if os.path.exists(VECTOR_INDEX_PATH):
        return

    doc = Document(WORD_DOC_PATH)
    chunks = []
    current_chunk = []

    for para in doc.paragraphs:
        text = para.text.strip()
        if text:
            if text.startswith("第") and "条" in text:
                if current_chunk:
                    chunks.append(" ".join(current_chunk))
                    current_chunk = []
            current_chunk.append(text)

    if current_chunk:
        chunks.append(" ".join(current_chunk))

    embeddings = embedder.encode(chunks, convert_to_tensor=False)
    embeddings = np.array(embeddings).astype('float32')

    dimension = embeddings.shape[1]
    index = faiss.IndexFlatL2(dimension)
    index.add(embeddings)

    faiss.write_index(index, VECTOR_INDEX_PATH)
    np.save(TEXT_DATA_PATH, np.array(chunks))


create_vector_store()


def search_knowledge(query, top_k=6):
    index = faiss.read_index(VECTOR_INDEX_PATH)
    text_data = np.load(TEXT_DATA_PATH, allow_pickle=True)

    query_embedding = embedder.encode([query], convert_to_tensor=False)
    query_embedding = np.array(query_embedding).astype('float32')

    distances, indices = index.search(query_embedding, top_k)
    return "\n".join([text_data[i] for i in indices[0]])


# 修改后的消息处理函数
def generate_response(prompt):
    # 检索相关知识
    context = search_knowledge(prompt)

    # 构建对话消息
    messages = [
        {"role": "system", "content": f"基于以下知识回答问题,如果不知道就说不知道:\n{context}"},
        {"role": "user", "content": prompt}
    ]

    # 流式生成响应
    full_response = ""
    response_container = st.empty()

    # 先显示知识库内容
    knowledge_content = f"🔍 知识库相关内容:\n{context}\n\n💡 深度思考:\n"
    response_container.markdown(knowledge_content)

    # 生成回答
    stream = client.chat.completions.create(
        model='deepseek-ai/DeepSeek-R1',
        messages=messages,
        stream=True,
        max_tokens=st.session_state.max_new_tokens,
        temperature=st.session_state.temperature,
        top_p=st.session_state.top_p
    )

    # 处理流式输出
    thinking_done = False
    for chunk in stream:
        content = chunk.choices[0].delta.content or ""
        reasoning = getattr(chunk.choices[0].delta, "reasoning_content", "") or ""

        if reasoning:
            knowledge_content += reasoning
            response_container.markdown(knowledge_content + "▌")

        if content and not reasoning:
            if not thinking_done:
                knowledge_content += "\n\n✅ 最终答案:\n"
                thinking_done = True
            knowledge_content += content
            response_container.markdown(knowledge_content + "▌")

    response_container.markdown(knowledge_content)
    return knowledge_content


# 修改后的发送消息函数
def send_message():
    # 生成响应并更新消息记录
    full_response = generate_response(st.session_state.messages[-1]["message"])
    st.session_state.messages[-1] = {"role": "assistant", "message": full_response}


# 界面部分保持不变(只修改键盘输入处理)
if option2 == "键盘":
    if prompt := st.chat_input(placeholder="输入..."):
        st.session_state.messages.append({"role": "user", "message": prompt})
        st.chat_message("user").write(prompt)

        # 先添加空白的助手消息占位符
        st.session_state.messages.append({"role": "assistant", "message": ""})

        # 生成并更新响应
        send_message()
        st.rerun()
elif option2 == "语音":
    # 文本输入表单
    with st.form("input_form", clear_on_submit=True):
        prompt = st.text_area(
            "**输入:**",
            key="user_input_area",
            value=st.session_state.user_voice_value,
            help="在此输入文本或通过语音输入。"
        )
        submitted = st.form_submit_button("确认提交")

    # 处理提交
    if submitted:
        st.session_state.messages.append({"role": "user", "message": prompt})
        st.chat_message("user").write(prompt)
        answer = send_message()
        st.session_state.messages.append({"role": "assistant", "message": answer["response_text"]})
        st.chat_message("assistant").write(answer["response_text"])

        # print(st.session_state)

        st.session_state.user_voice_value = ""
        st.rerun()
    # 语音输入
    vocie_result = voice_toolkit()
    # vocie_result会保存最后一次结果
    if (
            vocie_result and vocie_result["voice_result"]["flag"] == "interim"
    ) or st.session_state["voice_flag"] == "interim":
        st.session_state["voice_flag"] = "interim"
        st.session_state["user_voice_value"] = vocie_result["voice_result"]["value"]
        if vocie_result["voice_result"]["flag"] == "final":
            st.session_state["voice_flag"] = "final"
            st.rerun()