Spaces:

baidu
/

knowledge_retrieval_demo

Running

App Files Files Community

maxiaolong03 commited on Jun 27, 2025

Commit

98d3121

1 Parent(s): 3a5faf4

add files

Browse files

Files changed (2) hide show

app.py +247 -164
bot_requests.py +40 -38

app.py CHANGED Viewed

@@ -12,11 +12,14 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
-"""This script provides a simple web interface that allows users to interact with"""
 import argparse
 import base64
 from collections import namedtuple
 from functools import partial
 import hashlib
 import json
@@ -25,12 +28,12 @@ import faiss
 import os
 from argparse import ArgumentParser
 import textwrap
 import gradio as gr
 import numpy as np
 from bot_requests import BotClient
-# from faiss_text_database import FaissTextDatabase
 os.environ["NO_PROXY"] = "localhost,127.0.0.1"  # Disable proxy
@@ -44,89 +47,105 @@ RELEVANT_PASSAGE_DEFAULT = textwrap.dedent("""\
 )
 QUERY_REWRITE_PROMPT = textwrap.dedent("""\
-    你是一个擅长问答系统和信息检索的大模型助手。
-    请根据用户提出的问题，判断是否需要调用文档检索系统来获取答案：
-    - 若问题属于常识性、定义性或答案明确，不依赖外部资料，请标记为 "is_search": false；
-    - 若问题涉及事实查证、具体数据、文档内容等，必须依赖资料检索，请标记为 "is_search": true，并将问题拆解成多个可用于检索的子问题。
-    要求：
-    1. 子问题应语义清晰、独立，适合用于检索；
-    2. 只在**确有必要**的情况下拆解，最多不超过 5 个，不要为了凑满数量而输出冗余子问题；
-    3. 输出为严格的 JSON 格式，无多余注释。
-    【用户当前问题】：
-    {query}
-    【输出格式】：
-    请仅输出如下格式的内容（符合 JSON 规范，无多余注释）：
-    ```
     {{
-        "is_search": true 或 false,
-        "sub_query_list": ["子问题1","子问题2","..."]
-    }}
-    ```"""
 )
 ANSWER_PROMPT = textwrap.dedent(
     """\
-    你是一个乐于助人且信息丰富的机器人，使用下面提供的参考段落中的文本来回答问题。
-    请务必用完整的句子回答，内容要全面，包括所有相关的背景信息。
-    然而，你的对话对象是非技术人员，所以请务必分解复杂的概念，并使用友好和对话式的语气。
-    如果段落与答案无关，你可以忽略它。
-    问题：'{query}'
-    段落：'{relevant_passage}'
-    答案："""
 )
 QUERY_DEFAULT = "1675 年时，英格兰有多少家咖啡馆？"
 def get_args() -> argparse.Namespace:
     """
-    Parse and return command line arguments for the ERNIE models web chat demo.
-    Configures server settings, model endpoints, and document processing parameters.
     Returns:
-        argparse.Namespace: Parsed command line arguments containing:
-        - server_port: Demo server port (default: 8333)
-        - server_name: Demo server host (default: "0.0.0.0")
-        - model_urls: Endpoints for ERNIE and Qianfan models
-        - document_processing: Chunk size, FAISS index and text DB paths
     """
     parser = ArgumentParser(description="ERNIE models web chat demo.")
     parser.add_argument(
-        "--server-port", type=int, default=7860, help="Demo server port."
     )
     parser.add_argument(
         "--server-name", type=str, default="0.0.0.0", help="Demo server name."
     )
     parser.add_argument(
-        "--max_char", type=int, default=8000, help="Maximum character limit for messages."
     )
     parser.add_argument(
         "--max_retry_num", type=int, default=3, help="Maximum retry number for request."
     )
     parser.add_argument(
-        "--eb45t_model_url",
         type=str,
-        default="https://qianfan.baidubce.com/v2",
-        help="Model URL for multimodal model."
     )
     parser.add_argument(
-        "--qianfan_url",
         type=str,
         default="https://qianfan.baidubce.com/v2",
-        help="Qianfan URL."
     )
     parser.add_argument(
         "--qianfan_api_key",
         type=str,
         default=os.environ.get("API_KEY"),
-        help="Qianfan API key."
     )
     parser.add_argument(
         "--embedding_model",
@@ -134,12 +153,24 @@ def get_args() -> argparse.Namespace:
         default="embedding-v1",
         help="Embedding model name."
     )
     parser.add_argument(
         "--chunk_size",
         type=int,
         default=512,
         help="Chunk size for splitting long documents."
     )
     parser.add_argument(
         "--faiss_index_path",
         type=str,
@@ -154,15 +185,24 @@ def get_args() -> argparse.Namespace:
     )
     args = parser.parse_args()
     return args
 class FaissTextDatabase:
     """
-    A vector database for text retrieval using FAISS (Facebook AI Similarity Search).
     Provides efficient similarity search and document management capabilities.
     """
-    def __init__(self, args, bot_client: BotClient, embedding_dim: int=384):
         """
         Initialize the FaissTextDatabase.
@@ -174,9 +214,11 @@ class FaissTextDatabase:
         self.logger = logging.getLogger(__name__)
         self.bot_client = bot_client
         self.faiss_index_path = getattr(args, "faiss_index_path", "data/faiss_index")
         self.text_db_path = getattr(args, "text_db_path", "data/text_db.jsonl")
-        self.embedding_dim = embedding_dim
         # If faiss_index_path exists, load it and text_db_path
         if os.path.exists(self.faiss_index_path) and os.path.exists(self.text_db_path):
@@ -216,7 +258,8 @@ class FaissTextDatabase:
         file_md5 = self.calculate_md5(file_path)
         return file_md5 in self.text_db["file_md5s"]
-    def add_embeddings(self, file_path: str, segments: list[str], progress_bar: gr.Progress=None) -> bool:
         """
         Stores document embeddings in FAISS database after checking for duplicates.
         Generates embeddings for each text segment, updates the FAISS index and metadata database,
@@ -241,8 +284,9 @@ class FaissTextDatabase:
         # Generate embeddings
         vectors = []
         file_name = os.path.basename(file_path)
         for i, segment in  enumerate(segments):
-            vectors.append(self.bot_client.embed_fn(segment))
             if progress_bar is not None:
                 progress_bar((i + 1) / len(segments), desc=file_name + " Processing...")
         vectors = np.array(vectors)
@@ -252,43 +296,87 @@ class FaissTextDatabase:
         for i, text in enumerate(segments):
             self.text_db["chunks"].append({
                 "file_md5": file_md5,
                 "text": text,
                 "vector_id": start_id + i
             })
         self.text_db["file_md5s"].append(file_md5)
-        self.save()
         return True
-    def search_with_context(self, query: str, context_size: int=2) -> str:
         """
-        Finds the most relevant text chunk for a query and includes surrounding context.
-        Uses FAISS to find the closest matching embedding, then retrieves adjacent chunks
         from the same source document to provide better context understanding.
         Args:
-            query: the input query string
-            context_size: the number of surrounding chunks to include
         Returns:
-            str: the relevant chunk with context
-        """
-        query_vector = np.array([self.bot_client.embed_fn(query)]).astype('float32')
-        distances, indices = self.index.search(query_vector, 1)
-        target_idx = indices[0][0]
-        target_chunk = self.text_db["chunks"][target_idx]
-        target_file_md5 = target_chunk["file_md5"]
-        self.logger.info("Similarity: {}".format(distances[0][0]))
-        self.logger.info("Target Chunk: {}".format(self.text_db["chunks"][target_idx]["text"]))
-        # Get the context
-        start = max(0, target_idx - context_size)
-        end = min(len(self.text_db["chunks"]) - 1, target_idx + context_size)
         result = ""
-        for pos in range(start, end + 1):
-            if self.text_db["chunks"][pos]["file_md5"] == target_file_md5:
-                result += self.text_db["chunks"][pos]["text"] + "\n"
         return result
@@ -305,13 +393,35 @@ class GradioEvents(object):
     Manages event handling and UI interactions for Gradio applications.
     Provides methods to process user inputs, trigger callbacks, and update interface components.
     """
     @staticmethod
     def chat_stream(
         query: str,
         task_history: list,
         model: str,
-        bot_client: BotClient,
         faiss_db: FaissTextDatabase,
     ) -> dict:
         """
         Streams chatbot responses by processing queries with context from history and FAISS database.
@@ -328,23 +438,29 @@ class GradioEvents(object):
         Yields:
             dict: A dictionary containing the event type and its corresponding content.
         """
-        search_info_result = GradioEvents.get_sub_query(query, model, bot_client)
-        if search_info_result.get("is_search", False) and search_info_result.get("sub_query_list", []):
-            relevant_passage = GradioEvents.get_relevant_passage(
-                search_info_result["sub_query_list"],
-                faiss_db
             )
-            yield {"type": "relevant_passage", "content": relevant_passage}
-            input = ANSWER_PROMPT.format(query=query, relevant_passage=relevant_passage)
-        else:
-            input = query
-        conversation = []
-        for query_h, response_h in task_history:
-            conversation.append({"role": "user", "content": query_h})
-            conversation.append({"role": "assistant", "content": response_h})
-        conversation.append({"role": "user", "content": input})
         try:
             req_data = {"messages": conversation}
             for chunk in bot_client.process_stream(model, req_data):
@@ -353,10 +469,7 @@ class GradioEvents(object):
                 message = chunk.get("choices", [{}])[0].get("delta", {})
                 content = message.get("content", "")
-                reasoning_content = message.get("reasoning_content", "")
-                if reasoning_content:
-                    yield {"type": "thinking", "content": reasoning_content}
                 if content:
                     yield {"type": "answer", "content": content}
@@ -369,8 +482,8 @@ class GradioEvents(object):
         chatbot: list,
         task_history: list,
         model: str,
         bot_client: BotClient,
-        faiss_db: FaissTextDatabase
     ) -> tuple:
         """
         Generates streaming responses by combining model predictions with knowledge retrieval.
@@ -400,12 +513,11 @@ class GradioEvents(object):
             query,
             task_history,
             model,
-            bot_client,
             faiss_db,
         )
-        reasoning_content = ""
         response = ""
-        has_thinking = False
         current_relevant_passage = None
         for new_text in new_texts:
             if not isinstance(new_text, dict):
@@ -419,27 +531,15 @@ class GradioEvents(object):
                 current_relevant_passage = new_text["content"]
                 yield chatbot, current_relevant_passage
                 continue
-            elif new_text.get("type") == "thinking":
-                has_thinking = True
-                reasoning_content += new_text["content"]
             elif new_text.get("type") == "answer":
                 response += new_text["content"]
-            # Remove previous thinking message if exists
             if chatbot[-1].get("role") == "assistant":
                 chatbot.pop(-1)
-            content = ""
-            if has_thinking:
-                content = "**思考过程：**<br>{}<br>".format(reasoning_content)
             if response:
-                if has_thinking:
-                    content += "<br><br>**最终回答：**<br>{}".format(response)
-                else:
-                    content = response
-            if content:
-                chatbot.append({"role": "assistant", "content": content})
                 yield chatbot, current_relevant_passage
         logging.info("History: {}".format(task_history))
@@ -451,8 +551,8 @@ class GradioEvents(object):
         chatbot: list,
         task_history: list,
         model: str,
         bot_client: BotClient,
-        faiss_db: FaissTextDatabase
     ) -> tuple:
         """
         Regenerate the chatbot's response based on the latest user query
@@ -481,8 +581,8 @@ class GradioEvents(object):
             chatbot,
             task_history,
             model,
             bot_client,
-            faiss_db
         ):
             yield chunk, relevant_passage
@@ -548,44 +648,20 @@ class GradioEvents(object):
         return url
     @staticmethod
-    def get_relevant_passage(
-        sub_query_list: list,
-        faiss_db: FaissTextDatabase
-    ) -> str:
-        """
-        Retrieve the relevant passage from the database based on the query.
-        Args:
-            sub_query_list (list): List of sub-queries.
-            faiss_db (FaissTextDatabase): The FAISS database instance.
-        Returns:
-            str: The relevant passage.
-        """
-        relevant_passages = ""
-        for idx, query_item in enumerate(sub_query_list):
-            relevant_passage = faiss_db.search_with_context(query_item)
-            relevant_passages += "\n段落{idx}:\n{relevant_passage}".format(idx=idx + 1, relevant_passage=relevant_passage)
-        return relevant_passages
-    @staticmethod
-    def get_sub_query(query: str, model_name: str, bot_client: BotClient) -> dict:
         """
         Enhances user queries by generating alternative phrasings using language models.
         Creates semantically similar variations of the original query to improve retrieval accuracy.
         Returns structured dictionary containing both original and rephrased queries.
         Args:
-            query (str): The query to rephrase.
             model_name (str): The name of the model to use for rephrasing.
             bot_client (BotClient): The bot client instance.
         Returns:
             dict: The rephrased query.
         """
-        query = QUERY_REWRITE_PROMPT.format(query=query)
-        conversation = [{"role": "user", "content": query}]
         req_data = {"messages": conversation}
         try:
             response = bot_client.process(model_name, req_data)
@@ -600,7 +676,8 @@ class GradioEvents(object):
                 search_info_res["sub_query_list"] = unique_list
             return search_info_res
         except Exception:
-            raise gr.Error("Error: Model output is not a valid JSON")
     @staticmethod
     def split_oversized_line(line: str, chunk_size: int) -> tuple:
@@ -615,7 +692,7 @@ class GradioEvents(object):
         Returns:
             tuple: Two strings, the first part of the original line and the rest of the line.
         """
-        PUNCTUATIONS = [".", "。", "!", "！", "?", "？", ",", "，", ";", "；", ":", "："]
         if len(line) <= chunk_size:
             return line, ""
@@ -636,28 +713,33 @@ class GradioEvents(object):
         return line[:split_pos], line[split_pos:]
     @staticmethod
-    def split_text_into_chunks(text: str, chunk_size: int) -> list:
         """
-        Split text into chunks of a specified size while respecting natural language boundaries
         and avoiding mid-word splits whenever possible.
         Args:
-            text (str): The text to split.
             chunk_size (int): The maximum length of each chunk.
         Returns:
             list: A list of strings, where each element represents a chunk of the original text.
         """
-        lines = [line.strip() for line in text.split('\n') if line.strip()]
         chunks = []
         current_chunk = []
         current_length = 0
         for line in lines:
             # If adding this line would exceed chunk size (and we have content)
             if current_length + len(line) > chunk_size and current_chunk:
-                chunks.append(" ".join(current_chunk))
                 current_chunk = []
                 current_length = 0
@@ -672,7 +754,7 @@ class GradioEvents(object):
                 current_length += len(line) + 1
         if current_chunk:
-            chunks.append(" ".join(current_chunk))
         return chunks
     @staticmethod
@@ -706,7 +788,8 @@ class GradioEvents(object):
         yield gr.update(visible=False)
     @staticmethod
-    def save_file_to_db(file_url: str, chunk_size: int, faiss_db: FaissTextDatabase, progress_bar: gr.Progress=None):
         """
         Processes and indexes document content into FAISS database with semantic-aware chunking.
         Handles file validation, text segmentation, embedding generation and storage operations.
@@ -720,14 +803,16 @@ class GradioEvents(object):
         Returns:
             bool: True if the file was saved successfully, otherwise False.
         """
         file_name = os.path.basename(file_url)
         if not faiss_db.is_file_processed(file_url):
             logging.info("{} not processed yet, processing now...".format(file_url))
             try:
-                with open(file_url, "r", encoding="utf-8") as f:
-                    text = f.read()
-                segments = GradioEvents.split_text_into_chunks(text, chunk_size)
-                faiss_db.add_embeddings(file_url, segments, progress_bar)
                 logging.info("{} processed successfully.".format(file_url))
                 return True
@@ -740,7 +825,7 @@ class GradioEvents(object):
             return False
-def launch_demo(args: argparse.Namespace, bot_client: BotClient, faiss_db: FaissTextDatabase):
     """
     Launch demo program
@@ -770,7 +855,8 @@ def launch_demo(args: argparse.Namespace, bot_client: BotClient, faiss_db: Faiss
     }
     """
     with gr.Blocks(css=css) as demo:
-        model_name = gr.State("eb-45t")
         logo_url = GradioEvents.get_image_url("assets/logo.png")
         gr.Markdown("""\
@@ -816,35 +902,32 @@ def launch_demo(args: argparse.Namespace, bot_client: BotClient, faiss_db: Faiss
         predict_with_clients = partial(
             GradioEvents.predict_stream,
-            bot_client=bot_client,
-            faiss_db=faiss_db
         )
         regenerate_with_clients = partial(
             GradioEvents.regenerate,
-            bot_client=bot_client,
-            faiss_db=faiss_db
         )
         file_upload_with_clients = partial(
             GradioEvents.file_upload,
-            faiss_db=faiss_db
         )
         chunk_size = gr.State(args.chunk_size)
         file_btn.change(
             fn=file_upload_with_clients,
-            inputs=[file_btn, chunk_size],
             outputs=[progress_bar],
         )
         query.submit(
             predict_with_clients,
-            inputs=[query, chatbot, task_history, model_name],
             outputs=[chatbot, relevant_passage],
             show_progress=True
         )
         query.submit(GradioEvents.reset_user_input, [], [query])
         submit_btn.click(
             predict_with_clients,
-            inputs=[query, chatbot, task_history, model_name],
             outputs=[chatbot, relevant_passage],
             show_progress=True,
         )
@@ -855,7 +938,7 @@ def launch_demo(args: argparse.Namespace, bot_client: BotClient, faiss_db: Faiss
         )
         regen_btn.click(
             regenerate_with_clients,
-            inputs=[chatbot, task_history, model_name],
             outputs=[chatbot, relevant_passage],
             show_progress=True
         )
@@ -873,7 +956,7 @@ def main():
     faiss_db = FaissTextDatabase(args, bot_client)
     # Run file upload function to save default knowledge base.
-    GradioEvents.save_file_to_db(FILE_URL_DEFAULT, args.chunk_size, faiss_db)
     launch_demo(args, bot_client, faiss_db)

 # See the License for the specific language governing permissions and
 # limitations under the License.
+"""
+This script provides a Gradio interface for interacting with a chatbot based on Retrieval-Augmented Generation.
+"""
 import argparse
 import base64
 from collections import namedtuple
+from datetime import datetime
 from functools import partial
 import hashlib
 import json
 import os
 from argparse import ArgumentParser
 import textwrap
+import copy
 import gradio as gr
 import numpy as np
 from bot_requests import BotClient
 os.environ["NO_PROXY"] = "localhost,127.0.0.1"  # Disable proxy
 )
 QUERY_REWRITE_PROMPT = textwrap.dedent("""\
+    【当前时间】
+    {TIMESTAMP}
+    【对话内容】
+    {CONVERSATION}
+    你的任务是根据上面user与assistant的对话内容，理解user意图，改写user的最后一轮对话，以便更高效地从知识库查找相关知识。具体的改写要求如下：
+    1. 如果user的问题包括几个小问题，请将它们分成多个单独的问题。
+    2. 如果user的问题涉及到之前对话的信息，请将这些信息融入问题中，形成一个不需要上下文就可以理解的完整问题。
+    3. 如果user的问题是在比较或关联多个事物时，先将其拆分为单个事物的问题，例如‘A与B比起来怎么样’，拆分为：‘A怎么样’以及‘B怎么样’。
+    4. 如果user的问题中描述事物的限定词有多个，请将多个限定词拆分成单个限定词。
+    5. 如果user的问题具有**时效性（需要包含当前时间信息，才能得到正确的回复）**的时候，需要将当前时间信息添加到改写的query中；否则不加入当前时间信息。
+    6. 只在**确有必要**的情况下改写，不需要改写时query输出[]。输出不超过 5 个改写问题，不要为了凑满数量而输出冗余问题。
+    【输出格式】只输出 JSON ，不要给出多余内容
+    ```json
     {{
+    "query": ["改写问题1", "改写问题2"...]
+    }}```
+    """
 )
 ANSWER_PROMPT = textwrap.dedent(
     """\
+    你是阅读理解问答专家。
+    【文档知识】
+    {DOC_CONTENT}
+    你的任务是根据对话内容，理解用户需求，参考文档知识回答用户问题，知识参考详细原则如下：
+    - 对于同一信息点，如文档知识与模型通用知识均可支撑，应优先以文档知识为主，并对信息进行验证和综合。
+    - 如果文档知识不足或信息冲突，必须指出“根据资料无法确定”或“不同资料存在矛盾”，不得引入文档知识与通识之外的主观推测。
+    同时，回答问题需要综合考虑规则要求中的各项内容，详细要求如下：
+    【规则要求】
+    * 回答问题时，应优先参考与问题紧密相关的文档知识，不要在答案中引入任何与问题无关的文档内容。
+    * 回答中不可以让用户知道你查询了相关文档。
+    * 回复答案不要出现'根据文档知识'，'根据当前时间'等表述。
+    * 论述突出重点内容，以分点条理清晰的结构化格式输出。
+    【当前时间】
+    {TIMESTAMP}
+    【对话内容】
+    {CONVERSATION}
+    直接输出回复内容即可。
+    """
 )
 QUERY_DEFAULT = "1675 年时，英格兰有多少家咖啡馆？"
 def get_args() -> argparse.Namespace:
     """
+    Parse and return command line arguments for the ERNIE models chat demo.
+    Configures server settings, model endpoint, and document processing parameters.
     Returns:
+        argparse.Namespace: Parsed command line arguments containing all the above settings.
     """
     parser = ArgumentParser(description="ERNIE models web chat demo.")
     parser.add_argument(
+        "--server-port", type=int, default=8686, help="Demo server port."
     )
     parser.add_argument(
         "--server-name", type=str, default="0.0.0.0", help="Demo server name."
     )
     parser.add_argument(
+        "--max_char", type=int, default=20000, help="Maximum character limit for messages."
     )
     parser.add_argument(
         "--max_retry_num", type=int, default=3, help="Maximum retry number for request."
     )
     parser.add_argument(
+        "--model_map",
         type=str,
+        default="{\"ernie-4.5-turbo-vl-32k\": \"https://qianfan.baidubce.com/v2\"}",
+        help="""JSON string defining model name to endpoint mappings.
+            Required Format:
+            {"ERNIE-4.5": "http://localhost:port/v1"}
+            Note:
+            - Endpoints must be valid HTTP URL
+            - Specify ONE model endpoint in JSON format.
+            - Prefix determines model capabilities:
+            * ERNIE-4.5: Text-only model
+            """
     )
     parser.add_argument(
+        "--embedding_service_url",
         type=str,
         default="https://qianfan.baidubce.com/v2",
+        help="Embedding service url."
     )
     parser.add_argument(
         "--qianfan_api_key",
         type=str,
         default=os.environ.get("API_KEY"),
+        help="Qianfan API key.",
     )
     parser.add_argument(
         "--embedding_model",
         default="embedding-v1",
         help="Embedding model name."
     )
+    parser.add_argument(
+        "--embedding_dim",
+        type=int,
+        default=384,
+        help="Dimension of the embedding vector."
+    )
     parser.add_argument(
         "--chunk_size",
         type=int,
         default=512,
         help="Chunk size for splitting long documents."
     )
+    parser.add_argument(
+        "--top_k",
+        type=int,
+        default=3,
+        help="Top k results to retrieve."
+    )
     parser.add_argument(
         "--faiss_index_path",
         type=str,
     )
     args = parser.parse_args()
+    try:
+        args.model_map = json.loads(args.model_map)
+        # Validation: Check at least one model exists
+        if len(args.model_map) < 1:
+            raise ValueError("model_map must contain at least one model configuration")
+    except json.JSONDecodeError as e:
+        raise ValueError("Invalid JSON format for --model-map") from e
     return args
 class FaissTextDatabase:
     """
+    A vector database for text retrieval using FAISS.
     Provides efficient similarity search and document management capabilities.
     """
+    def __init__(self, args, bot_client: BotClient):
         """
         Initialize the FaissTextDatabase.
         self.logger = logging.getLogger(__name__)
         self.bot_client = bot_client
+        self.embedding_dim = getattr(args, "embedding_dim", 384)
+        self.top_k = getattr(args, "top_k", 3)
+        self.context_size = getattr(args, "context_size", 2)
         self.faiss_index_path = getattr(args, "faiss_index_path", "data/faiss_index")
         self.text_db_path = getattr(args, "text_db_path", "data/text_db.jsonl")
         # If faiss_index_path exists, load it and text_db_path
         if os.path.exists(self.faiss_index_path) and os.path.exists(self.text_db_path):
         file_md5 = self.calculate_md5(file_path)
         return file_md5 in self.text_db["file_md5s"]
+    def add_embeddings(self, file_path: str, segments: list[str], progress_bar: gr.Progress=None, \
+            save_file: bool=False) -> bool:
         """
         Stores document embeddings in FAISS database after checking for duplicates.
         Generates embeddings for each text segment, updates the FAISS index and metadata database,
         # Generate embeddings
         vectors = []
         file_name = os.path.basename(file_path)
+        file_txt = "".join(file_name.split(".")[:-1])[:30]
         for i, segment in  enumerate(segments):
+            vectors.append(self.bot_client.embed_fn(file_txt + "\n" + segment))
             if progress_bar is not None:
                 progress_bar((i + 1) / len(segments), desc=file_name + " Processing...")
         vectors = np.array(vectors)
         for i, text in enumerate(segments):
             self.text_db["chunks"].append({
                 "file_md5": file_md5,
+                "file_name": file_name,
+                "file_txt": file_txt,
                 "text": text,
                 "vector_id": start_id + i
             })
         self.text_db["file_md5s"].append(file_md5)
+        if save_file:
+            self.save()
         return True
+    def search_with_context(self, query_list: list) -> str:
         """
+        Finds the most relevant text chunks for multiple queries and includes surrounding context.
+        Uses FAISS to find the closest matching embeddings, then retrieves adjacent chunks
         from the same source document to provide better context understanding.
         Args:
+            query_list: list of input query strings
         Returns:
+            str: the concatenated output string
+        """
+        # Step 1: Retrieve top_k results for each query and collect all indices
+        all_indices = []
+        for query in query_list:
+            query_vector = np.array([self.bot_client.embed_fn(query)]).astype('float32')
+            _, indices = self.index.search(query_vector, self.top_k)
+            all_indices.extend(indices[0].tolist())
+        # Step 2: Remove duplicate indices
+        unique_indices = sorted(list(set(all_indices)))
+        self.logger.info(f"Retrieved indices: {all_indices}")
+        self.logger.info(f"Unique indices after deduplication: {unique_indices}")
+        # Step 3: Expand each index with context (within same file boundaries)
+        expanded_indices = set()
+        file_boundaries = {}  # {file_md5: (start_idx, end_idx)}
+        for target_idx in unique_indices:
+            target_chunk = self.text_db["chunks"][target_idx]
+            target_file_md5 = target_chunk["file_md5"]
+            if target_file_md5 not in file_boundaries:
+                file_start = target_idx
+                while file_start > 0 and self.text_db["chunks"][file_start - 1]["file_md5"] == target_file_md5:
+                    file_start -= 1
+                file_end = target_idx
+                while (file_end < len(self.text_db["chunks"]) - 1 and
+                    self.text_db["chunks"][file_end + 1]["file_md5"] == target_file_md5):
+                    file_end += 1
+            else:
+                file_start, file_end = file_boundaries[target_file_md5]
+            # Calculate context range within file boundaries
+            start = max(file_start, target_idx - self.context_size)
+            end = min(file_end, target_idx + self.context_size)
+            for pos in range(start, end + 1):
+                expanded_indices.add(pos)
+        # Step 4: Sort and merge continuous chunks
+        sorted_indices = sorted(list(expanded_indices))
+        groups = []
+        current_group = [sorted_indices[0]]
+        for i in range(1, len(sorted_indices)):
+            if (sorted_indices[i] == sorted_indices[i - 1] + 1 and
+                self.text_db["chunks"][sorted_indices[i]]["file_md5"] ==
+                self.text_db["chunks"][sorted_indices[i - 1]]["file_md5"]):
+                current_group.append(sorted_indices[i])
+            else:
+                groups.append(current_group)
+                current_group = [sorted_indices[i]]
+        groups.append(current_group)
+        # Step 5: Create merged text for each group
         result = ""
+        for idx, group in enumerate(groups):
+            result += "\n段落{idx}:\n{title}\n".format(idx=idx + 1, title=self.text_db["chunks"][group[0]]["file_txt"])
+            for idx in group:
+                result += self.text_db["chunks"][idx]["text"] + "\n"
+            self.logger.info(f"Merged chunk range: {group[0]}-{group[-1]}")
         return result
     Manages event handling and UI interactions for Gradio applications.
     Provides methods to process user inputs, trigger callbacks, and update interface components.
     """
+    @staticmethod
+    def get_history_conversation(task_history: list) -> tuple:
+        """
+        Converts task history into conversation format for model processing.
+        Transforms query-response pairs into structured message history and plain text.
+        Args:
+            task_history (list): List of tuples containing queries and responses.
+        Returns:
+            tuple: Tuple containing two elements:
+                - conversation (list): List of dictionaries representing the conversation history.
+                - conversation_str (str): String representation of the conversation history.
+        """
+        conversation = []
+        conversation_str = ""
+        for query_h, response_h in task_history:
+            conversation.append({"role": "user", "content": query_h})
+            conversation.append({"role": "assistant", "content": response_h})
+            conversation_str += "user:\n{query}\n assistant:\n{response}\n ".format(query=query_h, response=response_h)
+        return conversation, conversation_str
     @staticmethod
     def chat_stream(
         query: str,
         task_history: list,
         model: str,
         faiss_db: FaissTextDatabase,
+        bot_client: BotClient,
     ) -> dict:
         """
         Streams chatbot responses by processing queries with context from history and FAISS database.
         Yields:
             dict: A dictionary containing the event type and its corresponding content.
         """
+        conversation, conversation_str = GradioEvents.get_history_conversation(task_history)
+        conversation_str += "user:\n{query}\n".format(query=query)
+        search_info_message = QUERY_REWRITE_PROMPT.format(
+            TIMESTAMP=datetime.now().strftime("%Y-%m-%d %H:%M:%S"),
+            CONVERSATION=conversation_str
+        )
+        search_conversation = [{"role": "user", "content": search_info_message}]
+        search_info_result = GradioEvents.get_sub_query(search_conversation, model, bot_client)
+        if search_info_result is None:
+            search_info_result = {"query": [query]}
+        if search_info_result.get("query", []):
+            relevant_passages = faiss_db.search_with_context(search_info_result["query"])
+            yield {"type": "relevant_passage", "content": relevant_passages}
+            query = ANSWER_PROMPT.format(
+                DOC_CONTENT=relevant_passages,
+                TIMESTAMP=datetime.now().strftime("%Y-%m-%d %H:%M:%S"),
+                CONVERSATION=conversation_str
             )
+        conversation.append({"role": "user", "content": query})
         try:
             req_data = {"messages": conversation}
             for chunk in bot_client.process_stream(model, req_data):
                 message = chunk.get("choices", [{}])[0].get("delta", {})
                 content = message.get("content", "")
                 if content:
                     yield {"type": "answer", "content": content}
         chatbot: list,
         task_history: list,
         model: str,
+        faiss_db: FaissTextDatabase,
         bot_client: BotClient,
     ) -> tuple:
         """
         Generates streaming responses by combining model predictions with knowledge retrieval.
             query,
             task_history,
             model,
             faiss_db,
+            bot_client,
         )
         response = ""
         current_relevant_passage = None
         for new_text in new_texts:
             if not isinstance(new_text, dict):
                 current_relevant_passage = new_text["content"]
                 yield chatbot, current_relevant_passage
                 continue
             elif new_text.get("type") == "answer":
                 response += new_text["content"]
+            # Remove previous message if exists
             if chatbot[-1].get("role") == "assistant":
                 chatbot.pop(-1)
             if response:
+                chatbot.append({"role": "assistant", "content": response})
                 yield chatbot, current_relevant_passage
         logging.info("History: {}".format(task_history))
         chatbot: list,
         task_history: list,
         model: str,
+        faiss_db: FaissTextDatabase,
         bot_client: BotClient,
     ) -> tuple:
         """
         Regenerate the chatbot's response based on the latest user query
             chatbot,
             task_history,
             model,
+            faiss_db,
             bot_client,
         ):
             yield chunk, relevant_passage
         return url
     @staticmethod
+    def get_sub_query(conversation: list, model_name: str, bot_client: BotClient) -> dict:
         """
         Enhances user queries by generating alternative phrasings using language models.
         Creates semantically similar variations of the original query to improve retrieval accuracy.
         Returns structured dictionary containing both original and rephrased queries.
         Args:
+            conversation (list): The conversation history.
             model_name (str): The name of the model to use for rephrasing.
             bot_client (BotClient): The bot client instance.
         Returns:
             dict: The rephrased query.
         """
         req_data = {"messages": conversation}
         try:
             response = bot_client.process(model_name, req_data)
                 search_info_res["sub_query_list"] = unique_list
             return search_info_res
         except Exception:
+            logging.error("Error: Model output is not a valid JSON")
+            return None
     @staticmethod
     def split_oversized_line(line: str, chunk_size: int) -> tuple:
         Returns:
             tuple: Two strings, the first part of the original line and the rest of the line.
         """
+        PUNCTUATIONS = {".", "。", "!", "！", "?", "？", ",", "，", ";", "；", ":", "："}
         if len(line) <= chunk_size:
             return line, ""
         return line[:split_pos], line[split_pos:]
     @staticmethod
+    def split_text_into_chunks(file_url: str, chunk_size: int) -> list:
         """
+        Split file text into chunks of a specified size while respecting natural language boundaries
         and avoiding mid-word splits whenever possible.
         Args:
+            file_url (str): The file URL.
             chunk_size (int): The maximum length of each chunk.
         Returns:
             list: A list of strings, where each element represents a chunk of the original text.
         """
+        with open(file_url, "r", encoding="utf-8") as f:
+            text = f.read()
+        if not text:
+            logging.error("Error: File is empty")
+            return []
+        lines = [line.strip() for line in text.split("\n") if line.strip()]
         chunks = []
         current_chunk = []
         current_length = 0
         for line in lines:
             # If adding this line would exceed chunk size (and we have content)
             if current_length + len(line) > chunk_size and current_chunk:
+                chunks.append("\n".join(current_chunk))
                 current_chunk = []
                 current_length = 0
                 current_length += len(line) + 1
         if current_chunk:
+            chunks.append("\n".join(current_chunk))
         return chunks
     @staticmethod
         yield gr.update(visible=False)
     @staticmethod
+    def save_file_to_db(file_url: str, chunk_size: int, faiss_db: FaissTextDatabase, \
+                                    progress_bar: gr.Progress=None, save_file: bool=False):
         """
         Processes and indexes document content into FAISS database with semantic-aware chunking.
         Handles file validation, text segmentation, embedding generation and storage operations.
         Returns:
             bool: True if the file was saved successfully, otherwise False.
         """
+        if not os.path.exists(file_url):
+            logging.error("File not found: {}".format(file_url))
+            return False
         file_name = os.path.basename(file_url)
         if not faiss_db.is_file_processed(file_url):
             logging.info("{} not processed yet, processing now...".format(file_url))
             try:
+                segments = GradioEvents.split_text_into_chunks(file_url, chunk_size)
+                faiss_db.add_embeddings(file_url, segments, progress_bar, save_file)
                 logging.info("{} processed successfully.".format(file_url))
                 return True
             return False
+def launch_demo(args: argparse.Namespace, bot_client: BotClient, faiss_db_template: FaissTextDatabase):
     """
     Launch demo program
     }
     """
     with gr.Blocks(css=css) as demo:
+        model_name = gr.State(list(args.model_map.keys())[0])
+        faiss_db = gr.State(copy.deepcopy(faiss_db_template))
         logo_url = GradioEvents.get_image_url("assets/logo.png")
         gr.Markdown("""\
         predict_with_clients = partial(
             GradioEvents.predict_stream,
+            bot_client=bot_client
         )
         regenerate_with_clients = partial(
             GradioEvents.regenerate,
+            bot_client=bot_client
         )
         file_upload_with_clients = partial(
             GradioEvents.file_upload,
         )
         chunk_size = gr.State(args.chunk_size)
         file_btn.change(
             fn=file_upload_with_clients,
+            inputs=[file_btn, chunk_size, faiss_db],
             outputs=[progress_bar],
         )
         query.submit(
             predict_with_clients,
+            inputs=[query, chatbot, task_history, model_name, faiss_db],
             outputs=[chatbot, relevant_passage],
             show_progress=True
         )
         query.submit(GradioEvents.reset_user_input, [], [query])
         submit_btn.click(
             predict_with_clients,
+            inputs=[query, chatbot, task_history, model_name, faiss_db],
             outputs=[chatbot, relevant_passage],
             show_progress=True,
         )
         )
         regen_btn.click(
             regenerate_with_clients,
+            inputs=[chatbot, task_history, model_name, faiss_db],
             outputs=[chatbot, relevant_passage],
             show_progress=True
         )
     faiss_db = FaissTextDatabase(args, bot_client)
     # Run file upload function to save default knowledge base.
+    GradioEvents.save_file_to_db(FILE_URL_DEFAULT, args.chunk_size, faiss_db, save_file=True)
     launch_demo(args, bot_client, faiss_db)

bot_requests.py CHANGED Viewed

@@ -22,7 +22,7 @@ import json
 import jieba
 from openai import OpenAI
-from appbuilder.mcp_server.client import MCPClient
 class BotClient(object):
     """Client for interacting with various AI models."""
@@ -41,15 +41,16 @@ class BotClient(object):
         self.max_retry_num = getattr(args, 'max_retry_num', 3)
         self.max_char = getattr(args, 'max_char', 8000)
-        self.eb45t_model_url = getattr(args, 'eb45t_model_url', 'eb45t_model_url')
-        self.x1_model_url = getattr(args, 'x1_model_url', 'x1_model_url')
         self.api_key = os.environ.get("API_KEY")
-        self.qianfan_url = getattr(args, 'qianfan_url', 'qianfan_url')
-        self.qianfan_api_key = getattr(args, 'qianfan_api_key', 'qianfan_api_key')
         self.embedding_model = getattr(args, 'embedding_model', 'embedding_model')
-        self.ai_search_service_url = getattr(args, 'ai_search_service_url', 'ai_search_service_url')
     def call_back(self, host_url: str, req_data: dict) -> dict:
         """
@@ -130,14 +131,9 @@ class BotClient(object):
         Returns:
             dict: Dictionary containing the model's processing results.
         """
-        model_map = {
-            "eb-45t": self.eb45t_model_url,
-            "eb-x1": self.x1_model_url
-        }
-        model_url = model_map[model_name]
-        req_data["model"] = "ernie-4.5-turbo-32k" if "eb-45t" == model_name else "ernie-x1-turbo-32k"
         req_data["max_tokens"] = max_tokens
         req_data["temperature"] = temperature
         req_data["top_p"] = top_p
@@ -157,7 +153,6 @@ class BotClient(object):
                 res = {}
             if len(res) != 0 and "error" not in res:
                 break
-        self.logger.info(json.dumps(res, ensure_ascii=False))
         return res
@@ -183,13 +178,8 @@ class BotClient(object):
         Yields:
             dict: Dictionary containing the model's processing results.
         """
-        model_map = {
-            "eb-45t": self.eb45t_model_url,
-            "eb-x1": self.x1_model_url
-        }
-        model_url = model_map[model_name]
-        req_data["model"] = "ernie-4.5-turbo-32k" if "eb-45t" == model_name else "ernie-x1-turbo-32k"
         req_data["max_tokens"] = max_tokens
         req_data["temperature"] = temperature
         req_data["top_p"] = top_p
@@ -282,7 +272,7 @@ class BotClient(object):
         to_remove = total_units - self.max_char
         # 1. Truncate historical messages
-        for i in range(1, len(processed) - 1):
             if to_remove <= 0:
                 break
@@ -362,27 +352,39 @@ class BotClient(object):
         Returns:
             list: A list of floats representing the embedding.
         """
-        client = OpenAI(base_url=self.qianfan_url, api_key=self.qianfan_api_key)
         response = client.embeddings.create(input=[text], model=self.embedding_model)
         return response.data[0].embedding
-    async def get_ai_search_res(self, query_list: list) -> list:
         """
-        Get AI search results for the given queries using the MCPClient.
         Args:
-            query_list (list): List of queries to search for.
         Returns:
-            list: List of search results as strings.
         """
-        try:
-            client = MCPClient()
-            await client.connect_to_server(service_url=self.ai_search_service_url)
-            result = []
-            for query in query_list:
-                response = await client.call_tool("AIsearch", {"query": query})
-                result.append(response.content[0].text)
-        finally:
-            await client.cleanup()
-        return result

 import jieba
 from openai import OpenAI
+import requests
 class BotClient(object):
     """Client for interacting with various AI models."""
         self.max_retry_num = getattr(args, 'max_retry_num', 3)
         self.max_char = getattr(args, 'max_char', 8000)
+        self.model_map = getattr(args, 'model_map', {})
         self.api_key = os.environ.get("API_KEY")
+        self.embedding_service_url = getattr(args, 'embedding_service_url', 'embedding_service_url')
         self.embedding_model = getattr(args, 'embedding_model', 'embedding_model')
+        self.web_search_service_url = getattr(args, 'web_search_service_url', 'web_search_service_url')
+        self.max_search_results_num = getattr(args, 'max_search_results_num', 15)
+        self.qianfan_api_key = os.environ.get("API_KEY")
     def call_back(self, host_url: str, req_data: dict) -> dict:
         """
         Returns:
             dict: Dictionary containing the model's processing results.
         """
+        model_url = self.model_map[model_name]
+        req_data["model"] = model_name
         req_data["max_tokens"] = max_tokens
         req_data["temperature"] = temperature
         req_data["top_p"] = top_p
                 res = {}
             if len(res) != 0 and "error" not in res:
                 break
         return res
         Yields:
             dict: Dictionary containing the model's processing results.
         """
+        model_url = self.model_map[model_name]
+        req_data["model"] = model_name
         req_data["max_tokens"] = max_tokens
         req_data["temperature"] = temperature
         req_data["top_p"] = top_p
         to_remove = total_units - self.max_char
         # 1. Truncate historical messages
+        for i in range(len(processed) - 1, 1):
             if to_remove <= 0:
                 break
         Returns:
             list: A list of floats representing the embedding.
         """
+        client = OpenAI(base_url=self.embedding_service_url, api_key=self.qianfan_api_key)
         response = client.embeddings.create(input=[text], model=self.embedding_model)
         return response.data[0].embedding
+    def get_web_search_res(self, query_list: list) -> list:
         """
+        Send a request to the AI Search service using the provided API key and service URL.
         Args:
+            query_list (list): List of queries to send to the AI Search service.
         Returns:
+            list: List of responses from the AI Search service.
         """
+        headers = {
+            "Authorization": "Bearer " + self.qianfan_api_key,
+            "Content-Type": "application/json"
+        }
+        results = []
+        top_k = self.max_search_results_num // len(query_list)
+        for query in query_list:
+            payload = {
+                "messages": [{"role": "user", "content": query}],
+                "resource_type_filter": [{"type": "web", "top_k": top_k}]
+            }
+            response = requests.post(self.web_search_service_url, headers=headers, json=payload)
+            if response.status_code == 200:
+                response = response.json()
+                self.logger.info(response)
+                results.append(response["references"])
+            else:
+                self.logger.info(f"请求失败，状态码: {response.status_code}")
+                self.logger.info(response.text)
+        return results