wangrongsheng commited on Oct 30, 2025

Commit

8c6097b

verified ·

1 Parent(s): f74a3e0

Upload folder using huggingface_hub

Browse files

Files changed (43) hide show

.gitattributes +5 -0
LICENSE +34 -0
Open Source Software Notice +218 -0
README.md +554 -6
README_EN.md +554 -0
checklist.chk +14 -0
config.json +31 -0
configuration_openpangu_dense.py +56 -0
deepdiver_v2/cli/README.md +238 -0
deepdiver_v2/cli/demo.py +668 -0
deepdiver_v2/cli/run_demo.sh +171 -0
deepdiver_v2/config/config.py +239 -0
deepdiver_v2/env.template +44 -0
deepdiver_v2/requirements.txt +8 -0
deepdiver_v2/src/__init__.py +11 -0
deepdiver_v2/src/agents/__init__.py +62 -0
deepdiver_v2/src/agents/base_agent.py +692 -0
deepdiver_v2/src/agents/objective_information_seeker.py +428 -0
deepdiver_v2/src/agents/planner_agent.py +1203 -0
deepdiver_v2/src/agents/subjective_information_seeker.py +417 -0
deepdiver_v2/src/agents/writer_agent.py +477 -0
deepdiver_v2/src/tools/__init__.py +36 -0
deepdiver_v2/src/tools/mcp_client.py +814 -0
deepdiver_v2/src/tools/mcp_server_standard.py +1751 -0
deepdiver_v2/src/tools/mcp_tools.py +0 -0
deepdiver_v2/src/tools/server_config.yaml +73 -0
deepdiver_v2/src/utils/__init__.py +8 -0
deepdiver_v2/src/utils/status_codes.py +12 -0
deepdiver_v2/src/workspace/__init__.py +26 -0
deepdiver_v2/src/workspace/local_workspace_manager.py +420 -0
docs/openpangu-deepdiver-v2-tech-report.pdf +3 -0
generation_config.json +11 -0
model-00001-of-00004.safetensors +3 -0
model-00002-of-00004.safetensors +3 -0
model-00003-of-00004.safetensors +3 -0
model-00004-of-00004.safetensors +3 -0
model.safetensors.index.json +486 -0
modeling_openpangu_dense.py +585 -0
modular_openpangu_dense.py +149 -0
special_tokens_map.json +30 -0
tokenization_openpangu.py +273 -0
tokenizer.model +3 -0
tokenizer_config.json +1 -0

.gitattributes CHANGED Viewed

@@ -33,3 +33,8 @@ saved_model/**/* filter=lfs diff=lfs merge=lfs -text
 *.zip filter=lfs diff=lfs merge=lfs -text
 *.zst filter=lfs diff=lfs merge=lfs -text
 *tfevents* filter=lfs diff=lfs merge=lfs -text

 *.zip filter=lfs diff=lfs merge=lfs -text
 *.zst filter=lfs diff=lfs merge=lfs -text
 *tfevents* filter=lfs diff=lfs merge=lfs -text
+*.pdf filter=lfs diff=lfs merge=lfs -text
+model-00004-of-00004.safetensors filter=lfs diff=lfs merge=lfs -text
+model-00001-of-00004.safetensors filter=lfs diff=lfs merge=lfs -text
+model-00002-of-00004.safetensors filter=lfs diff=lfs merge=lfs -text
+model-00003-of-00004.safetensors filter=lfs diff=lfs merge=lfs -text

LICENSE ADDED Viewed

	@@ -0,0 +1,34 @@

+OPENPANGU MODEL LICENSE AGREEMENT VERSION 1.0
+This OPENPANGU MODEL LICENSE AGREEMENT VERSION 1.0 (the "Agreement") is a legal agreement between You and Huawei Technologies Co., Ltd. ("Huawei", "We" or "Us"), and it governs Your reproducing, use, modification, and distribution of openPangu as made available by Huawei under this Agreement.
+By using, reproducing, modifying, distributing, performing or displaying any portion or element of openPangu, or otherwise accepting the terms of this Agreement, You agree to be bound by this Agreement.
+1.	Definitions.
+1.1.	“openPangu” or “Model” means openPangu large language models and software, including trained model weights, parameters (including optimizer states), accompanying source code and scripts released under this Agreement.
+1.2.	“Derivative Model” means all (1) modifications to the Model, (2) works based on the Model, and (3) any other derivative works of the Model. For clarity, information or content results from operating or otherwise using the Model is not a Derivative Model.
+1.3.	“You” or “Your” means an individual or Legal Entity exercising permissions granted by this Agreement and/or using the Model for any purpose.
+1.4.	“Third Party” or “Third Parties” means individuals or legal entities that are not under common control with Us or You.
+2.	License Grant. Subject to Your full compliance with the terms and conditions of this Agreement, We hereby grant to You a perpetual, worldwide, non-exclusive, non-transferable, no-charge, royalty-free license (except as stated in Section 3) to use, reproduce, modify, and distribute the Model.
+3.	Conditions for License Grant. You represent and warrant that You will not, access, download, install, run, deploy, integrate, modify, or otherwise use the Model, directly or indirectly, within the European Union.
+4.	Redistribution.
+4.1.	If You distribute the Model or Derivative Model, You shall retain in Your distribution (1) a copy of this agreement, and (2) all copyright notices and other notices of origin included in the Model that are applicable to Your distribution.
+4.2.	Further, if You distribute or make available to Third Parties a product or service (including another AI model) based on the Model, You are required to (1) display the acknowledgement “Powered by openPangu” and (2) include a trademark notice “openPangu is a trademark of Huawei Technologies Co., Ltd.” on related webpages, user manuals, product documentations or other advertising materials mentioning features of the Model.
+4.3.	You may add Your own copyright statement to Your modifications and may provide additional or different license terms and conditions for use, reproduction, or distribution of Your modifications, or for Derivative Model made by You as a whole, provided Your use, reproduction, and distribution of the Model otherwise complies with the terms and conditions of this Agreement.
+5.	Ownership. We do not claim ownership to any information or content generated using the Model or Derivative Model that are made by You. You are solely responsible for evaluating the accuracy and appropriateness of such information or content for Your use case.
+6.	Trademarks. This License does not grant permission to use the trade names, trademarks, service marks, or product names of Huawei, except as required for complying with Section 4.2.
+7.	Indemnity. You will indemnify and hold harmless Huawei from and against any claim by any third party arising out of or related to Your use or distribution of the Model or Derivative Model made by You (e.g. a violation against Section 3). For avoidance of doubt, “third party” in this clause include supervisory authorities.
+8.	THE MODEL IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE, TITLE, NONINFRINGEMENT, ACCURACY, OR THE ABSENCE OF LATENT OR OTHER DEFECTS OR ERRORS, WHETHER OR NOT DISCOVERABLE, ALL TO THE GREATEST EXTENT PERMISSIBLE UNDER APPLICABLE LAW.
+9.	IN NO EVENT SHALL WE BE LIABLE TO YOU FOR ANY DAMAGES, INCLUDING, BUT NOT LIMITED TO ANY DIRECT, OR INDIRECT, SPECIAL OR CONSEQUENTIAL DAMAGES ARISING FROM YOUR USE OR INABILITY TO USE THE MODEL, IN WHOLE OR IN PART, NO MATTER HOW IT’S CAUSED OR THE LEGAL THEORY IT IS BASED ON, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGES.
+END OF THE TERMS AND CONDITIONS

Open Source Software Notice ADDED Viewed

	@@ -0,0 +1,218 @@

+OPEN SOURCE SOFTWARE NOTICE
+Please note we provide an open source software notice along with this product and/or this product firmware (in the following just “this product”). The open source software licenses are granted by the respective right holders. And the open source licenses prevail all other license information with regard to the respective open source software contained in the product, including but not limited to End User Software Licensing Agreement. This notice is provided on behalf of Huawei Technologies Co. Ltd. and any of its local subsidiaries which may have provided this product to you in your local country.
+Warranty Disclaimer
+THE OPEN SOURCE SOFTWARE IN THIS PRODUCT IS DISTRIBUTED IN THE HOPE THAT IT WILL BE USEFUL, BUT WITHOUT ANY WARRANTY, WITHOUT EVEN THE IMPLIED WARRANTY OF MERCHANTABILITY OR FITNESS FOR A PARTICULAR PURPOSE. SEE THE APPLICABLE LICENSES FOR MORE DETAILS.
+Copyright Notice and License Texts
+Software: transformers 4.53.2
+Copyright notice:
+Copyright 2022 EleutherAI and the HuggingFace Inc. team. All rights reserved.
+License Text:
+----------------------------------------
+                                 Apache License
+                           Version 2.0, January 2004
+                        http://www.apache.org/licenses/
+   TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION
+   1. Definitions.
+      "License" shall mean the terms and conditions for use, reproduction,
+      and distribution as defined by Sections 1 through 9 of this document.
+      "Licensor" shall mean the copyright owner or entity authorized by
+      the copyright owner that is granting the License.
+      "Legal Entity" shall mean the union of the acting entity and all
+      other entities that control, are controlled by, or are under common
+      control with that entity. For the purposes of this definition,
+      "control" means (i) the power, direct or indirect, to cause the
+      direction or management of such entity, whether by contract or
+      otherwise, or (ii) ownership of fifty percent (50%) or more of the
+      outstanding shares, or (iii) beneficial ownership of such entity.
+      "You" (or "Your") shall mean an individual or Legal Entity
+      exercising permissions granted by this License.
+      "Source" form shall mean the preferred form for making modifications,
+      including but not limited to software source code, documentation
+      source, and configuration files.
+      "Object" form shall mean any form resulting from mechanical
+      transformation or translation of a Source form, including but
+      not limited to compiled object code, generated documentation,
+      and conversions to other media types.
+      "Work" shall mean the work of authorship, whether in Source or
+      Object form, made available under the License, as indicated by a
+      copyright notice that is included in or attached to the work
+      (an example is provided in the Appendix below).
+      "Derivative Works" shall mean any work, whether in Source or Object
+      form, that is based on (or derived from) the Work and for which the
+      editorial revisions, annotations, elaborations, or other modifications
+      represent, as a whole, an original work of authorship. For the purposes
+      of this License, Derivative Works shall not include works that remain
+      separable from, or merely link (or bind by name) to the interfaces of,
+      the Work and Derivative Works thereof.
+      "Contribution" shall mean any work of authorship, including
+      the original version of the Work and any modifications or additions
+      to that Work or Derivative Works thereof, that is intentionally
+      submitted to Licensor for inclusion in the Work by the copyright owner
+      or by an individual or Legal Entity authorized to submit on behalf of
+      the copyright owner. For the purposes of this definition, "submitted"
+      means any form of electronic, verbal, or written communication sent
+      to the Licensor or its representatives, including but not limited to
+      communication on electronic mailing lists, source code control systems,
+      and issue tracking systems that are managed by, or on behalf of, the
+      Licensor for the purpose of discussing and improving the Work, but
+      excluding communication that is conspicuously marked or otherwise
+      designated in writing by the copyright owner as "Not a Contribution."
+      "Contributor" shall mean Licensor and any individual or Legal Entity
+      on behalf of whom a Contribution has been received by Licensor and
+      subsequently incorporated within the Work.
+   2. Grant of Copyright License. Subject to the terms and conditions of
+      this License, each Contributor hereby grants to You a perpetual,
+      worldwide, non-exclusive, no-charge, royalty-free, irrevocable
+      copyright license to reproduce, prepare Derivative Works of,
+      publicly display, publicly perform, sublicense, and distribute the
+      Work and such Derivative Works in Source or Object form.
+   3. Grant of Patent License. Subject to the terms and conditions of
+      this License, each Contributor hereby grants to You a perpetual,
+      worldwide, non-exclusive, no-charge, royalty-free, irrevocable
+      (except as stated in this section) patent license to make, have made,
+      use, offer to sell, sell, import, and otherwise transfer the Work,
+      where such license applies only to those patent claims licensable
+      by such Contributor that are necessarily infringed by their
+      Contribution(s) alone or by combination of their Contribution(s)
+      with the Work to which such Contribution(s) was submitted. If You
+      institute patent litigation against any entity (including a
+      cross-claim or counterclaim in a lawsuit) alleging that the Work
+      or a Contribution incorporated within the Work constitutes direct
+      or contributory patent infringement, then any patent licenses
+      granted to You under this License for that Work shall terminate
+      as of the date such litigation is filed.
+   4. Redistribution. You may reproduce and distribute copies of the
+      Work or Derivative Works thereof in any medium, with or without
+      modifications, and in Source or Object form, provided that You
+      meet the following conditions:
+      (a) You must give any other recipients of the Work or
+          Derivative Works a copy of this License; and
+      (b) You must cause any modified files to carry prominent notices
+          stating that You changed the files; and
+      (c) You must retain, in the Source form of any Derivative Works
+          that You distribute, all copyright, patent, trademark, and
+          attribution notices from the Source form of the Work,
+          excluding those notices that do not pertain to any part of
+          the Derivative Works; and
+      (d) If the Work includes a "NOTICE" text file as part of its
+          distribution, then any Derivative Works that You distribute must
+          include a readable copy of the attribution notices contained
+          within such NOTICE file, excluding those notices that do not
+          pertain to any part of the Derivative Works, in at least one
+          of the following places: within a NOTICE text file distributed
+          as part of the Derivative Works; within the Source form or
+          documentation, if provided along with the Derivative Works; or,
+          within a display generated by the Derivative Works, if and
+          wherever such third-party notices normally appear. The contents
+          of the NOTICE file are for informational purposes only and
+          do not modify the License. You may add Your own attribution
+          notices within Derivative Works that You distribute, alongside
+          or as an addendum to the NOTICE text from the Work, provided
+          that such additional attribution notices cannot be construed
+          as modifying the License.
+      You may add Your own copyright statement to Your modifications and
+      may provide additional or different license terms and conditions
+      for use, reproduction, or distribution of Your modifications, or
+      for any such Derivative Works as a whole, provided Your use,
+      reproduction, and distribution of the Work otherwise complies with
+      the conditions stated in this License.
+   5. Submission of Contributions. Unless You explicitly state otherwise,
+      any Contribution intentionally submitted for inclusion in the Work
+      by You to the Licensor shall be under the terms and conditions of
+      this License, without any additional terms or conditions.
+      Notwithstanding the above, nothing herein shall supersede or modify
+      the terms of any separate license agreement you may have executed
+      with Licensor regarding such Contributions.
+   6. Trademarks. This License does not grant permission to use the trade
+      names, trademarks, service marks, or product names of the Licensor,
+      except as required for reasonable and customary use in describing the
+      origin of the Work and reproducing the content of the NOTICE file.
+   7. Disclaimer of Warranty. Unless required by applicable law or
+      agreed to in writing, Licensor provides the Work (and each
+      Contributor provides its Contributions) on an "AS IS" BASIS,
+      WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
+      implied, including, without limitation, any warranties or conditions
+      of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A
+      PARTICULAR PURPOSE. You are solely responsible for determining the
+      appropriateness of using or redistributing the Work and assume any
+      risks associated with Your exercise of permissions under this License.
+   8. Limitation of Liability. In no event and under no legal theory,
+      whether in tort (including negligence), contract, or otherwise,
+      unless required by applicable law (such as deliberate and grossly
+      negligent acts) or agreed to in writing, shall any Contributor be
+      liable to You for damages, including any direct, indirect, special,
+      incidental, or consequential damages of any character arising as a
+      result of this License or out of the use or inability to use the
+      Work (including but not limited to damages for loss of goodwill,
+      work stoppage, computer failure or malfunction, or any and all
+      other commercial damages or losses), even if such Contributor
+      has been advised of the possibility of such damages.
+   9. Accepting Warranty or Additional Liability. While redistributing
+      the Work or Derivative Works thereof, You may choose to offer,
+      and charge a fee for, acceptance of support, warranty, indemnity,
+      or other liability obligations and/or rights consistent with this
+      License. However, in accepting such obligations, You may act only
+      on Your own behalf and on Your sole responsibility, not on behalf
+      of any other Contributor, and only if You agree to indemnify,
+      defend, and hold each Contributor harmless for any liability
+      incurred by, or claims asserted against, such Contributor by reason
+      of your accepting any such warranty or additional liability.
+   END OF TERMS AND CONDITIONS
+   APPENDIX: How to apply the Apache License to your work.
+      To apply the Apache License to your work, attach the following
+      boilerplate notice, with the fields enclosed by brackets "[]"
+      replaced with your own identifying information. (Don't include
+      the brackets!)  The text should be enclosed in the appropriate
+      comment syntax for the file format. We also recommend that a
+      file or class name and description of purpose be included on the
+      same "printed page" as the copyright notice for easier
+      identification within third-party archives.
+   Copyright [yyyy] [name of copyright owner]
+   Licensed under the Apache License, Version 2.0 (the "License");
+   you may not use this file except in compliance with the License.
+   You may obtain a copy of the License at
+       http://www.apache.org/licenses/LICENSE-2.0
+   Unless required by applicable law or agreed to in writing, software
+   distributed under the License is distributed on an "AS IS" BASIS,
+   WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+   See the License for the specific language governing permissions and
+   limitations under the License.

README.md CHANGED Viewed

@@ -1,6 +1,554 @@
----
-license: other
-license_name: openpangu-model-license-agreement-version-1.0
-license_link: >-
-  https://ai.gitcode.com/ascend-tribe/openPangu-Embedded-7B-DeepDiver/blob/main/LICENSE
----

+# 开源盘古 Embedded-7B-DeepDiver
+中文 | [English](README_EN.md)
+📑[技术报告](https://ai.gitcode.com/ascend-tribe/openPangu-Embedded-7B-DeepDiver/blob/main/docs/openpangu-deepdiver-v2-tech-report.pdf)
+## 1. 简介
+DeepDiver是openPangu系列中定位深度信息获取与处理的Agent，支持原生 Multi-Agent System（MAS），用于复杂知识问答与长文调研报告写作。
+### 特性
+- 🔍 支持QA模式：回答100步+复杂知识性问题
+- ✍️ 支持长文写作模式：撰写3w+字文章与报告
+- 🔄 支持自适应模式：根据用户问题自动选择知识问答模式或长文写作模式
+## 2. 评测结果
+|     测评集     |      测评指标       | openPangu-7B-DeepDiver|
+| :------------: | :-----------------: | :--------: |
+|    **BrowseComp-zh**   |     Acc     |   18.3   |
+|    **BrowseComp-en**      |         Acc         |   8.3    |
+|**XBench-DeepSearch** |  Acc  |   39.0    |
+注：上表仅展示复杂问答的结果，长文调研的评测结果请参考[技术报告](https://ai.gitcode.com/ascend-tribe/openPangu-Embedded-7B-DeepDiver/blob/main/docs/openpangu-deepdiver-v2-tech-report.pdf)
+## 3. 快速部署
+### 3.1 环境准备
+```bash
+# 克隆并安装
+git clone <repository-url>
+cd deepdiver_v2
+pip install -r requirements.txt
+```
+### 3.2 部署推理服务
+#### 拉取镜像
+```
+docker pull quay.io/ascend/vllm-ascend:v0.9.2rc1
+```
+或按照[官方文档](https://vllm-ascend.readthedocs.io/en/stable/installation.html)手动构建 docker 容器。
+#### 运行容器
+```
+docker run -itd --name vllm-deepdiver \
+    --network host \
+    --device /dev/davinci0 \
+    --device /dev/davinci1 \
+    --device /dev/davinci2 \
+    --device /dev/davinci3 \
+    --device /dev/davinci4 \
+    --device /dev/davinci5 \
+    --device /dev/davinci6 \
+    --device /dev/davinci7 \
+    -u root \
+    --device /dev/davinci_manager \
+    --device /dev/devmm_svm \
+    --device /dev/hisi_hdc \
+    -v /usr/local/dcmi:/usr/local/dcmi:ro \
+    -v /usr/local/Ascend/driver/tools/hccn_tool:/usr/local/Ascend/driver/tools/hccn_tool:ro \
+    -v /usr/local/bin/npu-smi:/usr/local/bin/npu-smi:ro \
+    -v /usr/local/Ascend/driver/lib64/:/usr/local/Ascend/driver/lib64/:ro \
+    -v /usr/local/Ascend/driver/version.info:/usr/local/Ascend/driver/version.info:ro \
+    -v /etc/ascend_install.info:/etc/ascend_install.info:ro \
+    -v /usr/local/Ascend/firmware:/usr/local/Ascend/firmware:ro \
+    -v /data:/data:ro \
+    -v /home/work:/home/work \ # 配置一个可读写的工作目录
+    quay.io/ascend/vllm-ascend:v0.9.2rc1
+```
+#### 进入容器
+```
+docker exec -itu root vllm-deepdiver bash
+```
+注意：必须使用 `-itu root`。
+#### 复制 Pangu 的 modeling 文件
+`open_pangu.py` 和 `__init__.py` 可以在[这里](https://ai.gitcode.com/ascend-tribe/openpangu-embedded-7b-model/tree/main/inference/vllm_ascend/models)找到。
+```
+cp ./vllm_ascend/open_pangu.py /vllm-workspace/vllm-ascend/vllm_ascend/models/
+cp ./vllm_ascend/__init__.py /vllm-workspace/vllm-ascend/vllm_ascend/models/
+```
+#### 启动部署
+```
+PRECHECKPOINT_PATH="path/to/deepdiver_model"
+export VLLM_USE_V1=1
+export VLLM_WORKER_MULTIPROC_METHOD=fork
+# export ASCEND_RT_VISIBLE_DEVICES=0,1,2,3,4,5,6,7
+vllm serve $PRECHECKPOINT_PATH \
+    --served-model-name ${SERVED_MODEL_NAME:=pangu_auto} \
+    --tensor-parallel-size ${tensor_parallel_size:=8} \
+    --trust-remote-code \
+    --host 127.0.0.1 \
+    --port 8888 \
+    --max-num-seqs 256 \
+    --max-model-len ${MAX_MODEL_LEN:=131072} \
+    --max-num-batched-tokens ${MAX_NUM_BATCHED_TOKENS:=4096} \
+    --tokenizer-mode "slow" \
+    --dtype bfloat16 \
+    --distributed-executor-backend mp \
+    --gpu-memory-utilization 0.93 \
+```
+#### 测试部署
+```
+curl -X POST http://127.0.0.1:8888/v1/completions -H "Content-Type: application/json" -d '{
+"model": "pangu_auto",
+"prompt": ["Tell me who you are?"],
+"max_tokens": 50
+}'
+```
+### 3.3 实现所需工具
+在启动服务器前，你需要为 web search 与 URL 抓取工具实现自定义逻辑。
+#### Web Search（`_generic_search`）
+位置：`src/tools/mcp_tools.py` - `_generic_search` 方法
+将 `NotImplementedError` 替换为你的搜索工具实现：
+```python
+def _generic_search(self, query: str, max_results: int, config: Dict[str, Any]) -> MCPToolResult:
+    """Your custom search implementation - based on the commented code example"""
+    try:
+        # Example implementation for search API:
+        url = config.get('base_url', 'https://api.search-provider.com/search')
+        payload = json.dumps({"q": query, "num": max_results})
+        api_keys = config.get('api_keys', [])
+        headers = {
+            'X-API-KEY': random.choice(api_keys),
+            'Content-Type': 'application/json'
+        }
+        response = requests.post(url, data=payload, headers=headers)
+        response.raise_for_status()
+        # Transform your API response to required format
+        search_results = {
+            "organic": [
+                {
+                    "title": result["title"],
+                    "link": result["link"],
+                    "snippet": result["snippet"],
+                    "date": result.get("date", "unknown")
+                }
+                for result in response.json().get("organic", [])
+            ]
+        }
+        return MCPToolResult(success=True, data=search_results)
+    except Exception as e:
+        return MCPToolResult(success=False, error=f"Generic search failed: {e}")
+```
+#### URL Crawler（`url_crawler` 与 `_content_extractor`）
+位置：`src/tools/mcp_tools.py` - `_content_extractor`
+将 `NotImplementedError` 部分替换为你的网页抓取工具实现：
+```python
+# Example implementation for content extractor:
+    crawler_url = f"{crawler_config.get('base_url', 'https://api.content-extractor.com')}/{url}"
+    response = requests.get(crawler_url, headers=headers, timeout=crawler_config.get('timeout', 30))
+    response.raise_for_status()
+    content = response.text
+    # Truncate if needed
+    if max_tokens and len(content.split()) > max_tokens:
+        words = content.split()[:max_tokens]
+        content = ' '.join(words) + '...'
+    return MCPToolResult(success=True, data=content)
+```
+#### ⚠️ 第三方服务提示
+重要：搜索与抓取工具使用外部 API 由用户自行选择和实现。我们不对以下情况负责：
+- 与第三方服务相关的隐私/安全问题
+- 搜索/抓取活动的合规性
+- 内容准确性或版权问题
+- API 停机或变更
+使用这些服务需自担风险。请查看其条款与隐私政策。
+### 3.4 必要配置
+#### 配置 .env 文件
+复制 `env.template` 到 `config/.env` 并配置如下选项：
+```bash
+# LLM Service
+MODEL_REQUEST_URL=http://localhost:8888/v1/chat/completions       # 你的 LLM endpoint
+# Agent 限制
+PLANNER_MODE=auto                            # 在 auto、writing 或 qa 模式间切换
+# 外部 API（先实现函数）
+SEARCH_ENGINE_BASE_URL=                      # 搜索 API endpoint
+SEARCH_ENGINE_API_KEYS=                      # 搜索 API keys
+URL_CRAWLER_BASE_URL=                        # URL Crawler API endpoint
+URL_CRAWLER_API_KEYS=                        # URL Crawler API keys
+```
+⚠️ 注意：
+- 请将上一步部署的推理服务 URL 配置到 `MODEL_REQUEST_URL`
+- 在 `PLANNER_MODE` 中指定模式。`auto` 会自动决策回答复杂问题或生成长文；若希望优先长文写作，可设置为 `writing`；若希望专注解决高难度问题，可设置为 `qa`
+### 3.5 启动工具服务
+```bash
+python src/tools/mcp_server_standard.py
+```
+### 3.6 运行Demo
+```bash
+# 交互模式
+python cli/demo.py
+# 单次查询
+python cli/demo.py -q "$your_query"
+```
+基于上述步骤可以快速运行DeepDiver，如果需要二次开发，可以参考[章节4](#4-自定义工具开发指南)和[5](#5-个性化配置)
+## 4. 自定义工具开发指南
+当前工具主要分为内置工具和外部MCP工具，内部工具主要包括分发任务，思考/反思等，外部MCP工具则是一些延伸LLM能力的工具，如搜索互联网，爬取链接，下载和读写文件等。
+### 4.1 已实现的工具类别
+#### A. 外部MCP工具
+Web Search 与数据采集：
+- `batch_web_search`：多查询 web 搜索
+- `url_crawler`：从 URL 抽取内容
+- `download_files`：从 URL 下载文件
+文件操作：
+- `file_read`、`file_write`：基础文件 I/O
+- `list_workspace`：目录列表
+文档处理与内容创作：
+- `document_qa`：针对特定文档问答
+- `document_extract`：多格式文本抽取
+- `section_writer`：结构化内容生成
+#### B. 内置工具
+- `think`、`reflect`：推理与规划
+- `task_done`：任务完成汇报
+- `assign_task_xxx`: 分发任务并创建子智能体
+### 4.2 开发并集成新的外部MCP工具
+#### A. 实现新的MCP工具
+位置：`src/tools/mcp_tools.py` - 在 `MCPTools` 类中添加方法
+```python
+def your_new_tool(self, param1: str, param2: int) -> MCPToolResult:
+    """
+    Description of what your tool does.
+    Args:
+        param1: Description of parameter 1
+        param2: Description of parameter 2
+    Returns:
+        MCPToolResult: Standardized result format
+    """
+    try:
+        # Your tool implementation here
+        result_data = {
+            "output": "Tool result",
+            "processed_items": param2
+        }
+        return MCPToolResult(
+            success=True,
+            data=result_data,
+            metadata={"tool_name": "your_new_tool"}
+        )
+    except Exception as e:
+        logger.error(f"Tool execution failed: {e}")
+        return MCPToolResult(
+            success=False,
+            error=f"Tool failed: {str(e)}"
+        )
+```
+#### B. 在服务器中注册工具
+##### 添加工具 Schema
+位置：`src/tools/mcp_tools.py` - 添加到 `MCP_TOOL_SCHEMAS` 字典
+```python
+MCP_TOOL_SCHEMAS = {
+    # ... existing tools ...
+    "your_new_tool": {
+        "name": "your_new_tool",
+        "description": "Brief description of what your tool does",
+        "inputSchema": {
+            "type": "object",
+            "properties": {
+                "param1": {
+                    "type": "string",
+                    "description": "Description of parameter 1"
+                },
+                "param2": {
+                    "type": "integer",
+                    "default": 10,
+                    "description": "Description of parameter 2"
+                }
+            },
+            "required": ["param1"]
+        }
+    }
+}
+```
+##### 注册工具函数
+位置：`src/tools/mcp_server_standard.py` - 添加到 `get_tool_function()`
+```python
+def get_tool_function(tool_name: str):
+    """Get the actual function for a tool"""
+    tool_map = {
+        # ... existing tools ...
+        "your_new_tool": lambda tools, **kwargs: tools.your_new_tool(**kwargs),
+    }
+    return tool_map.get(tool_name)
+```
+#### C. 让特定智能体可使用工具
+工具对各智能体的可见性由 MCP client 中的预定义工具集控制。
+位置：`src/tools/mcp_client.py` - 修改各智能体的工具集
+```python
+# Define which MCP server tools each agent can access
+PLANNER_AGENT_TOOLS = [
+    "download_files",
+    "document_qa",
+    "file_read",
+    "file_write",
+    "str_replace_based_edit_tool",
+    "list_workspace",
+    "file_find_by_name",
+    "your_new_tool",  # Add your new tool here
+]
+INFORMATION_SEEKER_TOOLS = [
+    "batch_web_search",
+    "url_crawler",
+    "document_extract",
+    "document_qa",
+    "download_files",
+    "file_read",
+    "file_write",
+    "str_replace_based_edit_tool",
+    "list_workspace",
+    "file_find_by_name",
+    "your_new_tool",  # Add your new tool here if needed
+]
+WRITER_AGENT_TOOLS = [
+    "file_read",
+    "list_workspace",
+    "file_find_by_name",
+    "search_result_classifier",
+    "section_writer",
+    "concat_section_files",
+    # Add your tool if the writer agent needs it
+]
+```
+### 4.3 添加内置智能体工具/函数
+#### A. 带有真实返回的工具/函数
+DeepDiver中的agent，如planner，集成了`assign_subjective_task_to_writer`, `assign_multi_objective_tasks_to_info_seeker` 等内置函数作为工具， 这类函数除了具体实现之外，还需要使用`_build_agent_specific_tool_schemas()` 添加专属的tool schema。
+位置：`src/agents/your_agent.py`
+```python
+def _build_agent_specific_tool_schemas(self) -> List[Dict[str, Any]]:
+    """Add built-in agent functions (not MCP server tools)"""
+    # Get base schemas from MCP server via client
+    schemas = super()._build_agent_specific_tool_schemas()
+    # Add agent-specific built-in functions like task assignment, completion reporting
+    builtin_functions = [
+        {
+            "type": "function",
+            "function": {
+                "name": "agent_specific_task_done",
+                "description": "Report task completion for this agent",
+                "parameters": {
+                    "type": "object",
+                    "properties": {
+                        "result": {"type": "string", "description": "Task result"},
+                        "status": {"type": "string", "description": "Completion status"}
+                    },
+                    "required": ["result", "status"]
+                }
+            }
+        }
+    ]
+    schemas.extend(builtin_functions)
+    return schemas
+```
+#### B. 带有伪返回的内置工具
+DeepDiver中的cognitive tools，比如think和reflect等，这些工具实际没有具体实现，agent在调用这些工具时通过生成工具入参，就已经完成了工具的调用。可以直接在模型生成完入参后，使用类似以下方法进行返回，继续让模型完成后续工作 (参考`planner_agent.py` 中`_execute_react_loop()`的实现)：
+```python
+if tool_call["name"] in ["think", "reflect"]:
+    tool_result = {"tool_results": "You can proceed to invoke other tools if needed. "}
+```
+同理，这种内置工具也需要使用`_build_agent_specific_tool_schemas()` 添加专属的tool schema。
+## 5. 个性化配置
+### 5.1 Client 配置
+复制 `env.template` 到 `config/.env` 并配置如下选项：
+```bash
+# LLM Service
+MODEL_REQUEST_URL=http://localhost:8000      # 你的 LLM endpoint
+MODEL_REQUEST_TOKEN=your-token               # LLM auth token
+MODEL_NAME=pangu_auto                        # 模型名
+MODEL_TEMPERATURE=0.3                        # 随机度（0.0-1.0）
+MODEL_MAX_TOKENS=8192                        # 最大回复长度
+MODEL_REQUEST_TIMEOUT=60                     # 请求超时（秒）
+# Agent 限制
+PLANNER_MAX_ITERATION=40                     # Planner 最大 ReAct 步数
+INFORMATION_SEEKER_MAX_ITERATION=30          # 信息搜集最大 ReAct 步数
+WRITER_MAX_ITERATION=40                      # Writer 最大 ReAct 步数
+PLANNER_MODE=auto                            # auto / 长文优先 / qa 优先
+# MCP Server
+MCP_SERVER_URL=http://localhost:6274/mcp     # MCP server endpoint
+MCP_USE_STDIO=false                          # 使用 stdio 或 HTTP
+# 外部 API（先实现函数）
+SEARCH_ENGINE_BASE_URL=                      # 搜索 API endpoint
+SEARCH_ENGINE_API_KEYS=                      # 搜索 API keys
+URL_CRAWLER_BASE_URL=                        # URL Crawler API endpoint
+URL_CRAWLER_API_KEYS=                        # URL Crawler API keys
+URL_CRAWLER_MAX_TOKENS=100000                # URL Crawler 内容最大长度
+# 存储路径
+TRAJECTORY_STORAGE_PATH=./workspace          # Agent工作目录
+REPORT_OUTPUT_PATH=./report                  # 报告输出目录
+DOCUMENT_ANALYSIS_PATH=./doc_analysis        # 文档分析目录
+# 系统
+DEBUG_MODE=false                             # 是否开启调试日志
+MAX_RETRIES=3                                # API 重试次数
+TIMEOUT=30                                   # 通用超时（秒）
+```
+### 5.2 Server 配置（server_config.yaml）
+`server_config.yaml` 控制服务器行为、工具限流与运行设置：
+#### 核心服务器设置
+```yaml
+server:
+  host: "127.0.0.1"              # 服务器绑定地址
+  port: 6274                     # 端口
+  debug_mode: false              # 调试日志
+  session_ttl_seconds: 21600     # 会话过期（6小时）
+  max_sessions: 1000             # 并发会话上限
+```
+#### 工具限流
+对所有会话的外部 API 使用进行控制：
+```yaml
+tool_rate_limits:
+  batch_web_search:
+    requests_per_minute: 9000    # 每分钟限制
+    burst_limit: 35              # 短时突发
+  url_crawler:
+    requests_per_minute: 9000
+    burst_limit: 60
+```
+#### 会话管理
+```yaml
+server:
+  cleanup_interval_seconds: 600   # 清理过期会话（5分钟）
+  enable_session_keepalive: true  # 长时操作期间保活
+  keepalive_touch_interval: 300   # 保活触发间隔（秒）
+```
+#### 安全与性能
+```yaml
+server:
+  request_timeout_seconds: 1800        # 请求超时
+  max_request_size_mb: 1000            # 最大请求体
+  rate_limit_requests_per_minute: 300000  # 每 IP 限流
+```
+配置文件包含对每项设置的详细注释。请根据你的部署需求与外部 API 限额进行调整。
+## 6. 模型许可证
+除文件中对开源许可证另有约定外，openPangu-Embedded-7B-DeepDiver 模型根据 OPENPANGU MODEL LICENSE AGREEMENT VERSION 1.0 授权，旨在允许使用并促进人工智能技术的进一步发展。有关详细信息，请参阅模型存储库根目录中的 [LICENSE](LICENSE) 文件。
+## 7. 安全提示与免责声明
+由于 openPangu-Embedded-7B-DeepDiver 模型和框架所依赖的技术固有的技术限制，以及人工智能生成的内容是由盘古自动生成的，华为无法对以下事项做出任何保证：
+- 尽管该模型的输出由 AI 算法生成，但不能排除某些信息可能存在缺陷、不合理或引起不适的可能性，生成的内容不代表华为的态度或立场；
+- 无法保证该模型 100% 准确、可靠、功能齐全、及时、安全、无错误、不间断、持续稳定或无任何故障；
+- 该模型的输出内容不构成任何建议或决策，也不保证生成的内容的真实性、完整性、准确性、及时性、合法性、功能性或实用性。生成的内容不能替代医疗、法律等领域的专业人士回答您的问题。生成的内容仅供参考，不代表华为的任何态度、立场或观点。您需要根据实际情况做出独立判断，华为不承担任何责任；
+- DeepDiver MAS系统的组件间通信不包含内置的数据加密或认证（如 tokens、签名）。你需要自行评估安全需求并实施相应防护（例如运行在加密网络中、加入 SSL/TLS、强制组件身份校验）；
+- 由于缺乏加密/认证导致的任何安全事件（数据泄露、未授权访问、业务损失）由使用方自行承担。项目开发者不承担责任。
+## 8. 反馈
+如果有任何意见和建议，请提交issue或联系 openPangu@huawei.com。
+---

README_EN.md ADDED Viewed

	@@ -0,0 +1,554 @@

+# openPangu-Embedded-7B-DeepDiver
+[中文](README.md) | English
+📑[Technical Report](https://ai.gitcode.com/ascend-tribe/openPangu-Embedded-7B-DeepDiver/blob/main/docs/openpangu-deepdiver-v2-tech-report.pdf)
+## 1. Introduction
+DeepDiver is an agentic solution within openPangu series aimed at deep information seeking and processing, which natively supports the Multi-Agent System (MAS) and is designed for complex question answering and long-form report writing.
+### Features
+- 🔍 Supports QA Mode: Capable of answering 100+ steps of complex knowledge-based questions.
+- ✍️ Supports Long-form Writing Mode: Enables the creation of articles and reports with over 3w+ words.
+- 🔄 Supports Adaptive Mode: Automatically selects between QA Mode and Long-form Writing Mode based on user queries.
+## 2. Results
+|     Benchmark     |      Metric       | openPangu-7B-DeepDiver|
+| :------------: | :-----------------: | :--------: |
+|    **BrowseComp-zh**   |     Acc     |   18.3   |
+|    **BrowseComp-en**      |         Acc         |   8.3    |
+|  **XBench-DeepSearch** |  Acc  |   39.0    |
+Note: The table above only displays the results of complex QA. For the evaluation results of long-form report writing, please refer to the [technical report](https://ai.gitcode.com/ascend-tribe/openPangu-Embedded-7B-DeepDiver/blob/main/docs/openpangu-deepdiver-v2-tech-report.pdf)
+## 3. Quick Start
+### 3.1 Setup
+```bash
+# Clone and install
+git clone <repository-url>
+cd deepdiver_v2
+pip install -r requirements.txt
+```
+### 3.2 Deployment of the Inference Service
+#### Pull Images
+```
+docker pull quay.io/ascend/vllm-ascend:v0.9.2rc1
+```
+Or follow the [official documentation](https://vllm-ascend.readthedocs.io/en/stable/installation.html) to build the docker container manually.
+#### Run Docker Container
+```
+docker run -itd --name vllm-deepdiver \
+    --network host \
+    --device /dev/davinci0 \
+    --device /dev/davinci1 \
+    --device /dev/davinci2 \
+    --device /dev/davinci3 \
+    --device /dev/davinci4 \
+    --device /dev/davinci5 \
+    --device /dev/davinci6 \
+    --device /dev/davinci7 \
+    -u root \
+    --device /dev/davinci_manager \
+    --device /dev/devmm_svm \
+    --device /dev/hisi_hdc \
+    -v /usr/local/dcmi:/usr/local/dcmi:ro \
+    -v /usr/local/Ascend/driver/tools/hccn_tool:/usr/local/Ascend/driver/tools/hccn_tool:ro \
+    -v /usr/local/bin/npu-smi:/usr/local/bin/npu-smi:ro \
+    -v /usr/local/Ascend/driver/lib64/:/usr/local/Ascend/driver/lib64/:ro \
+    -v /usr/local/Ascend/driver/version.info:/usr/local/Ascend/driver/version.info:ro \
+    -v /etc/ascend_install.info:/etc/ascend_install.info:ro \
+    -v /usr/local/Ascend/firmware:/usr/local/Ascend/firmware:ro \
+    -v /data:/data:ro \
+    -v /home/work:/home/work \ # set a working dir
+    quay.io/ascend/vllm-ascend:v0.9.2rc1
+```
+#### Enter the Container
+```
+docker exec -itu root vllm-deepdiver bash
+```
+Note that `-itu root` is necessary.
+#### Copy Pangu's Modeling Files
+`open_pangu.py` and `__init__.py` can be found at [here](https://ai.gitcode.com/ascend-tribe/openpangu-embedded-7b-model/tree/main/inference/vllm_ascend/models)
+```
+cp ./vllm_ascend/open_pangu.py /vllm-workspace/vllm-ascend/vllm_ascend/models/
+cp ./vllm_ascend/__init__.py /vllm-workspace/vllm-ascend/vllm_ascend/models/
+```
+#### Start Deployment
+```
+PRECHECKPOINT_PATH="path/to/deepdiver_model"
+export VLLM_USE_V1=1
+export VLLM_WORKER_MULTIPROC_METHOD=fork
+# export ASCEND_RT_VISIBLE_DEVICES=0,1,2,3,4,5,6,7
+vllm serve $PRECHECKPOINT_PATH \
+    --served-model-name ${SERVED_MODEL_NAME:=pangu_auto} \
+    --tensor-parallel-size ${tensor_parallel_size:=8} \
+    --trust-remote-code \
+    --host 127.0.0.1 \
+    --port 8888 \
+    --max-num-seqs 256 \
+    --max-model-len ${MAX_MODEL_LEN:=131072} \
+    --max-num-batched-tokens ${MAX_NUM_BATCHED_TOKENS:=4096} \
+    --tokenizer-mode "slow" \
+    --dtype bfloat16 \
+    --distributed-executor-backend mp \
+    --gpu-memory-utilization 0.93 \
+```
+#### Test Deployment
+```
+curl -X POST http://127.0.0.1:8888/v1/completions -H "Content-Type: application/json" -d '{
+"model": "pangu_auto",
+"prompt": ["Tell me who you are?"],
+"max_tokens": 50
+}'
+```
+### 3.3 Implement Required Tools
+Before starting the server, you must implement custom logic for web search and URL crawling tools.
+#### Web Search (`_generic_search`)
+**Location**: `src/tools/mcp_tools.py` - `_generic_search` method
+Replace the `NotImplementedError` with your search API integration:
+```python
+def _generic_search(self, query: str, max_results: int, config: Dict[str, Any]) -> MCPToolResult:
+    """Your custom search implementation - based on the commented code example"""
+    try:
+        # Example implementation for search API:
+        url = config.get('base_url', 'https://api.search-provider.com/search')
+        payload = json.dumps({"q": query, "num": max_results})
+        api_keys = config.get('api_keys', [])
+        headers = {
+            'X-API-KEY': random.choice(api_keys),
+            'Content-Type': 'application/json'
+        }
+        response = requests.post(url, data=payload, headers=headers)
+        response.raise_for_status()
+        # Transform your API response to required format
+        search_results = {
+            "organic": [
+                {
+                    "title": result["title"],
+                    "link": result["link"],
+                    "snippet": result["snippet"],
+                    "date": result.get("date", "unknown")
+                }
+                for result in response.json().get("organic", [])
+            ]
+        }
+        return MCPToolResult(success=True, data=search_results)
+    except Exception as e:
+        return MCPToolResult(success=False, error=f"Generic search failed: {e}")
+```
+#### URL Crawler (`url_crawler` and `_content_extractor`)
+**Location**: `src/tools/mcp_tools.py` - `_content_extractor`
+Replace the `NotImplementedError` section with your crawler API integration:
+```python
+# Example implementation for content extractor:
+    crawler_url = f"{crawler_config.get('base_url', 'https://api.content-extractor.com')}/{url}"
+    response = requests.get(crawler_url, headers=headers, timeout=crawler_config.get('timeout', 30))
+    response.raise_for_status()
+    content = response.text
+    # Truncate if needed
+    if max_tokens and len(content.split()) > max_tokens:
+        words = content.split()[:max_tokens]
+        content = ' '.join(words) + '...'
+    return MCPToolResult(success=True, data=content)
+```
+#### ⚠️ **Third-Party Service Notice**
+**Important**: Search and crawler tools use external APIs (your choice). We're not responsible for:
+- Privacy/security issues with third-party services
+- Legal compliance with search/crawling activities
+- Content accuracy or copyright issues
+- API downtime or changes
+Use these services at your own risk. Check their terms and privacy policies.
+### 3.4 Mandatory Configuration
+#### Configure the.env file
+Copy `env.template` to `config/.env` and configure these options:
+```bash
+# LLM Service
+MODEL_REQUEST_URL=http://localhost:8888/v1/chat/completions       # Your LLM endpoint
+# Agent Limits
+PLANNER_MODE=auto                            # Switching between the auto mode, writing mode, or qa mode.
+# External APIs (implement functions first)
+SEARCH_ENGINE_BASE_URL=                      # Search API endpoint
+SEARCH_ENGINE_API_KEYS=                      # Search API keys
+URL_CRAWLER_BASE_URL=                        # Crawler API endpoint
+URL_CRAWLER_API_KEYS=                        # Crawler API keys
+```
+**⚠️ Important:**
+- Please configure the URL for deploying the inference service from the previous step in `MODEL_REQUEST_URL`
+- Specify the mode in `PLANNER_MODE`. The `auto` mode is designed to automatically determine whether to answer complex questions or generate long-form reports. However, if you wish to prioritize long-form writing, you can set the PLANNER_MODE to ```writing```. Alternatively, if you want to focus solely on solving highly complex problems, configure the mode as ```qa```
+### 3.5 Start the Tool Server
+```bash
+python src/tools/mcp_server_standard.py
+```
+### 3.6 Run the Demo
+```bash
+# Interactive mode
+python cli/demo.py
+# Single query
+python cli/demo.py -q "$your_query"
+```
+Based on the above steps, DeepDiver can be quickly executed. If further development is required, you can refer to [Section 4](#4-customized-tool-development-guide) and [5](#5-customized-configuration).
+## 4. Customized Tool Development Guide
+Currently, tools are mainly categorized into Built-in Tools and External MCP Tools. Built-in Tools primarily include task assignment, think/reflect, etc. External MCP Tools are extensions that enhance LLM capabilities, such as web search, url crawl, file download, read, and write.
+### 4.1 Implemented Tool Categories
+#### A. External MCP Tools
+Web Search and Data Collection:
+- `batch_web_search`: Multi-query web search
+- `url_crawler`: Extract content from URLs
+- `download_files`: Download files from URLs
+File Operations:
+- `file_read`, `file_write`: Basic file I/O
+- `list_workspace`: Directory listing
+Document Processing and Content Creation:
+- `document_qa`: Question-answering on documents
+- `document_extract`: Extract text from various formats
+- `section_writer`: Structured content generation
+#### B. Built-in Tools
+- `think`, `reflect`: Reasoning and planning
+- `task_done`: Task completion reporting
+- `assign_task_xxx`: Assign tasks and create sub-agents
+### 4.2 Develop and Integrate New External MCP Tools
+#### A. Implementing a New MCP Tool
+Location: `src/tools/mcp_tools.py` - Add a method to the `MCPTools` class
+```python
+def your_new_tool(self, param1: str, param2: int) -> MCPToolResult:
+    """
+    Description of what your tool does.
+    Args:
+        param1: Description of parameter 1
+        param2: Description of parameter 2
+    Returns:
+        MCPToolResult: Standardized result format
+    """
+    try:
+        # Your tool implementation here
+        result_data = {
+            "output": "Tool result",
+            "processed_items": param2
+        }
+        return MCPToolResult(
+            success=True,
+            data=result_data,
+            metadata={"tool_name": "your_new_tool"}
+        )
+    except Exception as e:
+        logger.error(f"Tool execution failed: {e}")
+        return MCPToolResult(
+            success=False,
+            error=f"Tool failed: {str(e)}"
+        )
+```
+#### B. Registering the Tool on the Server
+##### Adding Tool Schema
+Location: `src/tools/mcp_tools.py` - Add to the `MCP_TOOL_SCHEMAS` dictionary
+```python
+MCP_TOOL_SCHEMAS = {
+    # ... existing tools ...
+    "your_new_tool": {
+        "name": "your_new_tool",
+        "description": "Brief description of what your tool does",
+        "inputSchema": {
+            "type": "object",
+            "properties": {
+                "param1": {
+                    "type": "string",
+                    "description": "Description of parameter 1"
+                },
+                "param2": {
+                    "type": "integer",
+                    "default": 10,
+                    "description": "Description of parameter 2"
+                }
+            },
+            "required": ["param1"]
+        }
+    }
+}
+```
+##### Registering the Tool Function
+Location: `src/tools/mcp_server_standard.py` - Add to `get_tool_function()`
+```python
+def get_tool_function(tool_name: str):
+    """Get the actual function for a tool"""
+    tool_map = {
+        # ... existing tools ...
+        "your_new_tool": lambda tools, **kwargs: tools.your_new_tool(**kwargs),
+    }
+    return tool_map.get(tool_name)
+```
+#### C. Making the Tool Accessible to Specific Agents
+The visibility of tools to each agent is controlled by the predefined tool sets in the MCP client.
+Location: `src/tools/mcp_client.py` - Modify the tool sets for each agent
+```python
+# Define which MCP server tools each agent can access
+PLANNER_AGENT_TOOLS = [
+    "download_files",
+    "document_qa",
+    "file_read",
+    "file_write",
+    "str_replace_based_edit_tool",
+    "list_workspace",
+    "file_find_by_name",
+    "your_new_tool",  # Add your new tool here
+]
+INFORMATION_SEEKER_TOOLS = [
+    "batch_web_search",
+    "url_crawler",
+    "document_extract",
+    "document_qa",
+    "download_files",
+    "file_read",
+    "file_write",
+    "str_replace_based_edit_tool",
+    "list_workspace",
+    "file_find_by_name",
+    "your_new_tool",  # Add your new tool here if needed
+]
+WRITER_AGENT_TOOLS = [
+    "file_read",
+    "list_workspace",
+    "file_find_by_name",
+    "search_result_classifier",
+    "section_writer",
+    "concat_section_files",
+    # Add your tool if the writer agent needs it
+]
+```
+### 4.3 Adding Built-in Agent Tools/Functions
+#### A. Tools/Functions with Actual Return Values
+Agents in DeepDiver (e.g., the Planner) integrate built-in functions as tools, such as `assign_subjective_task_to_writer` and `assign_multi_objective_tasks_to_info_seeker`. In addition to their specific implementations, these functions require adding **agent-specific tool schemas** using `_build_agent_specific_tool_schemas()`.
+Location: `src/agents/your_agent.py`
+```python
+def _build_agent_specific_tool_schemas(self) -> List[Dict[str, Any]]:
+    """Add built-in agent functions (not MCP server tools)"""
+    # Get base schemas from MCP server via client
+    schemas = super()._build_agent_specific_tool_schemas()
+    # Add agent-specific built-in functions like task assignment, completion reporting
+    builtin_functions = [
+        {
+            "type": "function",
+            "function": {
+                "name": "agent_specific_task_done",
+                "description": "Report task completion for this agent",
+                "parameters": {
+                    "type": "object",
+                    "properties": {
+                        "result": {"type": "string", "description": "Task result"},
+                        "status": {"type": "string", "description": "Completion status"}
+                    },
+                    "required": ["result", "status"]
+                }
+            }
+        }
+    ]
+    schemas.extend(builtin_functions)
+    return schemas
+```
+#### B. Built-in Tools with Pseudo Return Values
+Cognitive tools in DeepDiver (e.g., `think` and `reflect`) have no specific implementation. When an agent calls these tools, the tool invocation is considered complete once the agent generates the tool's input parameters. You can directly return a result after the model generates the input parameters, allowing the model to continue with subsequent tasks (refer to the implementation of `_execute_react_loop()` in `planner_agent.py`):
+```python
+if tool_call["name"] in ["think", "reflect"]:
+    tool_result = {"tool_results": "You can proceed to invoke other tools if needed. "}
+```
+Similarly, such built-in tools also require adding their exclusive tool schemas using `_build_agent_specific_tool_schemas()`.
+## 5. Customized Configuration
+### 5.1 Client Configuration
+Copy `env.template` to `config/.env` and configure these options:
+```bash
+# LLM Service
+MODEL_REQUEST_URL=http://localhost:8000       # Your LLM endpoint
+MODEL_REQUEST_TOKEN=your-token               # Auth token
+MODEL_NAME=pangu_auto                        # Model name
+MODEL_TEMPERATURE=0.3                        # Response randomness (0.0-1.0)
+MODEL_MAX_TOKENS=8192                        # Max response length
+MODEL_REQUEST_TIMEOUT=60                     # Request timeout (seconds)
+# Agent Limits
+PLANNER_MAX_ITERATION=40                     # Planner maximum ReAct steps
+INFORMATION_SEEKER_MAX_ITERATION=30          # Info seeker maximum ReAct steps
+WRITER_MAX_ITERATION=40                      # Writer maximum ReAct steps
+PLANNER_MODE=auto                            # Switching between the auto mode, long-form writing - priority mode, or the qa - priority mode.
+# MCP Server
+MCP_SERVER_URL=http://localhost:6274/mcp     # MCP server endpoint
+MCP_USE_STDIO=false                          # Use stdio vs HTTP
+# External APIs (implement functions first)
+SEARCH_ENGINE_BASE_URL=                      # Search API endpoint
+SEARCH_ENGINE_API_KEYS=                      # Search API keys
+URL_CRAWLER_BASE_URL=                        # Crawler API endpoint
+URL_CRAWLER_API_KEYS=                        # Crawler API keys
+URL_CRAWLER_MAX_TOKENS=100000                # Max crawled content length
+# Storage Paths
+TRAJECTORY_STORAGE_PATH=./workspace          # Agent work directory
+REPORT_OUTPUT_PATH=./report                  # Report output directory
+DOCUMENT_ANALYSIS_PATH=./doc_analysis        # Document analysis directory
+# System
+DEBUG_MODE=false                             # Enable debug logging
+MAX_RETRIES=3                                # API retry attempts
+TIMEOUT=30                                   # General timeout (seconds)
+```
+### 5.2 Server Configuration (server_config.yaml)
+The `server_config.yaml` file controls server behavior, tool rate limiting, and operational settings:
+#### Core Server Settings
+```yaml
+server:
+  host: "127.0.0.1"              # Server bind address
+  port: 6274                     # Server port
+  debug_mode: false              # Enable debug logging
+  session_ttl_seconds: 21600     # Session timeout (6 hours)
+  max_sessions: 1000             # Max concurrent sessions
+```
+#### Tool Rate Limiting
+Controls external API usage across all sessions:
+```yaml
+tool_rate_limits:
+  batch_web_search:
+    requests_per_minute: 9000    # Per-minute limit
+    burst_limit: 35              # Short-term burst allowance
+  url_crawler:
+    requests_per_minute: 9000
+    burst_limit: 60
+```
+#### Session Management
+```yaml
+server:
+  cleanup_interval_seconds: 600   # Clean expired sessions (5 min)
+  enable_session_keepalive: true  # Keep sessions alive during long operations
+  keepalive_touch_interval: 300   # Touch session every N seconds
+```
+#### Security & Performance
+```yaml
+server:
+  request_timeout_seconds: 1800        # Request timeout
+  max_request_size_mb: 1000           # Maximum request size
+  rate_limit_requests_per_minute: 300000  # Requests per IP
+```
+The configuration file includes detailed comments explaining each setting. Modify values based on your deployment requirements and external API limits.
+## 6. Model License
+Unless otherwise noted, openPangu-Embedded-7B-DeepDiver model is licensed under the terms and conditions of OPENPANGU MODEL LICENSE AGREEMENT VERSION 1.0, which is intended to be used permissively and enable the further development of artificial intelligence technologies. Please refer to the [LICENSE](LICENSE) file located in the root directory of the model repository for details.
+## 7. Security Notice and Disclaimer
+Due to the inherent technical limitations of the technologies relied upon by the openPangu-Embedded-7B-DeepDiver model and its framework, as well as the fact that AI-generated content is automatically produced by Pangu, Huawei cannot make any warranties regarding the following matters:
+- The output of this Model is automatically generated via AI algorithms, it does not rule out the possibility that some of the information may be flawed, unreasonable, or cause discomfort, and the generated content does not represent Huawei's attitude or standpoint;
+- There is no guarantee that this Model is 100% accurate, reliable, functional, timely, secure and safety, error-free, uninterrupted, continuously stable, or free of any faults;
+- The output of this Model does not constitute any advices or decisions for you, and it does not guarantee the authenticity, completeness, accuracy, timeliness, legality, functionality, or practicality of the generated content. The generated content cannot replace professionals in medical, legal, and other fields in answering your questions. The generated content is for your reference only and does not represent any attitude, standpoint, or position of Huawei. You need to make independent judgments based on your actual situation, and Huawei does not assume any responsibilities;
+- The inter-component communication of the DeepDiver MAS system does not include built-in data encryption or authentication mechanisms (e.g., tokens, signatures). You shall independently assess your security requirements and implement corresponding protective measures (such as deploying the system in an encrypted network, integrating SSL/TLS protocols, and enforcing component identity verification);
+- Any security incidents (including but not limited to data leakage, unauthorized access, and business losses) arising from the lack of encryption/authentication mechanisms shall be borne by the user of the system. Huawei shall bear no responsibility therefor.
+## 8. Contact Us
+If you have any comments or suggestions, please submit an issue or contact openPangu@huawei.com.
+---

checklist.chk ADDED Viewed

	@@ -0,0 +1,14 @@

+880418a2e195ea5221f700fabc446de4af401c777ea66288eedcfe3ca7861a58 *./config.json
+7302a0fdc386e723b16ad5860787b50a7bff39d30549dae986e073b193f2beb4 *./configuration_openpangu_dense.py
+5cbfc09f10ae85f0e9bebc1281541dcc7107d86e34282839277782cbb146117d *./generation_config.json
+9bf645e8399be6d99000eae64bd172b5c457d6d2c44d2257b47eb97a3c41aeda *./model.safetensors.index.json
+f15eaf322af8a0b0f16b26795eb68af836179413d3dbfa4dc44505db6c8b0d6f *./modeling_openpangu_dense.py
+7b8ec6cd94b1921560d37755c7c0c08280c1f9123195d14d352ad0607788f7f6 *./model-00001-of-00004.safetensors
+fc05d80f52ce44d1433a942e867bf61ea49eb1eebb0700312f76d6b3a3dee917 *./model-00002-of-00004.safetensors
+1ed37f38214c755b51bea06a71e154c9ea27670eb3b8506c06addcfbea2066f2 *./model-00003-of-00004.safetensors
+0145e255ba965ed0e75164a037b9a0137c5e5c12ffc42463ff82568054fe0186 *./model-00004-of-00004.safetensors
+c1f2d87f855b994039c52b1e83c8a7f3d71a2d1eb52946c4a2e862e99f19d8b3 *./modular_openpangu_dense.py
+b34cf5e7c7660889303b6e2d0a346c440356385c9db551d06f6615cf9fc600d1 *./special_tokens_map.json
+6b16f1558c0cd4ae6ef1a2c605713be0a514f50e1ce2d2c878979ce988c148ec *./tokenizer.model
+acb88eac57f8765fedf34e9c10bc16d55c46f0902b0fea74fbf041daca2667ae *./tokenizer_config.json
+c98602d6d1f61792a8bd3393972bbbe7409a205c0bb6299394c74287c26bd723 *./tokenization_openpangu.py

config.json ADDED Viewed

	@@ -0,0 +1,31 @@

+{
+  "architectures": [
+    "PanguEmbeddedForCausalLM"
+  ],
+  "auto_map": {
+    "AutoConfig": "configuration_openpangu_dense.PanguEmbeddedConfig",
+    "AutoModel": "modeling_openpangu_dense.PanguEmbeddedModel",
+    "AutoModelForCausalLM": "modeling_openpangu_dense.PanguEmbeddedForCausalLM"
+  },
+  "bias": true,
+  "attention_dropout": 0.0,
+  "bos_token_id": 1,
+  "pad_token_id": 0,
+  "eos_token_id": 45892,
+  "hidden_act": "silu",
+  "hidden_size": 4096,
+  "initializer_range": 0.02,
+  "intermediate_size": 12800,
+  "max_position_embeddings": 147456,
+  "model_type": "PanguEmbedded",
+  "num_attention_heads": 32,
+  "num_hidden_layers": 34,
+  "num_key_value_heads": 8,
+  "rms_norm_eps": 1e-05,
+  "rope_theta": 16000000.0,
+  "tie_word_embeddings": false,
+  "torch_dtype": "bfloat16",
+  "transformers_version": "4.53.2",
+  "use_cache": true,
+  "vocab_size": 153376
+}

configuration_openpangu_dense.py ADDED Viewed

	@@ -0,0 +1,56 @@

+# coding=utf-8
+# Copyright (c) 2025 Huawei Technologies Co., Ltd. All rights reserved.
+from transformers.utils import logging
+from transformers.configuration_utils import PretrainedConfig
+logger = logging.get_logger(__name__)
+class PanguEmbeddedConfig(PretrainedConfig):
+    model_type = "PanguEmbedded"
+    _auto_class = "AutoConfig"
+    def __init__(
+        self,
+        vocab_size=153376,
+        hidden_size=4096,
+        intermediate_size=12800,
+        num_hidden_layers=34,
+        num_attention_heads=32,
+        num_key_value_heads=8,
+        hidden_act="silu",
+        max_position_embeddings=147456,
+        initializer_range=0.02,
+        rms_norm_eps=1e-5,
+        use_cache=True,
+        pad_token_id=0,
+        bos_token_id=1,
+        eos_token_id=45892,
+        tie_word_embeddings=False,
+        rope_theta=16000000.0,
+        bias=True,
+        **kwargs,
+    ):
+        self.vocab_size = vocab_size
+        self.max_position_embeddings = max_position_embeddings
+        self.hidden_size = hidden_size
+        self.intermediate_size = intermediate_size
+        self.num_hidden_layers = num_hidden_layers
+        self.num_attention_heads = num_attention_heads
+        self.num_key_value_heads = num_key_value_heads
+        self.hidden_act = hidden_act
+        self.initializer_range = initializer_range
+        self.rms_norm_eps = rms_norm_eps
+        self.use_cache = use_cache
+        self.rope_theta = rope_theta
+        self.bias = bias
+        super().__init__(
+            pad_token_id=pad_token_id,
+            bos_token_id=bos_token_id,
+            eos_token_id=eos_token_id,
+            tie_word_embeddings=tie_word_embeddings,
+            **kwargs,
+        )

deepdiver_v2/cli/README.md ADDED Viewed

	@@ -0,0 +1,238 @@

+# CLI Demo for DeepDiver Long Writer Multi-Agent System
+This CLI demo showcases the multi-agent system that coordinates between PlannerAgent, InformationSeekerAgent, and WriterAgent to handle complex queries and generate comprehensive long-form content.
+## Features
+- 🧠 **PlannerAgent**: Orchestrates the entire process and coordinates sub-agents
+- 🔍 **InformationSeekerAgent**: Performs web research and gathers information
+- ✍️ **WriterAgent**: Creates comprehensive long-form content
+- 📊 **Real-time Visualization**: Shows tool calls, reasoning traces, and sub-agent responses
+- ⚙️ **Configuration Management**: Loads settings from .env files
+## Setup
+### 1. Install Dependencies
+```bash
+cd deepdiver_v2
+pip install -r requirements.txt
+```
+### 2. Configure Environment
+Create a `.env` file in the `config/` directory:
+```bash
+# From the project root
+cp env.template config/.env
+```
+Then edit `config/.env` with your settings:
+```bash
+# Custom LLM Service Configuration
+MODEL_REQUEST_URL=http://your-llm-service-endpoint/v1/chat/completions
+MODEL_REQUEST_TOKEN=your-service-token
+MODEL_NAME=pangu_auto
+# MCP Server Configuration
+MCP_SERVER_URL=http://localhost:6274/mcp
+MCP_AUTH_TOKEN=
+MCP_USE_STDIO=true
+# Agent Iteration Limits
+PLANNER_MAX_ITERATION=20
+INFORMATION_SEEKER_MAX_ITERATION=30
+WRITER_MAX_ITERATION=20
+# Mode
+PLANNER_MODE=auto # auto, writing, qa
+# Other settings...
+```
+### 3. Start Required Services
+Make sure your MCP server is running:
+```bash
+# Start MCP server (if needed)
+python src/tools/mcp_server_standard.py
+```
+## Usage
+### Interactive Mode (Recommended)
+```bash
+python cli/demo.py
+```
+This will start an interactive session where you can enter queries and see the full execution flow.
+### Single Query Mode
+```bash
+python cli/demo.py -q "Write a comprehensive analysis of artificial intelligence trends in 2024"
+```
+### Configuration Only
+```bash
+python cli/demo.py --config-only
+```
+### Debug Mode (Verbose Logging)
+```bash
+python cli/demo.py --debug -q "Debug a specific query"
+```
+### Quiet Mode (Clean Output)
+```bash
+python cli/demo.py --quiet -q "Run with minimal output"
+```
+### Create Sample Configuration
+```bash
+python cli/demo.py --create-env
+```
+## Example Queries
+### For Information Seeking Tasks:
+- "What are the latest developments in quantum computing?"
+- "Research the current state of renewable energy adoption globally"
+- "Find information about recent AI breakthroughs in healthcare"
+### For Long-form Writing Tasks:
+- "Write a comprehensive report on the impact of AI on education"
+- "Create an in-depth analysis of climate change mitigation strategies"
+- "Generate a detailed guide on sustainable business practices"
+## Demo Flow Visualization
+The demo provides rich visual feedback showing:
+1. **🚀 Task Initiation**: Shows the user query and planner startup
+2. **🧠 Agent Reasoning**: Displays the planner's reasoning at each step
+3. **🔧 Tool Calls**: Shows what tools are being called with their arguments
+4. **📋 Tool Results**: Displays the results from each tool execution
+5. **🤝 Sub-Agent Execution**: Shows when sub-agents (InformationSeeker, Writer) are invoked
+6. **📊 Sub-Agent Results**: Displays results from sub-agent executions
+7. **🏁 Final Result**: Shows the complete execution summary
+8. **🔍 Execution Trace**: Detailed step-by-step trace of the entire process
+## Output Modes
+The CLI demo supports different output modes for different use cases:
+### Default Mode
+Shows the full rich interface with welcome screen, progress bars, and detailed visualization of all agent interactions.
+### Quiet Mode (`--quiet`)
+Suppresses all non-essential output, showing only final results. Useful for:
+- Integration with scripts or automation
+- Focusing on results without process details
+- Running in environments where rich output isn't needed
+### Debug Mode (`--debug`)
+Enables verbose logging with timestamps, showing all internal system messages. Useful for:
+- Troubleshooting configuration issues
+- Understanding detailed agent behavior
+- Development and debugging
+```bash
+# Examples of different modes
+python cli/demo.py --query "Test query"  # Default rich mode
+python cli/demo.py --quiet --query "Test query"  # Minimal output
+python cli/demo.py --debug --query "Test query"  # Verbose debugging
+```
+## Troubleshooting
+### Configuration Issues
+If you see configuration errors:
+1. Ensure `config/.env` exists and is properly formatted
+2. Check that all required environment variables are set
+3. Verify your LLM service endpoint is accessible
+4. Confirm MCP server is running and reachable
+5. Use `--debug` mode to see detailed error messages
+### Agent Initialization Issues
+If agent initialization fails:
+1. Check MCP server connectivity
+2. Verify model configuration is correct
+3. Ensure required permissions for workspace directories
+4. Check log output for specific error messages
+### Tool Execution Issues
+If tool calls fail:
+1. Verify MCP server is running and has the required tools
+2. Check network connectivity for web search/crawler tools
+3. Ensure workspace directories exist and are writable
+4. Review tool arguments for correctness
+## Advanced Usage
+### Custom Sub-Agent Configurations
+You can customize sub-agent behavior by modifying the configurations in the demo script:
+```python
+sub_agent_configs = {
+    "information_seeker": {
+        "model": "your-model",
+        "max_iterations": 30,
+    },
+    "writer": {
+        "model": "your-model",
+        "max_iterations": 20,
+        "temperature": 0.3,
+        "max_tokens": 16384
+    }
+}
+```
+### Monitoring and Debugging
+Enable debug mode in your `.env` file:
+```bash
+DEBUG_MODE=true
+```
+This will provide more detailed logging and error information.
+## Architecture Overview
+The demo showcases a sophisticated multi-agent architecture:
+```
+User Query
+    ↓
+PlannerAgent (Coordinator)
+    ↓
+├── InformationSeekerAgent (Research)
+│   ├── Web Search Tools
+│   ├── URL Crawling Tools
+│   ├── Document Analysis Tools
+│   └── File Management Tools
+│
+└── WriterAgent (Content Generation)
+    ├── File Reading Tools
+    ├── Document QA Tools
+    ├── Content Synthesis
+    └── Long-form Writing
+```
+Each agent follows the ReAct pattern (Reasoning + Acting) with iterative refinement until task completion.

deepdiver_v2/cli/demo.py ADDED Viewed

	@@ -0,0 +1,668 @@

+# Copyright (c) 2025 Huawei Technologies Co., Ltd. All rights reserved.
+#!/usr/bin/env python3
+"""
+CLI Demo for DeepDiver Long Writer Multi-Agent System
+This demo showcases the multi-agent system that includes:
+- PlannerAgent: Coordinates and orchestrates the entire process
+- InformationSeekerAgent: Gathers and researches information
+- WriterAgent: Creates long-form content
+Features:
+- Loads configuration from config/.env file
+- Shows real-time tool calls and reasoning traces
+- Displays sub-agent responses and interactions
+- Visualizes the complete execution flow
+- Query preprocessing for safety and task suitability check
+"""
+import os
+import sys
+import json
+import time
+import logging
+import argparse
+import requests
+from pathlib import Path
+from typing import Dict, Any, List, Optional
+from rich.console import Console
+from rich.table import Table
+from rich.panel import Panel
+from rich.syntax import Syntax
+from rich.markdown import Markdown
+# Add project root to Python path
+project_root = Path(__file__).parent.parent
+sys.path.insert(0, str(project_root))
+# Configure logging to keep the CLI clean
+def setup_clean_logging(debug_mode: bool = False):
+    """Configure logging to show only relevant information for the demo"""
+    if debug_mode:
+        # Debug mode: show all logs with timestamps
+        logging.basicConfig(
+            level=logging.DEBUG,
+            format='%(asctime)s - %(name)s - %(levelname)s - %(message)s',
+            datefmt='%H:%M:%S'
+        )
+    else:
+        # Clean demo mode: suppress verbose logs
+        # Suppress specific noisy loggers
+        noisy_loggers = [
+            'httpx',
+            'httpcore',
+            'urllib3',
+            'src.tools.mcp_client',
+            # 'src.agents.base_agent',
+            'config.config'  # Also suppress config messages in quiet mode
+        ]
+        for logger_name in noisy_loggers:
+            logging.getLogger(logger_name).setLevel(logging.ERROR)
+# Set up default clean logging before any imports
+setup_clean_logging(debug_mode=False)
+# Import the multi-agent system components
+from config.config import get_config, reload_config
+from src.agents.planner_agent import create_planner_agent
+from src.agents.base_agent import AgentResponse
+console = Console()
+class DemoVisualizer:
+    """Visualizes the execution of the multi-agent system"""
+    def __init__(self, quiet_mode: bool = False):
+        self.console = console
+        self.execution_log = []
+        self.quiet_mode = quiet_mode
+    def _should_display(self, force: bool = False) -> bool:
+        """Check if output should be displayed based on quiet mode"""
+        return not self.quiet_mode or force
+    def show_welcome(self):
+        """Display welcome message and system info"""
+        if not self._should_display():
+            return
+        welcome_text = """
+# 🤖 DeepDiver Long Writer Multi-Agent System Demo
+This demo showcases an advanced multi-agent system for research and long-form content generation.
+## System Components:
+- **🧠 PlannerAgent**: Orchestrates the entire process and coordinates sub-agents
+- **🔍 InformationSeekerAgent**: Performs web research and gathers information
+- **✍️  WriterAgent**: Creates comprehensive long-form content
+## Features:
+- Real-time tool execution visualization
+- Sub-agent response tracking
+- Complete reasoning trace display
+- Configuration management
+- Query safety and suitability pre-check
+        """
+        self.console.print(Panel(Markdown(welcome_text), title="[bold blue]Welcome", border_style="blue"))
+    def show_config(self, config):
+        """Display current configuration"""
+        if not self._should_display():
+            return
+        config_table = Table(title="📋 System Configuration", show_header=True, header_style="bold magenta")
+        config_table.add_column("Setting", style="cyan", no_wrap=True)
+        config_table.add_column("Value", style="green")
+        # Safe config display (hide sensitive values)
+        safe_config = config.to_dict()
+        for key, value in safe_config.items():
+            if value is not None and str(value) != "None":
+                display_value = str(value)
+                if len(display_value) > 60:
+                    display_value = display_value[:57] + "..."
+                config_table.add_row(key, display_value)
+        self.console.print(config_table)
+    def show_planner_start(self, query: str):
+        """Show planner starting execution"""
+        self.console.print(Panel(
+            f"[bold yellow]User Query:[/bold yellow] {query}\n\n"
+            f"[bold green]🚀 Starting PlannerAgent execution...[/bold green]",
+            title="[bold blue]Task Initiation",
+            border_style="green"
+        ))
+    def show_reasoning_step(self, iteration: int, reasoning: str):
+        """Display reasoning step"""
+        self.console.print(Panel(
+            Markdown(f"**Iteration {iteration} - Reasoning:**\n\n{reasoning}"),
+            title=f"[bold yellow]🧠 Agent Reasoning (Step {iteration})",
+            border_style="yellow"
+        ))
+    def show_tool_call(self, iteration: int, tool_name: str, arguments: Dict[str, Any]):
+        """Display tool call"""
+        args_json = json.dumps(arguments, indent=2, ensure_ascii=False)
+        self.console.print(Panel(
+            f"[bold cyan]Tool:[/bold cyan] {tool_name}\n\n"
+            f"[bold cyan]Arguments:[/bold cyan]\n{Syntax(args_json, 'json', theme='monokai', line_numbers=True)}",
+            title=f"[bold cyan]🔧 Tool Call (Step {iteration})",
+            border_style="cyan"
+        ))
+    def show_tool_result(self, iteration: int, tool_name: str, result: Dict[str, Any]):
+        """Display tool result"""
+        success = result.get("success", True)
+        status_icon = "✅" if success else "❌"
+        status_color = "green" if success else "red"
+        # Format result for display
+        if success and "data" in result:
+            display_result = result["data"]
+        elif "error" in result:
+            display_result = {"error": result["error"]}
+        else:
+            display_result = result
+        result_text = json.dumps(display_result, indent=2, ensure_ascii=False)
+        if len(result_text) > 1000:
+            result_text = result_text[:997] + "..."
+        self.console.print(Panel(
+            f"[bold {status_color}]Status:[/bold {status_color}] {status_icon} {'Success' if success else 'Failed'}\n\n"
+            f"[bold {status_color}]Result:[/bold {status_color}]\n{Syntax(result_text, 'json', theme='monokai', line_numbers=True)}",
+            title=f"[bold {status_color}]📋 Tool Result: {tool_name} (Step {iteration})",
+            border_style=status_color
+        ))
+    def show_sub_agent_execution(self, agent_name: str, task_content: str):
+        """Show sub-agent starting execution"""
+        self.console.print(Panel(
+            f"[bold magenta]Agent:[/bold magenta] {agent_name}\n\n"
+            f"[bold magenta]Task:[/bold magenta] {task_content[:500]}{'...' if len(task_content) > 500 else ''}",
+            title="[bold magenta]🤝 Sub-Agent Execution",
+            border_style="magenta"
+        ))
+    def show_sub_agent_result(self, agent_name: str, result: Dict[str, Any]):
+        """Show sub-agent execution result"""
+        success = result.get("success", True)
+        status_icon = "✅" if success else "❌"
+        status_color = "green" if success else "red"
+        # Extract key information
+        iterations = result.get("iterations", 0)
+        execution_time = result.get("execution_time", 0)
+        summary = f"[bold {status_color}]Status:[/bold {status_color}] {status_icon} {'Success' if success else 'Failed'}\n"
+        summary += f"[bold blue]Iterations:[/bold blue] {iterations}\n"
+        summary += f"[bold blue]Execution Time:[/bold blue] {execution_time:.2f}s\n\n"
+        if success and "data" in result:
+            data = result["data"]
+            if isinstance(data, dict):
+                for key, value in data.items():
+                    if isinstance(value, str) and len(value) > 200:
+                        summary += f"[bold blue]{key}:[/bold blue] {value[:197]}...\n"
+                    else:
+                        summary += f"[bold blue]{key}:[/bold blue] {value}\n"
+        elif "error" in result:
+            summary += f"[bold red]Error:[/bold red] {result['error']}\n"
+        self.console.print(Panel(
+            summary,
+            title=f"[bold {status_color}]📊 Sub-Agent Result: {agent_name}",
+            border_style=status_color
+        ))
+    def show_final_result(self, response: AgentResponse):
+        """Display final execution result"""
+        # Always show final results, even in quiet mode
+        if not self._should_display(force=True):
+            return
+        success = response.success
+        status_icon = "✅" if success else "❌"
+        status_color = "green" if success else "red"
+        summary = f"[bold {status_color}]Final Status:[/bold {status_color}] {status_icon} {'Completed Successfully' if success else 'Failed'}\n"
+        summary += f"[bold blue]Total Iterations:[/bold blue] {response.iterations}\n"
+        summary += f"[bold blue]Total Execution Time:[/bold blue] {response.execution_time:.2f}s\n"
+        summary += f"[bold blue]Agent:[/bold blue] {response.agent_name}\n\n"
+        if success and response.result:
+            if isinstance(response.result, dict):
+                for key, value in response.result.items():
+                    if isinstance(value, str) and len(value) > 3000:
+                        summary += f"[bold blue]{key}:[/bold blue] {value[:2997]}...\n\n"
+                    else:
+                        summary += f"[bold blue]{key}:[/bold blue] {value}\n\n"
+        elif response.error:
+            summary += f"[bold red]Error:[/bold red] {response.error}\n"
+        self.console.print(Panel(
+            summary,
+            title=f"[bold {status_color}]🏁 Final Result",
+            border_style=status_color
+        ))
+    def show_reasoning_trace(self, trace: List[Dict[str, Any]]):
+        """Display detailed reasoning trace"""
+        if not trace:
+            return
+        trace_table = Table(title="🔍 Detailed Execution Trace", show_header=True, header_style="bold cyan")
+        trace_table.add_column("Step", style="cyan", width=8)
+        trace_table.add_column("Type", style="magenta", width=12)
+        trace_table.add_column("Details", style="white")
+        for i, step in enumerate(trace, 1):
+            step_type = step.get("type", "unknown")
+            if step_type == "reasoning":
+                content = step.get("content", "")[:100] + ("..." if len(step.get("content", "")) > 100 else "")
+                trace_table.add_row(str(i), "🧠 Reasoning", content)
+            elif step_type == "action":
+                tool = step.get("tool", "")
+                result_status = "✅" if step.get("result", {}).get("success", True) else "❌"
+                trace_table.add_row(str(i), "🔧 Tool Call", f"{result_status} {tool}")
+            elif step_type == "error":
+                error = step.get("error", "")[:100] + ("..." if len(step.get("error", "")) > 100 else "")
+                trace_table.add_row(str(i), "❌ Error", error)
+        self.console.print(trace_table)
+    def show_unsupported_response(self):
+        """Display the fixed response for unsupported queries"""
+        # Always show this response, even in quiet mode
+        self.console.print(Panel(
+            "Sorry, your question is not within the current scope of tasks for DeepDiver-V2. Please try asking a question related to long-form writing or complex knowledge Q&A instead.",
+            title="[bold yellow]❌ Unsupported Query",
+            border_style="yellow"
+        ))
+class AgentExecutionMonitor:
+    """Monitors agent execution and provides real-time feedback"""
+    def __init__(self, visualizer: DemoVisualizer):
+        self.visualizer = visualizer
+        self.current_iteration = 0
+    def on_reasoning_step(self, iteration: int, reasoning: str):
+        """Called when agent performs reasoning"""
+        self.visualizer.show_reasoning_step(iteration, reasoning)
+    def on_tool_call(self, iteration: int, tool_name: str, arguments: Dict[str, Any]):
+        """Called when agent makes a tool call"""
+        self.visualizer.show_tool_call(iteration, tool_name, arguments)
+        # Check for sub-agent assignments
+        if "assign_" in tool_name and "task" in tool_name:
+            if "tasks" in arguments:
+                for task in arguments.get("tasks", []):
+                    task_content = task.get("task_content", "")
+                    self.visualizer.show_sub_agent_execution("InformationSeeker", task_content)
+            elif "task_content" in arguments:
+                task_content = arguments.get("task_content", "")
+                self.visualizer.show_sub_agent_execution("Writer", task_content)
+    def on_tool_result(self, iteration: int, tool_name: str, result: Dict[str, Any]):
+        """Called when tool execution completes"""
+        self.visualizer.show_tool_result(iteration, tool_name, result)
+        # Show sub-agent results if this was an assignment
+        if "assign_" in tool_name and "task" in tool_name:
+            if "data" in result and "tasks" in result["data"]:
+                for task_result in result["data"]["tasks"]:
+                    agent_name = task_result.get("agent_name", "InformationSeeker")
+                    self.visualizer.show_sub_agent_result(agent_name, task_result)
+            elif "data" in result:
+                agent_name = result["data"].get("agent_name", "Writer")
+                self.visualizer.show_sub_agent_result(agent_name, result["data"])
+def classify_query(query: str, config) -> Dict[str, Any]:
+    """
+    Classify user query into one of three categories using LLM:
+    1. SAFE_SENSITIVE: Contains unsafe content (insults, political risks, etc.)
+    2. NON_KNOWLEDGE: Non-knowledge intensive (no need for research, e.g., greetings, simple calculations)
+    3. NORMAL: Requires processing (long-form writing or complex knowledge Q&A)
+    Returns:
+        Dict with 'category' (str) and 'reasoning' (str)
+    """
+    logger = logging.getLogger(__name__)
+    # Get model configuration
+    model_config = config.get_custom_llm_config()
+    pangu_url = model_config.get('url') or os.getenv('MODEL_REQUEST_URL', '')
+    model_token = model_config.get('token') or os.getenv('MODEL_REQUEST_TOKEN', '')
+    # Validate model configuration
+    if not pangu_url:
+        logger.error("Model URL not configured for query classification")
+        # Fallback to NORMAL category if model config is missing
+        return {
+            "category": "NORMAL",
+            "reasoning": "模型配置不完整，跳过分类检查，默认按正常任务处理"
+        }
+    headers = {'Content-Type': 'application/json', 'csb-token': model_token}
+    # Classification prompt (detailed instructions for accurate categorization)
+    prompt_template = """
+你是一个Query分类器，需要将用户输入的查询分为以下三类，并给出明确的分类理由：
+1. 【SAFE_SENSITIVE - 安全敏感内容】：包含以下任何一种情况的查询
+   - 辱骂、侮辱性语言（如脏话、人身攻击）
+   - 涉及政治敏感内容（如国家领导人、敏感政治事件、舆情风险话题）
+   - 违法违规内容（如暴力、色情、恐怖主义相关）
+   - 歧视性言论（种族、性别、宗教等歧视）
+2. 【NON_KNOWLEDGE - 非知识密集型任务】：不需要进行信息搜索的简单查询
+   - 问候语（如"你好"、"早上好"、"嗨"）
+   - 简单计算（如"1+1等于几"、"25乘以4是多少"）
+   - 基础闲聊（如"你是谁"）
+   - 指令性语句（如"退出"、"帮助"、"开始"）
+   - 不需要信息收集的简单问题
+3. 【NORMAL - 正常任务】：不包含安全敏感内容，需要进行信息搜索或长文写作的任务
+   - 简单的信息收集任务 （如"华为成立时间是什么时候"）
+   - 复杂知识问答（如"ACL2025举办地有什么美食推荐"）
+   - 长文写作任务（如"写一篇关于气候变化影响的5000字报告"）
+   - 需要数据支持的分析（如"2023年全球经济增长数据及分析"）
+   - 专业领域研究（如"机器学习在医疗诊断中的应用案例"）
+分类要求：
+- 严格按照上述定义进行分类，不要遗漏任何关键特征
+- 优先判断是否为SAFE_SENSITIVE，其次判断是否为NON_KNOWLEDGE，最后才是NORMAL
+- 必须提供清晰的分类理由，说明为什么属于该类别
+- 输出格式必须严格遵循：先输出分类理由的思考，然后换行输出分类结果（SAFE_SENSITIVE/NON_KNOWLEDGE/NORMAL）
+示例1（SAFE_SENSITIVE）：
+该查询包含辱骂性语言"XXX"，符合安全敏感内容的定义，属于需要拦截的内容
+SAFE_SENSITIVE
+示例2（NON_KNOWLEDGE）：
+该查询是简单的问候语"你好"，不需要进行信息搜索，属于非知识密集型任务
+NON_KNOWLEDGE
+示例3（NORMAL）：
+该查询不包含安全敏感内容，要求撰写关于"区块链技术在金融领域的应用"的长文，需要进行信息收集、案例研究和深度分析，属于正常的长文写作任务
+NORMAL
+用户输入query：$query"""
+    # Prepare conversation history
+    conversation_history = [
+        {"role": "user", "content": prompt_template.replace("$query", query) + " /no_think"}
+    ]
+    try:
+        # Call LLM with retry logic
+        retry_num = 1
+        max_retry_num = 3
+        while retry_num <= max_retry_num:
+            try:
+                response = requests.post(
+                    url=pangu_url,
+                    headers=headers,
+                    json={
+                        "model": config.model_name,
+                        "chat_template": "{% for message in messages %}{% if loop.first and messages[0]['role'] != 'system' %}{{ '<s>[unused9]系统：[unused10]' }}{% endif %}{% if message['role'] == 'system' %}{{'<s>[unused9]系统：' + message['content'] + '[unused10]'}}{% endif %}{% if message['role'] == 'assistant' %}{{'[unused9]助手：' + message['content'] + '[unused10]'}}{% endif %}{% if message['role'] == 'tool' %}{{'[unused9]工具：' + message['content'] + '[unused10]'}}{% endif %}{% if message['role'] == 'function' %}{{'[unused9]方法：' + message['content'] + '[unused10]'}}{% endif %}{% if message['role'] == 'user' %}{{'[unused9]用户：' + message['content'] + '[unused10]'}}{% endif %}{% endfor %}{% if add_generation_prompt %}{{ '[unused9]助手：' }}{% endif %}",
+                        "spaces_between_special_tokens": False,
+                        "messages": conversation_history,
+                        "temperature": 0.1,  # Low temperature for deterministic classification
+                        "max_tokens": 5000,
+                    },
+                    timeout=model_config.get("timeout", 60)
+                )
+                response_json = response.json()
+                logger.debug(f"Classification API response: {json.dumps(response_json, indent=2)}")
+                # Extract and parse result
+                assistant_message = response_json["choices"][0]["message"]["content"].strip()
+                lines = assistant_message.split('\n', 1)
+                if len(lines) < 2:
+                    raise ValueError(f"Invalid response format: {assistant_message}")
+                reasoning = lines[0].strip()
+                category = lines[1].strip() if len(lines) > 1 else "NORMAL"
+                # Validate category
+                valid_categories = ["SAFE_SENSITIVE", "NON_KNOWLEDGE", "NORMAL"]
+                if category not in valid_categories:
+                    logger.warning(f"Invalid category '{category}', using fallback NORMAL")
+                    category = "NORMAL"
+                    reasoning = f"模型返回无效分类 '{category}'，默认按正常任务处理。原始理由：{reasoning}"
+                return {
+                    "category": category,
+                    "reasoning": reasoning
+                }
+            except Exception as e:
+                logger.error(f"Classification attempt {retry_num} failed: {str(e)}")
+                if retry_num == max_retry_num:
+                    raise
+                time.sleep(2)  # Wait before retry
+                retry_num += 1
+    except Exception as e:
+        logger.error(f"Query classification failed: {str(e)}")
+        # Fallback to NORMAL category if classification fails
+        return {
+            "category": "NORMAL",
+            "reasoning": f"分类服务暂时不可用（错误：{str(e)[:100]}...），默认按正常任务处理"
+        }
+def load_environment_config(quiet: bool = False):
+    """Load configuration from .env file"""
+    try:
+        # Check for .env file in config directory
+        config_dir = Path(__file__).parent.parent / "config"
+        env_file = config_dir / ".env"
+        if not env_file.exists():
+            if not quiet:
+                console.print(f"[yellow]⚠️ No .env file found at {env_file}[/yellow]")
+                console.print(f"[yellow]💡 Please copy env.template to config/.env and configure your settings[/yellow]")
+            return None
+        # Reload configuration to pick up .env file
+        reload_config()
+        config = get_config()
+        if not quiet:
+            console.print("[green]✅ Configuration loaded successfully[/green]")
+        return config
+    except Exception as e:
+        if not quiet:
+            console.print(f"[red]❌ Failed to load configuration: {e}[/red]")
+        return None
+def create_sample_env_file():
+    """Create a sample .env file for demo purposes"""
+    config_dir = Path(__file__).parent.parent / "config"
+    env_file = config_dir / ".env"
+    if env_file.exists():
+        return
+    # Copy from template
+    template_file = Path(__file__).parent.parent / "env.template"
+    if template_file.exists():
+        import shutil
+        shutil.copy2(template_file, env_file)
+        console.print(f"[green]✅ Created .env file from template at {env_file}[/green]")
+        console.print("[yellow]⚠️ Please edit the .env file with your actual configuration values[/yellow]")
+    else:
+        console.print(f"[red]❌ Could not find env.template to copy[/red]")
+def run_demo_query(planner, query: str, visualizer: DemoVisualizer, config) -> Optional[AgentResponse]:
+    """Run a demo query through the planner with preprocessing"""
+    # Step 1: Show query information
+    visualizer.show_planner_start(query)
+    # Step 2: Query classification (preprocessing)
+    classification_result = classify_query(query, config)
+    # Step 3: Branch processing based on classification
+    unsupported_categories = ["SAFE_SENSITIVE", "NON_KNOWLEDGE"]
+    if classification_result["category"] in unsupported_categories:
+        # Show fixed response for unsupported queries
+        visualizer.show_unsupported_response()
+        return None
+    # Step 4: Process normal query (original flow)
+    try:
+        # Execute the query
+        with console.status("[bold green]Executing planner task...", spinner="dots"):
+            response = planner.execute_task(query)
+        # Show final results
+        visualizer.show_final_result(response)
+        # Show detailed trace if available
+        if hasattr(response, 'reasoning_trace') and response.reasoning_trace:
+            visualizer.show_reasoning_trace(response.reasoning_trace)
+        return response
+    except Exception as e:
+        console.print(f"[red]❌ Error during execution: {e}[/red]")
+        return None
+def main():
+    """Main CLI demo function"""
+    parser = argparse.ArgumentParser(description="DeepDiver Multi-Agent System Demo")
+    parser.add_argument("--query", "-q", type=str, help="Query to execute (interactive mode if not provided)")
+    parser.add_argument("--config-only", "-c", action="store_true", help="Only show configuration and exit")
+    parser.add_argument("--create-env", "-e", action="store_true", help="Create sample .env file from template")
+    parser.add_argument("--debug", "-d", action="store_true", help="Enable debug mode with verbose logging")
+    parser.add_argument("--quiet", help="Suppress all non-essential output")
+    args = parser.parse_args()
+    # Setup logging based on arguments (re-configure if debug mode is requested)
+    if args.debug:
+        setup_clean_logging(debug_mode=True)
+    # Initialize visualizer
+    visualizer = DemoVisualizer(quiet_mode=args.quiet)
+    if not args.quiet:
+        visualizer.show_welcome()
+    # Create sample .env file if requested
+    if args.create_env:
+        create_sample_env_file()
+        return 0
+    # Load configuration
+    config = load_environment_config(quiet=args.quiet)
+    if not config:
+        if not args.quiet:
+            console.print("[red]❌ Cannot proceed without valid configuration[/red]")
+            console.print("[yellow]💡 Use --create-env to create a sample configuration file[/yellow]")
+        return 1
+    # Show configuration
+    visualizer.show_config(config)
+    if args.config_only:
+        return 0
+    # Initialize planner agent
+    try:
+        if not args.quiet:
+            console.print("[blue]🔄 Initializing PlannerAgent...[/blue]")
+        # Create planner with sub-agent configurations
+        sub_agent_configs = {
+            "information_seeker": {
+                "model": config.model_name,
+                "max_iterations": config.information_seeker_max_iterations or 30,
+            },
+            "writer": {
+                "model": config.model_name,
+                "max_iterations": config.writer_max_iterations or 30,
+                "temperature": config.model_temperature,
+                "max_tokens": config.model_max_tokens
+            }
+        }
+        planner = create_planner_agent(
+            model=config.model_name,
+            max_iterations=config.planner_max_iterations or 40,
+            sub_agent_configs=sub_agent_configs
+        )
+        if not args.quiet:
+            console.print("[green]✅ PlannerAgent initialized successfully[/green]")
+    except Exception as e:
+        if not args.quiet:
+            console.print(f"[red]❌ Failed to initialize PlannerAgent: {e}[/red]")
+        return 1
+    # Handle query execution
+    if args.query:
+        # Single query mode
+        run_demo_query(planner, args.query, visualizer, config)
+    else:
+        # Interactive mode
+        if not args.quiet:
+            console.print("\n[bold blue]🎯 Interactive Mode[/bold blue]")
+            console.print("Enter your queries below. Type 'quit' or 'exit' to leave.")
+        while True:
+            try:
+                prompt_text = "\n[bold cyan]Enter your query:[/bold cyan] " if not args.quiet else "Query: "
+                query = console.input(prompt_text).strip()
+                if query.lower() in ['quit', 'exit', 'q']:
+                    if not args.quiet:
+                        console.print("[green]👋 Goodbye![/green]")
+                    break
+                if not query:
+                    continue
+                if not args.quiet:
+                    console.print("\n" + "="*80 + "\n")
+                run_demo_query(planner, query, visualizer, config)
+                if not args.quiet:
+                    console.print("\n" + "="*80 + "\n")
+            except KeyboardInterrupt:
+                if not args.quiet:
+                    console.print("\n[yellow]⚠️ Interrupted by user[/yellow]")
+                break
+            except EOFError:
+                if not args.quiet:
+                    console.print("\n[green]👋 Goodbye![/green]")
+                break
+    return 0
+if __name__ == "__main__":
+    sys.exit(main())

deepdiver_v2/cli/run_demo.sh ADDED Viewed

	@@ -0,0 +1,171 @@

+#!/bin/bash
+# DeepDiver Multi-Agent System CLI Demo Runner
+# This script makes it easier to run the CLI demo with different options
+set -e
+# Colors for output
+RED='\033[0;31m'
+GREEN='\033[0;32m'
+YELLOW='\033[1;33m'
+BLUE='\033[0;34m'
+NC='\033[0m' # No Color
+# Function to print colored output
+print_status() {
+    echo -e "${GREEN}[INFO]${NC} $1"
+}
+print_warning() {
+    echo -e "${YELLOW}[WARNING]${NC} $1"
+}
+print_error() {
+    echo -e "${RED}[ERROR]${NC} $1"
+}
+# Get script directory
+SCRIPT_DIR="$( cd "$( dirname "${BASH_SOURCE[0]}" )" &> /dev/null && pwd )"
+PROJECT_ROOT="$(dirname "$SCRIPT_DIR")"
+# Function to show help
+show_help() {
+    echo "DeepDiver Multi-Agent System CLI Demo Runner"
+    echo ""
+    echo "Usage: $0 [OPTIONS] [QUERY]"
+    echo ""
+    echo "Options:"
+    echo "  -h, --help              Show this help message"
+    echo "  -i, --interactive       Start interactive mode (default)"
+    echo "  -c, --config-only       Show configuration and exit"
+    echo "  -e, --create-env        Create sample .env file from template"
+    echo "  -q, --query \"QUERY\"     Execute a specific query"
+    echo "  -d, --debug             Enable debug mode with verbose logging"
+    echo "  --quiet                 Suppress all non-essential output"
+    echo "  --setup                 Install dependencies and setup"
+    echo ""
+    echo "Examples:"
+    echo "  $0 --interactive"
+    echo "  $0 --query \"Research the latest trends in AI\""
+    echo "  $0 --config-only"
+    echo "  $0 --debug --query \"Debug a specific query\""
+    echo "  $0 --quiet --query \"Run quietly\""
+    echo "  $0 --setup"
+    echo ""
+}
+# Function to setup the demo
+setup_demo() {
+    print_status "Setting up DeepDiver CLI Demo..."
+    # Check if we're in the right directory
+    if [ ! -f "$PROJECT_ROOT/cli/demo.py" ]; then
+        print_error "Cannot find demo.py. Please run this script from the CLI directory or project root."
+        exit 1
+    fi
+    # Install dependencies
+    print_status "Installing Python dependencies..."
+    cd "$PROJECT_ROOT"
+    if [ -f "cli/requirements.txt" ]; then
+        pip install -r cli/requirements.txt
+        print_status "Dependencies installed successfully"
+    else
+        print_warning "requirements.txt not found, skipping dependency installation"
+    fi
+    # Check for .env file
+    if [ ! -f "config/.env" ]; then
+        print_warning "No .env file found in config/ directory"
+        print_status "Creating sample .env file from template..."
+        if [ -f "env.template" ]; then
+            cp env.template config/.env
+            print_status "Sample .env file created at config/.env"
+            print_warning "Please edit config/.env with your actual configuration values"
+        else
+            print_error "No env.template found. Please create config/.env manually"
+        fi
+    else
+        print_status ".env file found at config/.env"
+    fi
+    # Make demo script executable
+    chmod +x "$PROJECT_ROOT/cli/demo.py"
+    print_status "Made demo.py executable"
+    print_status "Setup complete! You can now run the demo with:"
+    echo "  $0 --interactive"
+}
+# Function to run the demo
+run_demo() {
+    local args=("$@")
+    # Change to project root
+    cd "$PROJECT_ROOT"
+    print_status "Starting DeepDiver CLI Demo..."
+    python cli/demo.py "${args[@]}"
+}
+# Parse command line arguments
+DEMO_ARGS=()
+while [[ $# -gt 0 ]]; do
+    case $1 in
+        -h|--help)
+            show_help
+            exit 0
+            ;;
+        --setup)
+            setup_demo
+            exit 0
+            ;;
+        -c|--config-only)
+            DEMO_ARGS+=("--config-only")
+            shift
+            ;;
+        -e|--create-env)
+            DEMO_ARGS+=("--create-env")
+            shift
+            ;;
+        -q|--query)
+            if [ -z "${2:-}" ]; then
+                print_error "Query argument is required with --query option"
+                show_help
+                exit 1
+            fi
+            DEMO_ARGS+=("--query" "$2")
+            shift 2
+            ;;
+        -d|--debug)
+            DEMO_ARGS+=("--debug")
+            shift
+            ;;
+        --quiet)
+            DEMO_ARGS+=("--quiet")
+            shift
+            ;;
+        -i|--interactive)
+            # Interactive is default, no need to add args
+            shift
+            ;;
+        *)
+            # If it's not a flag, treat it as a query
+            if [[ "$1" != -* ]]; then
+                DEMO_ARGS+=("--query" "$1")
+                shift
+            else
+                print_error "Unknown option: $1"
+                show_help
+                exit 1
+            fi
+            ;;
+    esac
+done
+# Run the demo with collected arguments
+run_demo "${DEMO_ARGS[@]}"

deepdiver_v2/config/config.py ADDED Viewed

	@@ -0,0 +1,239 @@

+# Copyright (c) 2025 Huawei Technologies Co., Ltd. All rights reserved.
+import os
+from typing import Optional, Dict, Any
+from dataclasses import dataclass
+import logging
+from pathlib import Path
+from dotenv import load_dotenv
+load_dotenv()
+logging.basicConfig(level=logging.INFO)
+logger = logging.getLogger(__name__)
+@dataclass
+class APIConfig:
+    """Configuration class for API keys and settings"""
+    # Custom LLM Service Configuration
+    # Your own deployed LLM service accessed via requests
+    model_request_url: Optional[str] = None
+    model_request_token: Optional[str] = None
+    model_name: str = "pangu_auto"  # Default model name
+    # Custom Planner Mode
+    planner_mode: str = "auto"  # Default planner mode
+    # MCP Server Configuration
+    mcp_server_url: Optional[str] = None
+    mcp_auth_token: Optional[str] = None
+    mcp_use_stdio: bool = True  # Default to stdio for backward compatibility
+    # Search Engine Configuration (Generic)
+    search_engine_base_url: Optional[str] = None
+    search_engine_api_keys: Optional[str] = None  # Can be comma-separated for rotation
+    # URL Crawler Configuration (Generic)
+    url_crawler_base_url: Optional[str] = None
+    url_crawler_api_keys: Optional[str] = None  # Can be comma-separated for rotation
+    url_crawler_max_tokens: int = 100000
+    # Model Interaction Configuration
+    model_temperature: float = 0.3
+    model_max_tokens: int = 8192
+    model_request_timeout: int = 180
+    # Tool Trajectory and Output Configuration
+    trajectory_storage_path: str = "./workspace"
+    report_output_path: str = "./report"
+    document_analysis_path: str = "./doc_analysis"
+    # Per-agent iteration controls (optional; resolved by agent factories)
+    planner_max_iterations: Optional[int] = None
+    information_seeker_max_iterations: Optional[int] = None
+    writer_max_iterations: Optional[int] = None
+    # General Settings
+    debug_mode: bool = False
+    max_retries: int = 3
+    timeout: int = 30
+    def __post_init__(self):
+        """Load configuration from environment variables"""
+        self.load_from_env()
+    def load_from_env(self):
+        """Load API keys and settings from environment variables"""
+        # Custom LLM Service
+        self.model_request_url = os.getenv('MODEL_REQUEST_URL')
+        self.model_request_token = os.getenv('MODEL_REQUEST_TOKEN')
+        self.model_name = os.getenv('MODEL_NAME', 'pangu-auto')
+        # Custom Planner Mode
+        self.planner_mode = os.getenv("PLANNER_MODE", self.planner_mode)
+        # MCP Server
+        self.mcp_server_url = os.getenv("MCP_SERVER_URL")
+        self.mcp_auth_token = os.getenv("MCP_AUTH_TOKEN")
+        self.mcp_use_stdio = os.getenv("MCP_USE_STDIO", "true").lower() == "true"
+        # Search Engine Configuration
+        self.search_engine_base_url = os.getenv("SEARCH_ENGINE_BASE_URL")
+        self.search_engine_api_keys = os.getenv("SEARCH_ENGINE_API_KEYS")
+        # URL Crawler Configuration
+        self.url_crawler_base_url = os.getenv("URL_CRAWLER_BASE_URL")
+        self.url_crawler_api_keys = os.getenv("URL_CRAWLER_API_KEYS")
+        self.url_crawler_max_tokens = int(os.getenv("URL_CRAWLER_MAX_TOKENS", self.url_crawler_max_tokens))
+        # Model Interaction Configuration
+        self.model_temperature = float(os.getenv("MODEL_TEMPERATURE", self.model_temperature))
+        self.model_max_tokens = int(os.getenv("MODEL_MAX_TOKENS", self.model_max_tokens))
+        self.model_request_timeout = int(os.getenv("MODEL_REQUEST_TIMEOUT", self.model_request_timeout))
+        # Tool Trajectory and Output Configuration
+        self.trajectory_storage_path = os.getenv("TRAJECTORY_STORAGE_PATH", self.trajectory_storage_path)
+        self.report_output_path = os.getenv("REPORT_OUTPUT_PATH", self.report_output_path)
+        self.document_analysis_path = os.getenv("DOCUMENT_ANALYSIS_PATH", self.document_analysis_path)
+        # Per-agent iteration controls
+        self.planner_max_iterations = (
+            int(os.getenv("PLANNER_MAX_ITERATION")) if os.getenv("PLANNER_MAX_ITERATION") else None
+        )
+        self.information_seeker_max_iterations = (
+            int(os.getenv("INFORMATION_SEEKER_MAX_ITERATION")) if os.getenv("INFORMATION_SEEKER_MAX_ITERATION") else None
+        )
+        self.writer_max_iterations = (
+            int(os.getenv("WRITER_MAX_ITERATION")) if os.getenv("WRITER_MAX_ITERATION") else None
+        )
+        # General Settings
+        self.debug_mode = os.getenv("DEBUG_MODE", "false").lower() == "true"
+        self.max_retries = int(os.getenv("MAX_RETRIES", self.max_retries))
+        self.timeout = int(os.getenv("TIMEOUT", self.timeout))
+    def get_custom_llm_config(self) -> Dict[str, Any]:
+        """Get configuration for custom LLM service"""
+        return {
+            "url": self.model_request_url,
+            "token": self.model_request_token,
+            "model": self.model_name,
+            "temperature": self.model_temperature,
+            "max_tokens": self.model_max_tokens,
+            "timeout": self.model_request_timeout,
+            "base_url": self.model_request_url  # For backward compatibility with model_config.get('base_url')
+        }
+    def get_available_search_providers(self) -> list:
+        """Get list of available search providers based on API keys"""
+        providers = []
+        if self.search_engine_api_keys:
+            providers.append("custom")
+        return providers
+    def to_dict(self) -> Dict[str, Any]:
+        """Convert config to dictionary (excluding sensitive data)"""
+        config_dict = {}
+        for key, value in self.__dict__.items():
+            if "api_key" in key.lower() or "password" in key.lower():
+                config_dict[key] = "***" if value else None
+            else:
+                config_dict[key] = value
+        return config_dict
+# Global configuration instance
+config = APIConfig()
+def get_config() -> APIConfig:
+    """Get the global configuration instance"""
+    return config
+def reload_config():
+    """Reload configuration from environment variables"""
+    global config
+    config = APIConfig()
+    logger.info("Configuration reloaded")
+def validate_api_key(api_key: Optional[str], service_name: str) -> bool:
+    """Validate that an API key is present and not empty"""
+    if not api_key or api_key.strip() == "":
+        logger.error(f"Missing or empty API key for {service_name}")
+        return False
+    return True
+def get_url_crawler_config() -> Dict[str, Any]:
+    """Get generic URL crawler configuration"""
+    api_keys = config.url_crawler_api_keys
+    base_url = config.url_crawler_base_url
+    if not api_keys:
+        return {}
+    # Parse comma-separated API keys for rotation
+    api_key_list = [key.strip() for key in api_keys.split(",")] if isinstance(api_keys, str) else [api_keys]
+    return {
+        "api_keys": api_key_list,
+        "base_url": base_url,
+        "max_tokens": config.url_crawler_max_tokens,
+        "timeout": config.timeout
+    }
+def get_search_engine_config() -> Dict[str, Any]:
+    """Get generic search engine configuration"""
+    api_keys = config.search_engine_api_keys
+    base_url = config.search_engine_base_url
+    if not api_keys:
+        return {}
+    # Parse comma-separated API keys for rotation
+    api_key_list = [key.strip() for key in api_keys.split(",")] if isinstance(api_keys, str) else [api_keys]
+    return {
+        "api_keys": api_key_list,
+        "base_url": base_url,
+        "timeout": config.timeout
+    }
+def get_model_config() -> Dict[str, Any]:
+    """Get model interaction configuration for custom LLM service"""
+    return config.get_custom_llm_config()
+def get_storage_config() -> Dict[str, Any]:
+    """Get storage and trajectory configuration"""
+    return {
+        "trajectory_storage_path": config.trajectory_storage_path,
+        "report_output_path": config.report_output_path,
+        "document_analysis_path": config.document_analysis_path
+    }
+def get_mcp_config() -> Dict[str, Any]:
+    """Get MCP server specific configuration"""
+    return {
+        "server_url": config.mcp_server_url,
+        "auth_token": config.mcp_auth_token,
+        "use_stdio": config.mcp_use_stdio,
+        "timeout": config.timeout
+    }
+# Example usage and testing
+if __name__ == "__main__":
+    print("=== Multi Agent System Configuration ===")
+    print(f"Debug Mode: {config.debug_mode}")
+    print(f"Custom LLM Service URL: {config.model_request_url}")
+    print(f"Available Search Providers: {config.get_available_search_providers()}")
+    print("\nConfiguration Summary:")
+    for key, value in config.to_dict().items():
+        print(f"  {key}: {value}")

deepdiver_v2/env.template ADDED Viewed

	@@ -0,0 +1,44 @@

+# ===================================
+# DeepDiver Configuration Template
+# ===================================
+# Copy this file to .env and fill in your values
+# Custom LLM Service Configuration
+# Your own deployed LLM service endpoint
+MODEL_REQUEST_URL=
+MODEL_REQUEST_TOKEN=
+MODEL_NAME=pangu_auto
+PLANNER_MAX_ITERATION=40
+INFORMATION_SEEKER_MAX_ITERATION=30
+WRITER_MAX_ITERATION=40
+PLANNER_MODE=auto # auto, writing, qa
+# Model Interaction Settings
+MODEL_TEMPERATURE=0.6
+MODEL_MAX_TOKENS=8192
+MODEL_REQUEST_TIMEOUT=180
+# MCP Server Configuration
+MCP_SERVER_URL=http://localhost:6274/mcp
+MCP_AUTH_TOKEN=
+MCP_USE_STDIO=false
+# Search Engine Configuration
+SEARCH_ENGINE_BASE_URL=
+SEARCH_ENGINE_API_KEYS=
+# URL Crawler Configuration
+URL_CRAWLER_BASE_URL=
+URL_CRAWLER_API_KEYS=
+URL_CRAWLER_MAX_TOKENS=100000
+# Tool Trajectory and Output Paths
+TRAJECTORY_STORAGE_PATH=./workspace
+REPORT_OUTPUT_PATH=./report
+DOCUMENT_ANALYSIS_PATH=./doc_analysis
+# General Settings
+DEBUG_MODE=false
+MAX_RETRIES=3
+TIMEOUT=30

deepdiver_v2/requirements.txt ADDED Viewed

	@@ -0,0 +1,8 @@

+beautifulsoup4==4.13.5
+httpx[http2]==0.28.1
+python-dotenv==1.1.1
+python_dateutil==2.9.0.post0
+Requests==2.32.5
+rich==14.1.0
+starlette==0.47.3
+uvicorn==0.35.0

deepdiver_v2/src/__init__.py ADDED Viewed

	@@ -0,0 +1,11 @@

+# Copyright (c) 2025 Huawei Technologies Co., Ltd. All rights reserved.
+"""
+DeepDiver Multi-Agent System
+A comprehensive multi-agent system with MCP integration, local workspace management,
+and advanced knowledge management capabilities.
+"""
+__version__ = "2.0.0"
+__author__ = "DeepDiver Team"
+__description__ = "Multi-Agent System with MCP and Local Workspace Integration"

deepdiver_v2/src/agents/__init__.py ADDED Viewed

	@@ -0,0 +1,62 @@

+# Copyright (c) 2025 Huawei Technologies Co., Ltd. All rights reserved.
+"""
+Multi-Agent System - Agent Module
+This module provides the core agents for the multi-agent system:
+- BaseAgent: Abstract base class with common functionality
+- InformationSeekerAgent: Research and information gathering
+- WriterAgent: Content creation and writing
+- PlannerAgent: Top-level orchestrator
+All agents follow the ReAct pattern and use standardized TaskInput format.
+"""
+from .base_agent import (
+    BaseAgent,
+    AgentConfig,
+    AgentResponse,
+    TaskInput,
+    create_agent_config
+)
+from .subjective_information_seeker import (
+    InformationSeekerAgent,
+    create_subjective_information_seeker
+)
+from .objective_information_seeker import (
+    InformationSeekerAgent,
+    create_objective_information_seeker
+)
+from .writer_agent import (
+    WriterAgent,
+    create_writer_agent
+)
+from .planner_agent import (
+    PlannerAgent,
+    create_planner_agent
+)
+__all__ = [
+    # Base classes
+    "BaseAgent",
+    "AgentConfig",
+    "AgentResponse",
+    "TaskInput",
+    "create_agent_config",
+    # Specific agents
+    "InformationSeekerAgent",
+    "create_subjective_information_seeker",
+    "create_objective_information_seeker",
+    "WriterAgent",
+    "create_writer_agent",
+    "PlannerAgent",
+    "create_planner_agent"
+]
+# Version info
+__version__ = "0.1.0"
+__author__ = "DeepDiver Multi-Agent System"

deepdiver_v2/src/agents/base_agent.py ADDED Viewed

	@@ -0,0 +1,692 @@

+# Copyright (c) 2025 Huawei Technologies Co., Ltd. All rights reserved.
+import logging
+import time
+from abc import ABC, abstractmethod
+from typing import Dict, Any, List, Optional
+from dataclasses import dataclass, field
+# Import MCP client availability flag without binding unused symbols
+try:
+    from ..tools import mcp_client as _mcp_client_module  # noqa: F401
+    MCP_CLIENT_AVAILABLE = True
+except ImportError:
+    MCP_CLIENT_AVAILABLE = False
+@dataclass
+class AgentConfig:
+    """Configuration for agents - session management handled entirely by MCP server"""
+    agent_name: str = "base_agent"
+    planner_mode: str = "auto"
+    model: Optional[str] = None
+    max_iterations: int = 10
+    temperature: Optional[float] = None
+    max_tokens: Optional[int] = None
+    # Paths used by writer and other agents
+    trajectory_storage_path: Optional[str] = None
+    report_output_path: Optional[str] = None
+    document_analysis_path: Optional[str] = None
+@dataclass
+class AgentResponse:
+    """Standardized response format for all agents"""
+    success: bool
+    result: Optional[Dict[str, Any]] = None
+    error: Optional[str] = None
+    iterations: int = 0
+    reasoning_trace: List[Dict[str, Any]] = field(default_factory=list)
+    agent_name: str = ""
+    execution_time: float = 0.0
+@dataclass
+class TaskInput:
+    """Standardized task input format for all agents"""
+    task_content: str                                    # The specific task content
+    task_steps_for_reference: Optional[str] = None       # Reference steps for execution
+    deliverable_contents: Optional[str] = None           # Format of final deliverable
+    current_task_status: Optional[str] = None            # Description of current task status
+    task_executor: str = "info_seeker"                  # Name of task executor (info_seeker, writer)
+    workspace_id: Optional[str] = None                   # Workspace ID for stored files and memory
+    acceptance_checking_criteria: Optional[str] = None   # Criteria for determining task completion and quality
+    def to_dict(self) -> Dict[str, Any]:
+        """Convert TaskInput to dictionary format"""
+        return {
+            "task_content": self.task_content,
+            "task_steps_for_reference": self.task_steps_for_reference,
+            "deliverable_contents": self.deliverable_contents,
+            "current_task_status": self.current_task_status,
+            "task_executor": self.task_executor,
+            "workspace_id": self.workspace_id,
+            "acceptance_checking_criteria": self.acceptance_checking_criteria
+        }
+    @classmethod
+    def from_dict(cls, data: Dict[str, Any]) -> 'TaskInput':
+        """Create TaskInput from dictionary"""
+        return cls(
+            task_content=data.get("task_content", ""),
+            task_steps_for_reference=data.get("task_steps_for_reference"),
+            deliverable_contents=data.get("deliverable_contents"),
+            current_task_status=data.get("current_task_status"),
+            task_executor=data.get("task_executor", "info_seeker"),
+            workspace_id=data.get("workspace_id"),
+            acceptance_checking_criteria=data.get("acceptance_checking_criteria")
+        )
+    def format_for_prompt(self) -> str:
+        """Format the task input for use in prompts"""
+        prompt = f"Task Content:\n{self.task_content}\n\n"
+        if self.task_steps_for_reference:
+            prompt += f"Task Steps for Reference:\n{self.task_steps_for_reference}\n\n"
+        if self.deliverable_contents:
+            prompt += f"Deliverable Contents:\n{self.deliverable_contents}\n\n"
+        if self.current_task_status:
+            prompt += f"Current Task Status:\n{self.current_task_status}\n\n"
+        if self.acceptance_checking_criteria:
+            prompt += f"Acceptance Checking Criteria:\n{self.acceptance_checking_criteria}\n\n"
+        prompt += f"Task Executor: {self.task_executor}\n"
+        if self.workspace_id:
+            prompt += f"Workspace ID: {self.workspace_id}\n"
+        return prompt
+class SectionWriterTaskInput(TaskInput):
+    """
+    Specialized TaskInput for section writing tasks
+    Only stores the essential parameters. The section_writer agent
+    will handle prompt assembly internally.
+    """
+    def __init__(
+            self,
+            task_content: str,
+            user_query: str,
+            write_file_path: str,
+            overall_outline: str,
+            current_chapter_outline: str,
+            key_files: List[Dict[str, Any]],
+            written_chapters: str = "",
+            workspace_id: Optional[str] = None
+    ):
+        # Store the section writer specific parameters
+        self.write_file_path = write_file_path
+        self.user_query = user_query
+        self.current_chapter_outline = current_chapter_outline
+        self.key_files = key_files
+        self.written_chapters = written_chapters
+        self.overall_outline = overall_outline
+        # Initialize parent TaskInput with minimal required fields
+        super().__init__(
+            task_content=task_content,
+            task_executor="section_writer",
+            workspace_id=workspace_id,
+        )
+class WriterAgentTaskInput(TaskInput):
+    """
+    Specialized TaskInput for section writing tasks
+    Only stores the 4 essential parameters. The section_writer agent
+    will handle prompt assembly internally.
+    """
+    def __init__(
+            self,
+            task_content: str,
+            user_query: str,
+            key_files: List[Dict[str, Any]],
+            workspace_id: Optional[str] = None
+    ):
+        # Store the section writer specific parameters
+        self.user_query = user_query
+        self.key_files = key_files
+        # Initialize parent TaskInput with minimal required fields
+        super().__init__(
+            task_content=task_content,
+            task_executor="writer_agent",
+            workspace_id=workspace_id,
+        )
+class BaseAgent(ABC):
+    """
+    Base class for all agents with MCP server-managed sessions.
+    Session management is now entirely handled by the MCP server:
+    - Server assigns session IDs on connection
+    - Server creates workspace folders with UUID names
+    - All tool operations are performed in server-managed workspaces
+    """
+    def __init__(self, config: AgentConfig, shared_mcp_client=None):
+        self.execution_stats = None
+        self.reasoning_trace = None
+        self.config = config
+        self.logger = logging.getLogger(f"{__name__}.{config.agent_name}")
+        # Session info is populated by the MCP server
+        self.session_info = None
+        # Tool management
+        self.mcp_tools = None
+        self.available_tools = {}
+        self.reset_trace()
+        # Initialize MCP tools (server will handle session creation or use shared client)
+        self._initialize(shared_mcp_client)
+    def _initialize(self, shared_mcp_client=None):
+        """Initialize agent with MCP server connection or shared client"""
+        try:
+            self.logger.info(f"Initializing agent {self.config.agent_name}")
+            if shared_mcp_client:
+                # Use shared MCP client with agent-specific tool filtering
+                agent_type = self._get_agent_type()
+                self.mcp_tools = self._create_filtered_mcp_tools(shared_mcp_client, agent_type)
+                self.logger.info(f"Agent {self.config.agent_name} using shared MCP client with {agent_type} tools")
+            else:
+                # Create MCP tools with agent-specific filtering (no more unfiltered access)
+                self.mcp_tools = self._create_filtered_mcp_tools_standalone()
+            # Discover available tools
+            self.available_tools = self._discover_mcp_tools()
+            # Build tool schemas for function calling
+            self.tool_schemas = self._build_tool_schemas()
+            self.logger.info(f"Agent {self.config.agent_name} initialized successfully")
+            self.logger.info(f"Available tools: {list(self.available_tools.keys())}")
+        except Exception as e:
+            self.logger.error(f"Failed to initialize agent {self.config.agent_name}: {e}")
+            raise
+    def _discover_mcp_tools(self) -> Dict[str, Any]:
+        """Discover available tools from MCP server or fallback tools"""
+        available_tools = {}
+        # Try to get tools from MCP client first
+        if hasattr(self.mcp_tools, 'get_available_tools'):
+            try:
+                mcp_tools_dict = self.mcp_tools.get_available_tools()
+                for tool_name, tool_info in mcp_tools_dict.items():
+                    # For proper MCP architecture, store tool info for direct client calls
+                    # instead of creating wrapper lambda functions
+                    available_tools[tool_name] = tool_info
+                if available_tools:
+                    self.logger.info(f"Discovered {len(available_tools)} tools from MCP server")
+                    return available_tools
+            except Exception as e:
+                self.logger.warning(f"Failed to discover MCP tools: {e}")
+        # Fallback: if MCP client not available, use direct method access
+        # This should rarely be needed with proper MCP setup
+        if hasattr(self.mcp_tools, '__dict__'):
+            for attr_name in dir(self.mcp_tools):
+                if not attr_name.startswith('_') and callable(getattr(self.mcp_tools, attr_name)):
+                    available_tools[attr_name] = getattr(self.mcp_tools, attr_name)
+        return available_tools
+    def _get_agent_type(self) -> str:
+        """Get agent type for tool filtering"""
+        agent_name = self.config.agent_name.lower()
+        if "planner" in agent_name:
+            return "planner"
+        elif "information" in agent_name or "seeker" in agent_name:
+            return "information_seeker"
+        elif "writer" in agent_name:
+            return "writer"
+        else:
+            # Default to planner tools for unknown agent types
+            return "planner"
+    def _create_filtered_mcp_tools(self, shared_client, agent_type: str):
+        """Create filtered MCP tools adapter using shared client"""
+        try:
+            from src.tools.mcp_client import create_filtered_mcp_tools_adapter
+            return create_filtered_mcp_tools_adapter(shared_client, agent_type)
+        except ImportError:
+            # Fallback if FilteredMCPToolsAdapter not available
+            self.logger.warning("FilteredMCPToolsAdapter not available, using regular adapter")
+            from src.tools.mcp_client import MCPToolsAdapter
+            adapter = MCPToolsAdapter.__new__(MCPToolsAdapter)
+            adapter.client = shared_client
+            return adapter
+    def _create_filtered_mcp_tools_standalone(self):
+        """Create filtered MCP tools adapter with its own client connection"""
+        try:
+            # Get agent type for filtering
+            agent_type = self._get_agent_type()
+            # Create a new MCP client
+            client = self._create_new_mcp_client()
+            # Apply filtering based on agent type
+            from src.tools.mcp_client import create_filtered_mcp_tools_adapter
+            filtered_adapter = create_filtered_mcp_tools_adapter(client, agent_type)
+            self.logger.info(f"Agent {self.config.agent_name} created filtered MCP adapter with {agent_type} tools")
+            return filtered_adapter
+        except Exception as e:
+            self.logger.error(f"Failed to create filtered MCP tools: {e}")
+            raise RuntimeError(f"Failed to create filtered MCP client for {self.config.agent_name}: {e}")
+    def _create_new_mcp_client(self):
+        """Create a new MCP client connection"""
+        try:
+            # Get MCP configuration
+            from config.config import get_mcp_config
+            mcp_config = get_mcp_config()
+            # Create MCP client
+            from src.tools.mcp_client import MCPClient
+            if mcp_config.get("server_url") and not mcp_config.get("use_stdio", True):
+                # HTTP-based MCP server
+                client = MCPClient(server_url=mcp_config["server_url"])
+                self.logger.info(
+                    f"Agent {self.config.agent_name} connected to HTTP MCP server: {mcp_config['server_url']}")
+            else:
+                # Default to the expected HTTP MCP server on port 6274
+                client = MCPClient(server_url="http://localhost:6274/mcp")
+                self.logger.info(
+                    f"Agent {self.config.agent_name} connected to default HTTP MCP server: http://localhost:6274/mcp")
+            return client
+        except Exception as e:
+            self.logger.error(f"Failed to create MCP client: {e}")
+            raise RuntimeError(f"MCP client creation failed for {self.config.agent_name}: {e}")
+    # NOTE: _create_mcp_tools() method removed to prevent unfiltered tool access.
+    # All agents now use _create_filtered_mcp_tools_standalone() or _create_filtered_mcp_tools()
+    # to ensure proper tool isolation and security.
+    def get_session_info(self) -> Optional[Dict[str, Any]]:
+        """Get information about the current server-managed session"""
+        try:
+            # First try the adapter's get_session_info method if available
+            if hasattr(self.mcp_tools, 'get_session_info'):
+                session_info = self.mcp_tools.get_session_info()
+                if session_info:
+                    # Add agent-specific information
+                    session_info.update({
+                        "server_managed": True,
+                        "agent_name": self.config.agent_name
+                    })
+                    return session_info
+            # Fallback: Check if we have an MCP tools adapter with a client
+            if hasattr(self.mcp_tools, 'client'):
+                client = self.mcp_tools.client
+                # Check if client has session ID and connection status
+                if hasattr(client, '_session_id') and hasattr(client, 'is_connected'):
+                    return {
+                        "session_id": client._session_id,
+                        "server_managed": True,
+                        "agent_name": self.config.agent_name,
+                        "connected": client.is_connected()
+                    }
+            # Fallback: check if mcp_tools has session info directly
+            if hasattr(self.mcp_tools, '_session_id'):
+                return {
+                    "session_id": self.mcp_tools._session_id,
+                    "server_managed": True,
+                    "agent_name": self.config.agent_name,
+                    "connected": getattr(self.mcp_tools, 'is_connected', lambda: True)()
+                }
+            # If no session info available, return basic info
+            return {
+                "session_id": None,
+                "server_managed": True,
+                "agent_name": self.config.agent_name,
+                "connected": hasattr(self.mcp_tools, 'client') and getattr(self.mcp_tools.client, 'is_connected',
+                                                                           lambda: False)()
+            }
+        except Exception as e:
+            self.logger.warning(f"Failed to get session info: {e}")
+            return {
+                "session_id": None,
+                "server_managed": True,
+                "agent_name": self.config.agent_name,
+                "connected": False,
+                "error": str(e)
+            }
+    def _build_tool_schemas(self) -> List[Dict[str, Any]]:
+        """Build tool schemas for function calling"""
+        schemas = []
+        # Get agent-specific tool schemas
+        agent_schemas = self._build_agent_specific_tool_schemas()
+        schemas.extend(agent_schemas)
+        return schemas
+    def _build_agent_specific_tool_schemas(self) -> List[Dict[str, Any]]:
+        """
+        Build agent-specific tool schemas using proper MCP architecture.
+        Schemas come from MCP server via client, not direct imports.
+        """
+        schemas = []
+        # Proper MCP way: Get schemas from MCP client (which got them from server)
+        try:
+            if hasattr(self.mcp_tools, 'get_tool_schemas'):
+                # Use the MCP client to get schemas (proper MCP architecture)
+                schemas = self.mcp_tools.get_tool_schemas()
+                self.logger.info(f"Retrieved {len(schemas)} tool schemas from MCP server")
+            else:
+                # Fallback for adapters that don't have the new method yet
+                self.logger.warning("MCP adapter doesn't support get_tool_schemas, using fallback")
+                schemas = self._build_fallback_schemas()
+        except Exception as e:
+            self.logger.warning(f"Failed to get schemas from MCP client: {e}, using fallback")
+            schemas = self._build_fallback_schemas()
+        return schemas
+    def _build_fallback_schemas(self) -> List[Dict[str, Any]]:
+        """Fallback schema building if MCP client method fails"""
+        schemas = []
+        # Try to get tool info from MCP client
+        if hasattr(self.mcp_tools, 'get_available_tools'):
+            try:
+                available_tools = self.mcp_tools.get_available_tools()
+                for tool_name, tool_info in available_tools.items():
+                    schema = {
+                        "type": "function",
+                        "function": {
+                            "name": tool_name,
+                            "description": getattr(tool_info, 'description', f"Tool: {tool_name}"),
+                            "parameters": getattr(tool_info, 'input_schema', {"type": "object", "properties": {}, "required": []})
+                        }
+                    }
+                    schemas.append(schema)
+                self.logger.info(f"Built {len(schemas)} schemas using fallback method")
+            except Exception as e:
+                self.logger.warning(f"Fallback schema building failed: {e}")
+        return schemas
+    def execute_tool_call(self, tool_call) -> Dict[str, Any]:
+        """Execute a tool call and return results using proper MCP architecture"""
+        tool_name = tool_call["name"]
+        try:
+            # Parse arguments
+            arguments = tool_call["arguments"]
+            # Check if tool is available
+            if tool_name not in self.available_tools:
+                return {
+                    "success": False,
+                    "error": f"Tool '{tool_name}' not available for this agent"
+                }
+            # Route tool execution based on tool type
+            # Built-in tools (like assign_task_to_*) are callable methods, not MCP server tools
+            if callable(self.available_tools[tool_name]):
+                # Built-in tool: execute locally
+                tool_function = self.available_tools[tool_name]
+                result = tool_function(**arguments)
+                # Convert result to standard format
+                if hasattr(result, 'to_dict'):
+                    return result.to_dict()
+                elif isinstance(result, dict):
+                    return result
+                else:
+                    return {
+                        "success": True,
+                        "data": result,
+                        "error": None,
+                        "metadata": {}
+                    }
+            elif hasattr(self.mcp_tools, 'client') and hasattr(self.mcp_tools.client, 'call_tool'):
+                # MCP server tool: execute via client
+                result = self.mcp_tools.client.call_tool(tool_name, arguments)
+                # Convert MCPClientResult to standard format
+                if hasattr(result, 'success'):
+                    return {
+                        "success": result.success,
+                        "data": result.data,
+                        "error": result.error,
+                        "metadata": getattr(result, 'metadata', {})
+                    }
+                else:
+                    return result
+            else:
+                return {
+                    "success": False,
+                    "error": f"Tool '{tool_name}' is not executable (neither built-in nor MCP)"
+                }
+        except Exception as e:
+            self.logger.error(f"Error executing tool {tool_name}: {e}")
+            return {
+                "success": False,
+                "error": f"Tool execution failed: {str(e)}"
+            }
+    def log_reasoning(self, iteration: int, reasoning: str):
+        """Log reasoning step in the trace"""
+        self.reasoning_trace.append({
+            "type": "reasoning",
+            "iteration": iteration,
+            "content": reasoning,
+            "timestamp": time.time()
+        })
+        self.execution_stats["reasoning_steps"] += 1
+        self.execution_stats["total_steps"] += 1
+        self.logger.info(f"Reasoning (Iter {iteration}): {reasoning[:100]}...")
+    def log_action(self, iteration: int, tool: str, arguments: Dict[str, Any], result: Dict[str, Any]):
+        """Log action step in the trace"""
+        self.reasoning_trace.append({
+            "type": "action",
+            "iteration": iteration,
+            "tool": tool,
+            "arguments": arguments,
+            "result": result,
+            "timestamp": time.time()
+        })
+        self.execution_stats["action_steps"] += 1
+        self.execution_stats["total_steps"] += 1
+        # Log success/failure
+        success = result.get("success", True)
+        status = "Success" if success else "Failed"
+        self.logger.info(f"Action (Iter {iteration}): {tool} -> {status} -> {str(arguments)[:400]}...")
+    def log_error(self, iteration: int, error: str):
+        """Log error in the trace"""
+        self.reasoning_trace.append({
+            "type": "error",
+            "iteration": iteration,
+            "error": error,
+            "timestamp": time.time()
+        })
+        self.execution_stats["error_steps"] += 1
+        self.execution_stats["total_steps"] += 1
+        self.logger.error(f"Error (Iter {iteration}): {error}")
+    def reset_trace(self):
+        """Reset the reasoning trace for a new task"""
+        self.reasoning_trace = []
+        self.execution_stats = {
+            "total_steps": 0,
+            "reasoning_steps": 0,
+            "action_steps": 0,
+            "error_steps": 0,
+            "tool_usage": {},
+            "success_rate": 1.0
+        }
+    def get_execution_stats(self) -> Dict[str, Any]:
+        """Get execution statistics"""
+        # Calculate success rate
+        if self.execution_stats["action_steps"] > 0:
+            failed_actions = sum(1 for step in self.reasoning_trace
+                               if step.get("type") == "action"
+                               and not step.get("result", {}).get("success", True))
+            self.execution_stats["success_rate"] = (
+                (self.execution_stats["action_steps"] - failed_actions) /
+                self.execution_stats["action_steps"]
+            )
+        return self.execution_stats.copy()
+    def create_response(self, success: bool, result: Dict[str, Any] = None,
+                       error: str = None, iterations: int = 0,
+                       execution_time: float = 0.0) -> AgentResponse:
+        """Create a standardized agent response"""
+        return AgentResponse(
+            success=success,
+            result=result,
+            error=error,
+            iterations=iterations,
+            reasoning_trace=self.reasoning_trace.copy(),
+            agent_name=self.config.agent_name,
+            execution_time=execution_time
+        )
+    def validate_config(self) -> bool:
+        """Validate agent configuration"""
+        try:
+            # Check required fields
+            if not self.config.agent_name:
+                return False
+            if not self.config.model:
+                return False
+            if self.config.max_iterations <= 0:
+                return False
+            if not (0.0 <= self.config.temperature <= 2.0):
+                return False
+            if self.config.max_tokens <= 0:
+                return False
+            return True
+        except Exception:
+            return False
+    @abstractmethod
+    def execute_task(self, task_input: TaskInput) -> AgentResponse:
+        """
+        Execute a task using the standardized TaskInput format
+        Args:
+            task_input: TaskInput object with standardized task information
+        Returns:
+            AgentResponse with results and process trace
+        """
+        pass
+    @abstractmethod
+    def _build_system_prompt(self) -> str:
+        """Build the system prompt for this agent"""
+        pass
+# Simple factory function for creating agent configurations
+def create_agent_config(
+    agent_name: str,
+    model: Optional[str] = None,
+    max_iterations: Optional[int] = None,
+    temperature: Optional[float] = None,
+    max_tokens: Optional[int] = None
+) -> AgentConfig:
+    """
+    Create an AgentConfig instance for server-managed sessions.
+    Args:
+        agent_name: Name of the agent
+        model: LLM model to use
+        max_iterations: Maximum number of iterations
+        temperature: LLM temperature setting
+        max_tokens: Maximum tokens for LLM response
+    Returns:
+        Configured AgentConfig instance
+    """
+    # Load env-backed defaults
+    try:
+        from config.config import get_config
+        api_cfg = get_config()
+    except Exception as e:
+        raise ValueError(f"Failed to load global configuration: {e}")
+    planner_mode = getattr(api_cfg, "planner_mode", "auto")
+    resolved_model = model if model is not None else getattr(api_cfg, "model_name", None)
+    if not resolved_model:
+        raise ValueError("Model is not specified and MODEL_NAME is not set in environment")
+    resolved_temperature = temperature if temperature is not None else getattr(api_cfg, "model_temperature", None)
+    if resolved_temperature is None:
+        raise ValueError("Temperature is not specified and MODEL_TEMPERATURE is not set in environment")
+    resolved_max_tokens = max_tokens if max_tokens is not None else getattr(api_cfg, "model_max_tokens", None)
+    if resolved_max_tokens is None:
+        raise ValueError("Max tokens is not specified and MODEL_MAX_TOKENS is not set in environment")
+    # Optional paths used by writer and others
+    trajectory_storage_path = getattr(api_cfg, "trajectory_storage_path", None)
+    report_output_path = getattr(api_cfg, "report_output_path", None)
+    document_analysis_path = getattr(api_cfg, "document_analysis_path", None)
+    # Resolve max_iterations per agent type
+    if max_iterations is None:
+        agent_lower = (agent_name or "").lower()
+        resolved_max_iterations = None
+        if "planner" in agent_lower:
+            resolved_max_iterations = getattr(api_cfg, "planner_max_iterations", None)
+        elif "writer" in agent_lower:
+            resolved_max_iterations = getattr(api_cfg, "writer_max_iterations", None)
+        elif "information" in agent_lower or "seeker" in agent_lower:
+            resolved_max_iterations = getattr(api_cfg, "information_seeker_max_iterations", None)
+        # if not found in env, raise
+        if resolved_max_iterations is None:
+            raise ValueError("Max iterations not specified and no env override (PLANNER_MAX_ITERATION/WRITER_MAX_ITERATION/INFORMATION_SEEKER_MAX_ITERATION)")
+        max_iterations = resolved_max_iterations
+    return AgentConfig(
+        agent_name=agent_name,
+        planner_mode=planner_mode,
+        model=resolved_model,
+        max_iterations=int(max_iterations),
+        temperature=resolved_temperature,
+        max_tokens=resolved_max_tokens,
+        trajectory_storage_path=trajectory_storage_path,
+        report_output_path=report_output_path,
+        document_analysis_path=document_analysis_path
+    )

deepdiver_v2/src/agents/objective_information_seeker.py ADDED Viewed

	@@ -0,0 +1,428 @@

+# Copyright (c) 2025 Huawei Technologies Co., Ltd. All rights reserved.
+import json
+from typing import Dict, Any, List
+import time
+import requests
+import os
+from .base_agent import BaseAgent, AgentConfig, AgentResponse, TaskInput
+class InformationSeekerAgent(BaseAgent):
+    """
+    Information Seeker Agent that follows ReAct pattern (Reasoning + Acting)
+    This agent takes decomposed sub-questions or tasks from parent agents,
+    thinks interleaved (reasoning -> action -> reasoning -> action),
+    uses MCP tools to gather information, and returns structured results.
+    """
+    def __init__(self, config: AgentConfig = None, shared_mcp_client=None):
+        # Set default agent name if not specified
+        if config is None:
+            config = AgentConfig(agent_name="InformationSeekerAgent")
+        elif config.agent_name == "base_agent":
+            config.agent_name = "InformationSeekerAgent"
+        super().__init__(config, shared_mcp_client)
+    def _build_system_prompt(self) -> str:
+        """Build the system prompt for the ReAct agent"""
+        tool_schemas_str = json.dumps(self.tool_schemas, ensure_ascii=False)
+        system_prompt_template = """You are an Information Seeker Agent that follows the ReAct pattern (Reasoning + Acting).
+        Your role is to:
+        1. Take decomposed sub-questions or tasks from parent agents
+        2. Think step-by-step through reasoning
+        3. Use available tools to gather information when needed
+        4. Continue reasoning based on tool results
+        5. Repeat this process until you have sufficient information
+        6. Call info_seeker_objective_task_done to provide a structured summary
+        ### Optimized Workflow:
+        Follow this optimized workflow for information gathering:
+        1. INITIAL RESEARCH:
+           - Use `batch_web_search` to find relevant URLs for your queries. When calling the search statement, consider the language of the user's question. For example, for a Chinese question, generate a part of the search statement in Chinese.
+           - Analyze the search results (titles, snippets, URLs) to identify promising sources
+        2. CONTENT EXTRACTION:
+           - For important URLs, use `url_crawler` to:
+                a) Extract full content from the webpage
+                b) Save the content to a file in the workspace
+           - Store results with meaningful file paths (e.g., \"research/ai_trends_2024.txt\")
+        3. CONTENT ANALYSIS:
+           - Use `document_qa` to ask specific questions about the saved files:
+                a) Formulate focused questions to extract key insights
+                b) Use answers to deepen your understanding
+           - You can ask multiple questions about the same file
+        4. FILE MANAGEMENT:
+           - Use `file_write` to save important findings or summaries
+           - For reviewing saved content:
+                a) Prefer `document_qa` to ask specific questions about the content
+                b) Use `file_read` ONLY for small files (<1000 tokens) when you need the entire content
+                c) Avoid reading large files directly as it may exceed context limits
+        5. TASK COMPLETION:
+           - When ready to report, call `info_seeker_objective_task_done` with:
+                a) Comprehensive markdown summary of your process and findings
+                b) List of key files created with descriptions
+        ### Usage of Systematic Tool:
+            - `think` is a systematic tool. After receiving the response from the complex tool or before invoking any other tools, you must **first invoke the `think` tool**: to deeply reflect on the results of previous tool invocations (if any), and to thoroughly consider and plan the user's task. The `think` tool does not acquire new information; it only saves your thoughts into memory.
+            - `reflect` is a systematic tool. When encountering a failure in tool execution, it is necessary to invoke the reflect tool to conduct a review and revise the task plan. It does not acquire new information; it only saves your thoughts into memory.
+        Always provide clear reasoning for your actions and synthesize information effectively.
+Below, within the <tools></tools> tags, are the descriptions of each tool and the required fields for invocation:
+<tools>
+$tool_schemas
+</tools>
+For each function call, return a JSON object placed within the [unused11][unused12] tags, which includes the function name and the corresponding function arguments:
+[unused11][{\"name\": <function name>, \"arguments\": <args json object>}][unused12]
+"""
+        return system_prompt_template.replace("$tool_schemas", tool_schemas_str)
+    @staticmethod
+    def _build_initial_message_from_task_input(task_input: TaskInput) -> str:
+        """Build the initial user message from TaskInput"""
+        message = task_input.format_for_prompt()
+        message += "\nPlease analyze this task and start your ReAct process:\n"
+        message += "1. Reason about what information you need to gather\n"
+        message += "2. Use appropriate tools to get that information\n"
+        message += "3. Continue reasoning and acting until you have sufficient information\n"
+        message += "4. Call task_done when ready to provide your complete findings\n\n"
+        message += "Begin with your initial reasoning about the task."
+        return message
+    def execute_task(self, task_input: TaskInput) -> AgentResponse:
+        """
+        Execute a task using ReAct pattern (Reasoning + Acting)
+        Args:
+            task_input: TaskInput object with standardized task information
+        Returns:
+            AgentResponse with results and process trace
+        """
+        start_time = time.time()
+        try:
+            self.logger.info(f"Starting information seeker task: {task_input.task_content}")
+            # Reset trace for new task
+            self.reset_trace()
+            # Initialize conversation history
+            conversation_history = []
+            # Build initial system prompt for ReAct
+            system_prompt = self._build_system_prompt()
+            # Build initial user message from TaskInput
+            user_message = self._build_initial_message_from_task_input(task_input)
+            # Add to conversation
+            conversation_history.append({"role": "system", "content": system_prompt})
+            conversation_history.append({"role": "user", "content": user_message+" /no_think"})
+            iteration = 0
+            task_completed = False
+            # Get model endpoint configuration from env-backed config
+            from config.config import get_config
+            config = get_config()
+            model_config = config.get_custom_llm_config()
+            pangu_url = model_config.get('url') or os.getenv('MODEL_REQUEST_URL', '')
+            model_token = model_config.get('token') or os.getenv('MODEL_REQUEST_TOKEN', '')
+            headers = {'Content-Type': 'application/json', 'csb-token': model_token}
+            # ReAct Loop: Reasoning -> Acting -> Reasoning -> Acting...
+            while iteration < self.config.max_iterations and not task_completed:
+                iteration += 1
+                self.logger.info(f"Planning iteration {iteration}")
+                try:
+                    # Get LLM response (reasoning + potential tool calls)
+                    retry_num = 1
+                    max_retry_num = 10
+                    while retry_num < max_retry_num:
+                        try:
+                            response = requests.post(
+                                url=pangu_url,
+                                headers=headers,
+                                json={
+                                    "model": self.config.model,
+                                    "chat_template": "{% for message in messages %}{% if loop.first and messages[0]['role'] != 'system' %}{{ '<s>[unused9]系统：[unused10]' }}{% endif %}{% if message['role'] == 'system' %}{{'<s>[unused9]系统：' + message['content'] + '[unused10]'}}{% endif %}{% if message['role'] == 'assistant' %}{{'[unused9]助手：' + message['content'] + '[unused10]'}}{% endif %}{% if message['role'] == 'tool' %}{{'[unused9]工具：' + message['content'] + '[unused10]'}}{% endif %}{% if message['role'] == 'function' %}{{'[unused9]方法：' + message['content'] + '[unused10]'}}{% endif %}{% if message['role'] == 'user' %}{{'[unused9]用户：' + message['content'] + '[unused10]'}}{% endif %}{% endfor %}{% if add_generation_prompt %}{{ '[unused9]助手：' }}{% endif %}",
+                                    "messages": conversation_history,
+                                    "temperature": self.config.temperature,
+                                    "spaces_between_special_tokens": False,
+                                    "max_tokens": self.config.max_tokens,
+                                },
+                                timeout=model_config.get("timeout", 180)
+                            )
+                            response = response.json()
+                            self.logger.debug(f"API response received")
+                            break
+                        except Exception as e:
+                            time.sleep(3)
+                            retry_num += 1
+                            if retry_num == max_retry_num:
+                                raise ValueError(str(e))
+                            continue
+                    assistant_message = response["choices"][0]["message"]
+                    # Log the reasoning
+                    try:
+                        if assistant_message["content"]:
+                            reasoning_content = assistant_message["content"].split("[unused16]")[-1].split("[unused17]")[0]
+                            if len(reasoning_content) > 0:
+                                self.log_reasoning(iteration, reasoning_content)
+                    except Exception as e:
+                        self.logger.warning(f"Tool call parsing error: {e}")
+                        # Parse error, rerun
+                        followup_prompt = f"There is a problem with the format of model generation: {e}. Please try again."
+                        conversation_history.append({"role": "user", "content": followup_prompt + " /no_think"})
+                        continue
+                    def extract_tool_calls(content):
+                        import re
+                        if not content:
+                            return []
+                        tool_call_str = re.findall(r"\[unused11\]([\s\S]*?)\[unused12\]", content)
+                        if len(tool_call_str) > 0:
+                            try:
+                                tool_calls = json.loads(tool_call_str[0].strip())
+                            except:
+                                return []
+                        else:
+                            return []
+                        return tool_calls
+                    # Add assistant message to conversation
+                    conversation_history.append({
+                        "role": "assistant",
+                        "content": assistant_message["content"]
+                    })
+                    tool_calls = extract_tool_calls(assistant_message["content"])
+                    # Execute tool calls if any (Acting phase)
+                    for tool_call in tool_calls:
+                        arguments = tool_call["arguments"]
+                        # Check if planning is complete
+                        if tool_call["name"] in ["info_seeker_objective_task_done"]:
+                            task_completed = True
+                            self.log_action(iteration, tool_call["name"], arguments, arguments)
+                            break
+                        if tool_call["name"] in ["think", "reflect"]:
+                            tool_result = {"tool_results": "You can proceed to invoke other tools if needed."}
+                        else:
+                            tool_result = self.execute_tool_call(tool_call)
+                        # Log the action using base class method
+                        self.log_action(iteration, tool_call["name"], arguments, tool_result)
+                        # Add tool result to conversation
+                        conversation_history.append({
+                            "role": "tool",
+                            "content": json.dumps(tool_result, ensure_ascii=False, indent=2) + " /no_think"
+                        })
+                    # If no tool calls, encourage continued planning
+                    if len(tool_calls) == 0:
+                        # Add follow-up prompt to encourage action or completion
+                        followup_prompt = (
+                            "Continue your planning process. Use available tools to assign tasks to agents, "
+                            "search for information, or coordinate work. When you have a complete answer, "
+                            "call info_seeker_objective_task_done. /no_think"
+                        )
+                        conversation_history.append({"role": "user", "content": followup_prompt})
+                    if iteration == self.config.max_iterations-3:
+                        followup_prompt = "Due to length and number of rounds restrictions, you must now call the `info_seeker_objective_task_done` tool to report the completion of your task. /no_think"
+                        conversation_history.append({"role": "user", "content": followup_prompt})
+                except Exception as e:
+                    error_msg = f"Error in planning iteration {iteration}: {e}"
+                    self.log_error(iteration, error_msg)
+                    break
+            execution_time = time.time() - start_time
+            # Extract final result
+            if task_completed:
+                # Find the info_seeker_objective_task_done result in the trace
+                task_done_result = None
+                for step in reversed(self.reasoning_trace):
+                    if step.get("type") == "action" and step.get("tool") == "info_seeker_objective_task_done":
+                        task_done_result = step.get("result")
+                        break
+                return self.create_response(
+                    success=True,
+                    result=task_done_result,
+                    iterations=iteration,
+                    execution_time=execution_time
+                )
+            else:
+                return self.create_response(
+                    success=False,
+                    error=f"Task not completed within {self.config.max_iterations} iterations",
+                    iterations=iteration,
+                    execution_time=execution_time
+                )
+        except Exception as e:
+            execution_time = time.time() - start_time
+            self.logger.error(f"Error in execute_task: {e}")
+            return self.create_response(
+                success=False,
+                error=str(e),
+                iterations=iteration if 'iteration' in locals() else 0,
+                execution_time=execution_time
+            )
+    def _build_agent_specific_tool_schemas(self) -> List[Dict[str, Any]]:
+        """
+        Build tool schemas for InformationSeekerAgent using proper MCP architecture.
+        Schemas come from MCP server via client, not direct imports.
+        """
+        # Get MCP tool schemas from server via client (proper MCP architecture)
+        schemas = super()._build_agent_specific_tool_schemas()
+        # Add schemas for built-in task assignment tools
+        builtin_assignment_schemas = [
+            {
+                "type": "function",
+                "function": {
+                    "name": "think",
+                    "description": "Use the tool to think about something. It will not obtain new information or make any changes to the repository, but just log the thought. Use it when complex reasoning or brainstorming is needed.",
+                    "parameters": {
+                        "type": "object",
+                        "properties": {
+                            "thought": {
+                                "type": "string",
+                                "description": "Your thoughts."
+                            }
+                        },
+                        "required": ["thought"]
+                    }
+                }
+            },
+            {
+                "type": "function",
+                "function": {
+                    "name": "reflect",
+                    "description": "When multiple attempts yield no progress, use this tool to reflect on previous reasoning and planning, considering possible overlooked clues and exploring more possibilities. It will not obtain new information or make any changes to the repository.",
+                    "parameters": {
+                        "type": "object",
+                        "properties": {
+                            "reflect": {
+                                "type": "string",
+                                "description": "The specific content of your reflection"
+                            }
+                        },
+                        "required": ["reflect"]
+                    }
+                }
+            },
+            {
+                "type": "function",
+                "function": {
+                    "name": "info_seeker_objective_task_done",
+                    "description": "Structured reporting of task completion details including summary, decisions, outputs, and status",
+                    "inputSchema": {
+                        "type": "object",
+                        "properties": {
+                            "task_summary": {
+                                "type": "string",
+                                "description": "Comprehensive markdown covering what the agent was asked to do, steps taken, tools used, key findings, files created, challenges, and final deliverables.",
+                                "format": "markdown"
+                            },
+                            "task_name": {
+                                "type": "string",
+                                "description": "The name of the task currently assigned to the agent, usually with underscores (e.g., 'web_research_ai_trends')"
+                            },
+                            "key_files": {
+                                "type": "array",
+                                "items": {
+                                    "type": "object",
+                                    "properties": {
+                                        "file_path": {
+                                            "type": "string",
+                                            "description": "Relative path to created/modified file"
+                                        },
+                                        "desc": {
+                                            "type": "string",
+                                            "description": "File contents and creation purpose"
+                                        },
+                                        "is_final_output_file": {
+                                            "type": "boolean",
+                                            "description": "Whether file is primary deliverable"
+                                        }
+                                    },
+                                    "required": ["file_path", "desc", "is_final_output_file"]
+                                },
+                                "description": "List of key files generated or modified during the task, with their details."
+                            },
+                            "completion_status": {
+                                "type": "string",
+                                "enum": ["completed", "partial", "failed"],
+                                "description": "Final task status"
+                            }
+                        },
+                        "required": ["task_summary", "task_name", "key_files", "completion_status"]
+                    }
+                }
+            },
+        ]
+        schemas.extend(builtin_assignment_schemas)
+        return schemas
+# Factory function for creating the agent
+def create_objective_information_seeker(
+    model: Any = None,
+    max_iterations: Any = None,
+    shared_mcp_client=None,
+    **kwargs
+) -> InformationSeekerAgent:
+    """
+    Create an InformationSeekerAgent instance with server-managed sessions.
+    Args:
+        model: The LLM model to use
+        max_iterations: Maximum number of iterations
+        shared_mcp_client: Optional shared MCP client from parent agent (prevents extra sessions)
+        **kwargs: Additional configuration options
+    Returns:
+        Configured InformationSeekerAgent instance with appropriate tools
+    """
+    # Import the enhanced config function
+    from ..agents.base_agent import create_agent_config
+    # Create agent configuration (session managed by MCP server)
+    config = create_agent_config(
+        agent_name="InformationSeekerAgent",
+        model=model,
+        max_iterations=max_iterations,
+        **kwargs
+    )
+    # Create agent instance with shared MCP client (filtered tools for information seeking)
+    agent = InformationSeekerAgent(config=config, shared_mcp_client=shared_mcp_client)
+    return agent

deepdiver_v2/src/agents/planner_agent.py ADDED Viewed

	@@ -0,0 +1,1203 @@

+# Copyright (c) 2025 Huawei Technologies Co., Ltd. All rights reserved.
+"""
+Planner Agent for Multi-Agent Task Coordination
+This agent serves as a coordinator for complex tasks that require multiple agents
+working together. It implements the ReAct pattern for reasoning and action.
+"""
+import time
+import json
+import requests
+import os
+from typing import Dict, Any, List
+from concurrent.futures import ThreadPoolExecutor
+# Base imports
+from .base_agent import BaseAgent, AgentConfig, AgentResponse, WriterAgentTaskInput
+# Import agent creators for built-in task assignment
+from .writer_agent import create_writer_agent
+class PlannerAgent(BaseAgent):
+    """
+    PlannerAgent coordinates multiple agents to handle complex user queries.
+    The agent uses the ReAct pattern (Reasoning + Acting) to analyze user requests,
+    break them down into manageable tasks, and coordinate the appropriate agents
+    to complete the work.
+    """
+    def __init__(self, config: AgentConfig = None, shared_mcp_client=None):
+        # Set default agent name if not specified
+        if config and not config.agent_name:
+            config.agent_name = "PlannerAgent"
+        elif not config:
+            config = AgentConfig(agent_name="PlannerAgent")
+        super().__init__(config, shared_mcp_client)
+        # Planner-specific state
+        self.execution_plan = []
+        self.task_queue = []
+        # Add built-in task assignment methods to available tools
+        self._add_builtin_assignment_tools()
+        # Regenerate tool schemas with built-in assignment tools
+        self.tool_schemas = self._build_tool_schemas()
+        self.sub_agent_configs = {}
+    def _add_builtin_assignment_tools(self):
+        """Add built-in task assignment methods as available tools"""
+        # Add assignment methods that share the MCP client connection
+        self.available_tools.update({
+            "assign_subjective_task_to_writer": self.assign_subjective_task_to_writer, # assign_subjective_task_to_writer
+            "assign_multi_objective_tasks_to_info_seeker": self.assign_multi_objective_tasks_to_info_seeker,
+            "assign_multi_subjective_tasks_to_info_seeker": self.assign_multi_subjective_tasks_to_info_seeker
+        })
+    def assign_multi_objective_tasks_to_info_seeker(
+            self,
+            tasks: List[Dict[str, str]],
+            max_workers: int = 5
+        ) -> Dict[str, Any]:
+        """
+        Creates multiple TaskInput objects and routes them to info_seeker agents for concurrent execution.
+        This tool enables the PlannerAgent to assign multiple research tasks through the MCP tool interface.
+        Args:
+            tasks: List of task dictionaries with the following keys:
+                - task_content (required): The specific task content
+                - task_steps_for_reference: Optional reference steps for execution
+                - deliverable_contents: Format of expected deliverable
+                - acceptance_checking_criteria: Criteria for task completion and quality
+                - workspace_id: Workspace ID for stored files and memory
+                - current_task_status: Description of current task status
+            max_workers: Maximum concurrent threads (default=4)
+        Returns:
+            MCPToolResult with execution results for all tasks
+        """
+        try:
+            # Validate task count (1-4 tasks)
+            if not (1 <= len(tasks) <= 5):
+                return {
+                    "success": False,
+                    "error": f"Invalid task count ({len(tasks)}). Must assign 1~5 tasks. Please re-plan the task execution schedule or re-decompose the task."
+                }
+            # Import here to avoid circular imports
+            try:
+                from agents import TaskInput, create_objective_information_seeker
+            except ImportError:
+                from ..agents import TaskInput, create_objective_information_seeker
+            results = []
+            import threading
+            lock = threading.Lock()
+            def process_task(task: Dict[str, str]):
+                """Process a single task with thread-safe result collection"""
+                try:
+                    # Create TaskInput object
+                    task_input = TaskInput(
+                        task_content=task["task_content"],
+                        task_steps_for_reference=task.get("task_steps_for_reference"),
+                        deliverable_contents=task.get("deliverable_contents"),
+                        current_task_status=task.get("current_task_status"),
+                        workspace_id=None,  # Session/workspace is managed by the server; no need to set explicitly
+                        acceptance_checking_criteria=task.get("acceptance_checking_criteria")
+                    )
+                    # Create and execute with info seeker agent - use shared MCP client for session consistency
+                    info_seeker_config = getattr(self, 'sub_agent_configs', {}).get('information_seeker', {})
+                    info_seeker = create_objective_information_seeker(
+                        model=info_seeker_config.get('model', self.config.model),
+                        max_iterations=info_seeker_config.get('max_iterations', 30),
+                        shared_mcp_client=self.mcp_tools.client if hasattr(self.mcp_tools, 'client') else self.mcp_tools
+                    )
+                    self.logger.info(f"Assigning task to InformationSeekerAgent: {task['task_content'][:8000]}...")
+                    # Execute the task
+                    response = info_seeker.execute_task(task_input)
+                    if response.success:
+                        response_data = {
+                            "task_content": task.get("task_content", "Unknown task"),
+                            "success": True,
+                            "data": response.result,
+                            "agent_name": response.agent_name,
+                            "iterations": response.iterations,
+                            "execution_time": response.execution_time,
+                            # "reasoning_trace": response.reasoning_trace
+                        }
+                    else:
+                        response_data = {
+                            "task_content": task.get("task_content", "Unknown task"),
+                            "success": False,
+                            "error": response.error,
+                            "agent_name": response.agent_name
+                        }
+                    # Thread-safe result collection
+                    with lock:
+                        results.append(response_data)
+                    return response_data
+                except Exception as e:
+                    error_msg = f"Task processing failed: {str(e)}"
+                    self.logger.error(error_msg)
+                    with lock:
+                        results.append({
+                            "task_content": task.get("task_content", "Unknown task"),
+                            "success": False,
+                            "error": error_msg
+                        })
+                    return None
+            # Execute tasks in parallel with thread pool
+            with ThreadPoolExecutor(max_workers=max_workers) as executor:
+                futures = [executor.submit(process_task, task) for task in tasks]
+                # Wait for all tasks to complete
+                for future in futures:
+                    future.result()  # Raise exceptions if any
+            # Check overall success
+            all_success = all(task_result.get("success", False) for task_result in results)
+            return {
+                "success": all_success,
+                "data": {"tasks": results},
+                "error": None if all_success else "Some tasks failed",
+                "metadata": {
+                    "tool_name": "assign_multi_objective_tasks_to_info_seeker",
+                    "task_count": len(tasks),
+                    "success_count": sum(1 for r in results if r.get("success")),
+                    "failure_count": sum(1 for r in results if not r.get("success"))
+                }
+            }
+        except Exception as e:
+            self.logger.error(f"Multi-task assignment failed: {e}")
+            return {
+                "success": False,
+                "error": f"Multi-task assignment failed: {str(e)}"
+            }
+    def assign_multi_subjective_tasks_to_info_seeker(
+            self,
+            tasks: List[Dict[str, str]],
+            max_workers: int = 5
+    ) -> Dict[str, Any]:
+        """
+        Creates multiple TaskInput objects and routes them to info_seeker agents for concurrent execution.
+        This tool enables the PlannerAgent to assign multiple research tasks through the MCP tool interface.
+        Args:
+            tasks: List of task dictionaries with the following keys:
+                - task_content (required): The specific task content
+                - task_steps_for_reference: Optional reference steps for execution
+                - deliverable_contents: Format of expected deliverable
+                - acceptance_checking_criteria: Criteria for task completion and quality
+                - workspace_id: Workspace ID for stored files and memory
+                - current_task_status: Description of current task status
+            max_workers: Maximum concurrent threads (default=4)
+        Returns:
+            MCPToolResult with execution results for all tasks
+        """
+        try:
+            # Validate task count (1-4 tasks)
+            if not (1 <= len(tasks) <= 6):
+                return {
+                    "success": False,
+                    "error": f"Invalid task count ({len(tasks)}). Must assign 1-6 tasks."
+                }
+            # Import here to avoid circular imports
+            try:
+                from agents import TaskInput, create_subjective_information_seeker
+            except ImportError:
+                from ..agents import TaskInput, create_subjective_information_seeker
+            results = []
+            import threading
+            lock = threading.Lock()
+            def process_task(task: Dict[str, str]):
+                """Process a single task with thread-safe result collection"""
+                try:
+                    # Create TaskInput object
+                    task_input = TaskInput(
+                        task_content=task["task_content"],
+                        task_steps_for_reference=task.get("task_steps_for_reference"),
+                        deliverable_contents=task.get("deliverable_contents"),
+                        current_task_status=task.get("current_task_status"),
+                        workspace_id=self.get_session_info()["session_id"],  # Session/workspace is managed by the server; no need to set explicitly
+                        acceptance_checking_criteria=task.get("acceptance_checking_criteria")
+                    )
+                    # Create and execute with info seeker agent - use shared MCP client for session consistency
+                    info_seeker_config = getattr(self, 'sub_agent_configs', {}).get('information_seeker', {})
+                    info_seeker = create_subjective_information_seeker(
+                        model=info_seeker_config.get('model', self.config.model),
+                        max_iterations=info_seeker_config.get('max_iterations', 30),
+                        shared_mcp_client=self.mcp_tools.client if hasattr(self.mcp_tools, 'client') else self.mcp_tools
+                    )
+                    self.logger.info(f"Assigning task to InformationSeekerAgent: {task['task_content'][:8000]}...")
+                    # Execute the task
+                    response = info_seeker.execute_task(task_input)
+                    if response.success:
+                        response_data = {
+                            "task_content": task.get("task_content", "Unknown task"),
+                            "success": True,
+                            "data": response.result,
+                            "agent_name": response.agent_name,
+                            "iterations": response.iterations,
+                            "execution_time": response.execution_time,
+                            # "reasoning_trace": response.reasoning_trace
+                        }
+                    else:
+                        response_data = {
+                            "task_content": task.get("task_content", "Unknown task"),
+                            "success": False,
+                            "error": response.error,
+                            "agent_name": response.agent_name
+                        }
+                        # Thread-safe result collection
+                    with lock:
+                        results.append(response_data)
+                    return response_data
+                except Exception as e:
+                    error_msg = f"Task processing failed: {str(e)}"
+                    self.logger.error(error_msg)
+                    with lock:
+                        results.append({
+                            "task_content": task.get("task_content", "Unknown task"),
+                            "success": False,
+                            "error": error_msg
+                        })
+                    return None
+            # Execute tasks in parallel with thread pool
+            with ThreadPoolExecutor(max_workers=max_workers) as executor:
+                futures = [executor.submit(process_task, task) for task in tasks]
+                # Wait for all tasks to complete
+                for future in futures:
+                    future.result()  # Raise exceptions if any
+            # Check overall success
+            all_success = all(task_result.get("success", False) for task_result in results)
+            return {
+                "success": all_success,
+                "data": {"tasks": results},
+                "error": None if all_success else "Some tasks failed",
+                "metadata": {
+                    "tool_name": "assign_multi_subjective_tasks_to_info_seeker",
+                    "task_count": len(tasks),
+                    "success_count": sum(1 for r in results if r.get("success")),
+                    "failure_count": sum(1 for r in results if not r.get("success"))
+                }
+            }
+        except Exception as e:
+            self.logger.error(f"Multi-task assignment failed: {e}")
+            return {
+                "success": False,
+                "error": f"Multi-task assignment failed: {str(e)}"
+            }
+    def assign_subjective_task_to_writer(
+            self,
+            task_content: str,
+            user_query: str,
+            key_files: List[Dict[str, str]]
+    ) -> Dict[str, Any]:
+        """
+        Assign a writing or content creation task to the WriterAgent
+        Args:
+            task_content: Detailed description of the writing task to be performed
+            user_query: List storing previous information seeker subtask summaries intact to preserve information from each completed research task
+            key_files: Curated list of relevant files with file_path and desc for each file
+        Returns:
+            Dictionary with task assignment results
+        """
+        try:
+            self.logger.info("Assigning task to WriterAgent")
+            # Create task input
+            task_input = WriterAgentTaskInput(
+                task_content=task_content,
+                user_query=user_query,
+                key_files=key_files,
+                workspace_id=self.get_session_info()["session_id"],
+            )
+            # Create writer agent with shared MCP client and sub-agent configuration
+            writer_config = getattr(self, 'sub_agent_configs', {}).get('writer', {})
+            writer = create_writer_agent(
+                shared_mcp_client=self.mcp_tools.client,
+                model=writer_config.get('model', self.config.model),
+                max_iterations=writer_config.get('max_iterations', 20),
+                temperature=writer_config.get('temperature', 0.3),
+                max_tokens=writer_config.get('max_tokens', 16384)
+            )
+            self.logger.info(f"Assigning task to WriterAgent: {task_content[:800]}...")
+            # Execute the task with shared connection
+            response = writer.execute_task(task_input)
+            if response.success:
+                return {
+                    "success": True,
+                    "data": response.result,
+                    "agent_name": response.agent_name,
+                    "iterations": response.iterations,
+                    "execution_time": response.execution_time,
+                    # "reasoning_trace": response.reasoning_trace
+                }
+            else:
+                return {
+                    "success": False,
+                    "error": response.error,
+                    "agent_name": response.agent_name
+                }
+        except Exception as e:
+            self.logger.error(f"Failed to assign task to WriterAgent: {e}")
+            return {
+                "success": False,
+                "error": f"Task assignment failed: {str(e)}"
+            }
+    def _build_system_prompt(self) -> str:
+        """Build the system prompt for the planner agent"""
+        tool_schemas_str = json.dumps(self.tool_schemas, ensure_ascii=False)
+        auto_system_prompt_template = """# PlannerAgent: Multi-Agent Task Coordinator
+**Role:** Analyze complex queries, first distinguish query type (long-form writing type/objective question type), then create structured plans, and coordinate specialized agents to deliver comprehensive solutions—call corresponding tools based on query type, and only invoke writer for long-form writing type queries.
+#### Available Sub-Agents:
+- **`information_seeker`**: Research, data gathering, web search (supports single/parallel multi-task; long-form writing type uses assign_multi_subjective_tasks_to_info_seeker, other types use assign_multi_objective_tasks_to_info_seeker)
+- **`writer`**: Only invoke this sub-agent when long-form writing is required.
+---
+## Optimized Workflow
+### 1. Query Type Judgment & Analysis & Planning Phase
+**Goal:** Use the `think` tool to analyze the problem and determine whether it is a simple task (refers to tasks that do not require calling the information search agent or tool) or a complex task (requires calling info seeker). If it is a complex task, it is necessary to further analyze whether it is a objective question（do not require calling the writer agent）or a long-form writing question (requires long-form expression and need to call the writer agent later).
+- **Simple Tasks:** For simple tasks that do not require info seeker invocation, you can directly call the `planner_objective_task_done` tool and write the answer in `final_answer` field without creating a todo.md file.
+- **Complex Tasks:**
+  - For objective tasks, must use `assign_multi_objective_tasks_to_info_seeker`
+  - For long-form writing tasks, must use `assign_multi_subjective_tasks_to_info_seeker`, and call the writer agent to integrate the collected information to generate a very long text
+  - **Task Decomposition Rules:**
+    - Construct a task tree with a tree-like structure, where the root node represents the user's input query. Each subtask is marked with its depth in the task tree, and the entire task tree is executed from shallow to deep. Tasks at the same depth in the task tree must be independent and can be executed in parallel (via `assign_multi_xxx_tasks_to_info_seeker`) without mutual dependencies.
+    - At the first level of the task tree, it is essential to thoroughly design subtasks that can be executed in parallel to explore various potential background information, thereby providing more specific clues for the next step of planning.
+    - Competitive Redundancy Mechanism:
+      - For key subtasks that have a significant impact on subsequent reasoning and planning, a redundancy mechanism should be established. This involves duplicating the task at the same depth level in the task tree, enabling the parallel execution of nearly identical tasks to enhance the completion rate and robustness of the task execution.
+  - **Task Parallel Sending Requirements:**
+    - When using `assign_multi_xxx_tasks_to_info_seeker`, all parallel-sent subtasks must be independent of each other; the description of each subtask must not contain any mutual references or dependency requirements for other subtasks.
+    - There is no sequential execution relationship among all parallel-sent subtasks.
+  - **Mandatory Documentation:** Create and write `todo.md` (e.g., `todo_v1.md`) with fields:
+    ```markdown
+    # Task Planning Document
+    ## task_name: [Clear identifier]
+    ## task_desc: [Detailed requirements - focus on WHAT not HOW]
+    ## deliverable_contents: [Exact output format specs]
+    ## success_criteria: [Measurable 100% completion metrics]
+    ## context: [Background, constraints, prior results]
+    ## task_steps_for_reference: [Tree-structured preliminary execution plan, tag tasks with the depth in task tree `[DEPTH:xx]`]
+    ```
+### 2. Execution & Iteration Phase
+#### A. Unified Iteration Triggers (Shared by Both Types)
+- Based on upper-layer task results, refine the next layer of planning and document it in a new version of `todo.md` (e.g., `todo_v2.md`).
+- If upper-layer tasks fail/encounter challenges: Invoke the `reflect` tool for introspection (no new information acquired, only saves thoughts), adjust the plan, and re-invoke the corresponding `information_seeker` method (objective: `assign_multi_objective_tasks_to_info_seeker`; long-form writing: `assign_multi_subjective_tasks_to_info_seeker`).
+- If current tasks require prior round information: Clearly specify the context of each task and referenced files (e.g., `./data/agent_output_v1.json`) when calling `information_seeker`.
+- Decompose and refine clues from upper-layer results, then execute verification in parallel.
+#### B. Query-Type-Specific Operations
+- **Objective tasks**: No additional operations (strictly no writer invocation). Continue iterating until information meets `success_criteria`.
+- **Long-form writing tasks**: Add **information sufficiency check before writer invocation**:
+  1. Evaluate collected information from two dimensions: quantity (e.g., "Enough case studies for 3 chapters") and comprehensiveness (e.g., "Covers both positive and negative impacts of AI on education").
+  2. If information is insufficient: Adjust subtask directions (e.g., "Supplement AI education failure cases") and re-invoke `assign_multi_subjective_tasks_to_info_seeker` for targeted collection.
+  3. If information is sufficient: Invoke the writer via `assign_subjective_task_to_writer` (provide all collected materials and `todo.md` as context).
+  4. If the writer returns an incomplete result: Do not assist in completing it; only feed back the current completion status to the user.
+### 3. Completion & Synthesis Phase
+#### A. Unified Validation & Integration (Shared by Both Types)
+- **Validation**: Cross-check multi-source `information_seeker` outputs for consistency (e.g., "NBS and World Bank GDP data differ by ≤1%").
+- **Integration**: Combine parallel outputs into a unified deliverable (e.g., "Merge two GDP data sources into a single table" or "Integrate writer’s report with supplementary case studies").
+- **Delivery**: Output language must match the user’s query language (e.g., Chinese query → Chinese deliverable).
+#### B. Query-Type-Specific Task Completion (Critical)
+- **Objective tasks**: Call the `planner_objective_task_done` tool **only when** all planned tasks are completed and the final deliverable (e.g., verified data, clear answers) is ready for user delivery.
+- **Long-form writing tasks**: Call the `planner_subjective_task_done` tool **only when** the writer has finished executing and the final long-form content meets the `success_criteria` in `todo.md`.
+---
+## Critical Protocols
+1. **Dependency Management:**
+    - Prohibit parallel dispatch for sequential dependent tasks unless using competitive redundancy mechanism
+    - Convert sequential chains to parallel where possible (e.g., Hypothesis_A vs Hypothesis_B testing)
+2. **File Traceability:**
+    - All output references use relative paths (`./data/agent_output_1.json`)
+    - Version `todo.md` after each iteration (e.g., `todo_v2.md`)
+3. **Local File Reading Recommendations:**
+    - For files crawled natively, it is not recommended to directly use the `file_read` tool to read the entire content (maybe too long). Instead, the `document_qa` tool should be used to extract and verify the required information.
+    - For task deliverables and summary documents from sub-agents, the `file_read` tool can be used to read them.
+4. The final deliverable presented to the user should be consistent with the language used in the user's question.
+5. **Writer invocation**: Strictly prohibit calling the writer for objective tasks; for long-form writing tasks, **never directly answer based on collected information**—must invoke the writer to generate the final long-form content.
+Below, within the <tools></tools> tags, are the descriptions of each tool and the required fields for invocation:
+<tools>
+$tool_schemas
+</tools>
+For each function call, return a JSON object placed within the [unused11][unused12] tags, which includes the function name and the corresponding function arguments:
+[unused11][{\"name\": <function name>, \"arguments\": <args json object>}][unused12]"""
+        writing_system_prompt_template = """### PlannerAgent: Multi-Agent Task Coordinator
+**Role:** Analyze complex queries, create structured plans, and coordinate specialized agents to deliver comprehensive solutions.
+#### Available Sub-Agents:
+- **`information_seeker`**: Research, data gathering, web search (supports single/parallel multi-task)
+- **`writer`**: Creates content (e.g., reports, analysis, etc.), and synthesizes from existing materials
+---
+### Optimized Workflow
+#### 1. Analysis & Planning Phase
+**Goal:** Analyze the problem and determine whether it is a simple task or a complex task. If it is a complex task, it is necessary to further analyze whether it is a subject-driven question or an objective-driven question, so as to decompose the problem into multiple clear and executable subtasks according to the specific problem type. The main characteristic of objective-driven questions is that their answers are clear and verifiable entities, otherwise they are subject-driven questions.
+- **Simple Tasks:** For simple tasks that do not require sub-agent invocation, you can directly answer without creating a todo.md file
+- **Complex Tasks:**
+  - For Objective-driven tasks, Adopt *diverge-converge* strategy:
+    1. Use `assign_multi_subjective_tasks_to_info_seeker` call for divergent background research
+    2. Converge findings to define specific sub-problems
+  - For Subject-driven tasks, Adopt *multi-perspective* strategy:
+    1. Use assign_multi_subjective_tasks_to_info_seeker call for divergent multi-source exploration (each task targets independent dimensions)
+    2. Converge findings to define focused sub-problems addressing distinct knowledge gaps
+    3. When the information seeker collects information, start to call the writer agent to integrate the collected information to generate a very long text
+  - **Task Decomposition Rules:**
+    - Construct a task tree with a tree-like structure, where the root node represents the user's input query. Each subtask is marked with its depth in the task tree, and the entire task tree is executed from shallow to deep. Tasks at the same depth in the task tree must be independent and can be executed in parallel (via `assign_multi_subjective_tasks_to_info_seeker`) without mutual dependencies.
+    - At the first level of the task tree, it is essential to thoroughly design subtasks that can be executed in parallel to explore various potential background information, thereby providing more specific clues for the next step of planning.
+    - Competitive Redundancy Mechanism:
+      - For key subtasks that have a significant impact on subsequent reasoning and planning, a redundancy mechanism should be established. This involves duplicating the task at the same depth level in the task tree, enabling the parallel execution of nearly identical tasks to enhance the completion rate and robustness of the task execution.
+  - **Task Parallel Sending Requirements:**
+    - When using `assign_multi_subjective_tasks_to_info_seeker`, all parallel-sent subtasks must be independent of each other; the description of each subtask must not contain any mutual references or dependency requirements for other subtasks.
+    - There is no sequential execution relationship among all parallel-sent subtasks.
+  - **Mandatory Documentation:** Create and write `todo.md` (e.g., `todo_v1.md`) with fields:
+    ```markdown
+    # Task Planning Document
+    ## task_name: [Clear identifier]
+    ## task_desc: [Detailed requirements - focus on WHAT not HOW]
+    ## deliverable_contents: [Exact output format specs]
+    ## success_criteria: [Measurable 100% completion metrics]
+    ## context: [Background, constraints, prior results]
+    ## task_steps_for_reference: [Tree-structured preliminary execution plan, tag tasks with the depth in task tree `[DEPTH:xx]`]
+    ```
+#### 2. Execution & Iteration Phase
+- **Iteration Triggers:**
+  - Based on the execution results of the upper layer of the task tree, specify and refine the next layer and subsequent task planning, and document them in a new `todo.md` file (e.g., `todo_v2.md`).
+  - If there are tasks in the previous layer that have failed or encountered challenges, it is necessary to invoke `reflect` for introspection, consider more possibilities, and make new task planning and invoke `assign_multi_subjective_tasks_to_info_seeker` again.
+  - If the tasks sent in the current round require reference to task information from previous rounds, it is essential to clearly specify the context of each task and the files that may need to be used or referenced when calling `assign_multi_subjective_tasks_to_info_seeker`.
+  - For the multiple clues of the execution results from the previous layer, they should be decomposed and refined, and executed in parallel for verification.
+- **Information check required before calling writer:**
+  - Before invoking writer, analyze collected information for sufficiency: evaluate both quantity and comprehensiveness to ensure adequate material for long article generation
+  - If information is insufficient, adjust subtask direction and initiate additional targeted information collection
+- **When information is sufficient, invoke writer agent** via `assign_subjective_task_to_writer`
+#### 3. Completion & Synthesis Phase
+- **Validation:** Cross-check multi-source outputs for consistency, and Check whether the information source is sufficient
+- **Integration:** Combine parallel outputs into unified deliverable
+- **Delivery:** Output language must match user's query language
+- When the writer agent is finished executing, planner_subjective_task_done tool needs to be called to end the current task
+---
+### Critical Protocols
+1. **Dependency Management:**
+   - Prohibit parallel dispatch for sequential dependent tasks unless using competitive redundancy mechanism
+   - Convert sequential chains to parallel where possible (e.g., Hypothesis_A vs Hypothesis_B testing)
+2. **File Traceability:**
+   - All output references use relative paths (`./data/agent_output_1.json`)
+   - Version `todo.md` after each iteration (e.g., `todo_v2.md`)
+3. **Iteration Discipline:**
+   - Minimum 2 parallel agents for critical hypothesis-validation tasks
+   - Terminate only when ALL success criteria are met at 100%
+5. **Usage of Think Tool:**
+   - `think` is a systematic tool. After receiving the response from the complex tool or before invoking any other tools, you must **first invoke the `think` tool**: to deeply reflect on the results of previous tool invocations (if any), and to thoroughly consider and plan the user's task. The `think` tool does not acquire new information; it only saves your thoughts into memory.
+6. **Usage of Reflect Tool:**
+    `reflect` is a systematic tool. When encountering a failure in tool execution, it is necessary to invoke the reflect tool to conduct a review and revise the task plan. It does not acquire new information; it only saves your thoughts into memory.
+7. Always prioritize complete solutions over partial delivery. Use parallel redundancy for critical path tasks, and convert agent disagreements into new parallel investigation branches.
+8. **CRITICAL:** When you determine that the information_seeker has gathered sufficient information, you must invoke the writer agent to draft the final article in response to the user's query. You are not allowed to reply directly based on the collected information!
+9.Also note that when the writing agent returns a result that shows it is not completed, you do not need to help it complete it further. You only need to feedback the current completion status to the user.
+Below, within the <tools></tools> tags, are the descriptions of each tool and the required fields for invocation:
+<tools>
+$tool_schemas
+</tools>
+For each function call, return a JSON object placed within the [unused11][unused12] tags, which includes the function name and the corresponding function arguments:
+[unused11][{\"name\": <function name>, \"arguments\": <args json object>}][unused12]"""
+        qa_system_prompt_template = """### PlannerAgent: Multi-Agent Task Coordinator
+**Role:** Analyze complex queries, create structured plans, and coordinate specialized agents to deliver comprehensive solutions.
+#### Available Sub-Agents:
+- **`information_seeker`**: Research, data gathering, web search (supports single/parallel multi-task)
+---
+### Optimized Workflow
+#### 1. Analysis & Planning Phase
+**Goal:** Decompose problems into executable units with clear dependencies
+- **Simple Tasks:** For simple tasks that do not require sub-agent invocation, you can directly answer and call `planner_objective_task_done` without creating a todo.md file
+- **Complex Tasks:**
+  - **Task Decomposition Rules:**
+    - Construct a task tree with a tree-like structure, where the root node represents the user\'s input query. Each subtask is marked with its depth in the task tree, and the entire task tree is executed from shallow to deep. Tasks at the same depth in the task tree must be independent and can be executed in parallel (via `assign_multi_objective_tasks_to_info_seeker`) without mutual dependencies.
+    - At the first level of the task tree, it is essential to thoroughly design subtasks that can be executed in parallel to explore various potential background information, thereby providing more specific clues for the next step of planning.
+    - Competitive Redundancy Mechanism:
+      - For key subtasks that have a significant impact on subsequent reasoning and planning, a redundancy mechanism should be established. This involves duplicating the task at the same depth level in the task tree, enabling the parallel execution of nearly identical tasks to enhance the completion rate and robustness of the task execution.
+  - **Task Parallel Sending Requirements:**
+    - When using `assign_multi_objective_tasks_to_info_seeker`, all parallel-sent subtasks must be independent of each other; the description of each subtask must not contain any mutual references or dependency requirements for other subtasks.
+    - There is no sequential execution relationship among all parallel-sent subtasks.
+  - **Mandatory Documentation:** Create and write `todo.md` (e.g., `todo_v1.md`) with fields:
+    ```markdown
+    # Task Planning Document
+    ## task_name: [Clear identifier]
+    ## task_desc: [Detailed requirements - focus on WHAT not HOW]
+    ## deliverable_contents: [Exact output format specs]
+    ## success_criteria: [Measurable 100% completion metrics]
+    ## context: [Background, constraints, prior results]
+    ## task_steps_for_reference: [Tree-structured preliminary execution plan, tag tasks with the depth in task tree `[DEPTH:xx]`]
+    ```
+#### 2. Execution & Iteration Phase
+- **Iteration Triggers:**
+  - Based on the execution results of the upper layer of the task tree, specify and refine the next layer and subsequent task planning, and document them in a new `todo.md` file (e.g., `todo_v2.md`).
+  - If there are tasks in the previous layer that have failed or encountered challenges, it is necessary to invoke `reflect` for introspection, consider more possibilities, and make new task planning and invoke `assign_multi_objective_tasks_to_info_seeker` again.
+  - If the tasks sent in the current round require reference to task information from previous rounds, it is essential to clearly specify the context of each task and the files that may need to be used or referenced when calling `assign_multi_objective_tasks_to_info_seeker`.
+  - For the multiple clues of the execution results from the previous layer, they should be decomposed and refined, and executed in parallel for verification.
+#### 3. Completion & Synthesis Phase
+- **Validation:** Cross-check multi-source outputs for consistency
+- **Integration:** Combine parallel outputs into unified deliverable
+- **Delivery:** Output language must match user\'s query language
+- **Task Completed:** The `planner_objective_task_done` can only be called when all planned tasks have been completed and the final results are ready to be delivered to the user.
+---
+### Critical Protocols
+1. **Dependency Management:**
+   - Prohibit parallel dispatch for sequential dependent tasks unless using competitive redundancy mechanism
+   - Convert sequential chains to parallel where possible (e.g., Hypothesis_A vs Hypothesis_B testing)
+2. **File Traceability:**
+   - All output references use relative paths (`./data/agent_output_1.json`)
+   - Version `todo.md` after each iteration (e.g., `todo_v2.md`)
+3. **Local File Reading Recommendations:**
+    - For files crawled natively, it is not recommended to directly use the `file_read` tool to read the entire content (maybe too long). Instead, the `document_qa` tool should be used to extract and verify the required information.
+    - For task deliverables and summary documents from sub-agents, the `file_read` tool can be used to read them.
+4. The final deliverable presented to the user should be consistent with the language used in the user\'s question.
+Below, within the <tools></tools> tags, are the descriptions of each tool and the required fields for invocation:
+<tools>
+$tool_schemas
+</tools>
+For each function call, return a JSON object placed within the [unused11][unused12] tags, which includes the function name and the corresponding function arguments:
+[unused11][{\"name\": <function name>, \"arguments\": <args json object>}][unused12]"""
+        planner_mode_system_prompt_map = {
+            "auto": auto_system_prompt_template,
+            "writing": writing_system_prompt_template,
+            "qa": qa_system_prompt_template
+        }
+        system_prompt = planner_mode_system_prompt_map[self.config.planner_mode].replace("$tool_schemas", tool_schemas_str)
+        return system_prompt
+    def _build_agent_specific_tool_schemas(self) -> List[Dict[str, Any]]:
+        """
+        Build tool schemas for PlannerAgent using proper MCP architecture.
+        Schemas come from MCP server via client, not direct imports.
+        """
+        # Get MCP tool schemas from server via client (proper MCP architecture)
+        schemas = super()._build_agent_specific_tool_schemas()
+        # Add schemas for built-in task assignment tools
+        planner_mode_builtin_tools_map = {
+            "auto": ["think", "reflect", "assign_multi_subjective_tasks_to_info_seeker", "assign_multi_objective_tasks_to_info_seeker", "assign_subjective_task_to_writer", "writer_subjective_task_done", "planner_subjective_task_done", "planner_objective_task_done"],
+            "writing": ["think", "reflect", "assign_multi_subjective_tasks_to_info_seeker", "assign_subjective_task_to_writer", "writer_subjective_task_done", "planner_subjective_task_done"],
+            "qa": ["think", "reflect", "assign_multi_objective_tasks_to_info_seeker", "planner_objective_task_done"],
+        }
+        builtin_assignment_schemas = [
+            {
+                "type": "function",
+                "function": {
+                    "name": "think",
+                    "description": "Use the tool to think about something. It will not obtain new information or make any changes to the repository, but just log the thought. Use it when complex reasoning or brainstorming is needed.",
+                    "parameters": {
+                        "type": "object",
+                        "properties": {
+                            "thought": {
+                                "type": "string",
+                                "description": "Your thoughts."
+                            }
+                        },
+                        "required": ["thought"]
+                    }
+                }
+            },
+            {
+                "type": "function",
+                "function": {
+                    "name": "reflect",
+                    "description": "When multiple attempts yield no progress, use this tool to reflect on previous reasoning and planning, considering possible overlooked clues and exploring more possibilities. It will not obtain new information or make any changes to the repository.",
+                    "parameters": {
+                        "type": "object",
+                        "properties": {
+                            "reflect": {
+                                "type": "string",
+                                "description": "The specific content of your reflection"
+                            }
+                        },
+                        "required": ["reflect"]
+                    }
+                }
+            },
+            {
+                "type": "function",
+                "function": {
+                    "name": "assign_multi_subjective_tasks_to_info_seeker",
+                    "description": "Assign 1~6 research or information gathering tasks to different InformationSeekerAgents for parallel execution, each task descriptions must be semantically complete and clearly provide contextual information and potentially important reference documents.",
+                    "parameters": {
+                        "type": "object",
+                        "properties": {
+                            "tasks": {
+                                "type": "array",
+                                "description": "List of tasks to be assigned to multiple InformationSeekerAgents",
+                                "items": {
+                                    "type": "object",
+                                    "properties": {
+                                        "task_content": {
+                                            "type": "string",
+                                            "description": "Detailed description of the task to be performed"
+                                        },
+                                        "task_steps_for_reference": {
+                                            "type": "string",
+                                            "description": "Optional reference steps for task execution"
+                                        },
+                                        "deliverable_contents": {
+                                            "type": "string",
+                                            "description": "Expected format and content of deliverables"
+                                        },
+                                        "current_task_status": {
+                                            "type": "string",
+                                            "description": "Current status and context of the task, important documents that may be used and referenced"
+                                        },
+                                        "acceptance_checking_criteria": {
+                                            "type": "string",
+                                            "description": "Criteria for determining task completion and quality"
+                                        },
+                                    },
+                                    "required": ["task_content"]
+                                }
+                            }
+                        },
+                        "required": ["tasks"]
+                    }
+                }
+            },
+            {
+                "type": "function",
+                "function": {
+                    "name": "assign_multi_objective_tasks_to_info_seeker",
+                    "description": "Assign 1~5 research or information gathering tasks to different InformationSeekerAgents for parallel execution, each task descriptions must be semantically complete and clearly provide contextual information and potentially important reference documents.",
+                    "parameters": {
+                        "type": "object",
+                        "properties": {
+                            "tasks": {
+                                "type": "array",
+                                "description": "List of tasks to be assigned to multiple InformationSeekerAgents",
+                                "items": {
+                                    "type": "object",
+                                    "properties": {
+                                        "task_content": {
+                                            "type": "string",
+                                            "description": "Detailed description of the task to be performed, the task description must be semantically complete"
+                                        },
+                                        "task_steps_for_reference": {
+                                            "type": "string",
+                                            "description": "Optional reference steps for task execution"
+                                        },
+                                        "deliverable_contents": {
+                                            "type": "string",
+                                            "description": "Expected format and content of deliverables"
+                                        },
+                                        "current_task_status": {
+                                            "type": "string",
+                                            "description": "Current status and context of the task, important documents that may be used and referenced"
+                                        },
+                                        "acceptance_checking_criteria": {
+                                            "type": "string",
+                                            "description": "Criteria for determining task completion and quality, and the requirements in the event of task completion failure"
+                                        },
+                                    },
+                                    "required": ["task_content"]
+                                }
+                            }
+                        },
+                        "required": ["tasks"]
+                    }
+                }
+            },
+            {
+                "type": "function",
+                "function": {
+                    "name": "assign_subjective_task_to_writer",
+                    "description": "Assign a writing or content creation task to the WriterAgent",
+                    "parameters": {
+                        "type": "object",
+                        "properties": {
+                            "user_query": {
+                                "type": "string",
+                                "description": "Pass in the original user question."
+                            },
+                            "task_content": {
+                                "type": "string",
+                                "description": "Integrate and synthesize provided materials to generate comprehensive long-form content exceeding 10,000 words, especially careful not to give specific details, such as an outline plan, you are only providing the writer with a general description of the task."
+                            },
+                            "key_files": {
+                                "type": "array",
+                                "items": {
+                                    "type": "object",
+                                    "properties": {
+                                        "file_path": {
+                                            "type": "string",
+                                            "description": "Relative path to the file containing research content"
+                                        }
+                                    },
+                                    "required": ["file_path"]
+                                },
+                                "description": "Collect all key_files returned by the information seeker for long-form content creation."
+                            }
+                        },
+                        "required": ["user_query", "task_content", "key_files"]
+                    }
+                }
+            },
+            {
+                "type": "function",
+                "function": {
+                    "name": "writer_subjective_task_done",
+                    "description": "Writer Agent task completion reporting for complete long-form content. Called after all chapters/sections are written to provide a summary of the complete long article, final completion status and analysis, and the storage path of the final consolidated article.",
+                    "parameters": {
+                        "type": "object",
+                        "properties": {
+                            "final_article_path": {
+                                "type": "string",
+                                "description": "The file path where the final article is saved."
+                            },
+                            "article_summary": {
+                                "type": "string",
+                                "description": "Comprehensive summary of the complete long-form article, including main themes, key points covered, and overall narrative structure.",
+                                "format": "markdown"
+                            },
+                            "completion_status": {
+                                "type": "string",
+                                "enum": ["completed", "partial", "failed"],
+                                "description": "Final status of the complete long-form writing task"
+                            },
+                            "completion_analysis": {
+                                "type": "string",
+                                "description": "Analysis of the overall writing project completion including: assessment of article coherence and quality, evaluation of content organization and flow, identification of any challenges in the writing process, and overall evaluation of the long-form content creation success."
+                            }
+                        },
+                        "required": ["final_article_path", "article_summary", "completion_status", "completion_analysis"]
+                    }
+                }
+            },
+            {
+                "type": "function",
+                "function": {
+                    "name": "planner_subjective_task_done",
+                    "description": "When the writer agent is executed, the task done tool is called to end the planner's task.",
+                    "parameters": {
+                        "type": "object",
+                        "properties": {
+                            "final_article_path": {
+                                "type": "string",
+                                "description": "The file path where the final article is saved."
+                            },
+                            "task_summary": {
+                                "type": "string",
+                                "description": "This field is mainly used to describe the main content of the article, briefly summarize it, and finally indicate the path where the final article is saved.",
+                                "format": "markdown"
+                            },
+                            "task_name": {
+                                "type": "string",
+                                "description": "The name of the task currently assigned to the agent, usually with underscores (e.g., 'web_research_ai_trends')"
+                            },
+                            "completion_status": {
+                                "type": "string",
+                                "enum": ["completed", "partial", "failed"],
+                                "description": "Final task status"
+                            }
+                        },
+                        "required": ["final_article_path", "task_summary", "task_name", "completion_status"]
+                    }
+                }
+            },
+            {
+                "type": "function",
+                "function": {
+                    "name": "planner_objective_task_done",
+                    "description": "Structured reporting of task completion details including summary, decisions, and final answer",
+                    "parameters": {
+                        "type": "object",
+                        "properties": {
+                            "task_summary": {
+                                "type": "string",
+                                "description": "Comprehensive markdown covering what the agent was asked to do, steps taken, tools used, key findings, files created, challenges",
+                                "format": "markdown"
+                            },
+                            "task_name": {
+                                "type": "string",
+                                "description": "The name of the task currently assigned to the agent, usually with underscores (e.g., 'web_research_ai_trends')"
+                            },
+                            "key_files": {
+                                "type": "array",
+                                "items": {
+                                    "type": "object",
+                                    "properties": {
+                                        "file_path": {
+                                            "type": "string",
+                                            "description": "Relative path to created/modified file"
+                                        },
+                                        "desc": {
+                                            "type": "string",
+                                            "description": "File contents and creation purpose"
+                                        },
+                                        "is_final_output_file": {
+                                            "type": "boolean",
+                                            "description": "Whether file is primary deliverable"
+                                        }
+                                    },
+                                    "required": ["file_path", "desc", "is_final_output_file"]
+                                },
+                                "description": "List of key files generated or modified during the task, with their details."
+                            },
+                            "completion_status": {
+                                "type": "string",
+                                "enum": ["completed", "partial", "failed"],
+                                "description": "Final task status"
+                            },
+                            "final_answer": {
+                                "type": "string",
+                                "description": "The final response displayed to the user",
+                            }
+                        },
+                        "required": ["task_summary", "task_name", "key_files", "completion_status", "final_answer"]
+                    }
+                }
+            },
+        ]
+        used_builtin_schemas = [schema for schema in builtin_assignment_schemas if schema["function"]["name"] in planner_mode_builtin_tools_map[self.config.planner_mode]]
+        schemas.extend(used_builtin_schemas)
+        return schemas
+    def _execute_react_loop(self, initial_message: str, max_iterations: int = 20) -> Dict[str, Any]:
+        """
+        Execute the ReAct loop for planning tasks
+        Args:
+            initial_message: Initial message to start the planning process
+            max_iterations: Maximum number of iterations to perform
+        Returns:
+            Dictionary with execution results and trace
+        """
+        start_time = time.time()
+        try:
+            # Reset trace for new task
+            self.reset_trace()
+            # Initialize conversation history
+            conversation_history = []
+            # Build system prompt for planning
+            system_prompt = self._build_system_prompt()
+            # Add to conversation
+            conversation_history.append({"role": "system", "content": system_prompt})
+            conversation_history.append({"role": "user", "content": initial_message + " /no_think"})
+            iteration = 0
+            task_completed = False
+            # Get model endpoint configuration from env-backed config
+            from config.config import get_config
+            config = get_config()
+            model_config = config.get_custom_llm_config()
+            pangu_url = model_config.get('url') or os.getenv('MODEL_REQUEST_URL', '')
+            model_token = model_config.get('token') or os.getenv('MODEL_REQUEST_TOKEN', '')
+            headers = {'Content-Type': 'application/json', 'csb-token': model_token}
+            # ReAct Loop: Reasoning -> Acting -> Reasoning -> Acting...
+            while iteration < self.config.max_iterations and not task_completed:
+                iteration += 1
+                self.logger.info(f"Planning iteration {iteration}")
+                try:
+                    # Get LLM response (reasoning + potential tool calls)
+                    retry_num = 1
+                    max_retry_num = 10
+                    while retry_num < max_retry_num:
+                        try:
+                            response = requests.post(
+                                url=pangu_url,
+                                headers=headers,
+                                json={
+                                    "model": self.config.model,
+                                    "chat_template": "{% for message in messages %}{% if loop.first and messages[0]['role'] != 'system' %}{{ '<s>[unused9]系统：[unused10]' }}{% endif %}{% if message['role'] == 'system' %}{{'<s>[unused9]系统：' + message['content'] + '[unused10]'}}{% endif %}{% if message['role'] == 'assistant' %}{{'[unused9]助手：' + message['content'] + '[unused10]'}}{% endif %}{% if message['role'] == 'tool' %}{{'[unused9]工具：' + message['content'] + '[unused10]'}}{% endif %}{% if message['role'] == 'function' %}{{'[unused9]方法：' + message['content'] + '[unused10]'}}{% endif %}{% if message['role'] == 'user' %}{{'[unused9]用户：' + message['content'] + '[unused10]'}}{% endif %}{% endfor %}{% if add_generation_prompt %}{{ '[unused9]助手：' }}{% endif %}",
+                                    "spaces_between_special_tokens": False,
+                                    "messages": conversation_history,
+                                    "temperature": self.config.temperature,
+                                    "max_tokens": self.config.max_tokens,
+                                },
+                                timeout=model_config.get("timeout", 180)
+                            )
+                            response = response.json()
+                            self.logger.debug(f"API response received")
+                            break
+                        except Exception as e:
+                            time.sleep(3)
+                            retry_num += 1
+                            if retry_num == max_retry_num:
+                                raise ValueError(str(e))
+                            continue
+                    assistant_message = response["choices"][0]["message"]
+                    # Log the reasoning
+                    try:
+                        if assistant_message["content"]:
+                            reasoning_content = assistant_message["content"].split("[unused16]")[-1].split("[unused17]")[0]
+                            if len(reasoning_content) > 0:
+                                self.log_reasoning(iteration, reasoning_content)
+                    except Exception as e:
+                        self.logger.warning(f"Tool call parsing error: {e}")
+                        # Parse error, rerun
+                        followup_prompt = f"There is a problem with the format of model generation: {e}. Please try again."
+                        conversation_history.append({"role": "user", "content": followup_prompt + " /no_think"})
+                        continue
+                    def extract_tool_calls(content):
+                        import re
+                        if not content:
+                            return []
+                        tool_call_str = re.findall(r"\[unused11\]([\s\S]*?)\[unused12\]", content)
+                        if len(tool_call_str) > 0:
+                            try:
+                                tool_calls = json.loads(tool_call_str[0].strip())
+                            except:
+                                return []
+                        else:
+                            return []
+                        return tool_calls
+                    # Add assistant message to conversation
+                    conversation_history.append({
+                        "role": "assistant",
+                        "content": assistant_message["content"]
+                    })
+                    tool_calls = extract_tool_calls(assistant_message["content"])
+                    # Execute tool calls if any (Acting phase)
+                    for tool_call in tool_calls:
+                        arguments = tool_call["arguments"]
+                        self.logger.debug(f"Arguments is string: {isinstance(arguments, str)}")
+                        # Check if planning is complete
+                        if tool_call["name"] in ["planner_subjective_task_done", "planner_objective_task_done", "writer_subjective_task_done"]:
+                            task_completed = True
+                            self.log_action(iteration, tool_call["name"], arguments, arguments)
+                            break
+                        if tool_call["name"] in ["think", "reflect"]:
+                            tool_result = {"tool_results": "You can proceed to invoke other tools if needed. "}
+                        else:
+                            tool_result = self.execute_tool_call(tool_call)
+                        # Log the action using base class method
+                        self.log_action(iteration, tool_call["name"], arguments, tool_result)
+                        # Add tool result to conversation
+                        conversation_history.append({
+                            "role": "tool",
+                            "content": json.dumps(tool_result, ensure_ascii=False, indent=2) + " /no_think"
+                        })
+                    # If no tool calls, encourage continued planning
+                    if len(tool_calls) == 0:
+                        # Add follow-up prompt to encourage action or completion
+                        followup_prompt = (
+                            "Continue your planning process. Use available tools to assign tasks to agents, "
+                            "search for information, or coordinate work. When you have a complete answer, "
+                            "call planner_subjective_task_done or planner_objective_task_done. /no_think"
+                        )
+                        conversation_history.append({"role": "user", "content": followup_prompt})
+                except Exception as e:
+                    error_msg = f"Error in planning iteration {iteration}: {e}"
+                    self.log_error(iteration, error_msg)
+                    break
+            execution_time = time.time() - start_time
+            # Extract final result
+            if task_completed:
+                # Find the completion result in the trace
+                completion_result = None
+                for step in reversed(self.reasoning_trace):
+                    if step.get("type") == "action" and step.get("tool") in ["planner_subjective_task_done",
+                                                                             "planner_objective_task_done"]:
+                        completion_result = step.get("result")
+                        break
+                return {
+                    "success": True,
+                    "data": completion_result,
+                    "reasoning_trace": self.reasoning_trace,
+                    "iterations": iteration,
+                    "execution_time": execution_time
+                }
+            else:
+                return {
+                    "success": False,
+                    "error": f"Planning task not completed within {max_iterations} iterations",
+                    "reasoning_trace": self.reasoning_trace,
+                    "iterations": iteration,
+                    "execution_time": execution_time
+                }
+        except Exception as e:
+            execution_time = time.time() - start_time if 'start_time' in locals() else 0
+            self.logger.error(f"Error in execute_react_loop: {e}")
+            return {
+                "success": False,
+                "error": str(e),
+                "reasoning_trace": self.reasoning_trace,
+                "iterations": iteration if 'iteration' in locals() else 0,
+                "execution_time": execution_time
+            }
+    def execute_task(self, user_query: str) -> AgentResponse:
+        """
+        Execute a planning task for the given user query
+        Args:
+            user_query: The user's query or request
+        Returns:
+            AgentResponse with planning results and process trace
+        """
+        start_time = time.time()
+        try:
+            self.logger.info(f"Starting planner task: {user_query}")
+            # Execute the planning task using ReAct pattern
+            result = self._execute_react_loop(
+                initial_message=user_query,
+                max_iterations=self.config.max_iterations  # Reasonable limit for planning tasks
+            )
+            execution_time = time.time() - start_time
+            return AgentResponse(
+                success=result.get("success", False),
+                result=result.get("data"),
+                error=result.get("error"),
+                reasoning_trace=result.get("reasoning_trace", []),
+                iterations=result.get("iterations", 0),
+                execution_time=execution_time,
+                agent_name=self.config.agent_name
+            )
+        except Exception as e:
+            execution_time = time.time() - start_time
+            self.logger.error(f"Planner execution failed: {e}")
+            return AgentResponse(
+                success=False,
+                error=f"Planner execution failed: {str(e)}",
+                reasoning_trace=[],
+                iterations=0,
+                execution_time=execution_time,
+                agent_name=self.config.agent_name
+            )
+def create_planner_agent(
+        model: Any = None,
+        sub_agent_configs: Dict[str, Dict[str, Any]] = None,
+        shared_mcp_client=None,
+        **kwargs
+) -> PlannerAgent:
+    """
+    Create a PlannerAgent instance with server-managed sessions.
+    Args:
+        model: The LLM model to use
+        sub_agent_configs: Configuration for sub-agents (information_seeker, writer)
+        shared_mcp_client: Optional shared MCP client to prevent duplicate connections
+        **kwargs: Additional configuration options
+    Returns:
+        Configured PlannerAgent instance
+    """
+    # Import the enhanced config function
+    from .base_agent import create_agent_config
+    # Create agent configuration (session managed by MCP server)
+    config = create_agent_config(
+        agent_name="PlannerAgent",
+        model=model,
+        **kwargs
+    )
+    # Create planner agent with optional shared MCP client
+    planner = PlannerAgent(config=config, shared_mcp_client=shared_mcp_client)
+    # Store sub-agent configurations for use when creating sub-agents
+    planner.sub_agent_configs = sub_agent_configs or {
+        "information_seeker": {},
+        "writer": {}
+    }
+    return planner

deepdiver_v2/src/agents/subjective_information_seeker.py ADDED Viewed

	@@ -0,0 +1,417 @@

+# Copyright (c) 2025 Huawei Technologies Co., Ltd. All rights reserved.
+import json
+from typing import Dict, Any, List
+import time
+import requests
+import os
+from .base_agent import BaseAgent, AgentConfig, AgentResponse, TaskInput
+class InformationSeekerAgent(BaseAgent):
+    """
+    Information Seeker Agent that follows ReAct pattern (Reasoning + Acting)
+    This agent takes decomposed sub-questions or tasks from parent agents,
+    thinks interleaved (reasoning -> action -> reasoning -> action),
+    uses MCP tools to gather information, and returns structured results.
+    """
+    def __init__(self, config: AgentConfig = None, shared_mcp_client=None):
+        # Set default agent name if not specified
+        if config is None:
+            config = AgentConfig(agent_name="InformationSeekerAgent")
+        elif config.agent_name == "base_agent":
+            config.agent_name = "InformationSeekerAgent"
+        super().__init__(config, shared_mcp_client)
+    def _build_system_prompt(self) -> str:
+        """Build the system prompt for the ReAct agent"""
+        tool_schemas_str = json.dumps(self.tool_schemas, ensure_ascii=False)
+        system_prompt_template = """You are an Information Seeker Agent that follows the ReAct pattern (Reasoning + Acting).
+        Your role is to:
+        1. Take decomposed sub-questions or tasks from parent agents
+        2. Think step-by-step through reasoning
+        3. Use available tools to gather information when needed
+        4. Continue reasoning based on tool results
+        5. Repeat this process until you have sufficient information
+        6. Call info_seeker_subjective_task_done to provide a structured summary and key files
+        TOOL USAGE STRATEGY:
+        Follow this optimized workflow for information gathering:
+        1. INITIAL RESEARCH:
+           - Generate focused search queries (≤10): Limit to no more than 10 initial search queries to avoid increased failure rates from excessive decomposition.
+           - Use `batch_web_search` to find relevant URLs for your queries. When calling the search statement, consider the language of the user's question. For example, for a Chinese question, generate a part of the search statement in Chinese.
+           - Analyze the search results (titles, snippets, URLs) to identify promising sources
+        2. CONTENT EXTRACTION:
+           - For important URLs, use `url_crawler` to:
+                a) Extract full content from the webpage
+                b) Save the content to a file in the workspace **under the relative path `./url_crawler_save_files/`**
+           - Store results with meaningful file paths (e.g., `url_crawler_save_files/research/ai_trends_2024.txt`)
+        3. CONTENT ANALYSIS:
+           - Use `document_extract` for multi-dimensional analysis of saved files:
+                a) Provides structured analysis across five key dimensions: doc time source authority, core content and task relevance
+        4. FILE MANAGEMENT:
+           - For reviewing saved content:
+                a) Prefer `document_extract` to get comprehensive multi-dimensional analysis of saved files
+                b) Use `file_read` ONLY for small files (<1000 tokens) when you need the entire content
+                c) Avoid reading large files directly as it may exceed context limits
+        ### Usage of Systematic Tool:
+            - `think` is a systematic tool. After receiving the response from the complex tool or before invoking any other tools, you must **first invoke the `think` tool**: to deeply reflect on the results of previous tool invocations (if any), and to thoroughly consider and plan the user's task. The `think` tool does not acquire new information; it only saves your thoughts into memory.
+        Always provide clear reasoning for your actions and synthesize information effectively.
+Below, within the <tools></tools> tags, are the descriptions of each tool and the required fields for invocation:
+<tools>
+$tool_schemas
+</tools>
+For each function call, return a JSON object placed within the [unused11][unused12] tags, which includes the function name and the corresponding function arguments:
+[unused11][{\"name\": <function name>, \"arguments\": <args json object>}][unused12]
+"""
+        return system_prompt_template.replace("$tool_schemas", tool_schemas_str)
+    @staticmethod
+    def _build_initial_message_from_task_input(task_input: TaskInput) -> str:
+        """Build the initial user message from TaskInput"""
+        message = task_input.format_for_prompt()
+        message += "\nPlease analyze this task and start your ReAct process:\n"
+        message += "1. Reason about what information you need to gather\n"
+        message += "2. Use appropriate tools to get that information\n"
+        message += "3. Continue reasoning and acting until you have sufficient information\n"
+        message += "4. Call info_seeker_subjective_task_done when ready to provide your complete findings\n\n"
+        message += "Begin with your initial reasoning about the task."
+        return message
+    def execute_task(self, task_input: TaskInput) -> AgentResponse:
+        """
+        Execute a task using ReAct pattern (Reasoning + Acting)
+        Args:
+            task_input: TaskInput object with standardized task information
+        Returns:
+            AgentResponse with results and process trace
+        """
+        start_time = time.time()
+        try:
+            self.logger.info(f"Starting information seeker task: {task_input.task_content}")
+            # Reset trace for new task
+            self.reset_trace()
+            # Initialize conversation history
+            conversation_history = []
+            # Build initial system prompt for ReAct
+            system_prompt = self._build_system_prompt()
+            # Build initial user message from TaskInput
+            user_message = self._build_initial_message_from_task_input(task_input)
+            # Add to conversation
+            conversation_history.append({"role": "system", "content": system_prompt})
+            conversation_history.append({"role": "user", "content": user_message + " /no_think"})
+            iteration = 0
+            task_completed = False
+            # Get model configuration from config
+            from config.config import get_config
+            config = get_config()
+            model_config = config.get_custom_llm_config()
+            pangu_url = model_config.get('url') or os.getenv('MODEL_REQUEST_URL', '')
+            model_token = model_config.get('token') or os.getenv('MODEL_REQUEST_TOKEN', '')
+            headers = {'Content-Type': 'application/json', 'csb-token': model_token}
+            # ReAct Loop: Reasoning -> Acting -> Reasoning -> Acting...
+            self.config.max_iterations = 30
+            while iteration < self.config.max_iterations and not task_completed:
+                iteration += 1
+                self.logger.info(f"Planning iteration {iteration}")
+                try:
+                    # Get LLM response (reasoning + potential tool calls)
+                    retry_num = 1
+                    max_retry_num = 10
+                    while retry_num < max_retry_num:
+                        try:
+                            response = requests.post(
+                                url=pangu_url,
+                                headers=headers,
+                                json={
+                                    "model": model_config.get('model', 'pangu_auto'),
+                                    "chat_template": "{% for message in messages %}{% if loop.first and messages[0]['role'] != 'system' %}{{ '<s>[unused9]系统：[unused10]' }}{% endif %}{% if message['role'] == 'system' %}{{'<s>[unused9]系统：' + message['content'] + '[unused10]'}}{% endif %}{% if message['role'] == 'assistant' %}{{'[unused9]助手：' + message['content'] + '[unused10]'}}{% endif %}{% if message['role'] == 'tool' %}{{'[unused9]工具：' + message['content'] + '[unused10]'}}{% endif %}{% if message['role'] == 'function' %}{{'[unused9]方法：' + message['content'] + '[unused10]'}}{% endif %}{% if message['role'] == 'user' %}{{'[unused9]用户：' + message['content'] + '[unused10]'}}{% endif %}{% endfor %}{% if add_generation_prompt %}{{ '[unused9]助手：' }}{% endif %}",
+                                    "messages": conversation_history,
+                                    "spaces_between_special_tokens": False,
+                                    "temperature": self.config.temperature,
+                                },
+                                timeout=model_config.get("timeout", 180)
+                            )
+                            response = response.json()
+                            self.logger.debug(f"API response received")
+                            break
+                        except Exception as e:
+                            time.sleep(3)
+                            retry_num += 1
+                            if retry_num == max_retry_num:
+                                raise ValueError(str(e))
+                            continue
+                    assistant_message = response["choices"][0]["message"]
+                    # Log the reasoning
+                    try:
+                        if assistant_message["content"]:
+                            reasoning_content = assistant_message["content"].split("[unused16]")[-1].split("[unused17]")[0]
+                            if len(reasoning_content) > 0:
+                                self.log_reasoning(iteration, reasoning_content)
+                    except Exception as e:
+                        self.logger.warning(f"Tool call parsing error: {e}")
+                        # Parse error, rerun
+                        followup_prompt = f"There is a problem with the format of model generation: {e}. Please try again."
+                        conversation_history.append({"role": "user", "content": followup_prompt + " /no_think"})
+                        continue
+                    def extract_tool_calls(content):
+                        import re
+                        if not content:
+                            return []
+                        tool_call_str = re.findall(r"\[unused11\]([\s\S]*?)\[unused12\]", content)
+                        if len(tool_call_str) > 0:
+                            try:
+                                tool_calls = json.loads(tool_call_str[0].strip())
+                            except Exception as ee:
+                                return ["fail_tools_load", ee]
+                        else:
+                            return []
+                        return tool_calls
+                    # Add assistant message to conversation
+                    conversation_history.append({
+                        "role": "assistant",
+                        "content": assistant_message["content"]
+                    })
+                    tool_calls = extract_tool_calls(assistant_message["content"])
+                    if tool_calls[0] == "fail_tools_load":
+                        # Parse error, rerun
+                        followup_prompt = f"There was a parsing error in the format of the tool call" \
+                                          f" you generated:{tool_calls[1]} Please regenerate it."
+                        conversation_history.append({"role": "user", "content": followup_prompt + " /no_think"})
+                        continue
+                    # Execute tool calls if any (Acting phase)
+                    for tool_call in tool_calls:
+                        arguments = tool_call["arguments"]
+                        # Check if planning is complete
+                        if tool_call["name"] in ["info_seeker_subjective_task_done"]:
+                            task_completed = True
+                            self.log_action(iteration, tool_call["name"], arguments, arguments)
+                            break
+                        if tool_call["name"] in ["think", "reflect"]:
+                            tool_result = {"tool_results": "You can proceed to invoke other tools if needed."}
+                        else:
+                            tool_result = self.execute_tool_call(tool_call)
+                        # Log the action using base class method
+                        self.log_action(iteration, tool_call["name"], arguments, tool_result)
+                        # Add tool result to conversation
+                        conversation_history.append({
+                            "role": "tool",
+                            "content": json.dumps(tool_result, ensure_ascii=False, indent=2) + " /no_think"
+                        })
+                    # If no tool calls, encourage continued planning
+                    if len(tool_calls) == 0:
+                        # Add follow-up prompt to encourage action or completion
+                        followup_prompt = (
+                            "Continue your analysis. If you need more information, use available tools. "
+                            "If you have enough information to answer the question, call info_seeker_subjective_task_done with your complete context."
+                        )
+                        conversation_history.append({"role": "user", "content": followup_prompt + " /no_think"})
+                    if iteration == self.config.max_iterations-3:
+                        followup_prompt = "Due to length and number of rounds restrictions, you must now call the `info_seeker_subjective_task_done` tool to report the completion of your task."
+                        conversation_history.append({"role": "user", "content": followup_prompt + " /no_think"})
+                except Exception as e:
+                    error_msg = f"Error in planning iteration {iteration}: {e}"
+                    self.log_error(iteration, error_msg)
+                    break
+            execution_time = time.time() - start_time
+            # Extract final result
+            if task_completed:
+                # Find the task_done result in the trace
+                task_done_result = None
+                for step in reversed(self.reasoning_trace):
+                    if step.get("type") == "action" and step.get("tool") == "info_seeker_subjective_task_done":
+                        task_done_result = step.get("result")
+                        break
+                return self.create_response(
+                    success=True,
+                    result=task_done_result,
+                    iterations=iteration,
+                    execution_time=execution_time
+                )
+            else:
+                return self.create_response(
+                    success=False,
+                    error=f"Task not completed within {self.config.max_iterations} iterations",
+                    iterations=iteration,
+                    execution_time=execution_time
+                )
+        except Exception as e:
+            execution_time = time.time() - start_time
+            self.logger.error(f"Error in execute_task: {e}")
+            return self.create_response(
+                success=False,
+                error=str(e),
+                iterations=iteration if 'iteration' in locals() else 0,
+                execution_time=execution_time
+            )
+    def _build_agent_specific_tool_schemas(self) -> List[Dict[str, Any]]:
+        """
+        Build tool schemas for InformationSeekerAgent using proper MCP architecture.
+        Schemas come from MCP server via client, not direct imports.
+        """
+        # Get MCP tool schemas from server via client (proper MCP architecture)
+        schemas = super()._build_agent_specific_tool_schemas()
+        # Add schemas for built-in task assignment tools
+        builtin_assignment_schemas = [
+            {
+                "type": "function",
+                "function": {
+                    "name": "think",
+                    "description": "Use the tool to think about something. It will not obtain new information or make any changes to the repository, but just log the thought. Use it when complex reasoning or brainstorming is needed.",
+                    "parameters": {
+                        "type": "object",
+                        "properties": {
+                            "thought": {
+                                "type": "string",
+                                "description": "Your thoughts."
+                            }
+                        },
+                        "required": ["thought"]
+                    }
+                }
+            },
+            {
+                "type": "function",
+                "function": {
+                    "name": "reflect",
+                    "description": "When multiple attempts yield no progress, use this tool to reflect on previous reasoning and planning, considering possible overlooked clues and exploring more possibilities. It will not obtain new information or make any changes to the repository.",
+                    "parameters": {
+                        "type": "object",
+                        "properties": {
+                            "reflect": {
+                                "type": "string",
+                                "description": "The specific content of your reflection"
+                            }
+                        },
+                        "required": ["reflect"]
+                    }
+                }
+            },
+            {
+                "type": "function",
+                "function": {
+                    "name": "info_seeker_subjective_task_done",
+                    "description": "Information Seeker Agent task completion reporting with information collection summary and related files.",
+                    "parameters": {
+                        "type": "object",
+                        "properties": {
+                            "task_summary": {
+                                "type": "string",
+                                "description": "Simple summary of what information has been collected for the current task and what new discoveries have been made.",
+                                "format": "markdown"
+                            },
+                            "key_files": {
+                                "type": "array",
+                                "items": {
+                                    "type": "object",
+                                    "properties": {
+                                        "file_path": {
+                                            "type": "string",
+                                            "description": "Relative path to the file with collected content"
+                                        },
+                                    },
+                                    "required": ["file_path"]
+                                },
+                                "description": "Collect files highly relevant to this task. "
+                            },
+                            "completion_status": {
+                                "type": "string",
+                                "enum": ["completed", "partial", "failed"],
+                                "description": "Final status of the information gathering task"
+                            },
+                            "completion_analysis": {
+                                "type": "string",
+                                "description": "Brief analysis of task completion quality, information thoroughness, and any limitations or gaps."
+                            }
+                        },
+                        "required": ["task_summary", "key_files", "completion_status", "completion_analysis"]
+                    }
+                }
+            },
+        ]
+        schemas.extend(builtin_assignment_schemas)
+        return schemas
+# Factory function for creating the agent
+def create_subjective_information_seeker(
+    model: str = "pangu_auto",
+    max_iterations: int = 10,
+    shared_mcp_client=None,
+    **kwargs
+) -> InformationSeekerAgent:
+    """
+    Create an InformationSeekerAgent instance with server-managed sessions.
+    Args:
+        model: The LLM model to use
+        max_iterations: Maximum number of iterations
+        shared_mcp_client: Optional shared MCP client from parent agent (prevents extra sessions)
+        **kwargs: Additional configuration options
+    Returns:
+        Configured InformationSeekerAgent instance with appropriate tools
+    """
+    # Import the enhanced config function
+    from .base_agent import create_agent_config
+    # Create agent configuration (session managed by MCP server)
+    config = create_agent_config(
+        agent_name="InformationSeekerAgent",
+        model=model,
+        max_iterations=max_iterations,
+        **kwargs
+    )
+    # Create agent instance with shared MCP client (filtered tools for information seeking)
+    agent = InformationSeekerAgent(config=config, shared_mcp_client=shared_mcp_client)
+    return agent

deepdiver_v2/src/agents/writer_agent.py ADDED Viewed

	@@ -0,0 +1,477 @@

+# Copyright (c) 2025 Huawei Technologies Co., Ltd. All rights reserved.
+import json
+from typing import Dict, Any, List
+import time
+import requests
+import os
+from .base_agent import BaseAgent, AgentConfig, AgentResponse, WriterAgentTaskInput
+class WriterAgent(BaseAgent):
+    """
+    Writer Agent that follows ReAct pattern for content synthesis and generation
+    This agent takes writing tasks from parent agents, searches through existing
+    files and knowledge base, and creates long-form content through iterative
+    reasoning and refinement. It does NOT access internet resources, only
+    local files and memories.
+    """
+    def __init__(self, config: AgentConfig = None, shared_mcp_client=None):
+        # Set default agent name if not specified
+        if config is None:
+            config = AgentConfig(agent_name="WriterAgent")
+        elif config.agent_name == "base_agent":
+            config.agent_name = "WriterAgent"
+        super().__init__(config, shared_mcp_client)
+        # Rebuild tool schemas with writer-specific tools only
+        self.tool_schemas = self._build_tool_schemas()
+    def _build_agent_specific_tool_schemas(self) -> List[Dict[str, Any]]:
+        """
+        Build tool schemas for WriterAgent using proper MCP architecture.
+        Schemas come from MCP server via client, not direct imports.
+        """
+        # Get MCP tool schemas from server via client (proper MCP architecture)
+        schemas = super()._build_agent_specific_tool_schemas()
+        # Add schemas for built-in task assignment tools
+        builtin_assignment_schemas = [
+            {
+                "type": "function",
+                "function": {
+                    "name": "think",
+                    "description": "Use the tool to think about something. It will not obtain new information or make any changes to the repository, but just log the thought. Use it when complex reasoning or brainstorming is needed.",
+                    "parameters": {
+                        "type": "object",
+                        "properties": {
+                            "thought": {
+                                "type": "string",
+                                "description": "Your thoughts."
+                            }
+                        },
+                        "required": ["thought"]
+                    }
+                }
+            },
+            {
+                "type": "function",
+                "function": {
+                    "name": "reflect",
+                    "description": "When multiple attempts yield no progress, use this tool to reflect on previous reasoning and planning, considering possible overlooked clues and exploring more possibilities. It will not obtain new information or make any changes to the repository.",
+                    "parameters": {
+                        "type": "object",
+                        "properties": {
+                            "reflect": {
+                                "type": "string",
+                                "description": "The specific content of your reflection"
+                            }
+                        },
+                        "required": ["reflect"]
+                    }
+                }
+            },
+            {
+                "type": "function",
+                "function": {
+                    "name": "writer_subjective_task_done",
+                    "description": "Writer Agent task completion reporting for complete long-form content. Called after all chapters/sections are written to provide a summary of the complete long article, final completion status and analysis, and the storage path of the final consolidated article.",
+                    "parameters": {
+                        "type": "object",
+                        "properties": {
+                            "final_article_path": {
+                                "type": "string",
+                                "description": "The file path where the final article is saved."
+                            },
+                            "article_summary": {
+                                "type": "string",
+                                "description": "Comprehensive summary of the complete long-form article, including main themes, key points covered, and overall narrative structure.",
+                                "format": "markdown"
+                            },
+                            "completion_status": {
+                                "type": "string",
+                                "enum": ["completed", "partial", "failed"],
+                                "description": "Final status of the complete long-form writing task"
+                            },
+                            "completion_analysis": {
+                                "type": "string",
+                                "description": "Analysis of the overall writing project completion including: assessment of article coherence and quality, evaluation of content organization and flow, identification of any challenges in the writing process, and overall evaluation of the long-form content creation success."
+                            }
+                        },
+                        "required": ["final_article_path", "article_summary", "completion_status",
+                                     "completion_analysis"]
+                    }
+                }
+            },
+        ]
+        schemas.extend(builtin_assignment_schemas)
+        return schemas
+    def _build_system_prompt(self) -> str:
+        """Build the system prompt for the writer agent"""
+        tool_schemas_str = json.dumps(self.tool_schemas, ensure_ascii=False)
+        system_prompt_template = """You are a professional writing master. You will receive key files and user problems. Your task is to generate an outline highly consistent with the user problem, classify files into sections, and iteratively call section_writer tool to create comprehensive content. Then you strictly follow the steps given below:
+        MANDATORY WORKFLOW:
+        1. OUTLINE GENERATION
+        Based on the core content of the provided key files collection(file_core_content), generate a high-quality outline suitable for long-form writing. Strictly adhere to the following requirements during generation:
+        - Before generating the outline, carefully review the provided **file_core_content**, prioritizing sections with:
+            1.**Higher authority** (credible sources)
+            2.**Greater information richness** (substantive, detailed content)
+            3.**Stronger relevance** (direct alignment with user query)
+            4.**Timeliness** (if user’s query is time-sensitive, prioritize recent/updated content)
+        Select these segments as the basis for outline generation. Note that we only focus on relevance to the question, so when generating the outline, do not add unrelated sections just for the sake of length. Additionally, the sections should flow logically and not be too disjointed, as this would harm the readability of the final output.
+        - The overall structure must be **logically clear**, with **no repetition or redundancy** between chapters.
+        - **Note1:** The generated outline must not only have chapter-level headings (Level 1) highly relevant to the user’s question, but the subheadings (Level 2) must also be highly relevant to the user’s question. It is not permitted to generate chapter titles with weak relevance, whether Level 1 or Level 2.
+        - **Note2:** The number of chapters must not exceed 7, dynamic evaluation can be performed based on the collected content. For example, if there is a lot of content, more chapters can be generated, and vice versa. But each chapter should only include Level 1 and Level 2 headings. Also, be careful not to generate too many Level 2 headings, limit them to 4. However, if the first chapter is an abstract or introduction, do not generate subheadings (level-2 headings)—only include the main heading (level-1). Additionally, tailor the outline style based on the type of document. For example, in a research report, the first chapter should preferably be titled \"Abstract\" or \"Introduction.\"
+        2. FILE CLASSIFICATION
+        - Use the search_result_classifier tool to reasonably split the outline generated above and accurately assign key files to each chapter of the outline.
+        - Ensure optimal distribution of reference materials across chapters based on content relevance.
+        3. ITERATIVE SECTION WRITING
+        - Call section_writer tool sequentially for each chapter
+        - CRITICAL: Must wait for previous chapter completion before starting the next chapter
+        - Pass only the specific chapter outline , target file path and corresponding classified files to each section writer
+        - Generate save path for each chapter using \"./report/part_X.md\" format (e.g., \"./report/part_1.md\" for first chapter)
+        - Check section writer results after completion; retry up to 2 times per chapter if quality is insufficient based on returned fields (do not read saved files)
+        - When you call the section_writer tool, pay special attention to the fact that the parameter value of written_chapters_summary is a summary of the content returned by all previously completed chapters. Be careful not to make any changes to the summary content, including compressing the content.
+        4. TASK COMPLETION
+        - After all chapters are written, you must first call the concat_section_files tool to merge the saved chapter files into one file, then call writer_subjective_task_done to finalize and return.
+        CRITICAL REQUIREMENTS:
+        - The creation of the outline is crucial! Therefore, you must strictly adhere to the above requirements for generating the outline.
+        - No parallel writing - strictly sequential chapter execution
+        - Wait for each section writer completion before proceeding to next chapter
+        - Classify files appropriately to support each chapter's content needs
+        - Note again that to merge all the written chapter files, you must use the concat_section_files tool!!! You are not allowed to call any other tools for merging!!!
+        FORBIDDEN CONTENT PATTERNS:
+        - NEVER generate meta-structural chapters that describe how the article is organized
+        - AVOID introductory sections that outline \"Chapter 1 will cover..., Chapter 2 will discuss...\"
+        - DO NOT create chapters that explain the report structure or methodology
+        - Each chapter must contain SUBSTANTIVE CONTENT, not descriptions of what other chapters contain
+        - When generating an outline, if it is not a professional term, the language should remain consistent with the user's question.\"
+        Usage of TOOLS:
+        - search_result_classifier: Classify key files into outline sections
+        - section_writer: Write individual chapters sequentially
+        - writer_subjective_task_done: Complete the writing task
+        - concat_section_files: Concatenate the content of the saved section files into a single file
+        - think tool: \"Think\" is a systematic tool requiring its use during key steps. Before executing actions like generating an outline, you must first call this tool to deeply consider the given content and key requirements, ensuring the output meets specifications. Similarly, during iterative chapter generation, after receiving feedback and before writing the next chapter, call \"think\" to reflect on the current chapter. This provides guidance to avoid content repetition and ensure smooth transitions between chapters.
+        Execute workflow systematically to produce high-quality, coherent long-form content with substantive chapters.
+Below, within the <tools></tools> tags, are the descriptions of each tool and the required fields for invocation:
+<tools>
+$tool_schemas
+</tools>
+For each function call, return a JSON object placed within the [unused11][unused12] tags, which includes the function name and the corresponding function arguments:
+[unused11][{\"name\": <function name>, \"arguments\": <args json object>}][unused12]
+"""
+        return system_prompt_template.replace("$tool_schemas", tool_schemas_str)
+    def _build_initial_message_from_task_input(self, task_input: WriterAgentTaskInput) -> str:
+        """Build the initial user message from TaskInput"""
+        message = ""
+        # Add key files information with reliability dimensions
+        def load_json_from_server(file_path):
+            """Load JSONL file from MCP server using unlimited internal tool"""
+            res = []
+            try:
+                # Use json read tool directly through raw MCP client
+                raw_result = self.mcp_tools.client.call_tool("load_json", {"file_path": file_path})
+                if not raw_result.success:
+                    self.logger.error(f"Failed to read file from server: {raw_result.error}")
+                    return res
+                res = json.loads(raw_result.data["content"][0]["text"])["data"]
+            except Exception as e:
+                self.logger.error(f"Error loading file {file_path} from MCP server: {e}")
+                import traceback
+                self.logger.debug(f"Full traceback: {traceback.format_exc()}")
+            return res
+        key_files_dict = {}
+        server_analysis_path = f"doc_analysis/file_analysis.jsonl"
+        self.logger.debug(f"Loading analysis from MCP server: {server_analysis_path}")
+        file_analysis_list = load_json_from_server(server_analysis_path)
+        for file_info in file_analysis_list:
+            if file_info.get('file_path'):
+                key_files_dict[file_info.get('file_path')] = file_info
+        file_core_content = ""
+        if hasattr(task_input, 'key_files') and task_input.key_files:
+            message += "Key Files:\n"
+            for i, file_ in enumerate(task_input.key_files, 1):
+                file_path = file_.get('file_path')
+                if file_path in key_files_dict:
+                    file_info = key_files_dict[file_path]
+                    doc_time = file_info.get('doc_time', 'Not specified')
+                    source_authority = file_info.get('source_authority', 'Not assessed')
+                    task_relevance = file_info.get('task_relevance', 'Not assessed')
+                    information_richness = file_info.get('information_richness', 'Not assessed')
+                    message += f"{i}. File: {file_path}\n"
+                    file_core_content += f"[{str(i)}]doc_time:{doc_time}|||source_authority:{source_authority}|||task_relevance:{task_relevance}|||information_richness:{information_richness}|||summary_content:{file_info.get('core_content', '')}\n"
+            message += "\n"
+            message += f"file_core_content: {file_core_content}\n"
+        else:
+            message += "Key Files: None provided\n"
+        message += "\n"
+        # Add user query
+        if hasattr(task_input, 'user_query') and task_input.user_query:
+            message += f"User Query: {task_input.user_query}\n"
+        else:
+            message += "User Query: Not provided\n"
+        return message
+    def execute_task(self, task_input: WriterAgentTaskInput) -> AgentResponse:
+        """
+        Execute a writing task using ReAct pattern
+        Args:
+            task_input: TaskInput object with standardized task information
+        Returns:
+            AgentResponse with writing results and process trace
+        """
+        start_time = time.time()
+        try:
+            self.logger.info(f"Starting writing task: {task_input.task_content}")
+            # Reset trace for new task
+            self.reset_trace()
+            # Initialize conversation history
+            conversation_history = []
+            # Build system prompt for writing
+            system_prompt = self._build_system_prompt()
+            # Build initial user message from TaskInput
+            user_message = self._build_initial_message_from_task_input(task_input)
+            # Add to conversation
+            conversation_history.append({"role": "system", "content": system_prompt})
+            conversation_history.append({"role": "user", "content": user_message + " /no_think"})
+            iteration = 0
+            task_completed = False
+            self.logger.debug("Checking conversation history before model call")
+            self.logger.debug(f"Conversation history: {conversation_history}")
+            # ReAct Loop for Writing: Research → Plan → Write → Refine → Complete
+            # Get model configuration from config
+            from config.config import get_config
+            config = get_config()
+            model_config = config.get_custom_llm_config()
+            pangu_url = model_config.get('url') or os.getenv('MODEL_REQUEST_URL', '')
+            model_token = model_config.get('token') or os.getenv('MODEL_REQUEST_TOKEN', '')
+            headers = {'Content-Type': 'application/json', 'csb-token': model_token}
+            while iteration < self.config.max_iterations and not task_completed:
+                iteration += 1
+                self.logger.info(f"Writing iteration {iteration}")
+                try:
+                    # Get LLM response (reasoning + potential tool calls) with retry
+                    max_retries = 10
+                    response = None
+                    for attempt in range(max_retries):
+                        try:
+                            response = requests.post(
+                                url=pangu_url,
+                                headers=headers,
+                                json={
+                                    "model": self.config.model,
+                                    "chat_template":"{% for message in messages %}{% if loop.first and messages[0]['role'] != 'system' %}{{ '<s>[unused9]系统：[unused10]' }}{% endif %}{% if message['role'] == 'system' %}{{'<s>[unused9]系统：' + message['content'] + '[unused10]'}}{% endif %}{% if message['role'] == 'assistant' %}{{'[unused9]助手：' + message['content'] + '[unused10]'}}{% endif %}{% if message['role'] == 'tool' %}{{'[unused9]工具：' + message['content'] + '[unused10]'}}{% endif %}{% if message['role'] == 'function' %}{{'[unused9]方法：' + message['content'] + '[unused10]'}}{% endif %}{% if message['role'] == 'user' %}{{'[unused9]用户：' + message['content'] + '[unused10]'}}{% endif %}{% endfor %}{% if add_generation_prompt %}{{ '[unused9]助手：' }}{% endif %}",
+                                    "messages": conversation_history,
+                                    "temperature": self.config.temperature,
+                                    "max_tokens": self.config.max_tokens,
+                                    "spaces_between_special_tokens": False,
+                                },
+                                timeout=model_config.get("timeout", 180)
+                            )
+                            response = response.json()
+                            self.logger.debug(f"API response received")
+                            break  # Success, exit retry loop
+                        except Exception as e:
+                            self.logger.warning(f"LLM API call attempt {attempt + 1} failed: {e}")
+                            if attempt == max_retries - 1:
+                                raise e  # Last attempt, re-raise the exception
+                            time.sleep(6)  # Simple 1 second delay between retries
+                    if response is None:
+                        raise Exception("Failed to get response after all retries")
+                    assistant_message = response["choices"][0]["message"]
+                    try:
+                        if assistant_message["content"]:
+                            reasoning_content = assistant_message["content"].split("[unused16]")[-1].split("[unused17]")[0]
+                            if len(reasoning_content) > 0:
+                                self.log_reasoning(iteration, reasoning_content)
+                    except Exception as e:
+                        self.logger.warning(f"Tool call parsing error: {e}")
+                        # Parse error, rerun
+                        followup_prompt = f"There is a problem with the format of model generation: {e}. Please try again."
+                        conversation_history.append({"role": "user", "content": followup_prompt + " /no_think"})
+                        continue
+                    def extract_tool_calls(content):
+                        import re
+                        tool_call_str = re.findall(r"\[unused11\]([\s\S]*?)\[unused12\]", content)
+                        if len(tool_call_str) > 0:
+                            try:
+                                tool_calls = json.loads(tool_call_str[0])
+                            except:
+                                return []
+                        else:
+                            return []
+                        return tool_calls
+                    # Add assistant message to conversation
+                    conversation_history.append({
+                        "role": "assistant",
+                        "content": assistant_message["content"]
+                    })
+                    tool_calls = extract_tool_calls(assistant_message["content"])
+                    # Execute tool calls if any (Acting phase)
+                    for tool_call in tool_calls:
+                        # Str
+                        arguments = tool_call["arguments"]
+                        self.logger.debug(f"Arguments is string: {isinstance(arguments, str)}")
+                        # Check if planning is complete
+                        if tool_call["name"] in ["writer_subjective_task_done"]:
+                            task_completed = True
+                            self.log_action(iteration, tool_call["name"], arguments, arguments)
+                            break
+                        if tool_call["name"] in ["think"]:
+                            tool_result = {
+                                "tool_results": "You can proceed to invoke other tools if needed. But the next step cannot call the reflect tool"}
+                        else:
+                            tool_result = self.execute_tool_call(tool_call)
+                        # Log the action using base class method
+                        self.log_action(iteration, tool_call["name"], arguments, tool_result)
+                        # Add tool result to conversation
+                        conversation_history.append({
+                            "role": "tool",
+                            "content": json.dumps(tool_result, ensure_ascii=False, indent=2) + " /no_think"
+                        })
+                    # If no tool calls, encourage continued writing
+                    if len(tool_calls) == 0:
+                        # Add follow-up prompt to encourage action or completion
+                        followup_prompt = (
+                            "Continue your writing process. If you need to research more, use available tools. "
+                            "If you need to write or edit content, use file operations. "
+                            "If your writing is complete and meets requirements, call writer_subjective_task_done. /no_think"
+                        )
+                        conversation_history.append({"role": "user", "content": followup_prompt})
+                except Exception as e:
+                    error_msg = f"Error in writing iteration {iteration}: {e}"
+                    self.log_error(iteration, error_msg)
+                    break
+            execution_time = time.time() - start_time
+            # Extract final result
+            if task_completed:
+                # Find the completion result in the trace
+                completion_result = None
+                for step in reversed(self.reasoning_trace):
+                    if step.get("type") == "action" and step.get("tool") in ["writer_subjective_task_done"]:
+                        completion_result = step.get("result")
+                        break
+                return self.create_response(
+                    success=True,
+                    result=completion_result,
+                    iterations=iteration,
+                    execution_time=execution_time
+                )
+            else:
+                return self.create_response(
+                    success=False,
+                    error=f"Writing task not completed within {self.config.max_iterations} iterations",
+                    iterations=iteration,
+                    execution_time=execution_time
+                )
+        except Exception as e:
+            execution_time = time.time() - start_time if 'start_time' in locals() else 0
+            self.logger.error(f"Error in execute_react_loop: {e}")
+            return self.create_response(
+                success=False,
+                error=str(e),
+                iterations=iteration if 'iteration' in locals() else 0,
+                execution_time=execution_time
+            )
+# Factory function for creating the writer agent
+def create_writer_agent(
+        model: Any = None,
+        max_iterations: int = 15,  # More iterations for writing tasks
+        temperature: Any = None,  # Resolved from env if not provided
+        max_tokens: Any = None,
+        shared_mcp_client=None
+) -> WriterAgent:
+    """
+    Create a WriterAgent instance with server-managed sessions.
+    Args:
+        model: The LLM model to use
+        max_iterations: Maximum number of iterations for writing tasks
+        temperature: Temperature setting for creativity
+        max_tokens: Maximum tokens for the AI response
+        shared_mcp_client: Optional shared MCP client from parent agent (prevents extra sessions)
+    Returns:
+        Configured WriterAgent instance with writing-focused tools
+    """
+    # Import the enhanced config function
+    from .base_agent import create_agent_config
+    # Create agent configuration (session managed by MCP server)
+    config = create_agent_config(
+        agent_name="WriterAgent",
+        model=model,
+        max_iterations=max_iterations,
+        temperature=temperature,
+        max_tokens=max_tokens,
+    )
+    # Create agent instance with shared MCP client (filtered tools for writing)
+    agent = WriterAgent(config=config, shared_mcp_client=shared_mcp_client)
+    return agent

deepdiver_v2/src/tools/__init__.py ADDED Viewed

	@@ -0,0 +1,36 @@

+# Copyright (c) 2025 Huawei Technologies Co., Ltd. All rights reserved.
+"""
+Model Context Protocol (MCP) Integration
+This package contains MCP server implementations, tools, and integrations
+for the DeepDiver multi-agent system.
+"""
+from .mcp_tools import MCPTools
+# Server imports
+try:
+    from .mcp_server_standard import create_app as create_standard_app
+    from .mcp_server_simple import app as simple_app
+    MCP_STANDARD_AVAILABLE = True
+except ImportError:
+    MCP_STANDARD_AVAILABLE = False
+    create_standard_app = None
+    simple_app = None
+# For backward compatibility
+try:
+    standard_app = simple_app  # Keep simple app for basic compatibility
+    MCP_AVAILABLE = MCP_STANDARD_AVAILABLE
+except Exception as e:
+    MCP_AVAILABLE = False
+    standard_app = None
+__all__ = [
+    'MCPTools',
+    'create_standard_app',
+    'simple_app',
+    'standard_app',  # Backward compatibility
+    'MCP_AVAILABLE',
+    'MCP_STANDARD_AVAILABLE'
+]

deepdiver_v2/src/tools/mcp_client.py ADDED Viewed

	@@ -0,0 +1,814 @@

+# Copyright (c) 2025 Huawei Technologies Co., Ltd. All rights reserved.
+#!/usr/bin/env python3
+"""
+MCP Client for Agent-to-Server Communication
+Provides a proper MCP client that uses the official MCP package
+to connect to and communicate with MCP servers through the Model Context Protocol.
+"""
+import json
+import logging
+import time
+from typing import Dict, Any, List, Optional
+from dataclasses import dataclass, field
+from pathlib import Path
+import sys
+sys.path.append(str(Path(__file__).parent.parent.parent))
+from ..utils.status_codes import JsonRpcErr
+from http import HTTPStatus
+try:
+    import httpx
+    MCP_AVAILABLE = True
+except ImportError:
+    MCP_AVAILABLE = False
+    logging.warning("HTTP client dependencies not available. Falling back to direct tools.")
+logger = logging.getLogger(__name__)
+@dataclass
+class MCPClientResult:
+    """Standard result format for MCP client operations"""
+    success: bool
+    data: Any = None
+    error: str = None
+    metadata: Dict[str, Any] = field(default_factory=dict)
+    def to_dict(self) -> Dict[str, Any]:
+        return {
+            "success": self.success,
+            "data": self.data,
+            "error": self.error,
+            "metadata": self.metadata
+        }
+@dataclass
+class MCPTool:
+    """Simple representation of an MCP tool"""
+    name: str
+    description: str = ""
+    input_schema: Dict[str, Any] = field(default_factory=dict)
+@dataclass
+class RetryConfig:
+    """Configuration for retry behavior on rate limiting"""
+    max_retries: int = 20              # Maximum number of retry attempts
+    base_delay: float = 2.0            # Base delay between retries (seconds)
+    max_delay: float = 60.0            # Maximum delay between retries (seconds)
+    exponential_backoff: bool = True   # Use exponential backoff
+    respect_retry_after: bool = True   # Respect server's Retry-After header
+    retry_on_rate_limit: bool = True   # Enable automatic retry on rate limits
+class MCPClient:
+    """
+    Simple HTTP-based MCP Client for dynamic tool discovery and execution.
+    This client makes direct HTTP JSON-RPC calls to the MCP server,
+    avoiding the complexity of streaming connections.
+    Session management is handled entirely by the server:
+    - Server assigns session IDs on connection
+    - Server manages workspace creation and isolation
+    - All tool operations use server-managed workspaces
+    """
+    def __init__(self, server_url: str = "http://localhost:6274/mcp", retry_config: Optional[RetryConfig] = None):
+        self.server_url = server_url.rstrip('/')
+        self.retry_config = retry_config or RetryConfig()
+        self._tools: Dict[str, MCPTool] = {}
+        self._connected = False
+        self._request_id = 0
+        self._session_id = None
+        if not MCP_AVAILABLE:
+            logger.warning("HTTP client not available. Some functionality may be limited.")
+            return
+        # Initialize connection and discover tools
+        self._initialize_connection()
+    def _get_next_id(self) -> int:
+        """Get next request ID"""
+        self._request_id += 1
+        return self._request_id
+    @staticmethod
+    def _parse_sse_response(sse_text: str) -> Dict[str, Any]:
+        """Parse Server-Sent Events response and extract JSON data"""
+        try:
+            # SSE format: "event: message\ndata: {json}\n\n"
+            lines = sse_text.strip().split('\n')
+            for line in lines:
+                if line.startswith('data: '):
+                    json_data = line[6:]  # Remove "data: " prefix
+                    return json.loads(json_data)
+            # If no data line found, try parsing entire response as JSON
+            return json.loads(sse_text)
+        except json.JSONDecodeError as e:
+            logger.error(f"Failed to parse SSE response: {e}")
+            logger.error(f"SSE text: {sse_text[:200]}...")
+            return {"error": {"code": JsonRpcErr.PARSE_ERROR, "message": f"Parse error: {e}"}}
+    def _make_request(self, method: str, params: Dict[str, Any] = None) -> MCPClientResult:
+        """Make a JSON-RPC request to the MCP server with automatic retry on rate limits"""
+        if not MCP_AVAILABLE:
+            return MCPClientResult(success=False, error="HTTP client not available")
+        # Prepare JSON-RPC request
+        request_data = {
+            "jsonrpc": "2.0",
+            "id": self._get_next_id(),
+            "method": method,
+            "params": params or {}
+        }
+        # Make HTTP request with proper MCP headers
+        headers = {
+            "Content-Type": "application/json",
+            "Accept": "application/json, text/event-stream"
+        }
+        # Add session ID if available
+        if self._session_id:
+            headers["X-Session-ID"] = self._session_id
+        last_error = None
+        retry_count = 0
+        while retry_count <= self.retry_config.max_retries:
+            try:
+                # Disable proxy for localhost/127.0.0.1 connections to avoid proxy interference
+                import os
+                from urllib.parse import urlparse
+                parsed_url = urlparse(self.server_url)
+                is_localhost = parsed_url.hostname in ['localhost', '127.0.0.1', '::1']
+                # Add localhost to NO_PROXY for localhost connections
+                original_no_proxy = None
+                if is_localhost:
+                    original_no_proxy = os.environ.get('NO_PROXY', os.environ.get('no_proxy', ''))
+                    # Add localhost and 127.0.0.1 to NO_PROXY
+                    no_proxy_hosts = ['localhost', '127.0.0.1', '::1']
+                    if original_no_proxy:
+                        existing_hosts = [h.strip() for h in original_no_proxy.split(',')]
+                        no_proxy_hosts.extend(existing_hosts)
+                    os.environ['NO_PROXY'] = ','.join(no_proxy_hosts)
+                    os.environ['no_proxy'] = ','.join(no_proxy_hosts)
+                try:
+                    # Create client with connection pooling for high-concurrency
+                    limits = httpx.Limits(
+                        max_keepalive_connections=3000,  # Keep more connections alive
+                        max_connections=3000,           # Allow more concurrent connections
+                        keepalive_expiry=1000.0         # Keep connections alive longer
+                    )
+                    timeout = httpx.Timeout(
+                        connect=100.0,
+                        read=None,
+                        write=60.0,
+                        pool=30.0
+                    )
+                    with httpx.Client(
+                        timeout=timeout,  # Higher timeout for high-concurrency scenarios
+                        limits=limits,   # Connection pooling for better performance
+                        trust_env=False,
+                        http2=True      # Enable HTTP/2 for better multiplexing
+                    ) as client:
+                        response = client.post(
+                            self.server_url,
+                            json=request_data,
+                            headers=headers
+                        )
+                    # Check for rate limiting (HTTP 429)
+                    if response.status_code == 429:
+                        if not self.retry_config.retry_on_rate_limit:
+                            return MCPClientResult(
+                                success=False,
+                                error=f"Rate limit exceeded (HTTP 429) - retries disabled",
+                                metadata={"status_code": 429, "retry_count": retry_count}
+                            )
+                        if retry_count >= self.retry_config.max_retries:
+                            return MCPClientResult(
+                                success=False,
+                                error=f"Rate limit exceeded (HTTP 429) - max retries ({self.retry_config.max_retries}) reached",
+                                metadata={"status_code": 429, "retry_count": retry_count}
+                            )
+                        # Calculate retry delay
+                        delay = self._calculate_retry_delay(response, retry_count)
+                        logger.warning(f"Rate limit exceeded for {method} (attempt {retry_count + 1}/{self.retry_config.max_retries + 1}). Retrying in {delay:.1f}s...")
+                        # Wait before retry
+                        time.sleep(delay)
+                        retry_count += 1
+                        continue
+                    # Handle other HTTP errors
+                    if response.status_code != HTTPStatus.OK:
+                        return MCPClientResult(
+                            success=False,
+                            error=f"HTTP {response.status_code}: {response.text}",
+                            metadata={"status_code": response.status_code, "retry_count": retry_count}
+                        )
+                    # Parse successful response (could be JSON or SSE format)
+                    if response.headers.get("content-type", "").startswith("text/event-stream"):
+                        # Parse SSE format
+                        response_data = self._parse_sse_response(response.text)
+                    else:
+                        # Parse regular JSON
+                        response_data = response.json()
+                    if "error" in response_data:
+                        return MCPClientResult(
+                            success=False,
+                            error=f"MCP Error: {response_data['error']}",
+                            metadata={"retry_count": retry_count}
+                        )
+                    # Capture session ID from response data (for all methods, not just initialize)
+                    if "session_id" in response_data:
+                        self._session_id = response_data["session_id"]
+                        logger.info(f"Captured session ID from response: {self._session_id}")
+                    # Success! Log retry info if this wasn't the first attempt
+                    if retry_count > 0:
+                        logger.info(f"Request {method} succeeded after {retry_count} retries")
+                    return MCPClientResult(
+                        success=True,
+                        data=response_data.get("result"),
+                        metadata={
+                            "method": method,
+                            "server_url": self.server_url,
+                            "session_id": self._session_id,
+                            "retry_count": retry_count
+                        }
+                    )
+                finally:
+                    # Restore original NO_PROXY environment variable
+                    if is_localhost:
+                        if original_no_proxy is not None:
+                            if original_no_proxy:
+                                os.environ['NO_PROXY'] = original_no_proxy
+                                os.environ['no_proxy'] = original_no_proxy
+                            else:
+                                # Remove NO_PROXY if it wasn't set originally
+                                os.environ.pop('NO_PROXY', None)
+                                os.environ.pop('no_proxy', None)
+            except Exception as e:
+                last_error = str(e)
+                logger.error(f"MCP request failed for {method} (attempt {retry_count + 1}): {e}")
+                # Only retry on certain exceptions (network issues, timeouts)
+                if not self._should_retry_exception(e) or retry_count >= self.retry_config.max_retries:
+                    break
+                # Calculate retry delay for exceptions
+                delay = self._calculate_exception_retry_delay(retry_count)
+                logger.warning(f"Request {method} failed, retrying in {delay:.1f}s... (attempt {retry_count + 1}/{self.retry_config.max_retries + 1})")
+                time.sleep(delay)
+                retry_count += 1
+        # All retries exhausted
+        return MCPClientResult(
+            success=False,
+            error=f"Request failed after {retry_count} retries. Last error: {last_error}",
+            metadata={"retry_count": retry_count}
+        )
+    def _calculate_retry_delay(self, response, retry_count: int) -> float:
+        """Calculate delay before retry based on server response and retry count"""
+        delay = self.retry_config.base_delay
+        # Respect server's Retry-After header if available
+        if self.retry_config.respect_retry_after and "Retry-After" in response.headers:
+            try:
+                retry_after = float(response.headers["Retry-After"])
+                delay = min(retry_after, self.retry_config.max_delay)
+                logger.debug("Using server Retry-After: {%s}s", delay)
+            except (ValueError, TypeError):
+                logger.warning(f"Invalid Retry-After header: {response.headers.get('Retry-After')}")
+        # Apply exponential backoff if enabled
+        elif self.retry_config.exponential_backoff:
+            delay = min(
+                self.retry_config.base_delay * (2 ** retry_count),
+                self.retry_config.max_delay
+            )
+        return delay
+    def _calculate_exception_retry_delay(self, retry_count: int) -> float:
+        """Calculate delay for exception-based retries"""
+        if self.retry_config.exponential_backoff:
+            return min(
+                self.retry_config.base_delay * (2 ** retry_count),
+                self.retry_config.max_delay
+            )
+        return self.retry_config.base_delay
+    @staticmethod
+    def _should_retry_exception(exception: Exception) -> bool:
+        """Determine if an exception warrants a retry"""
+        # Retry on network-related exceptions
+        if isinstance(exception, (httpx.RequestError, httpx.TimeoutException, httpx.ConnectError)):
+            return True
+        # Don't retry on other exceptions (parsing errors, etc.)
+        return False
+    def _initialize_connection(self):
+        """Initialize MCP client connection and fetch available tools"""
+        if not MCP_AVAILABLE:
+            return
+        try:
+            # Initialize session
+            init_result = self._make_request("initialize", {
+                "protocolVersion": "2025-06-18",
+                "capabilities": {},
+                "clientInfo": {
+                    "name": "DeepDiver-MCP-Client",
+                    "version": "1.0.0"
+                }
+            })
+            print(init_result)
+            if not init_result.success:
+                logger.error(f"MCP initialization failed: {init_result.error}")
+                return
+            logger.info("MCP client initialized successfully")
+            # Fetch available tools
+            tools_result = self._make_request("tools/list")
+            if tools_result.success and tools_result.data:
+                tools_data = tools_result.data.get("tools", [])
+                self._tools = {}
+                for tool_data in tools_data:
+                    tool = MCPTool(
+                        name=tool_data.get("name", ""),
+                        description=tool_data.get("description", ""),
+                        input_schema=tool_data.get("inputSchema", {})
+                    )
+                    self._tools[tool.name] = tool
+                logger.info(f"Discovered {len(self._tools)} tools from MCP server: {list(self._tools.keys())}")
+            self._connected = True
+        except Exception as e:
+            logger.error(f"Failed to initialize MCP client: {e}")
+            self._connected = False
+    def _ensure_connection(self):
+        """Ensure MCP client is connected"""
+        if not MCP_AVAILABLE:
+            raise RuntimeError("HTTP client not available")
+        if not self._connected:
+            self._initialize_connection()
+        if not self._connected:
+            raise RuntimeError("MCP client not connected to server")
+    def call_tool(self, tool_name: str, arguments: Dict[str, Any]) -> MCPClientResult:
+        """
+        Generic method to call any tool available on the MCP server.
+        Args:
+            tool_name: Name of the tool to call
+            arguments: Dictionary of arguments to pass to the tool
+        Returns:
+            MCPClientResult with the tool execution result
+        """
+        try:
+            self._ensure_connection()
+            if tool_name not in self._tools:
+                return MCPClientResult(
+                    success=False,
+                    error=f"Tool '{tool_name}' not available on server. Available tools: {list(self._tools.keys())}"
+                )
+            # Call the tool via JSON-RPC
+            result = self._make_request("tools/call", {
+                "name": tool_name,
+                "arguments": arguments
+            })
+            return result
+        except Exception as e:
+            logger.error(f"Error calling tool '{tool_name}': {e}")
+            return MCPClientResult(
+                success=False,
+                error=str(e)
+            )
+    def get_available_tools(self) -> Dict[str, MCPTool]:
+        """Get dictionary of available tools from the server"""
+        return self._tools.copy()
+    def list_tools(self) -> List[str]:
+        """Get list of available tool names"""
+        return list(self._tools.keys())
+    def get_tool_info(self, tool_name: str) -> Optional[MCPTool]:
+        """Get detailed information about a specific tool"""
+        return self._tools.get(tool_name)
+    def is_connected(self) -> bool:
+        """Check if client is connected to MCP server"""
+        return self._connected and MCP_AVAILABLE
+    def refresh_tools(self):
+        """Refresh the list of available tools from the server"""
+        try:
+            # Fetch available tools
+            tools_result = self._make_request("tools/list")
+            if tools_result.success and tools_result.data:
+                tools_data = tools_result.data.get("tools", [])
+                self._tools = {}
+                print(self._tools)
+                for tool_data in tools_data:
+                    tool = MCPTool(
+                        name=tool_data.get("name", ""),
+                        description=tool_data.get("description", ""),
+                        input_schema=tool_data.get("inputSchema", {})
+                    )
+                    self._tools[tool.name] = tool
+                logger.info(f"Refreshed {len(self._tools)} tools from MCP server")
+            else:
+                logger.error(f"Failed to refresh tools: {tools_result.error}")
+        except Exception as e:
+            logger.error(f"Error refreshing tools: {e}")
+    def close(self):
+        """Close MCP client connection"""
+        # Since we create connections per request, just mark as disconnected
+        self._connected = False
+class MCPToolsAdapter:
+    """
+    Adapter class that provides the MCPTools interface while using the generic MCP client.
+    This adapter provides backward compatibility with existing agents by mapping
+    MCPTools method calls to generic MCP client tool calls.
+    """
+    def __init__(self, server_url: str = "http://localhost:6274/mcp", retry_config: Optional[RetryConfig] = None):
+        self.client = MCPClient(server_url, retry_config)
+    def _call_tool(self, tool_name: str, **kwargs) -> MCPClientResult:
+        """Internal method to call tools through the MCP client"""
+        return self.client.call_tool(tool_name, kwargs)
+    def __getattr__(self, name: str):
+        """
+        Dynamic method creation for any tool available on the server.
+        This allows calling tools like adapter.batch_web_search(...) or adapter.file_read(...)
+        """
+        if name.startswith('_'):
+            raise AttributeError(f"'{self.__class__.__name__}' object has no attribute '{name}'")
+        # Create a dynamic method that calls the tool
+        def tool_method(**kwargs):
+            result = self._call_tool(name, **kwargs)
+            # For backward compatibility, return the data portion
+            return result.data if result.success else {"error": result.error}
+        return tool_method
+    def is_connected(self) -> bool:
+        """Check if the MCP client is connected to the server."""
+        return self.client.is_connected()
+    def get_available_tools(self) -> Dict[str, MCPTool]:
+        """Get available tools from the MCP server."""
+        return self.client.get_available_tools()
+    def list_tools(self) -> List[str]:
+        """Get list of available tool names."""
+        return self.client.list_tools()
+    def get_tool_schemas(self) -> List[Dict[str, Any]]:
+        """
+        Get tool schemas for all available tools.
+        This is the proper MCP way - schemas come from server, not direct imports.
+        """
+        schemas = []
+        available_tools = self.get_available_tools()
+        for tool_name, tool_info in available_tools.items():
+            schema = {
+                "type": "function",
+                "function": {
+                    "name": tool_name,
+                    "description": tool_info.description,
+                    "parameters": tool_info.input_schema
+                }
+            }
+            schemas.append(schema)
+        return schemas
+    def refresh_tools(self):
+        """Refresh the list of available tools from the server."""
+        self.client.refresh_tools()
+    def get_session_info(self) -> Optional[Dict[str, Any]]:
+        """Get session information from the underlying MCP client."""
+        try:
+            if hasattr(self.client, '_session_id'):
+                return {
+                    "session_id": self.client._session_id,
+                    "connected": self.client.is_connected(),
+                    "server_url": getattr(self.client, 'server_url', 'unknown')
+                }
+            return None
+        except Exception:
+            return None
+    def close(self):
+        """Close the MCP client connection."""
+        self.client.close()
+class FilteredMCPToolsAdapter:
+    """
+    Filtered adapter that shares MCP client connection but restricts tool access per agent type.
+    This allows agents to:
+    - Share the same session/workspace (via shared client)
+    - Have different tool sets appropriate for their role
+    - Maintain proper separation of concerns
+    """
+    def __init__(self, shared_client: MCPClient, allowed_tools: List[str]):
+        """
+        Initialize with shared client and allowed tools list
+        Args:
+            shared_client: Shared MCPClient instance (same session)
+            allowed_tools: List of tools this agent can access
+        """
+        self.client = shared_client
+        self.allowed_tools = set(allowed_tools)
+        # Validate that allowed tools exist on server
+        available_tools = set(self.client.list_tools())
+        invalid_tools = self.allowed_tools - available_tools
+        if invalid_tools:
+            logger.warning(f"Requested tools not available on server: {invalid_tools}")
+            self.allowed_tools = self.allowed_tools & available_tools
+    def _call_tool(self, tool_name: str, **kwargs) -> MCPClientResult:
+        """Call tool if allowed, otherwise return error"""
+        if tool_name not in self.allowed_tools:
+            return MCPClientResult(
+                success=False,
+                error=f"Tool '{tool_name}' not allowed for this agent. Allowed tools: {list(self.allowed_tools)}"
+            )
+        # Remove any workspace_path if accidentally passed - server handles workspace
+        kwargs.pop('workspace_path', None)
+        return self.client.call_tool(tool_name, kwargs)
+    def __getattr__(self, name: str):
+        """
+        Dynamic method resolution with tool filtering.
+        Only allows access to tools in the allowed_tools list.
+        """
+        if name in self.allowed_tools:
+            def tool_method(**kwargs):
+                return self._call_tool(name, **kwargs)
+            return tool_method
+        if name in self.client.list_tools():
+            # Tool exists but not allowed for this agent
+            raise AttributeError(f"Tool '{name}' not allowed for this agent. Allowed tools: {list(self.allowed_tools)}")
+        else:
+            # Tool doesn't exist on server
+            raise AttributeError(f"Tool '{name}' not available on server. Available tools: {self.client.list_tools()}")
+    # ================ CLIENT MANAGEMENT ================
+    def is_connected(self) -> bool:
+        """Check if client is connected to MCP server"""
+        return self.client.is_connected()
+    def get_available_tools(self) -> Dict[str, MCPTool]:
+        """Get filtered list of available tools for this agent"""
+        all_tools = self.client.get_available_tools()
+        return {name: tool for name, tool in all_tools.items() if name in self.allowed_tools}
+    def list_tools(self) -> List[str]:
+        """Get list of allowed tool names for this agent"""
+        return list(self.allowed_tools)
+    def get_tool_schemas(self) -> List[Dict[str, Any]]:
+        """
+        Get tool schemas for tools allowed for this agent.
+        This is the proper MCP way - schemas come from server, not direct imports.
+        """
+        schemas = []
+        available_tools = self.get_available_tools()
+        for tool_name, tool_info in available_tools.items():
+            schema = {
+                "type": "function",
+                "function": {
+                    "name": tool_name,
+                    "description": tool_info.description,
+                    "parameters": tool_info.input_schema
+                }
+            }
+            schemas.append(schema)
+        return schemas
+    def refresh_tools(self):
+        """Refresh the underlying client's tools"""
+        self.client.refresh_tools()
+        # Re-validate allowed tools after refresh
+        available_tools = set(self.client.list_tools())
+        invalid_tools = self.allowed_tools - available_tools
+        if invalid_tools:
+            logger.warning(f"Some allowed tools no longer available after refresh: {invalid_tools}")
+            self.allowed_tools = self.allowed_tools & available_tools
+    def close(self):
+        """Close MCP client connection"""
+        self.client.close()
+# ================ AGENT TOOL SETS ================
+# Define what tools each agent type should have access to
+PLANNER_AGENT_TOOLS = [
+    "download_files",
+    "document_qa",
+    "file_read",
+    "file_write",
+    "str_replace_based_edit_tool",
+    "list_workspace",
+    "file_find_by_name",
+]
+INFORMATION_SEEKER_TOOLS = [
+    "batch_web_search",
+    "url_crawler",
+    "document_extract",
+    "document_qa",
+    "download_files",
+    "file_read",
+    "file_write",
+    "str_replace_based_edit_tool",
+    "list_workspace",
+    "file_find_by_name",
+]
+WRITER_AGENT_TOOLS = [
+    "file_read",
+    "list_workspace",
+    "file_find_by_name",
+    "search_result_classifier",
+    "section_writer",
+    "concat_section_files",
+]
+def create_filtered_mcp_tools_adapter(
+    shared_client: MCPClient,
+    agent_type: str
+) -> FilteredMCPToolsAdapter:
+    """
+    Create a filtered MCP tools adapter for specific agent type
+    Args:
+        shared_client: Shared MCPClient instance
+        agent_type: Type of agent ("planner", "information_seeker", "writer")
+    Returns:
+        FilteredMCPToolsAdapter with appropriate tools for agent type
+    """
+    tool_sets = {
+        "planner": PLANNER_AGENT_TOOLS,
+        "information_seeker": INFORMATION_SEEKER_TOOLS,
+        "writer": WRITER_AGENT_TOOLS
+    }
+    allowed_tools = tool_sets.get(agent_type, PLANNER_AGENT_TOOLS)
+    return FilteredMCPToolsAdapter(
+        shared_client=shared_client,
+        allowed_tools=allowed_tools
+    )
+def create_agent_mcp_tools(
+    agent_type: str,
+    server_url: str = "http://localhost:6274/mcp",
+    retry_config: Optional[RetryConfig] = None
+) -> FilteredMCPToolsAdapter:
+    """
+    Convenience factory to create a filtered MCP tools adapter with retry support.
+    This is the RECOMMENDED way to create MCP tools for agents.
+    Args:
+        agent_type: Type of agent ("planner", "information_seeker", "writer")
+        server_url: URL of the MCP server (default: http://localhost:6274/mcp)
+        retry_config: Optional retry configuration for handling rate limits
+    Returns:
+        FilteredMCPToolsAdapter with appropriate tools and retry support for the agent type
+    """
+    # Create client with retry support
+    client = create_mcp_client(server_url=server_url, retry_config=retry_config)
+    # Create filtered adapter for the agent type
+    return create_filtered_mcp_tools_adapter(client, agent_type)
+def create_mcp_client(
+    server_url: str = "http://localhost:6274/mcp",
+    retry_config: Optional[RetryConfig] = None
+) -> MCPClient:
+    """
+    Factory function to create a generic MCP Client with optional retry configuration
+    Args:
+        server_url: URL of the MCP server (default: http://localhost:6274/mcp)
+        retry_config: Optional retry configuration for handling rate limits
+    Returns:
+        MCPClient instance for direct tool calling with automatic retry on rate limits
+    """
+    return MCPClient(server_url=server_url, retry_config=retry_config)
+def create_mcp_tools_adapter(
+    server_url: str = "http://localhost:6274/mcp",
+    retry_config: Optional[RetryConfig] = None
+) -> MCPToolsAdapter:
+    """
+    Factory function to create an MCP Tools Adapter for backward compatibility with retry support.
+    Args:
+        server_url: URL of the MCP server (default: http://localhost:6274/mcp)
+        retry_config: Optional retry configuration for handling rate limits
+    Returns:
+        MCPToolsAdapter instance that behaves like MCPTools but uses MCP client with automatic retries
+    """
+    return MCPToolsAdapter(server_url=server_url, retry_config=retry_config)
+# Export for compatibility
+__all__ = [
+    'MCPClientResult',
+    'MCPClient',
+    'MCPTool',
+    'RetryConfig',
+    'MCPToolsAdapter',
+    'FilteredMCPToolsAdapter',
+    'create_mcp_client',
+    'create_mcp_tools_adapter',
+    'create_filtered_mcp_tools_adapter',
+    'create_agent_mcp_tools',  # RECOMMENDED for agents
+    'PLANNER_AGENT_TOOLS',
+    'INFORMATION_SEEKER_TOOLS',
+    'WRITER_AGENT_TOOLS'
+]

deepdiver_v2/src/tools/mcp_server_standard.py ADDED Viewed

	@@ -0,0 +1,1751 @@

+# Copyright (c) 2025 Huawei Technologies Co., Ltd. All rights reserved.
+#!/usr/bin/env python3
+"""
+Demo-Ready MCP Server - New Standard Implementation
+Combines robust session management with comprehensive tool definitions.
+Features: workspace isolation, tool call tracking, rate limiting, security, and full tool suite.
+"""
+import argparse
+import asyncio
+import json
+import logging
+import time
+import uuid
+import yaml
+from collections import defaultdict, deque
+from dataclasses import dataclass, field
+from datetime import datetime, timedelta
+from pathlib import Path
+from threading import Thread, Event
+from typing import Any, Dict, List, Optional
+# Third-party imports
+from starlette.applications import Starlette
+from starlette.middleware.base import BaseHTTPMiddleware
+from starlette.requests import Request
+from starlette.responses import JSONResponse, StreamingResponse
+import uvicorn
+# Add project root to Python path for imports
+import sys
+sys.path.insert(0, str(Path(__file__).parent.parent.parent))
+from src.utils.status_codes import JsonRpcErr
+from http import HTTPStatus
+# Handle both relative and absolute imports
+try:
+    from .mcp_tools import MCPTools, get_tool_schemas
+    from .mcp_tools_async import AsyncMCPTools
+except ImportError:
+    # Fallback for direct script execution
+    from src.tools.mcp_tools import MCPTools, get_tool_schemas
+    try:
+        from src.tools.mcp_tools_async import AsyncMCPTools
+    except ImportError:
+        AsyncMCPTools = None
+# Workspace knowledge manager disabled
+WORKSPACE_KNOWLEDGE_AVAILABLE = False
+# Configure structured logging
+logging.basicConfig(
+    level=logging.INFO,
+    format='%(asctime)s - %(name)s - %(levelname)s - %(funcName)s:%(lineno)d - %(message)s',
+    handlers=[
+        logging.StreamHandler(sys.stdout),
+        logging.FileHandler('mcp_server.log')
+    ]
+)
+logger = logging.getLogger(__name__)
+# ================ CONFIGURATION ================
+@dataclass
+class ServerConfig:
+    """Server configuration with only actually implemented options"""
+    # Server Core Settings
+    host: str = "127.0.0.1"
+    port: int = 6274
+    debug_mode: bool = False
+    # Session Management
+    session_ttl_seconds: int = 3600  # 1 hour default
+    max_sessions: int = 1000
+    cleanup_interval_seconds: int = 300  # 5 minutes
+    enable_session_keepalive: bool = True
+    keepalive_touch_interval: int = 300
+    # Request Handling
+    request_timeout_seconds: int = 120
+    max_request_size_mb: int = 10
+    # Client Rate Limiting (per IP)
+    rate_limit_requests_per_minute: int = 300
+    # Workspace Management
+    base_workspace_dir: str = "workspaces"
+    # Tool Call Tracking & Logging
+    enable_tool_tracking: bool = True
+    max_tracked_calls_per_session: int = 1000
+    track_detailed_errors: bool = True
+    # Per-tool Rate Limiting Configuration
+    tool_rate_limits: Dict[str, Dict[str, int]] = field(default_factory=dict)
+    @classmethod
+    def from_yaml(cls, config_path: str) -> 'ServerConfig':
+        """Load configuration from YAML file"""
+        try:
+            with open(config_path, 'r') as f:
+                config_data = yaml.safe_load(f)
+            # Extract configuration sections with defaults
+            server_config = config_data.get('server', {})
+            tracking_config = config_data.get('tracking', {})
+            tool_rate_limits = config_data.get('tool_rate_limits', {})
+            return cls(
+                # Server Core Settings
+                host=server_config.get('host', "127.0.0.1"),
+                port=server_config.get('port', 6274),
+                debug_mode=server_config.get('debug_mode', False),
+                # Session Management
+                session_ttl_seconds=server_config.get('session_ttl_seconds', 3600),
+                max_sessions=server_config.get('max_sessions', 1000),
+                cleanup_interval_seconds=server_config.get('cleanup_interval_seconds', 300),
+                enable_session_keepalive=server_config.get('enable_session_keepalive', True),
+                keepalive_touch_interval=server_config.get('keepalive_touch_interval', 300),
+                # Request Handling
+                request_timeout_seconds=server_config.get('request_timeout_seconds', 120),
+                max_request_size_mb=server_config.get('max_request_size_mb', 10),
+                # Client Rate Limiting
+                rate_limit_requests_per_minute=server_config.get('rate_limit_requests_per_minute', 300),
+                # Workspace Management
+                base_workspace_dir=server_config.get('base_workspace_dir', "workspaces"),
+                # Tool Call Tracking & Logging
+                enable_tool_tracking=tracking_config.get('enable_tool_tracking', True),
+                max_tracked_calls_per_session=tracking_config.get('max_tracked_calls_per_session', 1000),
+                track_detailed_errors=tracking_config.get('track_detailed_errors', True),
+                # Per-tool Rate Limiting
+                tool_rate_limits=tool_rate_limits
+            )
+        except Exception as e:
+            logger.error(f"Failed to load configuration from {config_path}: {e}")
+            logger.info("Using default configuration")
+            return cls()
+# Global configuration instance - will be set during startup
+config: Optional[ServerConfig] = None
+# ================ GLOBAL PER-TOOL RATE LIMITING ================
+@dataclass
+class ToolRateLimit:
+    """Rate limit configuration for a specific tool"""
+    requests_per_minute: float
+    requests_per_hour: float
+    burst_limit: int
+class GlobalToolRateLimiter:
+    """
+    Global rate limiter that controls QPS to external APIs per tool.
+    This is shared across all sessions and clients to manage upstream service load.
+    """
+    def __init__(self, tool_rate_limits: Dict[str, Dict[str, int]]):
+        self.tool_limits: Dict[str, ToolRateLimit] = {}
+        self.tool_requests: Dict[str, deque] = defaultdict(deque)
+        self.lock = asyncio.Lock()
+        # Initialize rate limits for each tool
+        for tool_name, limits_config in tool_rate_limits.items():
+            self.tool_limits[tool_name] = ToolRateLimit(
+                requests_per_minute=limits_config.get('requests_per_minute', float('inf')),
+                requests_per_hour=limits_config.get('requests_per_hour', float('inf')),
+                burst_limit=limits_config.get('burst_limit', 10)
+            )
+            self.tool_requests[tool_name] = deque()
+        logger.info(f"Initialized global tool rate limiter for {len(self.tool_limits)} tools")
+    async def is_allowed(self, tool_name: str) -> tuple[bool, Optional[str]]:
+        """
+        Check if a request to the specified tool is allowed based on global rate limits.
+        Returns:
+            tuple[bool, Optional[str]]: (allowed, reason_if_denied)
+        """
+        if tool_name not in self.tool_limits:
+            # Tool not configured for rate limiting - allow
+            return True, None
+        async with self.lock:
+            now = time.time()
+            limits = self.tool_limits[tool_name]
+            requests = self.tool_requests[tool_name]
+            # Clean old requests outside the time windows
+            self._cleanup_old_requests(requests, now)
+            # Check various time window limits
+            recent_requests = list(requests)
+            # Check burst limit (rapid requests in last second) - only if specified
+            if limits.burst_limit != float('inf'):
+                burst_count = sum(1 for req_time in recent_requests if now - req_time < 1.0)
+                if burst_count >= limits.burst_limit:
+                    return False, f"Tool '{tool_name}' burst limit exceeded ({limits.burst_limit} requests/burst)"
+            # Check per-minute limit - only if specified
+            if limits.requests_per_minute != float('inf'):
+                minute_count = sum(1 for req_time in recent_requests if now - req_time < 60.0)
+                if minute_count >= limits.requests_per_minute:
+                    return False, f"Tool '{tool_name}' per-minute limit exceeded ({limits.requests_per_minute} requests/minute)"
+            # Check per-hour limit - only if specified
+            if limits.requests_per_hour != float('inf'):
+                hour_count = sum(1 for req_time in recent_requests if now - req_time < 3600.0)
+                if hour_count >= limits.requests_per_hour:
+                    return False, f"Tool '{tool_name}' per-hour limit exceeded ({limits.requests_per_hour} requests/hour)"
+            return True, None
+    async def record_request(self, tool_name: str):
+        """Record a successful request for rate limiting tracking"""
+        if tool_name not in self.tool_limits:
+            return
+        async with self.lock:
+            now = time.time()
+            self.tool_requests[tool_name].append(now)
+            # Keep deque size manageable (only keep last hour of requests)
+            self._cleanup_old_requests(self.tool_requests[tool_name], now)
+    @staticmethod
+    def _cleanup_old_requests(requests: deque, now: float):
+        """Remove requests older than 1 hour to keep memory usage bounded"""
+        while requests and now - requests[0] > 3600.0:  # 1 hour
+            requests.popleft()
+    async def get_tool_stats(self, tool_name: str) -> Dict[str, Any]:
+        """Get current usage statistics for a tool"""
+        if tool_name not in self.tool_limits:
+            return {"error": f"Tool '{tool_name}' not configured for rate limiting"}
+        async with self.lock:
+            now = time.time()
+            requests = self.tool_requests[tool_name]
+            limits = self.tool_limits[tool_name]
+            # Clean old requests first
+            self._cleanup_old_requests(requests, now)
+            recent_requests = list(requests)
+            return {
+                "tool_name": tool_name,
+                "current_usage": {
+                    "last_second": sum(1 for req_time in recent_requests if now - req_time < 1.0),
+                    "last_minute": sum(1 for req_time in recent_requests if now - req_time < 60.0),
+                    "last_hour": sum(1 for req_time in recent_requests if now - req_time < 3600.0)
+                },
+                "limits": {
+                    "requests_per_minute": limits.requests_per_minute if limits.requests_per_minute != float('inf') else None,
+                    "requests_per_hour": limits.requests_per_hour if limits.requests_per_hour != float('inf') else None,
+                    "burst_limit": limits.burst_limit if limits.burst_limit != float('inf') else None
+                },
+                "utilization": {
+                    "minute_utilization": sum(1 for req_time in recent_requests if now - req_time < 60.0) / limits.requests_per_minute if limits.requests_per_minute != float('inf') else 0,
+                    "hour_utilization": sum(1 for req_time in recent_requests if now - req_time < 3600.0) / limits.requests_per_hour if limits.requests_per_hour != float('inf') else 0
+                }
+            }
+    def get_all_stats(self) -> Dict[str, Any]:
+        """Get usage statistics for all tools"""
+        return {
+            tool_name: self.get_tool_stats(tool_name)
+            for tool_name in self.tool_limits.keys()
+        }
+# Global tool rate limiter instance - will be initialized during startup
+global_tool_rate_limiter: Optional[GlobalToolRateLimiter] = None
+# ================ TOOL DEFINITIONS ================
+# Tool execution function mapping - maps tool names to their implementation functions
+def get_tool_function(tool_name: str):
+    """Get the actual function for a tool"""
+    tool_map = {
+        "batch_web_search": lambda tools, **kwargs: tools.batch_web_search(**kwargs),
+        "url_crawler": lambda tools, **kwargs: tools.url_crawler(**kwargs),
+        "download_files": lambda tools, **kwargs: tools.download_files(**kwargs),
+        "list_workspace": lambda tools, **kwargs: tools.list_workspace(**kwargs),
+        "str_replace_based_edit_tool": lambda tools, **kwargs: tools.str_replace_based_edit_tool(**kwargs),
+        "file_stats": lambda tools, **kwargs: tools.file_stats(**kwargs),
+        "file_read": lambda tools, **kwargs: tools.file_read(**kwargs),
+        "file_read_lines": lambda tools, **kwargs: tools.file_read_lines(**kwargs),
+        "content_preview": lambda tools, **kwargs: tools.content_preview(**kwargs),
+        "file_write": lambda tools, **kwargs: tools.file_write(**kwargs),
+        "file_grep_search": lambda tools, **kwargs: tools.file_grep_search(**kwargs),
+        "file_grep_with_context": lambda tools, **kwargs: tools.file_grep_with_context(**kwargs),
+        "file_find_by_name": lambda tools, **kwargs: tools.file_find_by_name(**kwargs),
+        "bash": lambda tools, **kwargs: tools.bash(**kwargs),
+        "task_done": lambda tools, **kwargs: tools.task_done(**kwargs),
+        "think": lambda tools, **kwargs: tools.think(**kwargs),
+        "reflect": lambda tools, **kwargs: tools.reflect(**kwargs),
+        "document_qa": lambda tools, **kwargs: tools.document_qa(**kwargs),
+        "extract_markdown_toc": lambda tools, **kwargs: tools.extract_markdown_toc(**kwargs),
+        "extract_markdown_section": lambda tools, **kwargs: tools.extract_markdown_section(**kwargs),
+        "document_extract": lambda tools, **kwargs: tools.document_extract(**kwargs),
+        "search_result_classifier": lambda tools, **kwargs: tools.search_result_classifier(**kwargs),
+        "info_seeker_subjective_task_done": None,
+        "writer_subjective_task_done": None,
+        "section_writer": lambda tools, **kwargs: tools.section_writer(**kwargs),
+        "concat_section_files": lambda tools, **kwargs: tools.concat_section_files(**kwargs),
+        # Internal tools - available to server but NOT exposed to agents via tool schemas
+        "internal_file_read_unlimited": lambda tools, **kwargs: tools.internal_file_read_unlimited(**kwargs),
+    }
+    return tool_map.get(tool_name)
+# ================ TOOL CALL TRACKING ================
+@dataclass
+class ToolCallLog:
+    """Individual tool call log entry"""
+    call_id: str
+    timestamp: datetime
+    tool_name: str
+    input_args: Dict[str, Any]
+    output_result: Dict[str, Any]
+    success: bool
+    duration_ms: float
+    error_details: Optional[str] = None
+    session_id: str = ""
+    agent_info: Optional[Dict[str, Any]] = None
+    def to_dict(self) -> Dict[str, Any]:
+        """Convert to dictionary for JSON serialization"""
+        return {
+            "call_id": self.call_id,
+            "timestamp": self.timestamp.isoformat(),
+            "tool_name": self.tool_name,
+            "input_args": self.input_args,
+            "output_result": self.output_result,
+            "success": self.success,
+            "duration_ms": self.duration_ms,
+            "error_details": self.error_details,
+            "session_id": self.session_id,
+            "agent_info": self.agent_info
+        }
+class ToolCallTracker:
+    """Tracks and saves tool calls to workspace-specific files"""
+    def __init__(self, workspace_path: Path, session_id: str):
+        self.workspace_path = workspace_path
+        self.session_id = session_id
+        self.logs_dir = workspace_path / "tool_call_logs"
+        self.logs_dir.mkdir(exist_ok=True)
+        # Create daily log file
+        today = datetime.now().strftime("%Y-%m-%d")
+        self.current_log_file = self.logs_dir / f"tool_calls_{today}.jsonl"
+        self.summary_file = self.logs_dir / "session_summary.json"
+        # Track call counts
+        self.call_count = 0
+        self.tool_usage_stats = defaultdict(int)
+        # Initialize session summary
+        self._initialize_session_summary()
+    def _initialize_session_summary(self):
+        """Initialize or update session summary file"""
+        summary = {
+            "session_id": self.session_id,
+            "session_start": datetime.now().isoformat(),
+            "last_updated": datetime.now().isoformat(),
+            "total_tool_calls": 0,
+            "tool_usage_stats": {},
+            "agent_activity": {},
+            "workspace_path": str(self.workspace_path)
+        }
+        # Load existing summary if it exists
+        if self.summary_file.exists():
+            try:
+                with open(self.summary_file, 'r') as f:
+                    existing_summary = json.load(f)
+                    summary.update(existing_summary)
+                    # Don't overwrite session_start if it already exists
+                    if "session_start" in existing_summary:
+                        summary["session_start"] = existing_summary["session_start"]
+            except Exception as e:
+                logger.warning(f"Could not load existing session summary: {e}")
+        self._save_summary(summary)
+    def _save_summary(self, summary: Dict[str, Any]):
+        """Save session summary to file"""
+        try:
+            with open(self.summary_file, 'w') as f:
+                json.dump(summary, f, indent=2, ensure_ascii=False)
+        except Exception as e:
+            logger.error(f"Failed to save session summary: {e}")
+    def log_tool_call(self,
+                     tool_name: str,
+                     input_args: Dict[str, Any],
+                     output_result: Dict[str, Any],
+                     success: bool,
+                     duration_ms: float,
+                     error_details: Optional[str] = None,
+                     agent_info: Optional[Dict[str, Any]] = None) -> str:
+        """Log a tool call and return the call ID"""
+        if not config.enable_tool_tracking:
+            return ""
+        # Respect max call limit per session
+        if self.call_count >= config.max_tracked_calls_per_session:
+            logger.warning(f"Max tracked calls reached for session {self.session_id}")
+            return ""
+        call_id = str(uuid.uuid4())
+        timestamp = datetime.now()
+        # Create log entry
+        log_entry = ToolCallLog(
+            call_id=call_id,
+            timestamp=timestamp,
+            tool_name=tool_name,
+            input_args=self._sanitize_args(input_args),
+            output_result=self._sanitize_result(output_result),
+            success=success,
+            duration_ms=duration_ms,
+            error_details=error_details if config.track_detailed_errors else None,
+            session_id=self.session_id,
+            agent_info=agent_info
+        )
+        # Save to JSONL file (one JSON object per line)
+        try:
+            with open(self.current_log_file, 'a', encoding="utf-8") as f:
+                f.write(json.dumps(log_entry.to_dict(), ensure_ascii=False) + '\n')
+        except Exception as e:
+            logger.error(f"Failed to save tool call log: {e}")
+        # Update session summary
+        self._update_session_summary(log_entry)
+        self.call_count += 1
+        self.tool_usage_stats[tool_name] += 1
+        return call_id
+    @staticmethod
+    def _sanitize_args(args: Dict[str, Any]) -> Dict[str, Any]:
+        """Sanitize arguments for logging (remove sensitive data)"""
+        sanitized = {}
+        for key, value in args.items():
+            if isinstance(value, str) and len(value) > 1000:
+                sanitized[key] = value[:1000] + "... [truncated]"
+            elif key.lower() in ['password', 'token', 'secret', 'key']:
+                sanitized[key] = "[REDACTED]"
+            else:
+                sanitized[key] = value
+        return sanitized
+    def _sanitize_result(self, result: Dict[str, Any]) -> Dict[str, Any]:
+        """Sanitize result for logging (remove large content)"""
+        if not isinstance(result, dict):
+            return result
+        sanitized = {}
+        for key, value in result.items():
+            if isinstance(value, str) and len(value) > 2000:
+                sanitized[key] = value[:2000] + "... [truncated]"
+            elif isinstance(value, dict):
+                sanitized[key] = self._sanitize_result(value)
+            else:
+                sanitized[key] = value
+        return sanitized
+    def _update_session_summary(self, log_entry: ToolCallLog):
+        """Update session summary with new tool call"""
+        try:
+            summary = {
+                "session_id": self.session_id,
+                "last_updated": datetime.now().isoformat(),
+                "total_tool_calls": self.call_count + 1,
+                "tool_usage_stats": dict(self.tool_usage_stats),
+                "workspace_path": str(self.workspace_path)
+            }
+            # Load existing summary
+            if self.summary_file.exists():
+                with open(self.summary_file, 'r') as f:
+                    existing_summary = json.load(f)
+                    summary.update(existing_summary)
+            # Update with new data
+            summary["last_updated"] = datetime.now().isoformat()
+            summary["total_tool_calls"] = self.call_count + 1
+            summary["tool_usage_stats"] = dict(self.tool_usage_stats)
+            summary["tool_usage_stats"][log_entry.tool_name] = self.tool_usage_stats[log_entry.tool_name] + 1
+            # Track agent activity
+            if log_entry.agent_info:
+                agent_type = log_entry.agent_info.get('type', 'unknown')
+                if 'agent_activity' not in summary:
+                    summary['agent_activity'] = {}
+                if agent_type not in summary['agent_activity']:
+                    summary['agent_activity'][agent_type] = {
+                        'tool_calls': 0,
+                        'last_active': log_entry.timestamp.isoformat()
+                    }
+                summary['agent_activity'][agent_type]['tool_calls'] += 1
+                summary['agent_activity'][agent_type]['last_active'] = log_entry.timestamp.isoformat()
+            self._save_summary(summary)
+        except Exception as e:
+            logger.error(f"Failed to update session summary: {e}")
+# ================ SESSION KEEP-ALIVE FOR LONG OPERATIONS ================
+class KeepAliveSessionWrapper:
+    """Wrapper that keeps a session alive during long-running operations"""
+    def __init__(self, session: 'Session', touch_interval: int = 300):  # Touch every 5 minutes
+        self.session = session
+        self.touch_interval = touch_interval
+        self.keep_alive_thread = None
+        self.stop_event = Event()
+        self.active = False
+    def start_keep_alive(self):
+        """Start the keep-alive mechanism"""
+        if self.active:
+            return
+        self.active = True
+        self.stop_event.clear()
+        def keep_alive_worker():
+            while not self.stop_event.wait(self.touch_interval):
+                try:
+                    self.session.touch()
+                    logger.debug("Keep-alive: Touched session {%s}", self.session.id)
+                except Exception as e:
+                    logger.error(f"Keep-alive error for session {self.session.id}: {e}")
+                    break
+        self.keep_alive_thread = Thread(target=keep_alive_worker, daemon=True)
+        self.keep_alive_thread.start()
+        logger.info(f"Started keep-alive for session {self.session.id}")
+    def stop_keep_alive(self):
+        """Stop the keep-alive mechanism"""
+        if not self.active:
+            return
+        self.active = False
+        self.stop_event.set()
+        if self.keep_alive_thread and self.keep_alive_thread.is_alive():
+            self.keep_alive_thread.join(timeout=1.0)
+        # Final touch
+        try:
+            self.session.touch()
+        except Exception as e:
+            logger.error(f"Final keep-alive touch error for session {self.session.id}: {e}")
+        logger.info(f"Stopped keep-alive for session {self.session.id}")
+    def __enter__(self):
+        self.start_keep_alive()
+        return self
+    def __exit__(self, exc_type, exc_val, exc_tb):
+        self.stop_keep_alive()
+# ================ SESSION MANAGEMENT ================
+@dataclass
+class Session:
+    """Thread-safe session data structure with workspace management"""
+    id: str
+    created_at: datetime
+    last_accessed: datetime
+    initialized: bool = False
+    request_count: int = 0
+    metadata: Dict[str, Any] = field(default_factory=dict)
+    workspace_path: Optional[Path] = None
+    mcp_tools: Optional[MCPTools] = None
+    tool_tracker: Optional[ToolCallTracker] = None
+    def is_expired(self, ttl_seconds: int) -> bool:
+        """Check if session has expired"""
+        return datetime.now() - self.last_accessed > timedelta(seconds=ttl_seconds)
+    def touch(self):
+        """Update last accessed time"""
+        self.last_accessed = datetime.now()
+        self.request_count += 1
+    def get_mcp_tools(self, prefer_async: bool = True) -> MCPTools:
+        """Get or create MCP tools instance for this session"""
+        if self.mcp_tools is None:
+            # Use async tools if available and preferred
+            if prefer_async and AsyncMCPTools is not None:
+                self.mcp_tools = AsyncMCPTools(workspace_path=str(self.workspace_path) if self.workspace_path else None)
+            else:
+                self.mcp_tools = MCPTools(workspace_path=str(self.workspace_path) if self.workspace_path else None)
+        return self.mcp_tools
+    def get_tool_tracker(self) -> Optional[ToolCallTracker]:
+        """Get or create tool call tracker for this session"""
+        if config.enable_tool_tracking and self.workspace_path:
+            if self.tool_tracker is None:
+                self.tool_tracker = ToolCallTracker(self.workspace_path, self.id)
+            return self.tool_tracker
+        return None
+class AsyncRLock:
+    """异步可重入锁，模拟 threading.RLock 的异步版本"""
+    def __init__(self):
+        self._lock = asyncio.Lock()
+        self._owner: Optional[asyncio.Task] = None  # 记录持有锁的协程任务
+        self._count = 0  # 重入次数
+    async def acquire(self):
+        current_task = asyncio.current_task()
+        # 如果当前协程已持有锁，直接增加重入次数
+        if self._owner == current_task:
+            self._count += 1
+            return
+        # 否则等待获取锁
+        await self._lock.acquire()
+        self._owner = current_task
+        self._count = 1
+    async def release(self):
+        if self._owner != asyncio.current_task():
+            raise RuntimeError("不能释放非当前协程持有的锁")
+        self._count -= 1
+        if self._count == 0:  # 重入次数归零时，真正释放锁
+            self._owner = None
+            self._lock.release()
+    # 支持 async with 语法
+    async def __aenter__(self):
+        await self.acquire()
+        return self
+    async def __aexit__(self, exc_type, exc, tb):
+        await self.release()
+class ThreadSafeSessionManager:
+    """Thread-safe session manager with workspace management"""
+    def __init__(self, ttl_seconds: int = 3600, max_sessions: int = 1000, base_workspace_dir: str = "workspaces"):
+        self.ttl_seconds = ttl_seconds
+        self.max_sessions = max_sessions
+        self.base_workspace_dir = Path(base_workspace_dir)
+        self.base_workspace_dir.mkdir(exist_ok=True)
+        # Thread-safe session storage
+        self.sessions: Dict[str, Session] = {}
+        self.lock = AsyncRLock()
+        # Start cleanup thread
+        self._start_cleanup_thread()
+    async def create_session(self) -> str:
+        """Create a new session and return session ID"""
+        session_id = str(uuid.uuid4())
+        async with self.lock:
+            # Check session limits
+            if len(self.sessions) >= self.max_sessions:
+                await self._cleanup_oldest_sessions()
+            # Create workspace directory
+            workspace_path = self.base_workspace_dir / session_id
+            workspace_path.mkdir(exist_ok=True, parents=True)
+            # Create session
+            session = Session(
+                id=session_id,
+                created_at=datetime.now(),
+                last_accessed=datetime.now(),
+                workspace_path=workspace_path
+            )
+            self.sessions[session_id] = session
+            logger.info(f"Created session {session_id} with workspace {workspace_path}")
+            return session_id
+    async def get_session(self, session_id: str) -> Optional[Session]:
+        """Get session by ID if it exists and is not expired"""
+        async with self.lock:
+            session = self.sessions.get(session_id)
+            if session and not session.is_expired(self.ttl_seconds):
+                session.touch()
+                return session
+            elif session:
+                # Remove expired session
+                del self.sessions[session_id]
+                logger.info(f"Removed expired session {session_id}")
+            return None
+    async def get_or_create_session(self, session_id: Optional[str] = None) -> Session:
+        """Get existing session or create new one"""
+        if session_id:
+            session = await self.get_session(session_id)
+            if session:
+                return session
+        # Create new session
+        new_session_id = await self.create_session()
+        return self.sessions[new_session_id]
+    async def _cleanup_expired_sessions(self):
+        """Remove expired sessions"""
+        async with self.lock:
+            expired_sessions = []
+            for session_id, session in self.sessions.items():
+                if session.is_expired(self.ttl_seconds):
+                    expired_sessions.append(session_id)
+            for session_id in expired_sessions:
+                del self.sessions[session_id]
+                logger.info(f"Cleaned up expired session {session_id}")
+    async def _cleanup_oldest_sessions(self):
+        """Remove oldest sessions when limit is reached"""
+        async with self.lock:
+            if len(self.sessions) < self.max_sessions:
+                return
+            # Sort by last accessed time and remove oldest
+            sorted_sessions = sorted(
+                self.sessions.items(),
+                key=lambda x: x[1].last_accessed
+            )
+            sessions_to_remove = len(self.sessions) - self.max_sessions + 10  # Remove extra
+            for i in range(sessions_to_remove):
+                if i < len(sorted_sessions):
+                    session_id = sorted_sessions[i][0]
+                    del self.sessions[session_id]
+                    logger.info(f"Removed old session {session_id} due to session limit")
+    def _start_cleanup_thread(self):
+        """Start background cleanup thread"""
+        def cleanup_worker():
+            while True:
+                try:
+                    time.sleep(config.cleanup_interval_seconds)
+                    # Run async method in sync context
+                    loop = asyncio.new_event_loop()
+                    loop.run_until_complete(self._cleanup_expired_sessions())
+                    loop.close()
+                except Exception as e:
+                    logger.error(f"Error in cleanup thread: {e}")
+        import threading
+        cleanup_thread = threading.Thread(target=cleanup_worker, daemon=True)
+        cleanup_thread.start()
+        logger.info("Started session cleanup thread")
+    async def get_stats(self) -> Dict[str, Any]:
+        """Get session manager statistics"""
+        async with self.lock:
+            return {
+                "total_sessions": len(self.sessions),
+                "max_sessions": self.max_sessions,
+                "ttl_seconds": self.ttl_seconds,
+                "session_ids": list(self.sessions.keys())
+            }
+# ================ MIDDLEWARE AND SECURITY ================
+class RateLimiter:
+    """Simple rate limiter with time-window tracking"""
+    def __init__(self, requests_per_minute: int = 60):
+        self.requests_per_minute = requests_per_minute
+        self.requests: Dict[str, List[float]] = defaultdict(list)
+        self.lock = asyncio.Lock()
+    async def is_allowed(self, client_id: str) -> bool:
+        """Check if request is allowed for client"""
+        async with self.lock:
+            now = time.time()
+            minute_ago = now - 60
+            # Clean old requests
+            self.requests[client_id] = [
+                req_time for req_time in self.requests[client_id]
+                if req_time > minute_ago
+            ]
+            # Check rate limit
+            if len(self.requests[client_id]) >= self.requests_per_minute:
+                return False
+            # Add current request
+            self.requests[client_id].append(now)
+            return True
+class RequestValidator:
+    """Validates incoming MCP requests"""
+    @staticmethod
+    def validate_mcp_request(data: Dict[str, Any]) -> tuple[bool, Optional[str]]:
+        """Validate basic MCP request structure"""
+        if not isinstance(data, dict):
+            return False, "Request must be a JSON object"
+        if "method" not in data:
+            return False, "Missing 'method' field"
+        if "id" not in data:
+            return False, "Missing 'id' field"
+        return True, None
+    @staticmethod
+    def validate_tool_call(params: Dict[str, Any]) -> tuple[bool, Optional[str]]:
+        """Validate tool call parameters"""
+        if not isinstance(params, dict):
+            return False, "Tool parameters must be a JSON object"
+        if "name" not in params:
+            return False, "Missing tool 'name'"
+        if "arguments" not in params:
+            return False, "Missing tool 'arguments'"
+        tool_name = params["name"]
+        # Get detailed schemas
+        detailed_schemas = get_tool_schemas()
+        if tool_name not in detailed_schemas:
+            return False, f"Unknown tool: {tool_name}. Available tools: {sorted(list(detailed_schemas.keys()))}"
+        return True, None
+class SecurityMiddleware(BaseHTTPMiddleware):
+    """Security middleware for basic protection"""
+    async def dispatch(self, request: Request, call_next):
+        # Check content length
+        content_length = request.headers.get("content-length")
+        if content_length and int(content_length) > config.max_request_size_mb * 1024 * 1024:
+            return JSONResponse(
+                status_code=HTTPStatus.REQUEST_ENTITY_TOO_LARGE,
+                content={"error": "Request too large"}
+            )
+        # Add security headers
+        response = await call_next(request)
+        response.headers["X-Content-Type-Options"] = "nosniff"
+        response.headers["X-Frame-Options"] = "DENY"
+        response.headers["X-XSS-Protection"] = "1; mode=block"
+        return response
+class RateLimitMiddleware(BaseHTTPMiddleware):
+    """Rate limiting middleware"""
+    def __init__(self, app, input_rate_limiter: RateLimiter):
+        super().__init__(app)
+        self.rate_limiter = input_rate_limiter
+    async def dispatch(self, request: Request, call_next):
+        # Get client identifier (IP address)
+        client_ip = request.client.host if request.client else "unknown"
+        if not await self.rate_limiter.is_allowed(client_ip):
+            return JSONResponse(
+                status_code=HTTPStatus.TOO_MANY_REQUESTS,
+                content={"error": "Rate limit exceeded"}
+            )
+        return await call_next(request)
+# Global session manager
+session_manager = None
+rate_limiter = None
+@dataclass
+class RateLimitViolation:
+    """Represents a rate limit violation with standardized error information"""
+    tool_name: str
+    limit_type: str  # "burst", "second", "minute", "hour"
+    current_usage: int
+    limit_value: float
+    retry_after_seconds: float
+    def to_user_friendly_message(self) -> str:
+        """Generate user-friendly error message"""
+        if self.limit_type == "burst":
+            return f"Service temporarily unavailable: Too many rapid requests to {self.tool_name}. Please wait {self.retry_after_seconds:.0f} seconds before trying again."
+        elif self.limit_type == "second":
+            return f"Service temporarily unavailable: {self.tool_name} request rate exceeded ({self.limit_value}/second). Please wait {self.retry_after_seconds:.0f} seconds before trying again."
+        elif self.limit_type == "minute":
+            return f"Service temporarily unavailable: {self.tool_name} quota exceeded ({self.limit_value}/minute). Please try again in {self.retry_after_seconds:.0f} seconds."
+        elif self.limit_type == "hour":
+            return f"Service temporarily unavailable: {self.tool_name} hourly quota exceeded ({self.limit_value}/hour). Please try again in {self.retry_after_seconds:.0f} minutes."
+        else:
+            return f"Service temporarily unavailable: {self.tool_name} rate limit exceeded. Please try again later."
+    def to_technical_message(self) -> str:
+        """Generate technical error message for debugging"""
+        return f"Tool '{self.tool_name}' {self.limit_type} limit exceeded ({self.current_usage}/{self.limit_value} {self.limit_type})"
+def _parse_rate_limit_denial(tool_name: str, denial_reason: str) -> RateLimitViolation:
+    """Parse rate limit denial reason into structured violation information"""
+    import re
+    # Default values
+    limit_type = "unknown"
+    current_usage = 0
+    limit_value = 0.0
+    retry_after_seconds = 60.0  # Default retry after 1 minute
+    # Parse different types of rate limit violations
+    if "burst limit exceeded" in denial_reason:
+        limit_type = "burst"
+        retry_after_seconds = 1.0  # Burst limits reset quickly
+        match = re.search(r'\((\d+) requests/burst\)', denial_reason)
+        if match:
+            limit_value = float(match.group(1))
+            current_usage = int(limit_value)  # Approximation
+    elif "per-second limit exceeded" in denial_reason:
+        limit_type = "second"
+        retry_after_seconds = 1.0  # Wait 1 second
+        match = re.search(r'\(([0-9.]+) requests/second\)', denial_reason)
+        if match:
+            limit_value = float(match.group(1))
+            current_usage = int(limit_value)  # Approximation
+    elif "per-minute limit exceeded" in denial_reason:
+        limit_type = "minute"
+        retry_after_seconds = 10.0  # Wait 10 seconds for minute limits
+        match = re.search(r'\(([0-9.]+) requests/minute\)', denial_reason)
+        if match:
+            limit_value = float(match.group(1))
+            current_usage = int(limit_value)  # Approximation
+    elif "per-hour limit exceeded" in denial_reason:
+        limit_type = "hour"
+        retry_after_seconds = 300.0  # Wait 5 minutes for hour limits
+        match = re.search(r'\(([0-9.]+) requests/hour\)', denial_reason)
+        if match:
+            limit_value = float(match.group(1))
+            current_usage = int(limit_value)  # Approximation
+    return RateLimitViolation(
+        tool_name=tool_name,
+        limit_type=limit_type,
+        current_usage=current_usage,
+        limit_value=limit_value,
+        retry_after_seconds=retry_after_seconds
+    )
+async def _call_session_tool_async(session: Session, tool_name: str, tool_args: Dict[str, Any],
+                                   client_ip: str = "unknown") -> Dict[str, Any]:
+    """Execute a tool within a session context with full tracking, workspace management, and global rate limiting"""
+    start_time = time.time()
+    success = False
+    error_details = None
+    result_data = None
+    # Touch session at start of tool execution to prevent expiry during long operations
+    session.touch()
+    try:
+        # CHECK GLOBAL TOOL RATE LIMITS FIRST
+        if global_tool_rate_limiter:
+            allowed, deny_reason = await global_tool_rate_limiter.is_allowed(tool_name)
+            if not allowed:
+                # Parse the denial reason to create structured rate limit violation
+                rate_limit_violation = _parse_rate_limit_denial(tool_name, deny_reason)
+                # Create user-friendly error message
+                user_message = rate_limit_violation.to_user_friendly_message()
+                technical_message = rate_limit_violation.to_technical_message()
+                logger.warning(f"Session {session.id}: {technical_message}")
+                result_data = {
+                    "success": False,
+                    "error": user_message,
+                    "error_code": "RATE_LIMIT_EXCEEDED",
+                    "error_type": "rate_limit",
+                    "tool_name": tool_name,
+                    "limit_type": rate_limit_violation.limit_type,
+                    "retry_after_seconds": rate_limit_violation.retry_after_seconds,
+                    "data": None,
+                    "rate_limited": True,  # Keep for backward compatibility
+                    "technical_details": technical_message  # For debugging
+                }
+                # Still log this for tracking purposes
+                duration_ms = (time.time() - start_time) * 1000
+                tracker = session.get_tool_tracker()
+                if tracker:
+                    try:
+                        agent_info = {
+                            "client_ip": client_ip,
+                            "type": "unknown",
+                            "session_request_count": session.request_count
+                        }
+                        tracker.log_tool_call(
+                            tool_name=tool_name,
+                            input_args=tool_args,
+                            output_result=result_data,
+                            success=False,
+                            duration_ms=duration_ms,
+                            error_details=user_message,
+                            agent_info=agent_info
+                        )
+                    except Exception as e:
+                        logger.error(f"Failed to log rate-limited tool call: {e}")
+                return result_data
+        # Get MCP tools instance for this session (handles workspace isolation)
+        mcp_tools = session.get_mcp_tools(prefer_async=True)
+        # Get tool method directly from the mcp_tools instance
+        if not hasattr(mcp_tools, tool_name):
+            raise ValueError(f"Tool '{tool_name}' not implemented")
+        tool_method = getattr(mcp_tools, tool_name)
+        # Add session context to tool arguments for workspace-aware tools
+        if hasattr(mcp_tools, 'set_session_context'):
+            mcp_tools.set_session_context(session.id, str(session.workspace_path))
+        # Execute tool with keep-alive for potentially long operations
+        logger.info(f"Session {session.id}: Executing tool '{tool_name}' with args: {list(tool_args.keys())}")
+        # Use keep-alive wrapper for tools that might take a long time
+        long_running_tools = {'batch_web_search', 'url_crawler', 'document_qa', 'document_extract', 'bash'}
+        # Check if the tool method is async
+        import inspect
+        is_async_tool = inspect.iscoroutinefunction(tool_method)
+        # Execute tool based on whether it's async or sync
+        if is_async_tool:
+            # Tool is async - execute directly
+            logger.debug("Executing async tool '{%s}'", tool_name)
+            if config.enable_session_keepalive and tool_name in long_running_tools:
+                # For long-running async tools, use keep-alive
+                with KeepAliveSessionWrapper(session, touch_interval=config.keepalive_touch_interval):
+                    result = await tool_method(**tool_args)
+            else:
+                # For regular async tools, execute directly
+                result = await tool_method(**tool_args)
+        else:
+            # Tool is sync - execute in thread pool
+            logger.debug("Executing sync tool '{%s}' in thread pool", tool_name)
+            # Define the synchronous tool execution function
+            def execute_tool_sync():
+                """Synchronous tool execution to be run in thread pool"""
+                return tool_method(**tool_args)
+            # Execute tool asynchronously in thread pool for true non-blocking execution
+            import asyncio
+            import concurrent.futures
+            # Create a thread pool executor for CPU-bound/blocking operations
+            loop = asyncio.get_event_loop()
+            if config.enable_session_keepalive and tool_name in long_running_tools:
+                # For long-running tools, use keep-alive with async execution
+                with KeepAliveSessionWrapper(session, touch_interval=config.keepalive_touch_interval):
+                    # Run in thread pool to avoid blocking the event loop
+                    with concurrent.futures.ThreadPoolExecutor(max_workers=4) as executor:
+                        result = await loop.run_in_executor(executor, execute_tool_sync)
+            else:
+                # For regular tools, use async execution without keep-alive
+                with concurrent.futures.ThreadPoolExecutor(max_workers=4) as executor:
+                    result = await loop.run_in_executor(executor, execute_tool_sync)
+        # Touch session after tool execution to update activity
+        session.touch()
+        # Handle different result formats
+        if hasattr(result, 'to_dict'):
+            result_data = result.to_dict()
+        elif isinstance(result, dict):
+            result_data = result
+        else:
+            result_data = {"result": result}
+        success = result_data.get('success', True)
+        if success:
+            logger.info(f"Session {session.id}: Tool '{tool_name}' completed successfully")
+            # RECORD SUCCESSFUL REQUEST FOR RATE LIMITING
+            if global_tool_rate_limiter:
+                await global_tool_rate_limiter.record_request(tool_name)
+        else:
+            error_details = result_data.get('error', 'Unknown error')
+            logger.warning(f"Session {session.id}: Tool '{tool_name}' failed: {error_details}")
+    except Exception as e:
+        success = False
+        error_details = str(e)
+        result_data = {
+            "success": False,
+            "error": error_details,
+            "data": None
+        }
+        logger.error(f"Session {session.id}: Tool '{tool_name}' exception: {e}")
+    # Calculate execution time
+    duration_ms = (time.time() - start_time) * 1000
+    # Log tool call if tracking is enabled
+    tracker = session.get_tool_tracker()
+    if tracker:
+        try:
+            agent_info = {
+                "client_ip": client_ip,
+                "type": "unknown",  # Could be enhanced to detect agent type
+                "session_request_count": session.request_count
+            }
+            tracker.log_tool_call(
+                tool_name=tool_name,
+                input_args=tool_args,
+                output_result=result_data,
+                success=success,
+                duration_ms=duration_ms,
+                error_details=error_details,
+                agent_info=agent_info
+            )
+        except Exception as e:
+            logger.error(f"Failed to log tool call: {e}")
+    return result_data
+def create_sse_response(response_data: dict, session_id: str = None) -> StreamingResponse:
+    """Create Server-Sent Events response with proper formatting"""
+    def generate_sse():
+        try:
+            # Add session info to response if available
+            if session_id:
+                response_data["session_id"] = session_id
+            json_data = json.dumps(response_data, ensure_ascii=False)
+            yield f"event: message\n"
+            yield f"data: {json_data}\n"
+            yield f"\n"
+        except Exception as e:
+            error_data = {
+                "jsonrpc": "2.0",
+                "error": {"code": JsonRpcErr.INTERNAL_ERROR, "message": f"Internal error: {str(e)}"},
+                "id": response_data.get("id")
+            }
+            json_data = json.dumps(error_data, ensure_ascii=False)
+            yield f"event: error\n"
+            yield f"data: {json_data}\n"
+            yield f"\n"
+    return StreamingResponse(
+        generate_sse(),
+        media_type="text/event-stream",
+        headers={
+            "Cache-Control": "no-cache",
+            "Connection": "keep-alive",
+            "Access-Control-Allow-Origin": "*",
+        }
+    )
+def create_error_response(request_id: Any, code: int, message: str, session_id: str = None) -> StreamingResponse:
+    """Create error response in SSE format"""
+    error_data = {
+        "jsonrpc": "2.0",
+        "error": {"code": code, "message": message},
+        "id": request_id
+    }
+    return create_sse_response(error_data, session_id)
+def create_rate_limit_response(
+    request_id: Any,
+    tool_name: str,
+    error_message: str,
+    retry_after_seconds: float,
+    limit_type: str,
+    technical_details: str = "",
+    session_id: str = None
+) -> JSONResponse:
+    """
+    Create HTTP 429 Rate Limit Exceeded response with proper headers and error format.
+    Returns proper HTTP status code instead of SSE for rate limiting errors.
+    """
+    # Calculate retry-after header value
+    retry_after_header = int(max(1.0, retry_after_seconds))
+    # Create standardized error response
+    error_data = {
+        "error": {
+            "type": "rate_limit_exceeded",
+            "code": "RATE_LIMIT_EXCEEDED",
+            "message": error_message,
+            "details": {
+                "tool_name": tool_name,
+                "limit_type": limit_type,
+                "retry_after_seconds": retry_after_seconds,
+                "technical_details": technical_details
+            }
+        },
+        "request_id": request_id,
+        "timestamp": datetime.now().isoformat(),
+        "session_id": session_id
+    }
+    # Set appropriate headers
+    headers = {
+        "Retry-After": str(retry_after_header),  # HTTP standard header
+        "X-RateLimit-Limit-Type": limit_type,
+        "X-RateLimit-Tool": tool_name,
+        "X-RateLimit-Retry-After": str(retry_after_seconds),
+        "Content-Type": "application/json"
+    }
+    return JSONResponse(
+        status_code=HTTPStatus.TOO_MANY_REQUESTS,  # Too Many Requests
+        content=error_data,
+        headers=headers
+    )
+async def handle_mcp_request(request: Request) -> StreamingResponse:
+    """Main MCP request handler with session management and tool execution"""
+    try:
+        # Check content length before reading body
+        content_length = request.headers.get("content-length")
+        if content_length:
+            content_size_mb = int(content_length) / (1024 * 1024)
+            if content_size_mb > config.max_request_size_mb:
+                logger.warning(f"Request too large: {content_size_mb:.2f}MB > {config.max_request_size_mb}MB")
+                return create_error_response(None, JsonRpcErr.PARSE_ERROR, f"Request too large: {content_size_mb:.2f}MB")
+        # Parse request with timeout protection
+        try:
+            body = await asyncio.wait_for(request.body(), timeout=config.request_timeout_seconds)
+        except asyncio.TimeoutError:
+            logger.error("Timeout while reading request body")
+            return create_error_response(None, JsonRpcErr.REQUEST_TIMEOUT, "Request body read timeout")
+        if not body:
+            return create_error_response(None, JsonRpcErr.PARSE_ERROR, "Empty request body")
+        try:
+            data = json.loads(body.decode('utf-8'))
+        except json.JSONDecodeError as e:
+            return create_error_response(None, JsonRpcErr.PARSE_ERROR, f"Invalid JSON: {str(e)}")
+        # Validate MCP request structure
+        is_valid, error_msg = RequestValidator.validate_mcp_request(data)
+        if not is_valid:
+            return create_error_response(data.get("id"), JsonRpcErr.INVALID_REQUEST, error_msg)
+        request_id = data["id"]
+        method = data["method"]
+        params = data.get("params", {})
+        # Get or create session
+        session_id = request.headers.get("X-Session-ID")
+        client_ip = request.client.host if request.client else "unknown"
+        session = await session_manager.get_or_create_session(session_id)
+        logger.info(f"Processing {method} request for session {session.id} from {client_ip}")
+        # Handle different MCP methods
+        if method == "initialize":
+            # MCP initialization
+            response_data = {
+                "jsonrpc": "2.0",
+                "result": {
+                    "protocolVersion": "2025-06-18",
+                    "capabilities": {
+                        "tools": {"supportsProgress": True},
+                        "resources": {},
+                        "prompts": {}
+                    },
+                    "serverInfo": {
+                        "name": "DeepDiver-Demo-MCP",
+                        "version": "1.0.0"
+                    }
+                },
+                "id": request_id
+            }
+        elif method == "tools/list":
+            # List available tools using detailed schemas from get_tool_schemas()
+            tools_list = []
+            detailed_schemas = get_tool_schemas()
+            # Build tools list from schemas
+            for _, detailed_schema in detailed_schemas.items():
+                tools_list.append({
+                    "name": detailed_schema["name"],
+                    "description": detailed_schema["description"],
+                    "inputSchema": detailed_schema["inputSchema"]
+                })
+            logger.info(f"Serving {len(tools_list)} tools with detailed schemas to client")
+            response_data = {
+                "jsonrpc": "2.0",
+                "result": {"tools": tools_list},
+                "id": request_id
+            }
+        elif method == "tools/call":
+            # Execute tool call
+            is_valid, error_msg = RequestValidator.validate_tool_call(params)
+            if not is_valid:
+                return create_error_response(request_id, JsonRpcErr.INVALID_PARAMS, error_msg, session.id)
+            tool_name = params["name"]
+            tool_arguments = params["arguments"]
+            # Execute tool in session context asynchronously
+            result = await _call_session_tool_async(session, tool_name, tool_arguments, client_ip)
+            # CHECK FOR RATE LIMITING AND RETURN PROPER HTTP STATUS
+            if result.get("rate_limited", False):
+                return create_rate_limit_response(
+                    request_id=request_id,
+                    tool_name=tool_name,
+                    error_message=result.get("error", "Rate limit exceeded"),
+                    retry_after_seconds=result.get("retry_after_seconds", 60),
+                    limit_type=result.get("limit_type", "unknown"),
+                    technical_details=result.get("technical_details", ""),
+                    session_id=session.id
+                )
+            # Format normal response
+            response_data = {
+                "jsonrpc": "2.0",
+                "result": {
+                    "content": [
+                        {
+                            "type": "text",
+                            "text": json.dumps(result, indent=2, ensure_ascii=False)
+                        }
+                    ]
+                },
+                "id": request_id
+            }
+        else:
+            return create_error_response(request_id, JsonRpcErr.METHOD_NOT_FOUND, f"Method not found: {method}", session.id)
+        return create_sse_response(response_data, session.id)
+    except asyncio.TimeoutError:
+        logger.warning("Request timeout - client may have disconnected")
+        return create_error_response(None, JsonRpcErr.REQUEST_TIMEOUT, "Request timeout")
+    except Exception as e:
+        # Handle client disconnects gracefully
+        if "ClientDisconnect" in str(e) or "ConnectionClosedError" in str(e):
+            logger.warning(f"Client disconnected during request processing: {e}")
+            return create_error_response(None, JsonRpcErr.REQUEST_TIMEOUT, "Client disconnected")
+        logger.error(f"Unexpected error in MCP request handler: {e}")
+        import traceback
+        logger.error(traceback.format_exc())
+        return create_error_response(None, JsonRpcErr.INTERNAL_ERROR, f"Internal server error: {str(e)}")
+async def handle_health_check(request: Request) -> JSONResponse:
+    """Health check endpoint"""
+    try:
+        stats = await session_manager.get_stats() if session_manager else {}
+        # Get rate limiting summary
+        rate_limit_summary = {}
+        if global_tool_rate_limiter:
+            all_stats = global_tool_rate_limiter.get_all_stats()
+            rate_limit_summary = {
+                "enabled": True,
+                "tools_with_limits": len(all_stats),
+                "total_configured_tools": list(all_stats.keys())
+            }
+        else:
+            rate_limit_summary = {"enabled": False}
+        health_data = {
+            "status": "healthy",
+            "timestamp": datetime.now().isoformat(),
+            "version": "1.0.0",
+            "session_stats": stats,
+            "features": {
+                "workspace_isolation": True,
+                "tool_call_tracking": config.enable_tool_tracking if config else False,
+                "client_rate_limiting": True,
+                "global_tool_rate_limiting": rate_limit_summary["enabled"],
+                "security_middleware": True,
+                "standardized_rate_limit_responses": True
+            },
+            "rate_limiting": rate_limit_summary,
+            "error_formats": {
+                "rate_limit_exceeded": {
+                    "http_status": HTTPStatus.TOO_MANY_REQUESTS,
+                    "headers": ["Retry-After", "X-RateLimit-*"],
+                    "error_code": "RATE_LIMIT_EXCEEDED",
+                    "response_format": "application/json"
+                }
+            }
+        }
+        return JSONResponse(content=health_data)
+    except Exception as e:
+        return JSONResponse(
+            status_code=HTTPStatus.INTERNAL_SERVER_ERROR,
+            content={"status": "unhealthy", "error": str(e)}
+        )
+async def handle_tracking_info(request: Request) -> JSONResponse:
+    """Get tool call tracking information for a session"""
+    try:
+        session_id = request.query_params.get("session_id")
+        if not session_id:
+            return JSONResponse(
+                status_code=HTTPStatus.BAD_REQUEST,
+                content={"error": "session_id parameter required"}
+            )
+        session = await session_manager.get_session(session_id)
+        if not session:
+            return JSONResponse(
+                status_code=HTTPStatus.NOT_FOUND,
+                content={"error": f"Session {session_id} not found"}
+            )
+        tracker = session.get_tool_tracker()
+        if not tracker:
+            return JSONResponse(
+                content={
+                    "session_id": session_id,
+                    "tracking_enabled": False,
+                    "message": "Tool call tracking not enabled or no workspace"
+                }
+            )
+        # Read session summary
+        summary_data = {}
+        if tracker.summary_file.exists():
+            try:
+                with open(tracker.summary_file, 'r') as f:
+                    summary_data = json.load(f)
+            except Exception as e:
+                logger.error(f"Failed to read session summary: {e}")
+        return JSONResponse(content={
+            "session_id": session_id,
+            "tracking_enabled": True,
+            "summary": summary_data,
+            "logs_directory": str(tracker.logs_dir),
+            "current_log_file": str(tracker.current_log_file)
+        })
+    except Exception as e:
+        return JSONResponse(
+            status_code=HTTPStatus.INTERNAL_SERVER_ERROR,
+            content={"error": str(e)}
+        )
+async def handle_rate_limit_stats(request: Request) -> JSONResponse:
+    """Get global tool rate limiting statistics"""
+    try:
+        if not global_tool_rate_limiter:
+            return JSONResponse(
+                status_code=HTTPStatus.NOT_FOUND,
+                content={"error": "Global tool rate limiter not initialized"}
+            )
+        # Check if specific tool requested
+        tool_name = request.query_params.get("tool")
+        if tool_name:
+            # Get stats for specific tool
+            stats = await global_tool_rate_limiter.get_tool_stats(tool_name)
+            return JSONResponse(content=stats)
+        else:
+            # Get stats for all tools
+            all_stats = global_tool_rate_limiter.get_all_stats()
+            return JSONResponse(content={
+                "timestamp": datetime.now().isoformat(),
+                "global_tool_rate_limiting": True,
+                "tools": all_stats,
+                "summary": {
+                    "total_tools_with_limits": len(all_stats),
+                    "tools_configured": list(all_stats.keys())
+                }
+            })
+    except Exception as e:
+        logger.error(f"Failed to get rate limit stats: {e}")
+        return JSONResponse(
+            status_code=HTTPStatus.INTERNAL_SERVER_ERROR,
+            content={"error": str(e)}
+        )
+def create_app() -> Starlette:
+    """Create and configure the Starlette application"""
+    global session_manager, rate_limiter, global_tool_rate_limiter
+    if not config:
+        raise RuntimeError("Server configuration not initialized")
+    # Initialize global components
+    session_manager = ThreadSafeSessionManager(
+        ttl_seconds=config.session_ttl_seconds,
+        max_sessions=config.max_sessions,
+        base_workspace_dir=config.base_workspace_dir
+    )
+    rate_limiter = RateLimiter(config.rate_limit_requests_per_minute)
+    # Initialize global tool rate limiter
+    if config.tool_rate_limits:
+        global_tool_rate_limiter = GlobalToolRateLimiter(config.tool_rate_limits)
+        logger.info(f"Initialized global tool rate limiter with {len(config.tool_rate_limits)} tool limits")
+    else:
+        logger.info("No tool rate limits configured - tools will run without global rate limiting")
+    # Create app
+    app = Starlette(debug=config.debug_mode)
+    app.add_middleware(SecurityMiddleware)
+    app.add_middleware(RateLimitMiddleware, input_rate_limiter=rate_limiter)
+    # Add routes
+    app.add_route("/mcp", handle_mcp_request, methods=["POST"])
+    app.add_route("/health", handle_health_check, methods=["GET"])
+    app.add_route("/tracking", handle_tracking_info, methods=["GET"])
+    app.add_route("/rate-limits", handle_rate_limit_stats, methods=["GET"])
+    return app
+def parse_arguments():
+    """Parse command line arguments"""
+    parser = argparse.ArgumentParser(
+        description="Demo-Ready MCP Server with Per-Tool Rate Limiting",
+        formatter_class=argparse.RawDescriptionHelpFormatter,
+        epilog="""
+Examples:
+  python src/tools/mcp_server_standard.py --config src/tools/server_config.yaml
+  python src/tools/mcp_server_standard.py --host 127.0.0.1 --port 8080
+  python src/tools/mcp_server_standard.py --config custom_config.yaml --debug
+        """
+    )
+    parser.add_argument(
+        '--config', '-c',
+        type=str,
+        help='Path to YAML configuration file'
+    )
+    parser.add_argument(
+        '--host',
+        type=str,
+        help='Server host (overrides config file)'
+    )
+    parser.add_argument(
+        '--port', '-p',
+        type=int,
+        help='Server port (overrides config file)'
+    )
+    parser.add_argument(
+        '--debug',
+        action='store_true',
+        help='Enable debug mode (overrides config file)'
+    )
+    parser.add_argument(
+        '--workspace-dir',
+        type=str,
+        help='Base workspace directory (overrides config file)'
+    )
+    return parser.parse_args()
+def print_startup_info():
+    """Print server startup information"""
+    logger.info("🚀 DeepDiver Demo MCP Server")
+    logger.info("=" * 50)
+    logger.info(f"📊 Features:")
+    logger.info(f"  • Session Management: ✅ (TTL: {config.session_ttl_seconds}s)")
+    logger.info(f"  • Workspace Isolation: ✅ (Base: {config.base_workspace_dir})")
+    logger.info(f"  • Tool Call Tracking: {'✅' if config.enable_tool_tracking else '❌'}")
+    logger.info(f"  • Client Rate Limiting: ✅ ({config.rate_limit_requests_per_minute}/min)")
+    logger.info(f"  • Global Tool Rate Limiting: {'✅' if config.tool_rate_limits else '❌'}")
+    logger.info(f"  • Security Middleware: ✅")
+    # Tool rate limiting information
+    if config.tool_rate_limits:
+        logger.info(f"🚦 Tool Rate Limits: {len(config.tool_rate_limits)} tools configured")
+        for tool_name, limits in list(config.tool_rate_limits.items())[:3]:
+            burst = limits.get('burst_limit', '∞')
+            rpm = limits.get('requests_per_minute', '∞')
+            logger.info(f"  • {tool_name}: {rpm}/min, burst: {burst}")
+        if len(config.tool_rate_limits) > 3:
+            logger.info(f"  • ... and {len(config.tool_rate_limits) - 3} more tools")
+    # Tool information from schemas
+    tool_schemas = get_tool_schemas()
+    available_tools = list(tool_schemas.keys())
+    logger.info(f"🔧 Tools Available: {len(available_tools)}")
+    logger.info(f"  • All tools defined in schemas: {len(available_tools)} tools")
+    logger.info(f"  • Sample tools: {', '.join(sorted(available_tools)[:5])}...")
+    logger.info("=" * 50)
+def main():
+    """Main function to run the production MCP server"""
+    global config
+    # Parse command line arguments
+    args = parse_arguments()
+    config = ServerConfig.from_yaml("./src/tools/server_config.yaml")
+    # Apply CLI overrides
+    if args.host:
+        config.host = args.host
+        logger.info(f"🔧 Override: Host = {config.host}")
+    if args.port:
+        config.port = args.port
+        logger.info(f"🔧 Override: Port = {config.port}")
+    if args.debug:
+        config.debug_mode = True
+        logger.info(f"🔧 Override: Debug mode enabled")
+    if args.workspace_dir:
+        config.base_workspace_dir = args.workspace_dir
+        logger.info(f"🔧 Override: Workspace directory = {config.base_workspace_dir}")
+    print_startup_info()
+    try:
+        import os
+        # Calculate optimal worker count for high-concurrency FIRST
+        # Use CPU core count indirectly via uvicorn's defaults; no local variable needed
+        # Override for high-concurrency scenarios
+        if os.getenv('FORCE_HIGH_CONCURRENCY', '').lower() == 'true':
+            pass  # Configuration handled elsewhere if needed
+        app = create_app()
+        logger.info(f"🌐 Starting server at http://{config.host}:{config.port}")
+        logger.info(f"📡 MCP endpoint: http://{config.host}:{config.port}/mcp")
+        logger.info(f"🏥 Health check: http://{config.host}:{config.port}/health")
+        logger.info(f"📊 Tracking info: http://{config.host}:{config.port}/tracking?session_id=<id>")
+        logger.info(f"🚦 Rate limit stats: http://{config.host}:{config.port}/rate-limits")
+        uvicorn.run(
+            app,  # Use app instance directly for single worker with async optimizations
+            host=config.host,
+            port=config.port,
+            log_level="info",
+            timeout_keep_alive=config.request_timeout_seconds,
+            workers=1,  # Single worker with async optimizations
+            backlog=1024,  # Larger backlog for high-concurrency
+            access_log=False,  # Disable access logs for better performance
+            limit_concurrency=None,  # No artificial concurrency limit
+        )
+    except KeyboardInterrupt:
+        print("\n⏹️  Server stopped by user")
+    except Exception as e:
+        print(f"❌ Server startup failed: {e}")
+        import traceback
+        traceback.print_exc()
+        raise
+if __name__ == "__main__":
+    main()

deepdiver_v2/src/tools/mcp_tools.py ADDED Viewed

The diff for this file is too large to render. See raw diff

deepdiver_v2/src/tools/server_config.yaml ADDED Viewed

	@@ -0,0 +1,73 @@

+# =================================================================
+# DeepDiver MCP Server Configuration
+# =================================================================
+# This file contains ONLY the configuration options that are actually
+# implemented and used by the server. No unused options!
+# =================================================================
+# SERVER CORE SETTINGS
+# =================================================================
+server:
+  # Network Configuration
+  host: "127.0.0.1"                    # Server bind address
+  port: 6274                         # Server port
+  debug_mode: false                  # Enable debug logging and error details
+  # Session Management
+  session_ttl_seconds: 21600         # Session timeout (6 hours)
+  max_sessions: 1000                 # Maximum concurrent sessions
+  cleanup_interval_seconds: 600      # How often to clean expired sessions (5 min)
+  enable_session_keepalive: true     # Keep sessions alive during long operations
+  keepalive_touch_interval: 300      # Touch session every N seconds during long ops
+  # Request Handling
+  request_timeout_seconds: 1800       # Request timeout
+  max_request_size_mb: 1000            # Maximum request size
+  # Client Rate Limiting (per IP address)
+  rate_limit_requests_per_minute: 300000 # Requests per minute per client IP
+  # Workspace Management
+  base_workspace_dir: "workspaces"   # Base directory for session workspaces
+# =================================================================
+# TOOL CALL TRACKING & LOGGING
+# =================================================================
+tracking:
+  enable_tool_tracking: true         # Enable detailed tool call logging
+  max_tracked_calls_per_session: 10000 # Limit tool calls logged per session
+  track_detailed_errors: true        # Include full error details in logs
+# =================================================================
+# GLOBAL PER-TOOL RATE LIMITING
+# =================================================================
+# These limits control requests to external APIs to avoid hitting provider limits.
+# They are shared across ALL sessions and clients.
+#
+# Each tool can have these limits:
+#   - requests_per_second: QPS limit
+#   - requests_per_minute: Per-minute limit
+#   - requests_per_hour: Hourly limit
+#   - burst_limit: Short-term burst allowance
+#
+# Omit a limit to disable it (infinite). All limits are optional.
+tool_rate_limits:
+  # API-based tools with external service limits
+  batch_web_search:
+    requests_per_minute: 9000
+    burst_limit: 35
+  url_crawler:
+    requests_per_minute: 9000
+    burst_limit: 60
+  document_qa:
+    requests_per_minute: 15000
+    burst_limit: 150
+  document_extract:
+    requests_per_minute: 15000
+    burst_limit: 150

deepdiver_v2/src/utils/__init__.py ADDED Viewed

	@@ -0,0 +1,8 @@

+# Copyright (c) 2025 Huawei Technologies Co., Ltd. All rights reserved.
+#!/usr/bin/env python3
+from .status_codes import JsonRpcErr
+__all__ = [
+    'JsonRpcErr',
+]

deepdiver_v2/src/utils/status_codes.py ADDED Viewed

	@@ -0,0 +1,12 @@

+# Copyright (c) 2025 Huawei Technologies Co., Ltd. All rights reserved.
+#!/usr/bin/env python3
+from enum import IntEnum
+class JsonRpcErr(IntEnum):
+    PARSE_ERROR = -32700
+    INVALID_REQUEST = -32600
+    METHOD_NOT_FOUND = -32601
+    INVALID_PARAMS = -32602
+    INTERNAL_ERROR = -32603
+    REQUEST_TIMEOUT = -32000

deepdiver_v2/src/workspace/__init__.py ADDED Viewed

	@@ -0,0 +1,26 @@

+# Copyright (c) 2025 Huawei Technologies Co., Ltd. All rights reserved.
+"""
+Workspace management module for the DeepDiver Multi-Agent System.
+This module provides local workspace management capabilities that don't require
+external dependencies like E2B. Each chat session gets its own isolated workspace
+directory for file operations and data persistence.
+"""
+from .local_workspace_manager import (
+    LocalWorkspaceManager,
+    WorkspaceInfo,
+    WorkspaceStatus,
+    get_workspace_manager,
+    initialize_workspace_manager,
+    shutdown_workspace_manager
+)
+__all__ = [
+    'LocalWorkspaceManager',
+    'WorkspaceInfo',
+    'WorkspaceStatus',
+    'get_workspace_manager',
+    'initialize_workspace_manager',
+    'shutdown_workspace_manager'
+]

deepdiver_v2/src/workspace/local_workspace_manager.py ADDED Viewed

	@@ -0,0 +1,420 @@

+# Copyright (c) 2025 Huawei Technologies Co., Ltd. All rights reserved.
+"""
+Local Workspace Manager for Multi-Agent System
+This module provides session-based workspace management using local directories.
+Each chat session gets its own isolated workspace directory that persists
+throughout the conversation and can be cleaned up when the session ends.
+Features:
+- Session-based workspace lifecycle management
+- Local directory isolation per session
+- File operations within session workspaces
+- Integration with existing MCP tools
+- Comprehensive error handling and logging
+"""
+import shutil
+import logging
+from typing import Dict, Optional, Any, List, Union
+from datetime import datetime, timedelta
+from pathlib import Path
+from dataclasses import dataclass, field
+from enum import Enum
+import json
+# Configure logging
+logger = logging.getLogger(__name__)
+class WorkspaceStatus(Enum):
+    """Workspace lifecycle status"""
+    CREATING = "creating"
+    ACTIVE = "active"
+    DESTROYING = "destroying"
+    DESTROYED = "destroyed"
+    ERROR = "error"
+@dataclass
+class WorkspaceInfo:
+    """Information about a workspace instance"""
+    workspace_id: str
+    session_id: str
+    workspace_path: Path
+    created_at: datetime
+    last_activity: datetime
+    status: WorkspaceStatus
+    workspace_files: List[str] = field(default_factory=list)
+    metadata: Dict[str, Any] = field(default_factory=dict)
+    error_message: Optional[str] = None
+    def to_dict(self) -> Dict[str, Any]:
+        """Convert to dictionary for serialization"""
+        return {
+            "workspace_id": self.workspace_id,
+            "session_id": self.session_id,
+            "workspace_path": str(self.workspace_path),
+            "created_at": self.created_at.isoformat(),
+            "last_activity": self.last_activity.isoformat(),
+            "status": self.status.value,
+            "workspace_files": self.workspace_files,
+            "metadata": self.metadata,
+            "error_message": self.error_message
+        }
+class LocalWorkspaceManager:
+    """
+    Manages local workspaces for multi-agent chat sessions.
+    Each chat session gets its own isolated workspace directory that persists
+    throughout the conversation. Workspaces are automatically managed
+    with cleanup capabilities.
+    """
+    def __init__(
+        self,
+        base_workspace_dir: str = "workspaces",
+        default_timeout: int = 86400,  # 24 hours default
+        cleanup_on_exit: bool = False  # Don't auto-cleanup by default
+    ):
+        """
+        Initialize the workspace manager.
+        Args:
+            base_workspace_dir: Base directory for all workspaces
+            default_timeout: Default workspace timeout in seconds
+            cleanup_on_exit: Whether to cleanup workspaces on manager shutdown
+        """
+        self.base_workspace_dir = Path(base_workspace_dir)
+        self.base_workspace_dir.mkdir(exist_ok=True)
+        self.default_timeout = default_timeout
+        self.cleanup_on_exit = cleanup_on_exit
+        # Active workspaces by session ID
+        self.workspaces: Dict[str, WorkspaceInfo] = {}
+        # Load existing workspaces from metadata
+        self._load_existing_workspaces()
+        logger.info(f"LocalWorkspaceManager initialized with base_dir={base_workspace_dir}")
+    def _load_existing_workspaces(self):
+        """Load existing workspaces from metadata files"""
+        try:
+            for workspace_dir in self.base_workspace_dir.iterdir():
+                if workspace_dir.is_dir():
+                    metadata_file = workspace_dir / ".workspace_metadata.json"
+                    if metadata_file.exists():
+                        try:
+                            with open(metadata_file, 'r') as f:
+                                data = json.load(f)
+                            workspace_info = WorkspaceInfo(
+                                workspace_id=data["workspace_id"],
+                                session_id=data["session_id"],
+                                workspace_path=Path(data["workspace_path"]),
+                                created_at=datetime.fromisoformat(data["created_at"]),
+                                last_activity=datetime.fromisoformat(data["last_activity"]),
+                                status=WorkspaceStatus(data["status"]),
+                                workspace_files=data.get("workspace_files", []),
+                                metadata=data.get("metadata", {}),
+                                error_message=data.get("error_message")
+                            )
+                            self.workspaces[workspace_info.session_id] = workspace_info
+                            logger.info(f"Loaded existing workspace for session {workspace_info.session_id}")
+                        except Exception as e:
+                            logger.warning(f"Failed to load workspace metadata from {metadata_file}: {e}")
+        except Exception as e:
+            logger.warning(f"Failed to load existing workspaces: {e}")
+    @staticmethod
+    def _save_workspace_metadata(workspace_info: WorkspaceInfo):
+        """Save workspace metadata to disk"""
+        try:
+            metadata_file = workspace_info.workspace_path / ".workspace_metadata.json"
+            with open(metadata_file, 'w') as f:
+                json.dump(workspace_info.to_dict(), f, indent=2)
+        except Exception as e:
+            logger.error(f"Failed to save workspace metadata: {e}")
+    def create_workspace(
+        self,
+        session_id: str,
+        workspace_id: Optional[str] = None,
+        metadata: Optional[Dict[str, Any]] = None
+    ) -> WorkspaceInfo:
+        """
+        Create a new workspace for a chat session.
+        Args:
+            session_id: Unique session identifier
+            workspace_id: Optional custom workspace ID (defaults to session_id)
+            metadata: Additional metadata to store with the workspace
+        Returns:
+            WorkspaceInfo: Information about the created workspace
+        Raises:
+            ValueError: If session already has an active workspace
+            Exception: If workspace creation fails
+        """
+        if session_id in self.workspaces:
+            raise ValueError(f"Session {session_id} already has an active workspace")
+        workspace_id = workspace_id or session_id
+        workspace_path = self.base_workspace_dir / workspace_id
+        logger.info(f"Creating workspace for session {session_id} at {workspace_path}")
+        # Create workspace info with creating status
+        workspace_info = WorkspaceInfo(
+            workspace_id=workspace_id,
+            session_id=session_id,
+            workspace_path=workspace_path,
+            created_at=datetime.now(),
+            last_activity=datetime.now(),
+            status=WorkspaceStatus.CREATING,
+            metadata=metadata or {}
+        )
+        try:
+            # Create workspace directory
+            workspace_path.mkdir(parents=True, exist_ok=True)
+            # Create subdirectories
+            (workspace_path / "downloads").mkdir(exist_ok=True)
+            (workspace_path / "outputs").mkdir(exist_ok=True)
+            (workspace_path / "temp").mkdir(exist_ok=True)
+            # Update status
+            workspace_info.status = WorkspaceStatus.ACTIVE
+            self.workspaces[session_id] = workspace_info
+            # Save metadata
+            self._save_workspace_metadata(workspace_info)
+            # Update workspace files list
+            self._update_workspace_files(session_id)
+            logger.info(f"Workspace created successfully: {workspace_path} for session {session_id}")
+            return workspace_info
+        except Exception as e:
+            workspace_info.status = WorkspaceStatus.ERROR
+            workspace_info.error_message = str(e)
+            logger.error(f"Failed to create workspace for session {session_id}: {e}")
+            raise
+    def get_workspace(self, session_id: str) -> Optional[WorkspaceInfo]:
+        """Get workspace info for a session"""
+        workspace_info = self.workspaces.get(session_id)
+        if workspace_info:
+            # Update last activity
+            workspace_info.last_activity = datetime.now()
+            self._save_workspace_metadata(workspace_info)
+        return workspace_info
+    def get_workspace_path(self, session_id: str) -> Optional[Path]:
+        """Get workspace path for a session"""
+        workspace_info = self.get_workspace(session_id)
+        return workspace_info.workspace_path if workspace_info else None
+    def list_sessions(self) -> List[str]:
+        """List all active session IDs"""
+        return list(self.workspaces.keys())
+    def destroy_workspace(self, session_id: str, force: bool = False) -> bool:
+        """
+        Destroy a workspace for a session.
+        Args:
+            session_id: Session identifier
+            force: Force removal even if files exist
+        Returns:
+            bool: True if destroyed successfully
+        """
+        if session_id not in self.workspaces:
+            logger.warning(f"No workspace found for session {session_id}")
+            return False
+        workspace_info = self.workspaces[session_id]
+        try:
+            logger.info(f"Destroying workspace for session {session_id}")
+            workspace_info.status = WorkspaceStatus.DESTROYING
+            # Remove workspace directory
+            if workspace_info.workspace_path.exists():
+                if force or not any(workspace_info.workspace_path.iterdir()):
+                    shutil.rmtree(workspace_info.workspace_path)
+                    logger.info(f"Workspace directory removed: {workspace_info.workspace_path}")
+                else:
+                    logger.warning(f"Workspace contains files, use force=True to remove: {workspace_info.workspace_path}")
+                    return False
+            # Update status and remove from active workspaces
+            workspace_info.status = WorkspaceStatus.DESTROYED
+            del self.workspaces[session_id]
+            logger.info(f"Workspace destroyed for session {session_id}")
+            return True
+        except Exception as e:
+            workspace_info.status = WorkspaceStatus.ERROR
+            workspace_info.error_message = str(e)
+            logger.error(f"Failed to destroy workspace for session {session_id}: {e}")
+            return False
+    def write_file(self, session_id: str, file_path: str, content: Union[str, bytes]) -> bool:
+        """Write content to a file in the workspace"""
+        workspace_info = self.get_workspace(session_id)
+        if not workspace_info:
+            logger.error(f"No workspace found for session {session_id}")
+            return False
+        try:
+            full_path = workspace_info.workspace_path / file_path
+            full_path.parent.mkdir(parents=True, exist_ok=True)
+            if isinstance(content, str):
+                with open(full_path, 'w', encoding='utf-8') as f:
+                    f.write(content)
+            else:
+                with open(full_path, 'wb') as f:
+                    f.write(content)
+            # Update workspace files list
+            self._update_workspace_files(session_id)
+            logger.info(f"File written to workspace: {file_path}")
+            return True
+        except Exception as e:
+            logger.error(f"Failed to write file {file_path} in workspace {session_id}: {e}")
+            return False
+    def read_file(self, session_id: str, file_path: str) -> Optional[Union[str, bytes]]:
+        """Read content from a file in the workspace"""
+        workspace_info = self.get_workspace(session_id)
+        if not workspace_info:
+            logger.error(f"No workspace found for session {session_id}")
+            return None
+        try:
+            full_path = workspace_info.workspace_path / file_path
+            if not full_path.exists():
+                logger.error(f"File not found: {file_path}")
+                return None
+            # Try to read as text first
+            try:
+                with open(full_path, 'r', encoding='utf-8') as f:
+                    return f.read()
+            except UnicodeDecodeError:
+                # If text reading fails, read as bytes
+                with open(full_path, 'rb') as f:
+                    return f.read()
+        except Exception as e:
+            logger.error(f"Failed to read file {file_path} from workspace {session_id}: {e}")
+            return None
+    def list_files(self, session_id: str, directory: str = "") -> List[str]:
+        """List files in the workspace directory"""
+        workspace_info = self.get_workspace(session_id)
+        if not workspace_info:
+            logger.error(f"No workspace found for session {session_id}")
+            return []
+        try:
+            target_path = workspace_info.workspace_path / directory if directory else workspace_info.workspace_path
+            if not target_path.exists():
+                return []
+            files = []
+            for item in target_path.rglob('*'):
+                if item.is_file() and not item.name.startswith('.'):
+                    rel_path = item.relative_to(workspace_info.workspace_path)
+                    files.append(str(rel_path))
+            return sorted(files)
+        except Exception as e:
+            logger.error(f"Failed to list files in workspace {session_id}: {e}")
+            return []
+    def _update_workspace_files(self, session_id: str):
+        """Update the list of workspace files for a session."""
+        try:
+            workspace_info = self.workspaces.get(session_id)
+            if workspace_info:
+                files = self.list_files(session_id)
+                workspace_info.workspace_files = files
+                self._save_workspace_metadata(workspace_info)
+        except Exception as e:
+            logger.debug("Failed to update workspace files for session {%s}: {%s}", session_id, e)
+    def cleanup_expired_workspaces(self, max_age_hours: int = 24):
+        """Clean up workspaces older than max_age_hours"""
+        cutoff_time = datetime.now() - timedelta(hours=max_age_hours)
+        expired_sessions = []
+        for session_id, workspace_info in self.workspaces.items():
+            if workspace_info.last_activity < cutoff_time:
+                expired_sessions.append(session_id)
+        for session_id in expired_sessions:
+            logger.info(f"Cleaning up expired workspace for session {session_id}")
+            self.destroy_workspace(session_id, force=True)
+    def shutdown(self):
+        """Shutdown the workspace manager"""
+        logger.info("Shutting down LocalWorkspaceManager...")
+        if self.cleanup_on_exit:
+            # Clean up all workspaces
+            session_ids = list(self.workspaces.keys())
+            for session_id in session_ids:
+                self.destroy_workspace(session_id, force=True)
+        else:
+            # Just save metadata for all workspaces
+            for workspace_info in self.workspaces.values():
+                self._save_workspace_metadata(workspace_info)
+        logger.info("LocalWorkspaceManager shutdown complete")
+# Global instance
+_workspace_manager: Optional[LocalWorkspaceManager] = None
+def get_workspace_manager(base_workspace_dir: str = "workspaces") -> LocalWorkspaceManager:
+    """Get or create the global workspace manager instance"""
+    global _workspace_manager
+    if _workspace_manager is None:
+        _workspace_manager = LocalWorkspaceManager(base_workspace_dir)
+    return _workspace_manager
+def initialize_workspace_manager(base_workspace_dir: str = "workspaces", **kwargs) -> LocalWorkspaceManager:
+    """Initialize the workspace manager with custom settings"""
+    global _workspace_manager
+    _workspace_manager = LocalWorkspaceManager(base_workspace_dir, **kwargs)
+    logger.info(f"Workspace manager initialized with base directory: {base_workspace_dir}")
+    return _workspace_manager
+def shutdown_workspace_manager():
+    """Shutdown the global workspace manager"""
+    global _workspace_manager
+    if _workspace_manager:
+        _workspace_manager.shutdown()
+        _workspace_manager = None

docs/openpangu-deepdiver-v2-tech-report.pdf ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:d9ff32d4bd7190ea26049ef1f5d009b9861c652a980623a5f5cd043e7dcec2a4
+size 39847395

generation_config.json ADDED Viewed

	@@ -0,0 +1,11 @@

+{
+  "_from_model_config": true,
+  "do_sample": true,
+  "bos_token_id": 1,
+  "pad_token_id": 0,
+  "eos_token_id": 45892,
+  "temperature": 1.0,
+  "top_k": 0,
+  "top_p": 0.8,
+  "transformers_version": "4.53.2"
+}

model-00001-of-00004.safetensors ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:7b8ec6cd94b1921560d37755c7c0c08280c1f9123195d14d352ad0607788f7f6
+size 4926842416

model-00002-of-00004.safetensors ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:fc05d80f52ce44d1433a942e867bf61ea49eb1eebb0700312f76d6b3a3dee917
+size 4991686576

model-00003-of-00004.safetensors ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:1ed37f38214c755b51bea06a71e154c9ea27670eb3b8506c06addcfbea2066f2
+size 4886853760

model-00004-of-00004.safetensors ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:0145e255ba965ed0e75164a037b9a0137c5e5c12ffc42463ff82568054fe0186
+size 1256456320

model.safetensors.index.json ADDED Viewed

	@@ -0,0 +1,486 @@

+{
+  "metadata": {
+    "total_size": 16061784576
+  },
+  "weight_map": {
+    "lm_head.weight": "model-00004-of-00004.safetensors",
+    "model.embed_tokens.weight": "model-00001-of-00004.safetensors",
+    "model.layers.0.input_layernorm.weight": "model-00001-of-00004.safetensors",
+    "model.layers.0.mlp.down_proj.weight": "model-00001-of-00004.safetensors",
+    "model.layers.0.mlp.gate_proj.weight": "model-00001-of-00004.safetensors",
+    "model.layers.0.mlp.up_proj.weight": "model-00001-of-00004.safetensors",
+    "model.layers.0.post_attention_layernorm.weight": "model-00001-of-00004.safetensors",
+    "model.layers.0.self_attn.k_proj.bias": "model-00001-of-00004.safetensors",
+    "model.layers.0.self_attn.k_proj.weight": "model-00001-of-00004.safetensors",
+    "model.layers.0.self_attn.o_proj.bias": "model-00001-of-00004.safetensors",
+    "model.layers.0.self_attn.o_proj.weight": "model-00001-of-00004.safetensors",
+    "model.layers.0.self_attn.q_proj.bias": "model-00001-of-00004.safetensors",
+    "model.layers.0.self_attn.q_proj.weight": "model-00001-of-00004.safetensors",
+    "model.layers.0.self_attn.rotary_emb.inv_freq": "model-00001-of-00004.safetensors",
+    "model.layers.0.self_attn.v_proj.bias": "model-00001-of-00004.safetensors",
+    "model.layers.0.self_attn.v_proj.weight": "model-00001-of-00004.safetensors",
+    "model.layers.1.input_layernorm.weight": "model-00001-of-00004.safetensors",
+    "model.layers.1.mlp.down_proj.weight": "model-00001-of-00004.safetensors",
+    "model.layers.1.mlp.gate_proj.weight": "model-00001-of-00004.safetensors",
+    "model.layers.1.mlp.up_proj.weight": "model-00001-of-00004.safetensors",
+    "model.layers.1.post_attention_layernorm.weight": "model-00001-of-00004.safetensors",
+    "model.layers.1.self_attn.k_proj.bias": "model-00001-of-00004.safetensors",
+    "model.layers.1.self_attn.k_proj.weight": "model-00001-of-00004.safetensors",
+    "model.layers.1.self_attn.o_proj.bias": "model-00001-of-00004.safetensors",
+    "model.layers.1.self_attn.o_proj.weight": "model-00001-of-00004.safetensors",
+    "model.layers.1.self_attn.q_proj.bias": "model-00001-of-00004.safetensors",
+    "model.layers.1.self_attn.q_proj.weight": "model-00001-of-00004.safetensors",
+    "model.layers.1.self_attn.rotary_emb.inv_freq": "model-00001-of-00004.safetensors",
+    "model.layers.1.self_attn.v_proj.bias": "model-00001-of-00004.safetensors",
+    "model.layers.1.self_attn.v_proj.weight": "model-00001-of-00004.safetensors",
+    "model.layers.10.input_layernorm.weight": "model-00002-of-00004.safetensors",
+    "model.layers.10.mlp.down_proj.weight": "model-00002-of-00004.safetensors",
+    "model.layers.10.mlp.gate_proj.weight": "model-00002-of-00004.safetensors",
+    "model.layers.10.mlp.up_proj.weight": "model-00002-of-00004.safetensors",
+    "model.layers.10.post_attention_layernorm.weight": "model-00002-of-00004.safetensors",
+    "model.layers.10.self_attn.k_proj.bias": "model-00002-of-00004.safetensors",
+    "model.layers.10.self_attn.k_proj.weight": "model-00002-of-00004.safetensors",
+    "model.layers.10.self_attn.o_proj.bias": "model-00002-of-00004.safetensors",
+    "model.layers.10.self_attn.o_proj.weight": "model-00002-of-00004.safetensors",
+    "model.layers.10.self_attn.q_proj.bias": "model-00002-of-00004.safetensors",
+    "model.layers.10.self_attn.q_proj.weight": "model-00002-of-00004.safetensors",
+    "model.layers.10.self_attn.rotary_emb.inv_freq": "model-00002-of-00004.safetensors",
+    "model.layers.10.self_attn.v_proj.bias": "model-00002-of-00004.safetensors",
+    "model.layers.10.self_attn.v_proj.weight": "model-00002-of-00004.safetensors",
+    "model.layers.11.input_layernorm.weight": "model-00002-of-00004.safetensors",
+    "model.layers.11.mlp.down_proj.weight": "model-00002-of-00004.safetensors",
+    "model.layers.11.mlp.gate_proj.weight": "model-00002-of-00004.safetensors",
+    "model.layers.11.mlp.up_proj.weight": "model-00002-of-00004.safetensors",
+    "model.layers.11.post_attention_layernorm.weight": "model-00002-of-00004.safetensors",
+    "model.layers.11.self_attn.k_proj.bias": "model-00002-of-00004.safetensors",
+    "model.layers.11.self_attn.k_proj.weight": "model-00002-of-00004.safetensors",
+    "model.layers.11.self_attn.o_proj.bias": "model-00002-of-00004.safetensors",
+    "model.layers.11.self_attn.o_proj.weight": "model-00002-of-00004.safetensors",
+    "model.layers.11.self_attn.q_proj.bias": "model-00002-of-00004.safetensors",
+    "model.layers.11.self_attn.q_proj.weight": "model-00002-of-00004.safetensors",
+    "model.layers.11.self_attn.rotary_emb.inv_freq": "model-00002-of-00004.safetensors",
+    "model.layers.11.self_attn.v_proj.bias": "model-00002-of-00004.safetensors",
+    "model.layers.11.self_attn.v_proj.weight": "model-00002-of-00004.safetensors",
+    "model.layers.12.input_layernorm.weight": "model-00002-of-00004.safetensors",
+    "model.layers.12.mlp.down_proj.weight": "model-00002-of-00004.safetensors",
+    "model.layers.12.mlp.gate_proj.weight": "model-00002-of-00004.safetensors",
+    "model.layers.12.mlp.up_proj.weight": "model-00002-of-00004.safetensors",
+    "model.layers.12.post_attention_layernorm.weight": "model-00002-of-00004.safetensors",
+    "model.layers.12.self_attn.k_proj.bias": "model-00002-of-00004.safetensors",
+    "model.layers.12.self_attn.k_proj.weight": "model-00002-of-00004.safetensors",
+    "model.layers.12.self_attn.o_proj.bias": "model-00002-of-00004.safetensors",
+    "model.layers.12.self_attn.o_proj.weight": "model-00002-of-00004.safetensors",
+    "model.layers.12.self_attn.q_proj.bias": "model-00002-of-00004.safetensors",
+    "model.layers.12.self_attn.q_proj.weight": "model-00002-of-00004.safetensors",
+    "model.layers.12.self_attn.rotary_emb.inv_freq": "model-00002-of-00004.safetensors",
+    "model.layers.12.self_attn.v_proj.bias": "model-00002-of-00004.safetensors",
+    "model.layers.12.self_attn.v_proj.weight": "model-00002-of-00004.safetensors",
+    "model.layers.13.input_layernorm.weight": "model-00002-of-00004.safetensors",
+    "model.layers.13.mlp.down_proj.weight": "model-00002-of-00004.safetensors",
+    "model.layers.13.mlp.gate_proj.weight": "model-00002-of-00004.safetensors",
+    "model.layers.13.mlp.up_proj.weight": "model-00002-of-00004.safetensors",
+    "model.layers.13.post_attention_layernorm.weight": "model-00002-of-00004.safetensors",
+    "model.layers.13.self_attn.k_proj.bias": "model-00002-of-00004.safetensors",
+    "model.layers.13.self_attn.k_proj.weight": "model-00002-of-00004.safetensors",
+    "model.layers.13.self_attn.o_proj.bias": "model-00002-of-00004.safetensors",
+    "model.layers.13.self_attn.o_proj.weight": "model-00002-of-00004.safetensors",
+    "model.layers.13.self_attn.q_proj.bias": "model-00002-of-00004.safetensors",
+    "model.layers.13.self_attn.q_proj.weight": "model-00002-of-00004.safetensors",
+    "model.layers.13.self_attn.rotary_emb.inv_freq": "model-00002-of-00004.safetensors",
+    "model.layers.13.self_attn.v_proj.bias": "model-00002-of-00004.safetensors",
+    "model.layers.13.self_attn.v_proj.weight": "model-00002-of-00004.safetensors",
+    "model.layers.14.input_layernorm.weight": "model-00002-of-00004.safetensors",
+    "model.layers.14.mlp.down_proj.weight": "model-00002-of-00004.safetensors",
+    "model.layers.14.mlp.gate_proj.weight": "model-00002-of-00004.safetensors",
+    "model.layers.14.mlp.up_proj.weight": "model-00002-of-00004.safetensors",
+    "model.layers.14.post_attention_layernorm.weight": "model-00002-of-00004.safetensors",
+    "model.layers.14.self_attn.k_proj.bias": "model-00002-of-00004.safetensors",
+    "model.layers.14.self_attn.k_proj.weight": "model-00002-of-00004.safetensors",
+    "model.layers.14.self_attn.o_proj.bias": "model-00002-of-00004.safetensors",
+    "model.layers.14.self_attn.o_proj.weight": "model-00002-of-00004.safetensors",
+    "model.layers.14.self_attn.q_proj.bias": "model-00002-of-00004.safetensors",
+    "model.layers.14.self_attn.q_proj.weight": "model-00002-of-00004.safetensors",
+    "model.layers.14.self_attn.rotary_emb.inv_freq": "model-00002-of-00004.safetensors",
+    "model.layers.14.self_attn.v_proj.bias": "model-00002-of-00004.safetensors",
+    "model.layers.14.self_attn.v_proj.weight": "model-00002-of-00004.safetensors",
+    "model.layers.15.input_layernorm.weight": "model-00002-of-00004.safetensors",
+    "model.layers.15.mlp.down_proj.weight": "model-00002-of-00004.safetensors",
+    "model.layers.15.mlp.gate_proj.weight": "model-00002-of-00004.safetensors",
+    "model.layers.15.mlp.up_proj.weight": "model-00002-of-00004.safetensors",
+    "model.layers.15.post_attention_layernorm.weight": "model-00002-of-00004.safetensors",
+    "model.layers.15.self_attn.k_proj.bias": "model-00002-of-00004.safetensors",
+    "model.layers.15.self_attn.k_proj.weight": "model-00002-of-00004.safetensors",
+    "model.layers.15.self_attn.o_proj.bias": "model-00002-of-00004.safetensors",
+    "model.layers.15.self_attn.o_proj.weight": "model-00002-of-00004.safetensors",
+    "model.layers.15.self_attn.q_proj.bias": "model-00002-of-00004.safetensors",
+    "model.layers.15.self_attn.q_proj.weight": "model-00002-of-00004.safetensors",
+    "model.layers.15.self_attn.rotary_emb.inv_freq": "model-00002-of-00004.safetensors",
+    "model.layers.15.self_attn.v_proj.bias": "model-00002-of-00004.safetensors",
+    "model.layers.15.self_attn.v_proj.weight": "model-00002-of-00004.safetensors",
+    "model.layers.16.input_layernorm.weight": "model-00002-of-00004.safetensors",
+    "model.layers.16.mlp.down_proj.weight": "model-00002-of-00004.safetensors",
+    "model.layers.16.mlp.gate_proj.weight": "model-00002-of-00004.safetensors",
+    "model.layers.16.mlp.up_proj.weight": "model-00002-of-00004.safetensors",
+    "model.layers.16.post_attention_layernorm.weight": "model-00002-of-00004.safetensors",
+    "model.layers.16.self_attn.k_proj.bias": "model-00002-of-00004.safetensors",
+    "model.layers.16.self_attn.k_proj.weight": "model-00002-of-00004.safetensors",
+    "model.layers.16.self_attn.o_proj.bias": "model-00002-of-00004.safetensors",
+    "model.layers.16.self_attn.o_proj.weight": "model-00002-of-00004.safetensors",
+    "model.layers.16.self_attn.q_proj.bias": "model-00002-of-00004.safetensors",
+    "model.layers.16.self_attn.q_proj.weight": "model-00002-of-00004.safetensors",
+    "model.layers.16.self_attn.rotary_emb.inv_freq": "model-00002-of-00004.safetensors",
+    "model.layers.16.self_attn.v_proj.bias": "model-00002-of-00004.safetensors",
+    "model.layers.16.self_attn.v_proj.weight": "model-00002-of-00004.safetensors",
+    "model.layers.17.input_layernorm.weight": "model-00002-of-00004.safetensors",
+    "model.layers.17.mlp.down_proj.weight": "model-00002-of-00004.safetensors",
+    "model.layers.17.mlp.gate_proj.weight": "model-00002-of-00004.safetensors",
+    "model.layers.17.mlp.up_proj.weight": "model-00002-of-00004.safetensors",
+    "model.layers.17.post_attention_layernorm.weight": "model-00002-of-00004.safetensors",
+    "model.layers.17.self_attn.k_proj.bias": "model-00002-of-00004.safetensors",
+    "model.layers.17.self_attn.k_proj.weight": "model-00002-of-00004.safetensors",
+    "model.layers.17.self_attn.o_proj.bias": "model-00002-of-00004.safetensors",
+    "model.layers.17.self_attn.o_proj.weight": "model-00002-of-00004.safetensors",
+    "model.layers.17.self_attn.q_proj.bias": "model-00002-of-00004.safetensors",
+    "model.layers.17.self_attn.q_proj.weight": "model-00002-of-00004.safetensors",
+    "model.layers.17.self_attn.rotary_emb.inv_freq": "model-00002-of-00004.safetensors",
+    "model.layers.17.self_attn.v_proj.bias": "model-00002-of-00004.safetensors",
+    "model.layers.17.self_attn.v_proj.weight": "model-00002-of-00004.safetensors",
+    "model.layers.18.input_layernorm.weight": "model-00002-of-00004.safetensors",
+    "model.layers.18.mlp.down_proj.weight": "model-00002-of-00004.safetensors",
+    "model.layers.18.mlp.gate_proj.weight": "model-00002-of-00004.safetensors",
+    "model.layers.18.mlp.up_proj.weight": "model-00002-of-00004.safetensors",
+    "model.layers.18.post_attention_layernorm.weight": "model-00002-of-00004.safetensors",
+    "model.layers.18.self_attn.k_proj.bias": "model-00002-of-00004.safetensors",
+    "model.layers.18.self_attn.k_proj.weight": "model-00002-of-00004.safetensors",
+    "model.layers.18.self_attn.o_proj.bias": "model-00002-of-00004.safetensors",
+    "model.layers.18.self_attn.o_proj.weight": "model-00002-of-00004.safetensors",
+    "model.layers.18.self_attn.q_proj.bias": "model-00002-of-00004.safetensors",
+    "model.layers.18.self_attn.q_proj.weight": "model-00002-of-00004.safetensors",
+    "model.layers.18.self_attn.rotary_emb.inv_freq": "model-00002-of-00004.safetensors",
+    "model.layers.18.self_attn.v_proj.bias": "model-00002-of-00004.safetensors",
+    "model.layers.18.self_attn.v_proj.weight": "model-00002-of-00004.safetensors",
+    "model.layers.19.input_layernorm.weight": "model-00002-of-00004.safetensors",
+    "model.layers.19.mlp.down_proj.weight": "model-00002-of-00004.safetensors",
+    "model.layers.19.mlp.gate_proj.weight": "model-00002-of-00004.safetensors",
+    "model.layers.19.mlp.up_proj.weight": "model-00002-of-00004.safetensors",
+    "model.layers.19.post_attention_layernorm.weight": "model-00002-of-00004.safetensors",
+    "model.layers.19.self_attn.k_proj.bias": "model-00002-of-00004.safetensors",
+    "model.layers.19.self_attn.k_proj.weight": "model-00002-of-00004.safetensors",
+    "model.layers.19.self_attn.o_proj.bias": "model-00002-of-00004.safetensors",
+    "model.layers.19.self_attn.o_proj.weight": "model-00002-of-00004.safetensors",
+    "model.layers.19.self_attn.q_proj.bias": "model-00002-of-00004.safetensors",
+    "model.layers.19.self_attn.q_proj.weight": "model-00002-of-00004.safetensors",
+    "model.layers.19.self_attn.rotary_emb.inv_freq": "model-00002-of-00004.safetensors",
+    "model.layers.19.self_attn.v_proj.bias": "model-00002-of-00004.safetensors",
+    "model.layers.19.self_attn.v_proj.weight": "model-00002-of-00004.safetensors",
+    "model.layers.2.input_layernorm.weight": "model-00001-of-00004.safetensors",
+    "model.layers.2.mlp.down_proj.weight": "model-00001-of-00004.safetensors",
+    "model.layers.2.mlp.gate_proj.weight": "model-00001-of-00004.safetensors",
+    "model.layers.2.mlp.up_proj.weight": "model-00001-of-00004.safetensors",
+    "model.layers.2.post_attention_layernorm.weight": "model-00001-of-00004.safetensors",
+    "model.layers.2.self_attn.k_proj.bias": "model-00001-of-00004.safetensors",
+    "model.layers.2.self_attn.k_proj.weight": "model-00001-of-00004.safetensors",
+    "model.layers.2.self_attn.o_proj.bias": "model-00001-of-00004.safetensors",
+    "model.layers.2.self_attn.o_proj.weight": "model-00001-of-00004.safetensors",
+    "model.layers.2.self_attn.q_proj.bias": "model-00001-of-00004.safetensors",
+    "model.layers.2.self_attn.q_proj.weight": "model-00001-of-00004.safetensors",
+    "model.layers.2.self_attn.rotary_emb.inv_freq": "model-00001-of-00004.safetensors",
+    "model.layers.2.self_attn.v_proj.bias": "model-00001-of-00004.safetensors",
+    "model.layers.2.self_attn.v_proj.weight": "model-00001-of-00004.safetensors",
+    "model.layers.20.input_layernorm.weight": "model-00002-of-00004.safetensors",
+    "model.layers.20.mlp.down_proj.weight": "model-00002-of-00004.safetensors",
+    "model.layers.20.mlp.gate_proj.weight": "model-00002-of-00004.safetensors",
+    "model.layers.20.mlp.up_proj.weight": "model-00002-of-00004.safetensors",
+    "model.layers.20.post_attention_layernorm.weight": "model-00002-of-00004.safetensors",
+    "model.layers.20.self_attn.k_proj.bias": "model-00002-of-00004.safetensors",
+    "model.layers.20.self_attn.k_proj.weight": "model-00002-of-00004.safetensors",
+    "model.layers.20.self_attn.o_proj.bias": "model-00002-of-00004.safetensors",
+    "model.layers.20.self_attn.o_proj.weight": "model-00002-of-00004.safetensors",
+    "model.layers.20.self_attn.q_proj.bias": "model-00002-of-00004.safetensors",
+    "model.layers.20.self_attn.q_proj.weight": "model-00002-of-00004.safetensors",
+    "model.layers.20.self_attn.rotary_emb.inv_freq": "model-00002-of-00004.safetensors",
+    "model.layers.20.self_attn.v_proj.bias": "model-00002-of-00004.safetensors",
+    "model.layers.20.self_attn.v_proj.weight": "model-00002-of-00004.safetensors",
+    "model.layers.21.input_layernorm.weight": "model-00003-of-00004.safetensors",
+    "model.layers.21.mlp.down_proj.weight": "model-00002-of-00004.safetensors",
+    "model.layers.21.mlp.gate_proj.weight": "model-00002-of-00004.safetensors",
+    "model.layers.21.mlp.up_proj.weight": "model-00003-of-00004.safetensors",
+    "model.layers.21.post_attention_layernorm.weight": "model-00003-of-00004.safetensors",
+    "model.layers.21.self_attn.k_proj.bias": "model-00002-of-00004.safetensors",
+    "model.layers.21.self_attn.k_proj.weight": "model-00002-of-00004.safetensors",
+    "model.layers.21.self_attn.o_proj.bias": "model-00002-of-00004.safetensors",
+    "model.layers.21.self_attn.o_proj.weight": "model-00002-of-00004.safetensors",
+    "model.layers.21.self_attn.q_proj.bias": "model-00002-of-00004.safetensors",
+    "model.layers.21.self_attn.q_proj.weight": "model-00002-of-00004.safetensors",
+    "model.layers.21.self_attn.rotary_emb.inv_freq": "model-00002-of-00004.safetensors",
+    "model.layers.21.self_attn.v_proj.bias": "model-00002-of-00004.safetensors",
+    "model.layers.21.self_attn.v_proj.weight": "model-00002-of-00004.safetensors",
+    "model.layers.22.input_layernorm.weight": "model-00003-of-00004.safetensors",
+    "model.layers.22.mlp.down_proj.weight": "model-00003-of-00004.safetensors",
+    "model.layers.22.mlp.gate_proj.weight": "model-00003-of-00004.safetensors",
+    "model.layers.22.mlp.up_proj.weight": "model-00003-of-00004.safetensors",
+    "model.layers.22.post_attention_layernorm.weight": "model-00003-of-00004.safetensors",
+    "model.layers.22.self_attn.k_proj.bias": "model-00003-of-00004.safetensors",
+    "model.layers.22.self_attn.k_proj.weight": "model-00003-of-00004.safetensors",
+    "model.layers.22.self_attn.o_proj.bias": "model-00003-of-00004.safetensors",
+    "model.layers.22.self_attn.o_proj.weight": "model-00003-of-00004.safetensors",
+    "model.layers.22.self_attn.q_proj.bias": "model-00003-of-00004.safetensors",
+    "model.layers.22.self_attn.q_proj.weight": "model-00003-of-00004.safetensors",
+    "model.layers.22.self_attn.rotary_emb.inv_freq": "model-00003-of-00004.safetensors",
+    "model.layers.22.self_attn.v_proj.bias": "model-00003-of-00004.safetensors",
+    "model.layers.22.self_attn.v_proj.weight": "model-00003-of-00004.safetensors",
+    "model.layers.23.input_layernorm.weight": "model-00003-of-00004.safetensors",
+    "model.layers.23.mlp.down_proj.weight": "model-00003-of-00004.safetensors",
+    "model.layers.23.mlp.gate_proj.weight": "model-00003-of-00004.safetensors",
+    "model.layers.23.mlp.up_proj.weight": "model-00003-of-00004.safetensors",
+    "model.layers.23.post_attention_layernorm.weight": "model-00003-of-00004.safetensors",
+    "model.layers.23.self_attn.k_proj.bias": "model-00003-of-00004.safetensors",
+    "model.layers.23.self_attn.k_proj.weight": "model-00003-of-00004.safetensors",
+    "model.layers.23.self_attn.o_proj.bias": "model-00003-of-00004.safetensors",
+    "model.layers.23.self_attn.o_proj.weight": "model-00003-of-00004.safetensors",
+    "model.layers.23.self_attn.q_proj.bias": "model-00003-of-00004.safetensors",
+    "model.layers.23.self_attn.q_proj.weight": "model-00003-of-00004.safetensors",
+    "model.layers.23.self_attn.rotary_emb.inv_freq": "model-00003-of-00004.safetensors",
+    "model.layers.23.self_attn.v_proj.bias": "model-00003-of-00004.safetensors",
+    "model.layers.23.self_attn.v_proj.weight": "model-00003-of-00004.safetensors",
+    "model.layers.24.input_layernorm.weight": "model-00003-of-00004.safetensors",
+    "model.layers.24.mlp.down_proj.weight": "model-00003-of-00004.safetensors",
+    "model.layers.24.mlp.gate_proj.weight": "model-00003-of-00004.safetensors",
+    "model.layers.24.mlp.up_proj.weight": "model-00003-of-00004.safetensors",
+    "model.layers.24.post_attention_layernorm.weight": "model-00003-of-00004.safetensors",
+    "model.layers.24.self_attn.k_proj.bias": "model-00003-of-00004.safetensors",
+    "model.layers.24.self_attn.k_proj.weight": "model-00003-of-00004.safetensors",
+    "model.layers.24.self_attn.o_proj.bias": "model-00003-of-00004.safetensors",
+    "model.layers.24.self_attn.o_proj.weight": "model-00003-of-00004.safetensors",
+    "model.layers.24.self_attn.q_proj.bias": "model-00003-of-00004.safetensors",
+    "model.layers.24.self_attn.q_proj.weight": "model-00003-of-00004.safetensors",
+    "model.layers.24.self_attn.rotary_emb.inv_freq": "model-00003-of-00004.safetensors",
+    "model.layers.24.self_attn.v_proj.bias": "model-00003-of-00004.safetensors",
+    "model.layers.24.self_attn.v_proj.weight": "model-00003-of-00004.safetensors",
+    "model.layers.25.input_layernorm.weight": "model-00003-of-00004.safetensors",
+    "model.layers.25.mlp.down_proj.weight": "model-00003-of-00004.safetensors",
+    "model.layers.25.mlp.gate_proj.weight": "model-00003-of-00004.safetensors",
+    "model.layers.25.mlp.up_proj.weight": "model-00003-of-00004.safetensors",
+    "model.layers.25.post_attention_layernorm.weight": "model-00003-of-00004.safetensors",
+    "model.layers.25.self_attn.k_proj.bias": "model-00003-of-00004.safetensors",
+    "model.layers.25.self_attn.k_proj.weight": "model-00003-of-00004.safetensors",
+    "model.layers.25.self_attn.o_proj.bias": "model-00003-of-00004.safetensors",
+    "model.layers.25.self_attn.o_proj.weight": "model-00003-of-00004.safetensors",
+    "model.layers.25.self_attn.q_proj.bias": "model-00003-of-00004.safetensors",
+    "model.layers.25.self_attn.q_proj.weight": "model-00003-of-00004.safetensors",
+    "model.layers.25.self_attn.rotary_emb.inv_freq": "model-00003-of-00004.safetensors",
+    "model.layers.25.self_attn.v_proj.bias": "model-00003-of-00004.safetensors",
+    "model.layers.25.self_attn.v_proj.weight": "model-00003-of-00004.safetensors",
+    "model.layers.26.input_layernorm.weight": "model-00003-of-00004.safetensors",
+    "model.layers.26.mlp.down_proj.weight": "model-00003-of-00004.safetensors",
+    "model.layers.26.mlp.gate_proj.weight": "model-00003-of-00004.safetensors",
+    "model.layers.26.mlp.up_proj.weight": "model-00003-of-00004.safetensors",
+    "model.layers.26.post_attention_layernorm.weight": "model-00003-of-00004.safetensors",
+    "model.layers.26.self_attn.k_proj.bias": "model-00003-of-00004.safetensors",
+    "model.layers.26.self_attn.k_proj.weight": "model-00003-of-00004.safetensors",
+    "model.layers.26.self_attn.o_proj.bias": "model-00003-of-00004.safetensors",
+    "model.layers.26.self_attn.o_proj.weight": "model-00003-of-00004.safetensors",
+    "model.layers.26.self_attn.q_proj.bias": "model-00003-of-00004.safetensors",
+    "model.layers.26.self_attn.q_proj.weight": "model-00003-of-00004.safetensors",
+    "model.layers.26.self_attn.rotary_emb.inv_freq": "model-00003-of-00004.safetensors",
+    "model.layers.26.self_attn.v_proj.bias": "model-00003-of-00004.safetensors",
+    "model.layers.26.self_attn.v_proj.weight": "model-00003-of-00004.safetensors",
+    "model.layers.27.input_layernorm.weight": "model-00003-of-00004.safetensors",
+    "model.layers.27.mlp.down_proj.weight": "model-00003-of-00004.safetensors",
+    "model.layers.27.mlp.gate_proj.weight": "model-00003-of-00004.safetensors",
+    "model.layers.27.mlp.up_proj.weight": "model-00003-of-00004.safetensors",
+    "model.layers.27.post_attention_layernorm.weight": "model-00003-of-00004.safetensors",
+    "model.layers.27.self_attn.k_proj.bias": "model-00003-of-00004.safetensors",
+    "model.layers.27.self_attn.k_proj.weight": "model-00003-of-00004.safetensors",
+    "model.layers.27.self_attn.o_proj.bias": "model-00003-of-00004.safetensors",
+    "model.layers.27.self_attn.o_proj.weight": "model-00003-of-00004.safetensors",
+    "model.layers.27.self_attn.q_proj.bias": "model-00003-of-00004.safetensors",
+    "model.layers.27.self_attn.q_proj.weight": "model-00003-of-00004.safetensors",
+    "model.layers.27.self_attn.rotary_emb.inv_freq": "model-00003-of-00004.safetensors",
+    "model.layers.27.self_attn.v_proj.bias": "model-00003-of-00004.safetensors",
+    "model.layers.27.self_attn.v_proj.weight": "model-00003-of-00004.safetensors",
+    "model.layers.28.input_layernorm.weight": "model-00003-of-00004.safetensors",
+    "model.layers.28.mlp.down_proj.weight": "model-00003-of-00004.safetensors",
+    "model.layers.28.mlp.gate_proj.weight": "model-00003-of-00004.safetensors",
+    "model.layers.28.mlp.up_proj.weight": "model-00003-of-00004.safetensors",
+    "model.layers.28.post_attention_layernorm.weight": "model-00003-of-00004.safetensors",
+    "model.layers.28.self_attn.k_proj.bias": "model-00003-of-00004.safetensors",
+    "model.layers.28.self_attn.k_proj.weight": "model-00003-of-00004.safetensors",
+    "model.layers.28.self_attn.o_proj.bias": "model-00003-of-00004.safetensors",
+    "model.layers.28.self_attn.o_proj.weight": "model-00003-of-00004.safetensors",
+    "model.layers.28.self_attn.q_proj.bias": "model-00003-of-00004.safetensors",
+    "model.layers.28.self_attn.q_proj.weight": "model-00003-of-00004.safetensors",
+    "model.layers.28.self_attn.rotary_emb.inv_freq": "model-00003-of-00004.safetensors",
+    "model.layers.28.self_attn.v_proj.bias": "model-00003-of-00004.safetensors",
+    "model.layers.28.self_attn.v_proj.weight": "model-00003-of-00004.safetensors",
+    "model.layers.29.input_layernorm.weight": "model-00003-of-00004.safetensors",
+    "model.layers.29.mlp.down_proj.weight": "model-00003-of-00004.safetensors",
+    "model.layers.29.mlp.gate_proj.weight": "model-00003-of-00004.safetensors",
+    "model.layers.29.mlp.up_proj.weight": "model-00003-of-00004.safetensors",
+    "model.layers.29.post_attention_layernorm.weight": "model-00003-of-00004.safetensors",
+    "model.layers.29.self_attn.k_proj.bias": "model-00003-of-00004.safetensors",
+    "model.layers.29.self_attn.k_proj.weight": "model-00003-of-00004.safetensors",
+    "model.layers.29.self_attn.o_proj.bias": "model-00003-of-00004.safetensors",
+    "model.layers.29.self_attn.o_proj.weight": "model-00003-of-00004.safetensors",
+    "model.layers.29.self_attn.q_proj.bias": "model-00003-of-00004.safetensors",
+    "model.layers.29.self_attn.q_proj.weight": "model-00003-of-00004.safetensors",
+    "model.layers.29.self_attn.rotary_emb.inv_freq": "model-00003-of-00004.safetensors",
+    "model.layers.29.self_attn.v_proj.bias": "model-00003-of-00004.safetensors",
+    "model.layers.29.self_attn.v_proj.weight": "model-00003-of-00004.safetensors",
+    "model.layers.3.input_layernorm.weight": "model-00001-of-00004.safetensors",
+    "model.layers.3.mlp.down_proj.weight": "model-00001-of-00004.safetensors",
+    "model.layers.3.mlp.gate_proj.weight": "model-00001-of-00004.safetensors",
+    "model.layers.3.mlp.up_proj.weight": "model-00001-of-00004.safetensors",
+    "model.layers.3.post_attention_layernorm.weight": "model-00001-of-00004.safetensors",
+    "model.layers.3.self_attn.k_proj.bias": "model-00001-of-00004.safetensors",
+    "model.layers.3.self_attn.k_proj.weight": "model-00001-of-00004.safetensors",
+    "model.layers.3.self_attn.o_proj.bias": "model-00001-of-00004.safetensors",
+    "model.layers.3.self_attn.o_proj.weight": "model-00001-of-00004.safetensors",
+    "model.layers.3.self_attn.q_proj.bias": "model-00001-of-00004.safetensors",
+    "model.layers.3.self_attn.q_proj.weight": "model-00001-of-00004.safetensors",
+    "model.layers.3.self_attn.rotary_emb.inv_freq": "model-00001-of-00004.safetensors",
+    "model.layers.3.self_attn.v_proj.bias": "model-00001-of-00004.safetensors",
+    "model.layers.3.self_attn.v_proj.weight": "model-00001-of-00004.safetensors",
+    "model.layers.30.input_layernorm.weight": "model-00003-of-00004.safetensors",
+    "model.layers.30.mlp.down_proj.weight": "model-00003-of-00004.safetensors",
+    "model.layers.30.mlp.gate_proj.weight": "model-00003-of-00004.safetensors",
+    "model.layers.30.mlp.up_proj.weight": "model-00003-of-00004.safetensors",
+    "model.layers.30.post_attention_layernorm.weight": "model-00003-of-00004.safetensors",
+    "model.layers.30.self_attn.k_proj.bias": "model-00003-of-00004.safetensors",
+    "model.layers.30.self_attn.k_proj.weight": "model-00003-of-00004.safetensors",
+    "model.layers.30.self_attn.o_proj.bias": "model-00003-of-00004.safetensors",
+    "model.layers.30.self_attn.o_proj.weight": "model-00003-of-00004.safetensors",
+    "model.layers.30.self_attn.q_proj.bias": "model-00003-of-00004.safetensors",
+    "model.layers.30.self_attn.q_proj.weight": "model-00003-of-00004.safetensors",
+    "model.layers.30.self_attn.rotary_emb.inv_freq": "model-00003-of-00004.safetensors",
+    "model.layers.30.self_attn.v_proj.bias": "model-00003-of-00004.safetensors",
+    "model.layers.30.self_attn.v_proj.weight": "model-00003-of-00004.safetensors",
+    "model.layers.31.input_layernorm.weight": "model-00003-of-00004.safetensors",
+    "model.layers.31.mlp.down_proj.weight": "model-00003-of-00004.safetensors",
+    "model.layers.31.mlp.gate_proj.weight": "model-00003-of-00004.safetensors",
+    "model.layers.31.mlp.up_proj.weight": "model-00003-of-00004.safetensors",
+    "model.layers.31.post_attention_layernorm.weight": "model-00003-of-00004.safetensors",
+    "model.layers.31.self_attn.k_proj.bias": "model-00003-of-00004.safetensors",
+    "model.layers.31.self_attn.k_proj.weight": "model-00003-of-00004.safetensors",
+    "model.layers.31.self_attn.o_proj.bias": "model-00003-of-00004.safetensors",
+    "model.layers.31.self_attn.o_proj.weight": "model-00003-of-00004.safetensors",
+    "model.layers.31.self_attn.q_proj.bias": "model-00003-of-00004.safetensors",
+    "model.layers.31.self_attn.q_proj.weight": "model-00003-of-00004.safetensors",
+    "model.layers.31.self_attn.rotary_emb.inv_freq": "model-00003-of-00004.safetensors",
+    "model.layers.31.self_attn.v_proj.bias": "model-00003-of-00004.safetensors",
+    "model.layers.31.self_attn.v_proj.weight": "model-00003-of-00004.safetensors",
+    "model.layers.32.input_layernorm.weight": "model-00003-of-00004.safetensors",
+    "model.layers.32.mlp.down_proj.weight": "model-00003-of-00004.safetensors",
+    "model.layers.32.mlp.gate_proj.weight": "model-00003-of-00004.safetensors",
+    "model.layers.32.mlp.up_proj.weight": "model-00003-of-00004.safetensors",
+    "model.layers.32.post_attention_layernorm.weight": "model-00003-of-00004.safetensors",
+    "model.layers.32.self_attn.k_proj.bias": "model-00003-of-00004.safetensors",
+    "model.layers.32.self_attn.k_proj.weight": "model-00003-of-00004.safetensors",
+    "model.layers.32.self_attn.o_proj.bias": "model-00003-of-00004.safetensors",
+    "model.layers.32.self_attn.o_proj.weight": "model-00003-of-00004.safetensors",
+    "model.layers.32.self_attn.q_proj.bias": "model-00003-of-00004.safetensors",
+    "model.layers.32.self_attn.q_proj.weight": "model-00003-of-00004.safetensors",
+    "model.layers.32.self_attn.rotary_emb.inv_freq": "model-00003-of-00004.safetensors",
+    "model.layers.32.self_attn.v_proj.bias": "model-00003-of-00004.safetensors",
+    "model.layers.32.self_attn.v_proj.weight": "model-00003-of-00004.safetensors",
+    "model.layers.33.input_layernorm.weight": "model-00003-of-00004.safetensors",
+    "model.layers.33.mlp.down_proj.weight": "model-00003-of-00004.safetensors",
+    "model.layers.33.mlp.gate_proj.weight": "model-00003-of-00004.safetensors",
+    "model.layers.33.mlp.up_proj.weight": "model-00003-of-00004.safetensors",
+    "model.layers.33.post_attention_layernorm.weight": "model-00003-of-00004.safetensors",
+    "model.layers.33.self_attn.k_proj.bias": "model-00003-of-00004.safetensors",
+    "model.layers.33.self_attn.k_proj.weight": "model-00003-of-00004.safetensors",
+    "model.layers.33.self_attn.o_proj.bias": "model-00003-of-00004.safetensors",
+    "model.layers.33.self_attn.o_proj.weight": "model-00003-of-00004.safetensors",
+    "model.layers.33.self_attn.q_proj.bias": "model-00003-of-00004.safetensors",
+    "model.layers.33.self_attn.q_proj.weight": "model-00003-of-00004.safetensors",
+    "model.layers.33.self_attn.rotary_emb.inv_freq": "model-00003-of-00004.safetensors",
+    "model.layers.33.self_attn.v_proj.bias": "model-00003-of-00004.safetensors",
+    "model.layers.33.self_attn.v_proj.weight": "model-00003-of-00004.safetensors",
+    "model.layers.4.input_layernorm.weight": "model-00001-of-00004.safetensors",
+    "model.layers.4.mlp.down_proj.weight": "model-00001-of-00004.safetensors",
+    "model.layers.4.mlp.gate_proj.weight": "model-00001-of-00004.safetensors",
+    "model.layers.4.mlp.up_proj.weight": "model-00001-of-00004.safetensors",
+    "model.layers.4.post_attention_layernorm.weight": "model-00001-of-00004.safetensors",
+    "model.layers.4.self_attn.k_proj.bias": "model-00001-of-00004.safetensors",
+    "model.layers.4.self_attn.k_proj.weight": "model-00001-of-00004.safetensors",
+    "model.layers.4.self_attn.o_proj.bias": "model-00001-of-00004.safetensors",
+    "model.layers.4.self_attn.o_proj.weight": "model-00001-of-00004.safetensors",
+    "model.layers.4.self_attn.q_proj.bias": "model-00001-of-00004.safetensors",
+    "model.layers.4.self_attn.q_proj.weight": "model-00001-of-00004.safetensors",
+    "model.layers.4.self_attn.rotary_emb.inv_freq": "model-00001-of-00004.safetensors",
+    "model.layers.4.self_attn.v_proj.bias": "model-00001-of-00004.safetensors",
+    "model.layers.4.self_attn.v_proj.weight": "model-00001-of-00004.safetensors",
+    "model.layers.5.input_layernorm.weight": "model-00001-of-00004.safetensors",
+    "model.layers.5.mlp.down_proj.weight": "model-00001-of-00004.safetensors",
+    "model.layers.5.mlp.gate_proj.weight": "model-00001-of-00004.safetensors",
+    "model.layers.5.mlp.up_proj.weight": "model-00001-of-00004.safetensors",
+    "model.layers.5.post_attention_layernorm.weight": "model-00001-of-00004.safetensors",
+    "model.layers.5.self_attn.k_proj.bias": "model-00001-of-00004.safetensors",
+    "model.layers.5.self_attn.k_proj.weight": "model-00001-of-00004.safetensors",
+    "model.layers.5.self_attn.o_proj.bias": "model-00001-of-00004.safetensors",
+    "model.layers.5.self_attn.o_proj.weight": "model-00001-of-00004.safetensors",
+    "model.layers.5.self_attn.q_proj.bias": "model-00001-of-00004.safetensors",
+    "model.layers.5.self_attn.q_proj.weight": "model-00001-of-00004.safetensors",
+    "model.layers.5.self_attn.rotary_emb.inv_freq": "model-00001-of-00004.safetensors",
+    "model.layers.5.self_attn.v_proj.bias": "model-00001-of-00004.safetensors",
+    "model.layers.5.self_attn.v_proj.weight": "model-00001-of-00004.safetensors",
+    "model.layers.6.input_layernorm.weight": "model-00001-of-00004.safetensors",
+    "model.layers.6.mlp.down_proj.weight": "model-00001-of-00004.safetensors",
+    "model.layers.6.mlp.gate_proj.weight": "model-00001-of-00004.safetensors",
+    "model.layers.6.mlp.up_proj.weight": "model-00001-of-00004.safetensors",
+    "model.layers.6.post_attention_layernorm.weight": "model-00001-of-00004.safetensors",
+    "model.layers.6.self_attn.k_proj.bias": "model-00001-of-00004.safetensors",
+    "model.layers.6.self_attn.k_proj.weight": "model-00001-of-00004.safetensors",
+    "model.layers.6.self_attn.o_proj.bias": "model-00001-of-00004.safetensors",
+    "model.layers.6.self_attn.o_proj.weight": "model-00001-of-00004.safetensors",
+    "model.layers.6.self_attn.q_proj.bias": "model-00001-of-00004.safetensors",
+    "model.layers.6.self_attn.q_proj.weight": "model-00001-of-00004.safetensors",
+    "model.layers.6.self_attn.rotary_emb.inv_freq": "model-00001-of-00004.safetensors",
+    "model.layers.6.self_attn.v_proj.bias": "model-00001-of-00004.safetensors",
+    "model.layers.6.self_attn.v_proj.weight": "model-00001-of-00004.safetensors",
+    "model.layers.7.input_layernorm.weight": "model-00001-of-00004.safetensors",
+    "model.layers.7.mlp.down_proj.weight": "model-00001-of-00004.safetensors",
+    "model.layers.7.mlp.gate_proj.weight": "model-00001-of-00004.safetensors",
+    "model.layers.7.mlp.up_proj.weight": "model-00001-of-00004.safetensors",
+    "model.layers.7.post_attention_layernorm.weight": "model-00001-of-00004.safetensors",
+    "model.layers.7.self_attn.k_proj.bias": "model-00001-of-00004.safetensors",
+    "model.layers.7.self_attn.k_proj.weight": "model-00001-of-00004.safetensors",
+    "model.layers.7.self_attn.o_proj.bias": "model-00001-of-00004.safetensors",
+    "model.layers.7.self_attn.o_proj.weight": "model-00001-of-00004.safetensors",
+    "model.layers.7.self_attn.q_proj.bias": "model-00001-of-00004.safetensors",
+    "model.layers.7.self_attn.q_proj.weight": "model-00001-of-00004.safetensors",
+    "model.layers.7.self_attn.rotary_emb.inv_freq": "model-00001-of-00004.safetensors",
+    "model.layers.7.self_attn.v_proj.bias": "model-00001-of-00004.safetensors",
+    "model.layers.7.self_attn.v_proj.weight": "model-00001-of-00004.safetensors",
+    "model.layers.8.input_layernorm.weight": "model-00001-of-00004.safetensors",
+    "model.layers.8.mlp.down_proj.weight": "model-00001-of-00004.safetensors",
+    "model.layers.8.mlp.gate_proj.weight": "model-00001-of-00004.safetensors",
+    "model.layers.8.mlp.up_proj.weight": "model-00001-of-00004.safetensors",
+    "model.layers.8.post_attention_layernorm.weight": "model-00001-of-00004.safetensors",
+    "model.layers.8.self_attn.k_proj.bias": "model-00001-of-00004.safetensors",
+    "model.layers.8.self_attn.k_proj.weight": "model-00001-of-00004.safetensors",
+    "model.layers.8.self_attn.o_proj.bias": "model-00001-of-00004.safetensors",
+    "model.layers.8.self_attn.o_proj.weight": "model-00001-of-00004.safetensors",
+    "model.layers.8.self_attn.q_proj.bias": "model-00001-of-00004.safetensors",
+    "model.layers.8.self_attn.q_proj.weight": "model-00001-of-00004.safetensors",
+    "model.layers.8.self_attn.rotary_emb.inv_freq": "model-00001-of-00004.safetensors",
+    "model.layers.8.self_attn.v_proj.bias": "model-00001-of-00004.safetensors",
+    "model.layers.8.self_attn.v_proj.weight": "model-00001-of-00004.safetensors",
+    "model.layers.9.input_layernorm.weight": "model-00002-of-00004.safetensors",
+    "model.layers.9.mlp.down_proj.weight": "model-00002-of-00004.safetensors",
+    "model.layers.9.mlp.gate_proj.weight": "model-00002-of-00004.safetensors",
+    "model.layers.9.mlp.up_proj.weight": "model-00002-of-00004.safetensors",
+    "model.layers.9.post_attention_layernorm.weight": "model-00002-of-00004.safetensors",
+    "model.layers.9.self_attn.k_proj.bias": "model-00001-of-00004.safetensors",
+    "model.layers.9.self_attn.k_proj.weight": "model-00001-of-00004.safetensors",
+    "model.layers.9.self_attn.o_proj.bias": "model-00001-of-00004.safetensors",
+    "model.layers.9.self_attn.o_proj.weight": "model-00001-of-00004.safetensors",
+    "model.layers.9.self_attn.q_proj.bias": "model-00001-of-00004.safetensors",
+    "model.layers.9.self_attn.q_proj.weight": "model-00001-of-00004.safetensors",
+    "model.layers.9.self_attn.rotary_emb.inv_freq": "model-00001-of-00004.safetensors",
+    "model.layers.9.self_attn.v_proj.bias": "model-00001-of-00004.safetensors",
+    "model.layers.9.self_attn.v_proj.weight": "model-00001-of-00004.safetensors",
+    "model.norm.weight": "model-00003-of-00004.safetensors"
+  }
+}

modeling_openpangu_dense.py ADDED Viewed

	@@ -0,0 +1,585 @@

+#                🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨
+#                       This file was automatically generated from modular_openpangu_dense.py.
+#               Do NOT edit this file manually as any edits will be overwritten by the generation of
+#             the file from the modular. If any change should be done, please apply the change to the
+#                          modular_openpangu_dense.py file directly. One of our CI enforces this.
+#                🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨
+# coding=utf-8
+# Copyright (c) 2025 Huawei Technologies Co., Ltd. All rights reserved.
+# Copyright 2022 EleutherAI and the HuggingFace Inc. team. All rights reserved.
+#
+# This code is based on EleutherAI's GPT-NeoX library and the GPT-NeoX
+# and OPT implementations in this library. It has been modified from its
+# original forms to accommodate minor architectural differences compared
+# to GPT-NeoX and OPT used by the Meta AI team that trained the model.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+from typing import Callable, Optional, Union
+import torch
+from torch import nn
+import torch_npu
+from torch_npu.contrib import transfer_to_npu
+if "910" in torch.npu.get_device_name():
+    NPU_ATTN_INFR = True
+    print("[INFO] torch_npu detected. Using NPU fused infer attention.")
+else:
+    NPU_ATTN_INFR = False
+from transformers.activations import ACT2FN
+from transformers.cache_utils import Cache, DynamicCache
+from transformers.generation import GenerationMixin
+from transformers.masking_utils import create_causal_mask
+from transformers.modeling_flash_attention_utils import FlashAttentionKwargs
+from transformers.modeling_layers import GradientCheckpointingLayer
+from transformers.modeling_outputs import (
+    BaseModelOutputWithPast,
+    CausalLMOutputWithPast,
+    SequenceClassifierOutputWithPast,
+)
+from transformers.modeling_rope_utils import ROPE_INIT_FUNCTIONS, dynamic_rope_update
+from transformers.modeling_utils import ALL_ATTENTION_FUNCTIONS, PreTrainedModel
+from transformers.processing_utils import Unpack
+from transformers.utils import LossKwargs, auto_docstring, can_return_tuple, logging
+from .configuration_openpangu_dense import PanguEmbeddedConfig
+logger = logging.get_logger(__name__)
+class PanguEmbeddedRMSNorm(nn.Module):
+    def __init__(self, hidden_size, eps=1e-6):
+        """
+        PanguEmbeddedRMSNorm is equivalent to T5LayerNorm
+        """
+        super().__init__()
+        self.weight = nn.Parameter(torch.ones(hidden_size))
+        self.variance_epsilon = eps
+    def forward(self, hidden_states):
+        input_dtype = hidden_states.dtype
+        hidden_states = hidden_states.to(torch.float32)
+        variance = hidden_states.pow(2).mean(-1, keepdim=True)
+        hidden_states = hidden_states * torch.rsqrt(variance + self.variance_epsilon)
+        return self.weight * hidden_states.to(input_dtype)
+    def extra_repr(self):
+        return f"{tuple(self.weight.shape)}, eps={self.variance_epsilon}"
+class PanguEmbeddedRotaryEmbedding(nn.Module):
+    def __init__(self, config: PanguEmbeddedConfig, device=None):
+        super().__init__()
+        # BC: "rope_type" was originally "type"
+        if hasattr(config, "rope_scaling") and config.rope_scaling is not None:
+            self.rope_type = config.rope_scaling.get("rope_type", config.rope_scaling.get("type"))
+        else:
+            self.rope_type = "default"
+        self.max_seq_len_cached = config.max_position_embeddings
+        self.original_max_seq_len = config.max_position_embeddings
+        self.config = config
+        self.rope_init_fn = ROPE_INIT_FUNCTIONS[self.rope_type]
+        inv_freq, self.attention_scaling = self.rope_init_fn(self.config, device)
+        self.register_buffer("inv_freq", inv_freq, persistent=False)
+        self.original_inv_freq = self.inv_freq
+    @torch.no_grad()
+    @dynamic_rope_update  # power user: used with advanced RoPE types (e.g. dynamic rope)
+    def forward(self, x, position_ids):
+        inv_freq_expanded = self.inv_freq[None, :, None].float().expand(position_ids.shape[0], -1, 1).to(x.device)
+        position_ids_expanded = position_ids[:, None, :].float()
+        device_type = x.device.type if isinstance(x.device.type, str) and x.device.type != "mps" else "cpu"
+        with torch.autocast(device_type=device_type, enabled=False):  # Force float32
+            freqs = (inv_freq_expanded.float() @ position_ids_expanded.float()).transpose(1, 2)
+            emb = torch.cat((freqs, freqs), dim=-1)
+            cos = emb.cos() * self.attention_scaling
+            sin = emb.sin() * self.attention_scaling
+        return cos.to(dtype=x.dtype), sin.to(dtype=x.dtype)
+def rotate_half(x):
+    """Rotates half the hidden dims of the input."""
+    x1 = x[..., : x.shape[-1] // 2]
+    x2 = x[..., x.shape[-1] // 2 :]
+    return torch.cat((-x2, x1), dim=-1)
+def apply_rotary_pos_emb(q, k, cos, sin, position_ids=None, unsqueeze_dim=1):
+    """Applies Rotary Position Embedding to the query and key tensors.
+    Args:
+        q (`torch.Tensor`): The query tensor.
+        k (`torch.Tensor`): The key tensor.
+        cos (`torch.Tensor`): The cosine part of the rotary embedding.
+        sin (`torch.Tensor`): The sine part of the rotary embedding.
+        position_ids (`torch.Tensor`, *optional*):
+            Deprecated and unused.
+        unsqueeze_dim (`int`, *optional*, defaults to 1):
+            The 'unsqueeze_dim' argument specifies the dimension along which to unsqueeze cos[position_ids] and
+            sin[position_ids] so that they can be properly broadcasted to the dimensions of q and k. For example, note
+            that cos[position_ids] and sin[position_ids] have the shape [batch_size, seq_len, head_dim]. Then, if q and
+            k have the shape [batch_size, heads, seq_len, head_dim], then setting unsqueeze_dim=1 makes
+            cos[position_ids] and sin[position_ids] broadcastable to the shapes of q and k. Similarly, if q and k have
+            the shape [batch_size, seq_len, heads, head_dim], then set unsqueeze_dim=2.
+    Returns:
+        `tuple(torch.Tensor)` comprising of the query and key tensors rotated using the Rotary Position Embedding.
+    """
+    cos = cos.unsqueeze(unsqueeze_dim)
+    sin = sin.unsqueeze(unsqueeze_dim)
+    q_embed = (q * cos) + (rotate_half(q) * sin)
+    k_embed = (k * cos) + (rotate_half(k) * sin)
+    return q_embed, k_embed
+class PanguEmbeddedMLP(nn.Module):
+    def __init__(self, config):
+        super().__init__()
+        self.config = config
+        self.hidden_size = config.hidden_size
+        self.intermediate_size = config.intermediate_size
+        self.gate_proj = nn.Linear(self.hidden_size, self.intermediate_size, bias=False)
+        self.up_proj = nn.Linear(self.hidden_size, self.intermediate_size, bias=False)
+        self.down_proj = nn.Linear(self.intermediate_size, self.hidden_size, bias=False)
+        self.act_fn = ACT2FN[config.hidden_act]
+    def forward(self, x):
+        down_proj = self.down_proj(self.act_fn(self.gate_proj(x)) * self.up_proj(x))
+        return down_proj
+def repeat_kv(hidden_states: torch.Tensor, n_rep: int) -> torch.Tensor:
+    """
+    This is the equivalent of torch.repeat_interleave(x, dim=1, repeats=n_rep). The hidden states go from (batch,
+    num_key_value_heads, seqlen, head_dim) to (batch, num_attention_heads, seqlen, head_dim)
+    """
+    batch, num_key_value_heads, slen, head_dim = hidden_states.shape
+    if n_rep == 1:
+        return hidden_states
+    hidden_states = hidden_states[:, :, None, :, :].expand(batch, num_key_value_heads, n_rep, slen, head_dim)
+    return hidden_states.reshape(batch, num_key_value_heads * n_rep, slen, head_dim)
+def eager_attention_forward(
+    module: nn.Module,
+    query: torch.Tensor,
+    key: torch.Tensor,
+    value: torch.Tensor,
+    attention_mask: Optional[torch.Tensor],
+    scaling: float,
+    dropout: float = 0.0,
+    **kwargs,
+):
+    key_states = repeat_kv(key, module.num_key_value_groups)
+    value_states = repeat_kv(value, module.num_key_value_groups)
+    attn_weights = torch.matmul(query, key_states.transpose(2, 3)) * scaling
+    if attention_mask is not None:
+        causal_mask = attention_mask[:, :, :, : key_states.shape[-2]]
+        attn_weights = attn_weights + causal_mask
+    attn_weights = nn.functional.softmax(attn_weights, dim=-1, dtype=torch.float32).to(query.dtype)
+    attn_weights = nn.functional.dropout(attn_weights, p=dropout, training=module.training)
+    attn_output = torch.matmul(attn_weights, value_states)
+    attn_output = attn_output.transpose(1, 2).contiguous()
+    return attn_output, attn_weights
+class PanguEmbeddedAttention(nn.Module):
+    """Multi-headed attention from 'Attention Is All You Need' paper"""
+    def __init__(self, config: PanguEmbeddedConfig, layer_idx: int):
+        super().__init__()
+        self.config = config
+        self.layer_idx = layer_idx
+        self.head_dim = getattr(config, "head_dim", config.hidden_size // config.num_attention_heads)
+        self.num_heads = config.num_attention_heads
+        self.num_key_value_heads = config.num_key_value_heads
+        self.num_key_value_groups = config.num_attention_heads // config.num_key_value_heads
+        self.scaling = self.head_dim**-0.5
+        self.attention_dropout = config.attention_dropout
+        self.is_causal = True
+        self.q_proj = nn.Linear(config.hidden_size, config.num_attention_heads * self.head_dim, bias=config.bias)
+        self.k_proj = nn.Linear(config.hidden_size, config.num_key_value_heads * self.head_dim, bias=config.bias)
+        self.v_proj = nn.Linear(config.hidden_size, config.num_key_value_heads * self.head_dim, bias=config.bias)
+        self.o_proj = nn.Linear(config.num_attention_heads * self.head_dim, config.hidden_size, bias=config.bias)
+    def forward(
+        self,
+        hidden_states: torch.Tensor,
+        position_embeddings: tuple[torch.Tensor, torch.Tensor],
+        attention_mask: Optional[torch.Tensor],
+        past_key_value: Optional[Cache] = None,
+        cache_position: Optional[torch.LongTensor] = None,
+        **kwargs: Unpack[FlashAttentionKwargs],
+    ) -> tuple[torch.Tensor, Optional[torch.Tensor], Optional[tuple[torch.Tensor]]]:
+        input_shape = hidden_states.shape[:-1]
+        hidden_shape = (*input_shape, -1, self.head_dim)
+        query_states = self.q_proj(hidden_states).view(hidden_shape).transpose(1, 2)
+        key_states = self.k_proj(hidden_states).view(hidden_shape).transpose(1, 2)
+        value_states = self.v_proj(hidden_states).view(hidden_shape).transpose(1, 2)
+        cos, sin = position_embeddings
+        query_states, key_states = apply_rotary_pos_emb(query_states, key_states, cos, sin)
+        if past_key_value is not None:
+            # sin and cos are specific to RoPE models; cache_position needed for the static cache
+            cache_kwargs = {"sin": sin, "cos": cos, "cache_position": cache_position}
+            key_states, value_states = past_key_value.update(key_states, value_states, self.layer_idx, cache_kwargs)
+        attention_interface: Callable = eager_attention_forward
+        if self.config._attn_implementation != "eager":
+            attention_interface = ALL_ATTENTION_FUNCTIONS[self.config._attn_implementation]
+        if not self.training and NPU_ATTN_INFR:
+            q_len = input_shape[1]
+            if attention_mask is not None:
+                attention_mask = ~attention_mask.bool()
+            elif q_len > 1:
+                attention_mask = torch.triu(torch.ones([q_len, q_len]), diagonal=1).bool().unsqueeze(0).unsqueeze(0).to(query_states.device)
+            attn_output, _ = torch_npu.npu_fused_infer_attention_score(
+                query_states, key_states, value_states,
+                num_heads=self.num_heads, num_key_value_heads=self.num_key_value_heads,
+                input_layout="BNSD", atten_mask=attention_mask, scale=self.scaling)
+            attn_output = attn_output.transpose(1, 2)
+            attn_weights = None
+        else:
+            attn_output, attn_weights = attention_interface(
+                self,
+                query_states,
+                key_states,
+                value_states,
+                attention_mask,
+                dropout=0.0 if not self.training else self.attention_dropout,
+                scaling=self.scaling,
+                **kwargs,
+            )
+        attn_output = attn_output.reshape(*input_shape, -1).contiguous()
+        attn_output = self.o_proj(attn_output)
+        return attn_output, attn_weights
+class PanguEmbeddedDecoderLayer(GradientCheckpointingLayer):
+    def __init__(self, config: PanguEmbeddedConfig, layer_idx: int):
+        super().__init__()
+        self.hidden_size = config.hidden_size
+        self.self_attn = PanguEmbeddedAttention(config=config, layer_idx=layer_idx)
+        self.mlp = PanguEmbeddedMLP(config)
+        self.input_layernorm = PanguEmbeddedRMSNorm(config.hidden_size, eps=config.rms_norm_eps)
+        self.post_attention_layernorm = PanguEmbeddedRMSNorm(config.hidden_size, eps=config.rms_norm_eps)
+    def forward(
+        self,
+        hidden_states: torch.Tensor,
+        attention_mask: Optional[torch.Tensor] = None,
+        position_ids: Optional[torch.LongTensor] = None,
+        past_key_value: Optional[Cache] = None,
+        output_attentions: Optional[bool] = False,
+        use_cache: Optional[bool] = False,
+        cache_position: Optional[torch.LongTensor] = None,
+        position_embeddings: Optional[tuple[torch.Tensor, torch.Tensor]] = None,  # necessary, but kept here for BC
+        **kwargs: Unpack[FlashAttentionKwargs],
+    ) -> tuple[torch.FloatTensor, Optional[tuple[torch.FloatTensor, torch.FloatTensor]]]:
+        residual = hidden_states
+        hidden_states = self.input_layernorm(hidden_states)
+        # Self Attention
+        hidden_states, self_attn_weights = self.self_attn(
+            hidden_states=hidden_states,
+            attention_mask=attention_mask,
+            position_ids=position_ids,
+            past_key_value=past_key_value,
+            output_attentions=output_attentions,
+            use_cache=use_cache,
+            cache_position=cache_position,
+            position_embeddings=position_embeddings,
+            **kwargs,
+        )
+        hidden_states = residual + hidden_states
+        # Fully Connected
+        residual = hidden_states
+        hidden_states = self.post_attention_layernorm(hidden_states)
+        hidden_states = self.mlp(hidden_states)
+        hidden_states = residual + hidden_states
+        outputs = (hidden_states,)
+        if output_attentions:
+            outputs += (self_attn_weights,)
+        return outputs
+@auto_docstring
+class PanguEmbeddedPreTrainedModel(PreTrainedModel):
+    config_class = PanguEmbeddedConfig
+    base_model_prefix = "model"
+    supports_gradient_checkpointing = True
+    _no_split_modules = ["PanguEmbeddedDecoderLayer"]
+    _skip_keys_device_placement = ["past_key_values"]
+    _supports_flash_attn_3 = True
+    _supports_flash_attn_2 = True
+    _supports_sdpa = True
+    _supports_flex_attn = True
+    _supports_cache_class = True
+    _supports_quantized_cache = True
+    _supports_static_cache = True
+    _supports_attention_backend = True
+    def _init_weights(self, module):
+        std = self.config.initializer_range
+        if isinstance(module, nn.Linear):
+            module.weight.data.normal_(mean=0.0, std=std)
+            if module.bias is not None:
+                module.bias.data.zero_()
+        elif isinstance(module, nn.Embedding):
+            module.weight.data.normal_(mean=0.0, std=std)
+            if module.padding_idx is not None:
+                module.weight.data[module.padding_idx].zero_()
+        elif isinstance(module, PanguEmbeddedRMSNorm):
+            module.weight.data.fill_(1.0)
+@auto_docstring
+class PanguEmbeddedModel(PanguEmbeddedPreTrainedModel):
+    def __init__(self, config: PanguEmbeddedConfig):
+        super().__init__(config)
+        self.padding_idx = config.pad_token_id
+        self.vocab_size = config.vocab_size
+        self.embed_tokens = nn.Embedding(config.vocab_size, config.hidden_size, self.padding_idx)
+        self.layers = nn.ModuleList(
+            [PanguEmbeddedDecoderLayer(config, layer_idx) for layer_idx in range(config.num_hidden_layers)]
+        )
+        self.norm = PanguEmbeddedRMSNorm(config.hidden_size, eps=config.rms_norm_eps)
+        self.rotary_emb = PanguEmbeddedRotaryEmbedding(config=config)
+        self.gradient_checkpointing = False
+        # Initialize weights and apply final processing
+        self.post_init()
+    def get_input_embeddings(self):
+        return self.embed_tokens
+    def set_input_embeddings(self, value):
+        self.embed_tokens = value
+    @can_return_tuple
+    @auto_docstring
+    def forward(
+        self,
+        input_ids: Optional[torch.LongTensor] = None,
+        attention_mask: Optional[torch.Tensor] = None,
+        position_ids: Optional[torch.LongTensor] = None,
+        past_key_values: Optional[Cache] = None,
+        inputs_embeds: Optional[torch.FloatTensor] = None,
+        use_cache: Optional[bool] = None,
+        output_attentions: Optional[bool] = None,
+        output_hidden_states: Optional[bool] = None,
+        cache_position: Optional[torch.LongTensor] = None,
+        **flash_attn_kwargs: Unpack[FlashAttentionKwargs],
+    ) -> BaseModelOutputWithPast:
+        output_attentions = output_attentions if output_attentions is not None else self.config.output_attentions
+        output_hidden_states = (
+            output_hidden_states if output_hidden_states is not None else self.config.output_hidden_states
+        )
+        use_cache = use_cache if use_cache is not None else self.config.use_cache
+        if (input_ids is None) ^ (inputs_embeds is not None):
+            raise ValueError("You must specify exactly one of input_ids or inputs_embeds")
+        if self.gradient_checkpointing and self.training and use_cache:
+            logger.warning_once(
+                "`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`."
+            )
+            use_cache = False
+        # TODO (joao): remove this exception in v4.56 -- it exists for users that try to pass a legacy cache
+        if not isinstance(past_key_values, (type(None), Cache)):
+            raise ValueError("The `past_key_values` should be either a `Cache` object or `None`.")
+        if inputs_embeds is None:
+            inputs_embeds = self.embed_tokens(input_ids)
+        if use_cache and past_key_values is None:
+            past_key_values = DynamicCache()
+        if cache_position is None:
+            past_seen_tokens = past_key_values.get_seq_length() if past_key_values is not None else 0
+            cache_position = torch.arange(
+                past_seen_tokens, past_seen_tokens + inputs_embeds.shape[1], device=inputs_embeds.device
+            )
+        if position_ids is None:
+            position_ids = cache_position.unsqueeze(0)
+        causal_mask = create_causal_mask(
+            config=self.config,
+            input_embeds=inputs_embeds,
+            attention_mask=attention_mask,
+            cache_position=cache_position,
+            past_key_values=past_key_values,
+            position_ids=position_ids,
+        )
+        hidden_states = inputs_embeds
+        # create position embeddings to be shared across the decoder layers
+        position_embeddings = self.rotary_emb(hidden_states, position_ids)
+        # decoder layers
+        all_hidden_states = () if output_hidden_states else None
+        all_self_attns = () if output_attentions else None
+        for decoder_layer in self.layers[: self.config.num_hidden_layers]:
+            if output_hidden_states:
+                all_hidden_states += (hidden_states,)
+            layer_outputs = decoder_layer(
+                hidden_states,
+                attention_mask=causal_mask,
+                position_ids=position_ids,
+                past_key_value=past_key_values,
+                output_attentions=output_attentions,
+                use_cache=use_cache,
+                cache_position=cache_position,
+                position_embeddings=position_embeddings,
+                **flash_attn_kwargs,
+            )
+            hidden_states = layer_outputs[0]
+            if output_attentions:
+                all_self_attns += (layer_outputs[1],)
+        hidden_states = self.norm(hidden_states)
+        # add hidden states from the last decoder layer
+        if output_hidden_states:
+            all_hidden_states += (hidden_states,)
+        return BaseModelOutputWithPast(
+            last_hidden_state=hidden_states,
+            past_key_values=past_key_values if use_cache else None,
+            hidden_states=all_hidden_states,
+            attentions=all_self_attns,
+        )
+class KwargsForCausalLM(FlashAttentionKwargs, LossKwargs): ...
+@auto_docstring
+class PanguEmbeddedForCausalLM(PanguEmbeddedPreTrainedModel, GenerationMixin):
+    _tied_weights_keys = ["lm_head.weight"]
+    _tp_plan = {"lm_head": "colwise_rep"}
+    _pp_plan = {"lm_head": (["hidden_states"], ["logits"])}
+    def __init__(self, config):
+        super().__init__(config)
+        self.model = PanguEmbeddedModel(config)
+        self.vocab_size = config.vocab_size
+        self.lm_head = nn.Linear(config.hidden_size, config.vocab_size, bias=False)
+        # Initialize weights and apply final processing
+        self.post_init()
+    def get_input_embeddings(self):
+        return self.model.embed_tokens
+    def set_input_embeddings(self, value):
+        self.model.embed_tokens = value
+    def get_output_embeddings(self):
+        return self.lm_head
+    def set_output_embeddings(self, new_embeddings):
+        self.lm_head = new_embeddings
+    def set_decoder(self, decoder):
+        self.model = decoder
+    def get_decoder(self):
+        return self.model
+    @can_return_tuple
+    @auto_docstring
+    def forward(
+        self,
+        input_ids: Optional[torch.LongTensor] = None,
+        attention_mask: Optional[torch.Tensor] = None,
+        position_ids: Optional[torch.LongTensor] = None,
+        past_key_values: Optional[Cache] = None,
+        inputs_embeds: Optional[torch.FloatTensor] = None,
+        labels: Optional[torch.LongTensor] = None,
+        use_cache: Optional[bool] = None,
+        output_attentions: Optional[bool] = None,
+        output_hidden_states: Optional[bool] = None,
+        cache_position: Optional[torch.LongTensor] = None,
+        logits_to_keep: Union[int, torch.Tensor] = 0,
+        **kwargs: Unpack[KwargsForCausalLM],
+    ) -> CausalLMOutputWithPast:
+        output_attentions = output_attentions if output_attentions is not None else self.config.output_attentions
+        output_hidden_states = (
+            output_hidden_states if output_hidden_states is not None else self.config.output_hidden_states
+        )
+        # decoder outputs consists of (dec_features, layer_state, dec_hidden, dec_attn)
+        outputs: BaseModelOutputWithPast = self.model(
+            input_ids=input_ids,
+            attention_mask=attention_mask,
+            position_ids=position_ids,
+            past_key_values=past_key_values,
+            inputs_embeds=inputs_embeds,
+            use_cache=use_cache,
+            output_attentions=output_attentions,
+            output_hidden_states=output_hidden_states,
+            cache_position=cache_position,
+            **kwargs,
+        )
+        hidden_states = outputs.last_hidden_state
+        # Only compute necessary logits, and do not upcast them to float if we are not computing the loss
+        slice_indices = slice(-logits_to_keep, None) if isinstance(logits_to_keep, int) else logits_to_keep
+        logits = self.lm_head(hidden_states[:, slice_indices, :])
+        loss = None
+        if labels is not None:
+            loss = self.loss_function(logits=logits, labels=labels, vocab_size=self.config.vocab_size, **kwargs)
+        return CausalLMOutputWithPast(
+            loss=loss,
+            logits=logits,
+            past_key_values=outputs.past_key_values,
+            hidden_states=outputs.hidden_states,
+            attentions=outputs.attentions,
+        )
+__all__ = [
+    "PanguEmbeddedForCausalLM",
+    "PanguEmbeddedModel",
+    "PanguEmbeddedPreTrainedModel",
+]

modular_openpangu_dense.py ADDED Viewed

	@@ -0,0 +1,149 @@

+# coding=utf-8
+# Copyright (c) 2025 Huawei Technologies Co., Ltd. All rights reserved.
+# Copyright 2022 EleutherAI and the HuggingFace Inc. team. All rights reserved.
+#
+# This code is based on EleutherAI's GPT-NeoX library and the GPT-NeoX
+# and OPT implementations in this library. It has been modified from its
+# original forms to accommodate minor architectural differences compared
+# to GPT-NeoX and OPT used by the Meta AI team that trained the model.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+from typing import Callable, Optional, Tuple
+import torch
+from torch import nn
+import torch_npu
+from torch_npu.contrib import transfer_to_npu
+if "910" in torch.npu.get_device_name():
+    NPU_ATTN_INFR = True
+    print("[INFO] torch_npu detected. Using NPU fused infer attention.")
+else:
+    NPU_ATTN_INFR = False
+from transformers.cache_utils import Cache
+from transformers.modeling_flash_attention_utils import FlashAttentionKwargs
+from transformers.modeling_utils import ALL_ATTENTION_FUNCTIONS
+from transformers.processing_utils import Unpack
+from transformers.utils import logging
+from transformers.models.llama.modeling_llama import (
+    LlamaAttention,
+    LlamaDecoderLayer,
+    LlamaForCausalLM,
+    LlamaForSequenceClassification,
+    LlamaMLP,
+    LlamaModel,
+    apply_rotary_pos_emb,
+    eager_attention_forward,
+)
+from .configuration_openpangu_dense import PanguEmbeddedConfig
+logger = logging.get_logger(__name__)
+class PanguEmbeddedMLP(LlamaMLP):
+    def __init__(self, config):
+        super().__init__(config)
+        self.gate_proj = nn.Linear(self.hidden_size, self.intermediate_size, bias=False)
+        self.up_proj = nn.Linear(self.hidden_size, self.intermediate_size, bias=False)
+        self.down_proj = nn.Linear(self.intermediate_size, self.hidden_size, bias=False)
+class PanguEmbeddedAttention(LlamaAttention):
+    def __init__(self, config: PanguEmbeddedConfig, layer_idx: int):
+        super().__init__()
+        self.config = config
+        self.layer_idx = layer_idx
+        self.head_dim = getattr(config, "head_dim", config.hidden_size // config.num_attention_heads)
+        self.num_heads = config.num_attention_heads
+        self.num_key_value_heads = config.num_key_value_heads
+        self.num_key_value_groups = config.num_attention_heads // config.num_key_value_heads
+        self.scaling = self.head_dim**-0.5
+        self.attention_dropout = config.attention_dropout
+        self.is_causal = True
+        self.q_proj = nn.Linear(config.hidden_size, config.num_attention_heads * self.head_dim, bias=config.bias)
+        self.k_proj = nn.Linear(config.hidden_size, config.num_key_value_heads * self.head_dim, bias=config.bias)
+        self.v_proj = nn.Linear(config.hidden_size, config.num_key_value_heads * self.head_dim, bias=config.bias)
+        self.o_proj = nn.Linear(config.num_attention_heads * self.head_dim, config.hidden_size, bias=config.bias)
+    def forward(
+        self,
+        hidden_states: torch.Tensor,
+        position_embeddings: tuple[torch.Tensor, torch.Tensor],
+        attention_mask: Optional[torch.Tensor],
+        past_key_value: Optional[Cache] = None,
+        cache_position: Optional[torch.LongTensor] = None,
+        **kwargs: Unpack[FlashAttentionKwargs],
+    ) -> tuple[torch.Tensor, Optional[torch.Tensor], Optional[tuple[torch.Tensor]]]:
+        input_shape = hidden_states.shape[:-1]
+        hidden_shape = (*input_shape, -1, self.head_dim)
+        query_states = self.q_proj(hidden_states).view(hidden_shape).transpose(1, 2)
+        key_states = self.k_proj(hidden_states).view(hidden_shape).transpose(1, 2)
+        value_states = self.v_proj(hidden_states).view(hidden_shape).transpose(1, 2)
+        cos, sin = position_embeddings
+        query_states, key_states = apply_rotary_pos_emb(query_states, key_states, cos, sin)
+        if past_key_value is not None:
+            # sin and cos are specific to RoPE models; cache_position needed for the static cache
+            cache_kwargs = {"sin": sin, "cos": cos, "cache_position": cache_position}
+            key_states, value_states = past_key_value.update(key_states, value_states, self.layer_idx, cache_kwargs)
+        attention_interface: Callable = eager_attention_forward
+        if self.config._attn_implementation != "eager":
+            attention_interface = ALL_ATTENTION_FUNCTIONS[self.config._attn_implementation]
+        if not self.training and NPU_ATTN_INFR:
+            q_len = input_shape[1]
+            if attention_mask is not None:
+                attention_mask = ~attention_mask.bool()
+            elif q_len > 1:
+                attention_mask = torch.triu(torch.ones([q_len, q_len]), diagonal=1).bool().unsqueeze(0).unsqueeze(0).to(query_states.device)
+            attn_output, _ = torch_npu.npu_fused_infer_attention_score(
+                query_states, key_states, value_states,
+                num_heads=self.num_heads, num_key_value_heads=self.num_key_value_heads,
+                input_layout="BNSD", atten_mask=attention_mask, scale=self.scaling)
+            attn_output = attn_output.transpose(1, 2)
+            attn_weights = None
+        else:
+            attn_output, attn_weights = attention_interface(
+                self,
+                query_states,
+                key_states,
+                value_states,
+                attention_mask,
+                dropout=0.0 if not self.training else self.attention_dropout,
+                scaling=self.scaling,
+                **kwargs,
+            )
+        attn_output = attn_output.reshape(*input_shape, -1).contiguous()
+        attn_output = self.o_proj(attn_output)
+        return attn_output, attn_weights
+class PanguEmbeddedDecoderLayer(LlamaDecoderLayer):
+    pass
+class PanguEmbeddedModel(LlamaModel):
+    pass
+class PanguEmbeddedForCausalLM(LlamaForCausalLM):
+    pass

special_tokens_map.json ADDED Viewed

	@@ -0,0 +1,30 @@

+{
+  "bos_token": {
+    "content": "<s>",
+    "lstrip": false,
+    "normalized": false,
+    "rstrip": false,
+    "single_word": false
+  },
+  "eos_token": {
+    "content": "[unused10]",
+    "lstrip": false,
+    "normalized": false,
+    "rstrip": false,
+    "single_word": false
+  },
+  "pad_token": {
+    "content": "<unk>",
+    "lstrip": false,
+    "normalized": false,
+    "rstrip": false,
+    "single_word": false
+  },
+  "unk_token": {
+    "content": "<unk>",
+    "lstrip": false,
+    "normalized": false,
+    "rstrip": false,
+    "single_word": false
+  }
+}

tokenization_openpangu.py ADDED Viewed

	@@ -0,0 +1,273 @@

+# coding=utf-8
+# Copyright (c) 2025 Huawei Technologies Co., Ltd. All rights reserved.
+# Copyright 2022 EleutherAI and the HuggingFace Inc. team. All rights reserved.
+#
+# This code is based on EleutherAI's GPT-NeoX library and the GPT-NeoX
+# and OPT implementations in this library. It has been modified from its
+# original forms to accommodate minor architectural differences compared
+# to GPT-NeoX and OPT used by the Meta AI team that trained the model.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+import os
+from shutil import copyfile
+from typing import Any, Dict, List, Optional, Tuple
+import sentencepiece as spm
+from transformers.tokenization_utils import PreTrainedTokenizer
+from transformers.utils import logging
+logger = logging.get_logger(__name__)
+VOCAB_FILES_NAMES = {"vocab_file": "./tokenizer.model"}
+PRETRAINED_VOCAB_FILES_MAP = {}
+def convert_bool(string):
+    if isinstance(string, str):
+        if string.lower() == "true":
+            return True
+        elif string.lower() == "false":
+            return False
+        else:
+            return string
+    else:
+        return string
+class PanguTokenizer(PreTrainedTokenizer):
+    """
+    Construct a  tokenizer. Based on byte-level Byte-Pair-Encoding.
+    Args:
+        vocab_file (`str`):
+            Path to the vocabulary file.
+    """
+    vocab_files_names = VOCAB_FILES_NAMES
+    pretrained_vocab_files_map = PRETRAINED_VOCAB_FILES_MAP
+    model_input_names = ["input_ids", "attention_mask"]
+    _auto_class = "AutoTokenizer"
+    def __init__(
+        self,
+        vocab_file,
+        unk_token="<unk>",
+        bos_token="<s>",
+        eos_token="</s>",
+        pad_token="</s>",
+        sp_model_kwargs: Optional[Dict[str, Any]] = None,
+        add_bos_token=True,
+        add_eos_token=False,
+        decode_with_prefix_space=False,
+        clean_up_tokenization_spaces=False,
+        **kwargs,
+    ):
+        self.sp_model_kwargs = {} if sp_model_kwargs is None else sp_model_kwargs
+        self.sp_model = spm.SentencePieceProcessor(**self.sp_model_kwargs)
+        self.sp_model.Load(vocab_file)
+        super().__init__(
+            bos_token=bos_token,
+            eos_token=eos_token,
+            unk_token=unk_token,
+            pad_token=pad_token,
+            clean_up_tokenization_spaces=clean_up_tokenization_spaces,
+            **kwargs,
+        )
+        self.vocab_file = vocab_file
+        self.add_bos_token = convert_bool(add_bos_token)
+        self.add_eos_token = add_eos_token
+        self.decode_with_prefix_space = decode_with_prefix_space
+        self.sp_model = spm.SentencePieceProcessor(**self.sp_model_kwargs)
+        self.sp_model.Load(vocab_file)
+        self._no_prefix_space_tokens = None
+        """ Initialisation"""
+    @property
+    def no_prefix_space_tokens(self):
+        if self._no_prefix_space_tokens is None:
+            vocab = self.convert_ids_to_tokens(list(range(self.vocab_size)))
+            self._no_prefix_space_tokens = {i for i, tok in enumerate(vocab) if not tok.startswith("▁")}
+        return self._no_prefix_space_tokens
+    @property
+    def vocab_size(self):
+        """Returns vocab size"""
+        return self.sp_model.get_piece_size()
+    @property
+    def bos_token_id(self) -> Optional[int]:
+        return self.sp_model.bos_id()
+    @property
+    def eos_token_id(self) -> Optional[int]:
+        return super().eos_token_id
+    def get_vocab(self):
+        """Returns vocab as a dict"""
+        vocab = {self.convert_ids_to_tokens(i): i for i in range(self.vocab_size)}
+        vocab.update(self.added_tokens_encoder)
+        return vocab
+    def _tokenize(self, text):
+        """Returns a tokenized string."""
+        return self.sp_model.encode(text, out_type=str)
+    def _convert_token_to_id(self, token):
+        """Converts a token (str) in an id using the vocab."""
+        return self.sp_model.piece_to_id(token)
+    def _convert_id_to_token(self, index):
+        """Converts an index (integer) in a token (str) using the vocab."""
+        token = self.sp_model.IdToPiece(index)
+        return token
+    def _maybe_add_prefix_space(self, tokens, decoded):
+        if tokens and tokens[0] not in self.no_prefix_space_tokens:
+            return " " + decoded
+        else:
+            return decoded
+    def convert_tokens_to_string(self, tokens):
+        """Converts a sequence of tokens (string) in a single string."""
+        current_sub_tokens = []
+        out_string = ""
+        prev_is_special = False
+        for token in tokens:
+            # make sure that special tokens are not decoded using sentencepiece model
+            if token in self.all_special_tokens:
+                # Decode the current sub-tokens first
+                if current_sub_tokens:
+                    out_string += self.sp_model.decode(current_sub_tokens)
+                    current_sub_tokens = []
+                # Append the special token without adding extra spaces
+                out_string += token
+                prev_is_special = True
+            else:
+                current_sub_tokens.append(token)
+                prev_is_special = False
+        # Decode any remaining sub-tokens
+        if current_sub_tokens:
+            out_string += self.sp_model.decode(current_sub_tokens)
+        # Clean up leading and trailing spaces
+        if self.clean_up_tokenization_spaces:
+            out_string = self.clean_up_tokenization(out_string)
+        out_string = self._maybe_add_prefix_space(tokens=tokens, decoded=out_string)
+        return out_string[1:]
+    # Override decode to set spaces_between_special_tokens to True as default
+    def decode(self,
+               token_ids,
+               spaces_between_special_tokens: bool = False,
+               **kwargs):
+        return super().decode(
+            token_ids=token_ids,
+            spaces_between_special_tokens=spaces_between_special_tokens,
+            **kwargs,
+        )
+    def save_vocabulary(self, save_directory, filename_prefix: Optional[str] = None) -> Tuple[str]:
+        """
+        Save the vocabulary and special tokens file to a directory.
+        Args:
+            save_directory (`str`):
+                The directory in which to save the vocabulary.
+        Returns:
+            `Tuple(str)`: Paths to the files saved.
+        """
+        if not os.path.isdir(save_directory):
+            logger.error(f"Vocabulary path ({save_directory}) should be a directory")
+            return ("",)
+        out_vocab_file = os.path.join(
+            save_directory, (filename_prefix + "-" if filename_prefix else "") + VOCAB_FILES_NAMES["vocab_file"]
+        )
+        if os.path.abspath(self.vocab_file) != os.path.abspath(out_vocab_file) and os.path.isfile(self.vocab_file):
+            copyfile(self.vocab_file, out_vocab_file)
+        elif not os.path.isfile(self.vocab_file):
+            with open(out_vocab_file, "wb") as fi:
+                content_spiece_model = self.sp_model.serialized_model_proto()
+                fi.write(content_spiece_model)
+        return (out_vocab_file,)
+    def build_inputs_with_special_tokens(self, token_ids_0, token_ids_1=None):
+        if self.add_bos_token:
+            bos_token_ids = [self.bos_token_id]
+        else:
+            bos_token_ids = []
+        output = bos_token_ids + token_ids_0
+        if token_ids_1 is not None:
+            output = output + token_ids_1
+        if self.add_eos_token:
+            output = output + [self.eos_token_id]
+        return output
+    def get_special_tokens_mask(
+        self, token_ids_0: List[int], token_ids_1: Optional[List[int]] = None, already_has_special_tokens: bool = False
+    ) -> List[int]:
+        """
+        Retrieve sequence ids from a token list that has no special tokens added. This method is called when adding
+        special tokens using the tokenizer `prepare_for_model` method.
+        Args:
+            token_ids_0 (`List[int]`):
+                List of IDs.
+            token_ids_1 (`List[int]`, *optional*):
+                Optional second list of IDs for sequence pairs.
+            already_has_special_tokens (`bool`, *optional*, defaults to `False`):
+                Whether or not the token list is already formatted with special tokens for the model.
+        Returns:
+            `List[int]`: A list of integers in the range [0, 1]: 1 for a special token, 0 for a sequence token.
+        """
+        if already_has_special_tokens:
+            return super().get_special_tokens_mask(
+                token_ids_0=token_ids_0, token_ids_1=token_ids_1, already_has_special_tokens=True
+            )
+        if token_ids_1 is None:
+            return [1] + ([0] * len(token_ids_0)) + [1]
+        return [1] + ([0] * len(token_ids_0)) + [1, 1] + ([0] * len(token_ids_1)) + [1]
+    def create_token_type_ids_from_sequences(
+        self, token_ids_0: List[int], token_ids_1: Optional[List[int]] = None
+    ) -> List[int]:
+        """
+        Create a mask from the two sequences passed to be used in a sequence-pair classification task. T5 does not make
+        use of token type ids, therefore a list of zeros is returned.
+        Args:
+            token_ids_0 (`List[int]`):
+                List of IDs.
+            token_ids_1 (`List[int]`, *optional*):
+                Optional second list of IDs for sequence pairs.
+        Returns:
+            `List[int]`: List of zeros.
+        """
+        eos = [self.eos_token_id]
+        if token_ids_1 is None:
+            return len(token_ids_0 + eos) * [0]
+        return len(token_ids_0 + eos + token_ids_1 + eos) * [0]

tokenizer.model ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:6b16f1558c0cd4ae6ef1a2c605713be0a514f50e1ce2d2c878979ce988c148ec
+size 2477809

tokenizer_config.json ADDED Viewed

	@@ -0,0 +1 @@

+ {"add_bos_token": true, "add_eos_token": false, "add_prefix_space": true, "added_tokens_decoder": {"0": {"content": "<unk>", "lstrip": false, "normalized": false, "rstrip": false, "single_word": false, "special": true}, "1": {"content": "<s>", "lstrip": false, "normalized": false, "rstrip": false, "single_word": false, "special": true}, "2": {"content": "</s>", "lstrip": false, "normalized": false, "rstrip": false, "single_word": false, "special": true}, "45806": {"content": "<|User|>:", "lstrip": false, "normalized": false, "rstrip": false, "single_word": false, "special": true}, "45813": {"content": "<|Bot|>:", "lstrip": false, "normalized": false, "rstrip": false, "single_word": false, "special": true}, "45830": {"content": "[unused0]", "lstrip": false, "normalized": false, "rstrip": false, "single_word": false, "special": true}, "45840": {"content": "[unused1]", "lstrip": false, "normalized": false, "rstrip": false, "single_word": false, "special": true}, "45846": {"content": "[unused2]", "lstrip": false, "normalized": false, "rstrip": false, "single_word": false, "special": true}, "45849": {"content": "[unused3]", "lstrip": false, "normalized": false, "rstrip": false, "single_word": false, "special": true}, "45861": {"content": "[unused4]", "lstrip": false, "normalized": false, "rstrip": false, "single_word": false, "special": true}, "45866": {"content": "[unused5]", "lstrip": false, "normalized": false, "rstrip": false, "single_word": false, "special": true}, "45874": {"content": "[unused6]", "lstrip": false, "normalized": false, "rstrip": false, "single_word": false, "special": true}, "45883": {"content": "[unused7]", "lstrip": false, "normalized": false, "rstrip": false, "single_word": false, "special": true}, "45884": {"content": "[unused8]", "lstrip": false, "normalized": false, "rstrip": false, "single_word": false, "special": true}, "45887": {"content": "[unused9]", "lstrip": false, "normalized": false, "rstrip": false, "single_word": false, "special": true}, "45892": {"content": "[unused10]", "lstrip": false, "normalized": false, "rstrip": false, "single_word": false, "special": true}, "45920": {"content": "[unused11]", "lstrip": false, "normalized": false, "rstrip": false, "single_word": false, "special": true}, "45932": {"content": "[unused12]", "lstrip": false, "normalized": false, "rstrip": false, "single_word": false, "special": true}, "45938": {"content": "[unused13]", "lstrip": false, "normalized": false, "rstrip": false, "single_word": false, "special": true}, "45953": {"content": "[unused14]", "lstrip": false, "normalized": false, "rstrip": false, "single_word": false, "special": true}, "45968": {"content": "[unused15]", "lstrip": false, "normalized": false, "rstrip": false, "single_word": false, "special": true}, "45974": {"content": "[unused16]", "lstrip": false, "normalized": false, "rstrip": false, "single_word": false, "special": true}, "45982": {"content": "[unused17]", "lstrip": false, "normalized": false, "rstrip": false, "single_word": false, "special": true}, "45986": {"content": "[unused18]", "lstrip": false, "normalized": false, "rstrip": false, "single_word": false, "special": true}, "46005": {"content": "[unused19]", "lstrip": false, "normalized": false, "rstrip": false, "single_word": false, "special": true}, "46007": {"content": "[unused20]", "lstrip": false, "normalized": false, "rstrip": false, "single_word": false, "special": true}, "46014": {"content": "[unused21]", "lstrip": false, "normalized": false, "rstrip": false, "single_word": false, "special": true}, "46017": {"content": "[unused22]", "lstrip": false, "normalized": false, "rstrip": false, "single_word": false, "special": true}, "46028": {"content": "[unused23]", "lstrip": false, "normalized": false, "rstrip": false, "single_word": false, "special": true}, "46032": {"content": "[unused24]", "lstrip": false, "normalized": false, "rstrip": false, "single_word": false, "special": true}, "46081": {"content": "[unused25]", "lstrip": false, "normalized": false, "rstrip": false, "single_word": false, "special": true}, "46086": {"content": "[unused26]", "lstrip": false, "normalized": false, "rstrip": false, "single_word": false, "special": true}, "46101": {"content": "[unused27]", "lstrip": false, "normalized": false, "rstrip": false, "single_word": false, "special": true}, "46183": {"content": "[unused28]", "lstrip": false, "normalized": false, "rstrip": false, "single_word": false, "special": true}, "46230": {"content": "[unused29]", "lstrip": false, "normalized": false, "rstrip": false, "single_word": false, "special": true}, "46245": {"content": "[unused30]", "lstrip": false, "normalized": false, "rstrip": false, "single_word": false, "special": true}, "46257": {"content": "[unused31]", "lstrip": false, "normalized": false, "rstrip": false, "single_word": false, "special": true}, "144208": {"content": "[unused32]", "lstrip": false, "normalized": false, "rstrip": false, "single_word": false, "special": true}, "144209": {"content": "[unused33]", "lstrip": false, "normalized": false, "rstrip": false, "single_word": false, "special": true}}, "auto_map": {"AutoTokenizer": ["tokenization_openpangu.PanguTokenizer", null]}, "bos_token": "<s>", "clean_up_tokenization_spaces": false, "eos_token": "[unused10]", "legacy": true, "model_max_length": 1000000000000000019884624838656, "pad_token": "<unk>", "sp_model_kwargs": {}, "spaces_between_special_tokens": false, "tokenizer_class": "PanguTokenizer", "unk_token": "<unk>", "use_default_system_prompt": false, "chat_template": "{% for message in messages %}{% if loop.first and messages[0]['role'] != 'system' %}{{ '[unused9]系统：[unused10]' }}{% endif %}{% if message['role'] == 'system' %}{{ '[unused9]系统：' + message['content'] + '[unused10]' }}{% endif %}{% if message['role'] == 'assistant' %}{{'[unused9]助手：' + message['content'] + '[unused10]'}}{% endif %}{% if message['role'] == 'tool' %}{{'[unused9]工具：' + message['content'] + '[unused10]'}}{% endif %}{% if message['role'] == 'function' %}{{'[unused9]方法：' + message['content'] + '[unused10]'}}{% endif %}{% if message['role'] == 'user' %}{{'[unused9]用户：' + message['content'] + '[unused10]'}}{% endif %}{% endfor %}{% if add_generation_prompt %}{{ '[unused9]助手：' }}{% endif %}"}