Spaces:

MaoShen
/

Moonshot_DeepResearch

Sleeping

App Files Files Community

Moonshot_DeepResearch / examples /open_deep_research /response_parser.py

MaoShen

Upload folder using huggingface_hub

2eb41d7 verified 9 months ago

raw

history blame contribute delete

4.17 kB

	import re

	def parse_agent_response(response):
	"""结构化解析API响应"""
	result = {
	"final_answer": "",
	"execution_steps": [],
	"search_links": []
	}

	current_step = {}
	link_pattern = re.compile(r'https?://\S+')
	# 新增详细结果匹配模式
	detail_pattern = re.compile(r'### 2\. Task outcome \(extremely detailed version\):(.+?)(?=### 3\|\Z)', re.DOTALL)

	for msg in response:
	content = str(msg.get('content', ''))
	metadata = msg.get('metadata', {})

	# 解析最终答案
	if "Final answer:" in content:
	result["final_answer"] = _extract_final_answer(content)

	# 解析执行步骤（仅处理assistant消息）
	if msg.get('role') == 'assistant' and content.startswith("**Step"):
	current_step = {
	"title": content.strip('* '),
	"details": []
	}
	result["execution_steps"].append(current_step)
	elif current_step and msg.get('role') == 'assistant':
	# 精确提取详细分析内容
	if (detail_match := detail_pattern.search(content)):
	cleaned_content = re.sub(r'\*{2,}\|`{3,}', '',
	detail_match.group(1)).strip()
	current_step["details"].append(cleaned_content)

	# 提取执行日志中的链接
	if metadata.get('title') == '📝 Execution Logs':
	result["search_links"].extend(
	link_pattern.findall(content)
	)

	return result

	def _extract_final_answer(content):
	"""提取并清理最终答案"""
	answer = content.split("Final answer:")[-1]
	return re.sub(r'\*{2,}', '', answer).strip()

	def save_as_markdown(result, filename):
	"""生成优化后的Markdown报告"""
	with open(filename, 'w', encoding='utf-8') as f:
	# 最终答案部分
	f.write("## Novelty Research Report\n")
	f.write(result["final_answer"] + "\n")

	# 研究步骤部分 - 添加步骤去重
	if result["execution_steps"]:
	f.write("\n### Execution Steps\n")
	seen_steps = set()
	unique_steps = []

	for step in result["execution_steps"]:
	# 使用标题和详情内容作为唯一性判断
	step_content = (step['title'], tuple(step['details']))
	if step_content not in seen_steps:
	seen_steps.add(step_content)
	unique_steps.append(step)

	# 输出去重后的步骤
	for step in unique_steps:
	f.write(f"\n#### {step['title']}\n")
	f.write('\n'.join(step["details"]) + "\n")

	# 搜索结果部分
	if result["search_links"]:
	f.write("\n### Relevant References\n")
	seen = set()
	for raw_link in result["search_links"]:
	# 分步骤清理链接
	link = raw_link.split('?')[0] # 移除URL参数
	link = re.sub(r'\).*', '', link) # 删除第一个)及其后所有内容
	link = link.strip('\\n. ') # 清理首尾特殊符号

	if link and link.startswith('http') and link not in seen:
	seen.add(link)
	f.write(f"- {link}\n")
	f.write('\n'.join([f"- {link}" for link in result["search_links"]]))

	if __name__ == "__main__":
	import json
	# 从本地JSON文件加载数据
	with open(r"D:\007.Projects\008.deep_research\smolagents\examples\open_deep_research\last_result.json",
	encoding="utf-8") as f:
	test_data = json.load(f)

	parsed_result = parse_agent_response(test_data)
	report_path = r"D:\007.Projects\008.deep_research\smolagents\examples\open_deep_research\analysis_report.md" # 添加路径定义
	save_as_markdown(
	parsed_result,
	report_path
	)
	print(f"完整分析报告已保存至：{report_path}")