planning-agent-pro / agent /agentic_engine.py
catninja123's picture
Add feeder_school_info + summer_program_recommendation tools (v1.1)
7f1fe1b verified
"""
Agentic 规划引擎 v1.0
真正的 LLM 驱动工具调用循环——LLM 自己决定调用哪些工具、调用几次。
架构:
- Grok 4.20 作为主推理模型(原生支持 tool_use)
- 工具集:10 个数据检索 + 分析工具,覆盖 MAZE / 活动向量 / 知识库
- 循环上限:8 步(防止无限循环)
- 输出:SSE 流式,每次工具调用和结果实时推送
集成方式:
- 被 magi_system.py 在 planning 模式下调用
- 也可作为独立 API 端点 /api/agentic_plan 使用
"""
import json
import math
import os
import re
import sys
from pathlib import Path
from typing import Any, Dict, Generator, List, Optional
import requests
import numpy as np
sys.path.insert(0, str(Path(__file__).parent.parent))
# ─── 常量 ─────────────────────────────────────────────────────────────────────
GROK_URL = "https://api.x.ai/v1/chat/completions"
GROK_HEADERS = {
"Authorization": f"Bearer {os.environ.get('XAI_API_KEY', '')}",
"Content-Type": "application/json",
}
DATA_DIR = Path(os.environ.get("DATA_DIR", str(Path(__file__).parent.parent / "data")))
MAX_STEPS = 8
# ─── 工具定义(Grok tool_use 格式)────────────────────────────────────────────
TOOLS = [
{
"type": "function",
"function": {
"name": "find_similar_cases",
"description": (
"从 MAZE 939 个真实录取案例中,找到与目标学生最相似的历史案例。"
"返回相似案例列表,包含录取/拒绝结果、相似度分解、活动摘要。"
"适用场景:了解类似学生的录取规律、活动组合参考、选校参考。"
),
"parameters": {
"type": "object",
"properties": {
"sat": {"type": "number", "description": "学生 SAT 分数(1000-1600)"},
"major": {"type": "string", "description": "申请专业方向,如 Computer Science、Economics、Biology"},
"hs_type": {"type": "string", "description": "高中类型:国际高中/国际部/公立/私立/美高"},
"target_school": {"type": "string", "description": "目标学校名称(可选),如 Harvard、MIT、Stanford"},
"n": {"type": "integer", "description": "返回案例数量,默认 8", "default": 8},
"result_filter": {"type": "string", "description": "筛选结果:Accept(只看录取)、Reject(只看拒绝)、null(全部)"},
},
"required": ["sat", "major"],
},
},
},
{
"type": "function",
"function": {
"name": "find_similar_activity_profiles",
"description": (
"用 42 维混合向量(活动类型分布 + 语义相似度)检索活动组合相似的历史案例。"
"比 find_similar_cases 更专注于活动维度的相似性。"
"适用场景:分析某类活动组合的录取效果、发现活动差距。"
),
"parameters": {
"type": "object",
"properties": {
"activity_types": {
"type": "object",
"description": "活动类型分布,键为类型名,值为 0-1 的比例。类型包括:Research/Leadership/Art/Sports/Community/Work/Tutoring/Club/Competition/Other",
},
"major": {"type": "string", "description": "专业方向"},
"sat_band": {"type": "string", "description": "SAT 分段,如 1400-1449、1500-1549"},
"n": {"type": "integer", "description": "返回案例数量,默认 8", "default": 8},
},
"required": ["activity_types"],
},
},
},
{
"type": "function",
"function": {
"name": "get_school_stats",
"description": (
"获取某所学校的录取统计数据:SAT 均值/标准差、录取率、大陆高中申请人数/录取人数。"
"数据来源:MAZE 939 案例 + meiben 大陆高中录取数据库。"
),
"parameters": {
"type": "object",
"properties": {
"school_name": {"type": "string", "description": "学校名称,如 Harvard、MIT、Columbia"},
"hs_category": {"type": "string", "description": "高中类别筛选(可选):国际高中/国际部/公立/私立"},
},
"required": ["school_name"],
},
},
},
{
"type": "function",
"function": {
"name": "get_school_taste",
"description": (
"获取某所学校的招生偏好(口味):偏好的活动类型、文书风格、学生特质、常见拒绝原因。"
"数据来源:school_taste_updates.json + consultant_knowledge_kb.json。"
),
"parameters": {
"type": "object",
"properties": {
"school_name": {"type": "string", "description": "学校名称"},
},
"required": ["school_name"],
},
},
},
{
"type": "function",
"function": {
"name": "analyze_activity_gap",
"description": (
"对比学生的活动组合与相似录取案例,识别活动差距和改进方向。"
"输入:学生活动描述 + find_similar_cases 的返回结果。"
"输出:差距分析、具体建议、优先级排序。"
),
"parameters": {
"type": "object",
"properties": {
"student_activities": {"type": "string", "description": "学生当前活动描述"},
"similar_cases_summary": {"type": "string", "description": "相似录取案例的活动摘要(来自 find_similar_cases 结果)"},
"major": {"type": "string", "description": "专业方向"},
"target_school": {"type": "string", "description": "目标学校(可选)"},
},
"required": ["student_activities", "similar_cases_summary"],
},
},
},
{
"type": "function",
"function": {
"name": "get_ps_distillation",
"description": (
"从 ps_distillation_kb.json(MAZE 158 个 Top15/20 录取案例蒸馏)中检索文书写作规律。"
"返回:高频主题、核心原则、红旗警告、低 SAT 成功模式(如适用)。"
),
"parameters": {
"type": "object",
"properties": {
"major": {"type": "string", "description": "专业方向"},
"sat": {"type": "number", "description": "学生 SAT 分数(用于判断是否需要低 SAT 成功模式)"},
},
"required": ["major"],
},
},
},
{
"type": "function",
"function": {
"name": "check_pause_factors",
"description": (
"检查学生档案是否触发 Harvard 招生委员会的 pause factors(暂停因素)。"
"这些因素可能导致招生官在审阅时产生顾虑,即使其他指标很强。"
),
"parameters": {
"type": "object",
"properties": {
"student_profile": {
"type": "object",
"description": "学生档案,包含 sat/gpa/activities/essays 等字段",
},
"target_schools": {
"type": "array",
"items": {"type": "string"},
"description": "目标学校列表(可选)",
},
},
"required": ["student_profile"],
},
},
},
{
"type": "function",
"function": {
"name": "search_blog_knowledge",
"description": (
"从 blog_cards_all.json(4376 张招生官博客知识卡片)中检索相关内容。"
"适用场景:查找特定主题的招生官观点、文书建议、活动评价标准。"
),
"parameters": {
"type": "object",
"properties": {
"query": {"type": "string", "description": "检索查询,如「文书开头」「科研活动价值」「中国学生常见问题」"},
"top_k": {"type": "integer", "description": "返回卡片数量,默认 5", "default": 5},
},
"required": ["query"],
},
},
},
{
"type": "function",
"function": {
"name": "calibrate_school_list",
"description": (
"根据学生档案校准选校清单:评估每所学校的录取概率、建议轮次(ED/EA/RD)、风险等级。"
"返回结构化的选校建议表格。"
),
"parameters": {
"type": "object",
"properties": {
"student": {
"type": "object",
"description": "学生档案(sat/gpa/major/hs_type/activities)",
},
"school_list": {
"type": "array",
"items": {"type": "string"},
"description": "待评估的学校列表",
},
},
"required": ["student", "school_list"],
},
},
},
{
"type": "function",
"function": {
"name": "get_special_case_patterns",
"description": (
"检索特殊案例模式:低分高录、逆袭案例、特定背景学生的成功规律。"
"数据来源:special_case_patterns.json。"
),
"parameters": {
"type": "object",
"properties": {
"pattern_type": {
"type": "string",
"description": "模式类型:low_sat_admit(低SAT录取)/ underrepresented(弱势群体)/ late_bloomer(后期爆发)/ niche_talent(特殊才能)",
},
"major": {"type": "string", "description": "专业方向(可选)"},
},
"required": ["pattern_type"],
},
},
},
{
"type": "function",
"function": {
"name": "get_feeder_school_info",
"description": (
"查询某所大学的大陆高中 feeder school 分布(哪些高中送了多少人),"
"或查询某所高中的录取档案(该高中历史上送往哪些大学、T10/T15 率)。"
"数据来源:meiben 27000+ 条大陆录取记录。"
),
"parameters": {
"type": "object",
"properties": {
"school_name": {
"type": "string",
"description": "大学英文名(如 'Stanford University')",
},
"hs_name": {
"type": "string",
"description": "高中中文名(可选),若提供则返回该高中的录取档案",
},
},
"required": ["school_name"],
},
},
},
{
"type": "function",
"function": {
"name": "get_summer_program_recommendation",
"description": (
"根据学生的专业方向和目标档次,推荐适合的夏校和竞赛项目。"
"数据来源:meiben 录取数据中提取的夏校/竞赛给力指数(1-10分)。"
),
"parameters": {
"type": "object",
"properties": {
"major": {"type": "string", "description": "专业方向"},
"sat": {"type": "number", "description": "SAT 分数(可选)"},
"target_tier": {
"type": "string",
"description": "目标档次:T10 / T15 / T20",
},
},
"required": ["major"],
},
},
},
]
# ─── 工具执行层 ────────────────────────────────────────────────────────────────
def _execute_tool(tool_name: str, tool_input: Dict) -> str:
"""执行工具调用,返回 JSON 字符串结果"""
try:
if tool_name == "find_similar_cases":
return _tool_find_similar_cases(**tool_input)
elif tool_name == "find_similar_activity_profiles":
return _tool_find_similar_activity_profiles(**tool_input)
elif tool_name == "get_school_stats":
return _tool_get_school_stats(**tool_input)
elif tool_name == "get_school_taste":
return _tool_get_school_taste(**tool_input)
elif tool_name == "analyze_activity_gap":
return _tool_analyze_activity_gap(**tool_input)
elif tool_name == "get_ps_distillation":
return _tool_get_ps_distillation(**tool_input)
elif tool_name == "check_pause_factors":
return _tool_check_pause_factors(**tool_input)
elif tool_name == "search_blog_knowledge":
return _tool_search_blog_knowledge(**tool_input)
elif tool_name == "calibrate_school_list":
return _tool_calibrate_school_list(**tool_input)
elif tool_name == "get_special_case_patterns":
return _tool_get_special_case_patterns(**tool_input)
elif tool_name == "get_feeder_school_info":
return _tool_get_feeder_school_info(**tool_input)
elif tool_name == "get_summer_program_recommendation":
return _tool_get_summer_program_recommendation(**tool_input)
else:
return json.dumps({"error": f"Unknown tool: {tool_name}"})
except Exception as e:
return json.dumps({"error": str(e), "tool": tool_name})
# ─── 工具实现 ──────────────────────────────────────────────────────────────────
def _tool_find_similar_cases(sat: float, major: str, hs_type: str = "国际高中",
target_school: str = None, n: int = 8,
result_filter: str = None) -> str:
from track2.maze_retriever import find_similar_cases
student = {"sat": sat, "major": major, "hs_type": hs_type}
results = find_similar_cases(student, target_school=target_school, n=n,
result_filter=result_filter)
# 精简输出,避免 token 爆炸
simplified = []
for r in results:
case = r["case"]
simplified.append({
"similarity": round(r["similarity"], 3),
"result_at_school": r.get("result_at_school"),
"school": case.get("school", ""),
"year": case.get("year", ""),
"sat": case.get("sat", ""),
"major_area": case.get("major_area", ""),
"hs_type": case.get("hs_type", ""),
"activity_summary": str(case.get("activity_summary", ""))[:300],
"all_results_summary": _summarize_results(r.get("all_results", {})),
"similarity_breakdown": r.get("similarity_breakdown", {}),
})
return json.dumps({
"total_found": len(simplified),
"cases": simplified,
"insight": _infer_pattern(simplified, target_school),
}, ensure_ascii=False)
def _summarize_results(results: Dict) -> str:
"""把录取结果字典压缩为一行"""
admits = [s for s, r in results.items() if r in ("Accept", "Enrolled", "WL→Accept")]
rejects = [s for s, r in results.items() if r in ("Reject", "Deny")]
parts = []
if admits:
parts.append(f"录取:{', '.join(admits[:4])}")
if rejects:
parts.append(f"拒绝:{', '.join(rejects[:4])}")
return ";".join(parts) if parts else "无结果"
def _infer_pattern(cases: List[Dict], target_school: Optional[str]) -> str:
"""从案例列表中推断规律"""
if not cases:
return "未找到足够相似的案例"
admits = [c for c in cases if c.get("result_at_school") in ("Accept", "Enrolled")]
rejects = [c for c in cases if c.get("result_at_school") in ("Reject", "Deny")]
total_with_result = len(admits) + len(rejects)
if target_school and total_with_result > 0:
rate = len(admits) / total_with_result
return f"在 {len(cases)} 个相似案例中,{total_with_result} 个申请了 {target_school},录取率约 {rate:.0%}{len(admits)} 录取 / {len(rejects)} 拒绝)"
return f"找到 {len(cases)} 个相似案例,SAT 范围 {min(c.get('sat', 0) or 0 for c in cases)}-{max(c.get('sat', 0) or 0 for c in cases)}"
def _tool_find_similar_activity_profiles(activity_types: Dict, major: str = "",
sat_band: str = "", n: int = 8) -> str:
"""用 42 维混合向量检索活动相似案例"""
av_file = DATA_DIR / "activity_hybrid_vectors.json"
if not av_file.exists():
return json.dumps({"error": "activity_hybrid_vectors.json not found"})
with open(av_file) as f:
av_data = json.load(f)
meta = av_data.get("meta", {})
type_buckets = meta.get("type_buckets", [
"Research", "Leadership", "Art", "Sports", "Community",
"Work", "Tutoring", "Club", "Competition", "Other"
])
# 构建查询向量(10 维类型分布)
query_type_vec = np.zeros(len(type_buckets))
for bucket, val in activity_types.items():
if bucket in type_buckets:
idx = type_buckets.index(bucket)
query_type_vec[idx] = float(val)
# 归一化
norm = np.linalg.norm(query_type_vec)
if norm > 0:
query_type_vec = query_type_vec / norm
# 计算余弦相似度(只用前 10 维类型部分)
cases = av_data.get("cases", [])
scored = []
for case in cases:
vec = np.array(case.get("hybrid_vector", []))
if len(vec) < 10:
continue
case_type_vec = vec[:10]
case_norm = np.linalg.norm(case_type_vec)
if case_norm == 0:
continue
sim = float(np.dot(query_type_vec, case_type_vec) / case_norm)
# 专业方向加权
if major and case.get("major_area", ""):
if _major_match(major, case["major_area"]):
sim *= 1.2
# SAT 段加权
if sat_band and case.get("sat_band", "") == sat_band:
sim *= 1.1
scored.append({
"similarity": round(sim, 3),
"school": case.get("school", ""),
"result": case.get("result", ""),
"year": case.get("year", ""),
"major_area": case.get("major_area", ""),
"sat_band": case.get("sat_band", ""),
"activity_summary": str(case.get("activity_summary", ""))[:250],
})
scored.sort(key=lambda x: -x["similarity"])
top = scored[:n]
admits = [c for c in top if c["result"] == "admit"]
rejects = [c for c in top if c["result"] == "reject"]
return json.dumps({
"total_found": len(top),
"admit_count": len(admits),
"reject_count": len(rejects),
"cases": top,
"admit_activity_patterns": _extract_activity_patterns(admits),
"reject_activity_patterns": _extract_activity_patterns(rejects),
}, ensure_ascii=False)
def _major_match(major1: str, major2: str) -> bool:
STEM = {"cs", "computer", "engineering", "math", "physics", "biology", "chemistry", "data", "stem"}
Hum = {"economics", "econ", "history", "philosophy", "politics", "sociology", "psychology", "humanities"}
m1 = major1.lower()
m2 = major2.lower()
for group in [STEM, Hum]:
if any(k in m1 for k in group) and any(k in m2 for k in group):
return True
return False
def _extract_activity_patterns(cases: List[Dict]) -> str:
if not cases:
return "无案例"
summaries = [c["activity_summary"] for c in cases if c["activity_summary"]]
return ";".join(summaries[:3])
def _tool_get_school_stats(school_name: str, hs_category: str = None) -> str:
from track2.maze_retriever import get_school_admission_stats
stats = get_school_admission_stats(school_name, hs_cat=hs_category)
return json.dumps(stats, ensure_ascii=False)
def _tool_get_school_taste(school_name: str) -> str:
taste_file = DATA_DIR / "school_taste_updates.json"
consultant_file = DATA_DIR / "consultant_knowledge_kb.json"
result = {"school": school_name, "taste": {}, "red_flags": [], "preferences": []}
if taste_file.exists():
with open(taste_file) as f:
taste_data = json.load(f)
# 模糊匹配学校名
for key, val in taste_data.items():
if school_name.lower() in key.lower() or key.lower() in school_name.lower():
result["taste"] = val
break
if consultant_file.exists():
with open(consultant_file) as f:
kb = json.load(f)
# 从 consultant KB 中提取该学校相关内容
school_lower = school_name.lower()
relevant = []
def _search(obj, path=""):
if isinstance(obj, str) and school_lower in obj.lower() and len(obj) > 30:
relevant.append(obj[:200])
elif isinstance(obj, dict):
for k, v in obj.items():
_search(v, f"{path}/{k}")
elif isinstance(obj, list):
for item in obj:
_search(item, path)
_search(kb)
result["consultant_notes"] = relevant[:5]
return json.dumps(result, ensure_ascii=False)
def _tool_analyze_activity_gap(student_activities: str, similar_cases_summary: str,
major: str = "", target_school: str = "") -> str:
"""用 Grok 分析活动差距"""
prompt = f"""你是一位资深美本申请顾问。
学生当前活动:
{student_activities[:800]}
相似录取案例的活动模式:
{similar_cases_summary[:1000]}
专业方向:{major or '未知'}
目标学校:{target_school or '未指定'}
请分析:
1. **活动差距**:学生缺少哪类活动?(对比录取案例)
2. **优势**:学生已有的活动中,哪些是加分项?
3. **优先级建议**:最需要补充的 2-3 个活动方向(具体可执行)
4. **时间线**:如果距离申请还有 12 个月,怎么安排?
输出要简洁直接,每点不超过 2 句话。"""
resp = requests.post(GROK_URL, headers=GROK_HEADERS, json={
"model": "grok-4.20-0309",
"messages": [{"role": "user", "content": prompt}],
"max_tokens": 1000,
}, timeout=60)
resp.raise_for_status()
analysis = resp.json()["choices"][0]["message"]["content"]
return json.dumps({"analysis": analysis}, ensure_ascii=False)
def _tool_get_ps_distillation(major: str, sat: float = None) -> str:
ps_kb_file = DATA_DIR / "ps_distillation_kb.json"
if not ps_kb_file.exists():
return json.dumps({"error": "ps_distillation_kb.json not found"})
with open(ps_kb_file) as f:
ps_kb = json.load(f)
result = {
"version": ps_kb.get("meta", {}).get("version", "unknown"),
"high_success_themes": ps_kb.get("theme_taxonomy", {}).get("high_success_themes", [])[:5],
"core_principles": ps_kb.get("core_principles", [])[:5],
"red_flags": ps_kb.get("red_flags", [])[:8],
"distilled_wisdom": ps_kb.get("distilled_wisdom", "")[:500],
}
# 低 SAT 专属模式
if sat and sat < 1500:
result["low_sat_patterns"] = ps_kb.get("low_sat_success_patterns", [])[:5]
# 中国学生专属
chinese = ps_kb.get("chinese_student_specific", {})
if chinese:
result["chinese_specific"] = {
"common_mistakes": chinese.get("common_mistakes", [])[:4],
"success_strategies": chinese.get("success_strategies", [])[:3],
}
return json.dumps(result, ensure_ascii=False)
def _tool_check_pause_factors(student_profile: Dict, target_schools: List[str] = None) -> str:
from agent.pause_factor_checker import check_student_profile, format_pause_factor_warning
result = check_student_profile(student_profile, target_schools)
warnings = []
for school in (target_schools or ["Harvard"]):
w = format_pause_factor_warning(result, school)
if w:
warnings.append({"school": school, "warning": w[:300]})
return json.dumps({
"has_pause_factors": result.get("has_pause_factors", False),
"pause_factors": result.get("pause_factors", [])[:5],
"warnings": warnings,
}, ensure_ascii=False)
def _tool_search_blog_knowledge(query: str, top_k: int = 5) -> str:
blog_file = DATA_DIR / "blog_cards_all.json"
if not blog_file.exists():
return json.dumps({"error": "blog_cards_all.json not found"})
with open(blog_file) as f:
cards = json.load(f)
# 简单关键词匹配(BM25 太重,这里用快速关键词)
query_words = set(query.lower().split())
scored = []
for card in cards:
claim = card.get("claim", "").lower()
tag = str(card.get("primary_tag", "")).lower()
score = sum(1 for w in query_words if w in claim or w in tag)
if score > 0:
scored.append((score, card))
scored.sort(key=lambda x: -x[0])
results = []
for score, card in scored[:top_k]:
results.append({
"claim": card.get("claim", ""),
"evidence": card.get("evidence", {}).get("quote", "")[:150] if isinstance(card.get("evidence"), dict) else "",
"school": card.get("source_school", ""),
"tag": card.get("primary_tag", ""),
})
return json.dumps({"results": results, "total_matched": len(scored)}, ensure_ascii=False)
def _tool_calibrate_school_list(student: Dict, school_list: List[str]) -> str:
from agent.school_calibrator import calibrate_school_list
result = calibrate_school_list(student, school_list)
# 精简输出
if isinstance(result, dict) and "results" in result:
simplified = [{
"school": r.get("school", ""),
"probability": r.get("probability_pct", ""),
"risk_level": r.get("risk_level", ""),
"recommended_round": r.get("recommended_round", ""),
"notes": r.get("notes", "")[:100],
} for r in result["results"][:10]]
return json.dumps({"school_list": simplified, "summary": result.get("summary_table", "")[:500]}, ensure_ascii=False)
return json.dumps(result, ensure_ascii=False)
def _tool_get_special_case_patterns(pattern_type: str, major: str = "") -> str:
scp_file = DATA_DIR / "special_case_patterns.json"
if not scp_file.exists():
# fallback: 从 ps_distillation_kb 里的 low_sat 部分
ps_kb_file = DATA_DIR / "ps_distillation_kb.json"
if ps_kb_file.exists() and pattern_type == "low_sat_admit":
with open(ps_kb_file) as f:
ps_kb = json.load(f)
return json.dumps({
"pattern_type": pattern_type,
"patterns": ps_kb.get("low_sat_success_patterns", [])[:5],
}, ensure_ascii=False)
return json.dumps({"error": "special_case_patterns.json not found"})
with open(scp_file) as f:
scp = json.load(f)
patterns = scp.get(pattern_type, [])
if major:
# 过滤专业相关
filtered = [p for p in patterns if not p.get("major") or _major_match(major, p.get("major", ""))]
patterns = filtered if filtered else patterns
return json.dumps({
"pattern_type": pattern_type,
"patterns": patterns[:6],
}, ensure_ascii=False)
def _tool_get_feeder_school_info(school_name: str, hs_name: str = None) -> str:
"""
查询某所大学的大陆高中 feeder school 分布,或某所高中的录取档案。
school_name: 大学英文名(如 'Stanford University')
hs_name: 可选,高中中文名(如 '北京师范大学附属实验中学国际部'),若提供则返回该高中的录取档案
"""
feeder_file = DATA_DIR / "meiben_feeder_kb.json"
if not feeder_file.exists():
return json.dumps({"error": "meiben_feeder_kb.json not found"})
with open(feeder_file) as f:
kb = json.load(f)
# 查询高中档案
if hs_name:
# 模糊匹配高中名
hs_profiles = kb.get("high_school_profiles", {})
matched = None
for name, profile in hs_profiles.items():
if hs_name in name or name in hs_name:
matched = (name, profile)
break
if not matched:
# 尝试更宽松的匹配
hs_lower = hs_name.lower().replace(" ", "")
for name, profile in hs_profiles.items():
if hs_lower in name.lower().replace(" ", ""):
matched = (name, profile)
break
if matched:
name, profile = matched
return json.dumps({
"hs_name": name,
"province": profile.get("province"),
"total_offers": profile.get("total_offers"),
"t10_ivy_offers": profile.get("t10_ivy_offers"),
"t15_offers": profile.get("t15_offers"),
"t10_rate": profile.get("t10_rate"),
"t15_rate": profile.get("t15_rate"),
"top_universities": profile.get("top_universities", [])[:12],
"years_active": profile.get("years_active"),
}, ensure_ascii=False)
return json.dumps({"error": f"High school '{hs_name}' not found in database"})
# 查询大学的 feeder school 分布
feeder_kb = kb.get("feeder_school_kb", {})
# 模糊匹配大学名
matched_uni = None
for uni, data in feeder_kb.items():
if school_name.lower() in uni.lower() or uni.lower() in school_name.lower():
matched_uni = (uni, data)
break
if not matched_uni:
# 尝试缩写匹配
abbrev_map = {
"stanford": "Stanford University",
"harvard": "Harvard University",
"mit": "Massachusetts Institute of Technology",
"yale": "Yale University",
"princeton": "Princeton University",
"columbia": "Columbia University",
"upenn": "University of Pennsylvania",
"penn": "University of Pennsylvania",
"duke": "Duke University",
"dartmouth": "Dartmouth College",
"brown": "Brown University",
"cornell": "Cornell University",
"washu": "Washington University in St.Louis",
"wustl": "Washington University in St.Louis",
}
for abbrev, full_name in abbrev_map.items():
if abbrev in school_name.lower():
for uni, data in feeder_kb.items():
if full_name.lower() in uni.lower():
matched_uni = (uni, data)
break
if matched_uni:
break
if not matched_uni:
available = list(feeder_kb.keys())[:20]
return json.dumps({"error": f"University '{school_name}' not found", "available_universities": available})
uni_name, data = matched_uni
# 同时返回省市偏好
province_pref = kb.get("province_preference", {}).get(uni_name, {})
return json.dumps({
"university": uni_name,
"total_mainland_offers": data.get("total_offers"),
"top_feeder_schools": data.get("top_feeder_schools", [])[:10],
"province_distribution": data.get("province_distribution", {}),
"year_trend": data.get("year_trend", {}),
"province_preference": province_pref,
"insight": f"{uni_name} 在大陆的主要来源高中集中在 {', '.join(list(data.get('province_distribution', {}).keys())[:3])} 等省市。"
}, ensure_ascii=False)
def _tool_get_summer_program_recommendation(major: str, sat: float = None,
hs_type: str = "国际高中",
target_tier: str = "T15") -> str:
"""
根据学生背景推荐适合的夏校和竞赛项目。
major: 专业方向
sat: SAT 分数(可选)
hs_type: 高中类型
target_tier: 目标档次 T10/T15/T20
"""
feeder_file = DATA_DIR / "meiben_feeder_kb.json"
scores_file = DATA_DIR / "summer_program_scores.json"
programs = []
# 从 meiben_feeder_kb 的 summer_program_db 获取项目列表
if feeder_file.exists():
with open(feeder_file) as f:
kb = json.load(f)
all_programs = kb.get("summer_program_db", [])
# 按专业方向过滤
major_lower = major.lower()
major_keywords = {
"cs": ["computer", "coding", "programming", "software", "ai", "data"],
"bio": ["biology", "biomedical", "life science", "medicine", "health"],
"physics": ["physics", "astronomy", "astrophysics"],
"econ": ["economics", "business", "finance", "policy"],
"env": ["environment", "sustainability", "climate", "earth"],
"humanities": ["history", "literature", "philosophy", "writing", "language"],
"math": ["math", "statistics", "quantitative"],
"engineering": ["engineering", "mechanical", "electrical", "civil"],
}
# 确定专业类别
major_cat = "general"
for cat, keywords in major_keywords.items():
if any(kw in major_lower for kw in keywords):
major_cat = cat
break
# 按给力指数排序,过滤出高质量项目
min_geili = 7.0 if target_tier == "T10" else 6.0 if target_tier == "T15" else 5.0
filtered = [p for p in all_programs if p.get("avg_geili_score", 0) >= min_geili]
filtered.sort(key=lambda x: x.get("avg_geili_score", 0), reverse=True)
# 分类型返回
summer_progs = [p for p in filtered if p.get("type") == "summer_program"][:8]
competitions = [p for p in filtered if p.get("type") == "competition"][:6]
programs = {
"summer_programs": summer_progs,
"competitions": competitions,
"total_filtered": len(filtered),
"filter_criteria": f"给力指数 >= {min_geili}{target_tier} 目标)",
}
return json.dumps({
"major": major,
"target_tier": target_tier,
"recommendations": programs,
"note": "给力指数 10=顶级(RSI/ISEF),8-9=强力,6-7=良好,5以下=一般",
}, ensure_ascii=False)
# ─── SSE 工具 ──────────────────────────────────────────────────────────────────
def _sse(event: str, data: Any) -> str:
return f"event: {event}\ndata: {json.dumps(data, ensure_ascii=False)}\n\n"
# ─── 主 Agentic Loop ───────────────────────────────────────────────────────────
SYSTEM_PROMPT = """你是一位资深美本申请顾问,拥有访问真实录取数据库的工具集。
你的工作方式:
1. 仔细分析用户的问题和学生信息
2. 主动调用工具获取真实数据(不要凭空推断)
3. 基于工具返回的真实案例数据给出具体建议
4. 当数据足够时,输出完整的规划建议
重要原则:
- 优先调用 find_similar_cases 获取真实案例基准
- 对于活动问题,同时调用 find_similar_activity_profiles
- 对于选校问题,调用 get_school_stats + get_school_taste
- 数据说话:所有建议必须有真实案例或统计数据支撑
- 不要在没有数据的情况下给出录取概率估算
输出格式:
- 使用 Markdown 格式
- 数据来源要标注(如「来自 MAZE 8 个相似案例」)
- 建议要具体可执行,不要泛泛而谈"""
def run_agentic_plan(
query: str,
student: Dict,
stream: bool = True,
) -> Generator[str, None, None]:
"""
主 Agentic 规划循环。
Args:
query: 用户的自然语言问题
student: 学生档案 {sat, toefl, gpa, major, hs_type, activities, target_schools}
stream: 是否 SSE 流式输出
Yields:
SSE 格式的字符串
"""
yield _sse("agentic_start", {
"message": "规划智能体启动,正在分析问题...",
"tools_available": [t["function"]["name"] for t in TOOLS],
})
# 构建初始消息
student_context = _format_student_context(student)
messages = [
{
"role": "user",
"content": f"{student_context}\n\n用户问题:{query}",
}
]
step = 0
while step < MAX_STEPS:
step += 1
# 调用 Grok(带工具定义)
yield _sse("thinking", {"step": step, "message": f"第 {step} 步:推理中..."})
try:
resp = requests.post(
GROK_URL,
headers=GROK_HEADERS,
json={
"model": "grok-4.20-0309",
"messages": messages,
"tools": TOOLS,
"tool_choice": "auto",
"max_tokens": 4000,
},
timeout=120,
)
resp.raise_for_status()
response_data = resp.json()
except Exception as e:
yield _sse("error", {"message": f"LLM 调用失败:{str(e)}"})
return
choice = response_data["choices"][0]
message = choice["message"]
finish_reason = choice.get("finish_reason", "")
# 将 assistant 消息加入历史
messages.append({"role": "assistant", "content": message.get("content") or "", "tool_calls": message.get("tool_calls", [])})
# 检查是否有工具调用
tool_calls = message.get("tool_calls", [])
if not tool_calls or finish_reason == "stop":
# LLM 决定输出最终答案
final_content = message.get("content", "")
yield _sse("final_answer", {
"content": final_content,
"steps_taken": step,
})
break
# 执行所有工具调用
tool_results = []
for tc in tool_calls:
tool_name = tc.get("function", {}).get("name", "")
tool_input_str = tc.get("function", {}).get("arguments", "{}")
tool_call_id = tc.get("id", f"call_{step}")
try:
tool_input = json.loads(tool_input_str)
except json.JSONDecodeError:
tool_input = {}
yield _sse("tool_call", {
"step": step,
"tool": tool_name,
"input": tool_input,
"call_id": tool_call_id,
})
# 执行工具
result_str = _execute_tool(tool_name, tool_input)
# 预览结果(前 200 字符)
try:
result_preview = json.loads(result_str)
if isinstance(result_preview, dict):
preview_keys = list(result_preview.keys())[:3]
preview = {k: str(result_preview[k])[:100] for k in preview_keys}
else:
preview = str(result_preview)[:200]
except Exception:
preview = result_str[:200]
yield _sse("tool_result", {
"step": step,
"tool": tool_name,
"call_id": tool_call_id,
"preview": preview,
"result_length": len(result_str),
})
tool_results.append({
"tool_call_id": tool_call_id,
"role": "tool",
"content": result_str,
})
# 将工具结果加入消息历史
messages.extend(tool_results)
else:
# 超过最大步数
yield _sse("max_steps_reached", {
"message": f"已达到最大步数 {MAX_STEPS},输出当前结论",
"steps_taken": MAX_STEPS,
})
# 强制输出最终答案
try:
resp = requests.post(
GROK_URL,
headers=GROK_HEADERS,
json={
"model": "grok-4.20-0309",
"messages": messages + [{"role": "user", "content": "请基于以上所有工具结果,输出最终的规划建议。"}],
"max_tokens": 3000,
},
timeout=120,
)
resp.raise_for_status()
final_content = resp.json()["choices"][0]["message"]["content"]
yield _sse("final_answer", {"content": final_content, "steps_taken": MAX_STEPS})
except Exception as e:
yield _sse("error", {"message": f"最终输出失败:{str(e)}"})
yield _sse("agentic_done", {"message": "规划完成", "total_steps": step})
def _format_student_context(student: Dict) -> str:
"""将学生档案格式化为 LLM 可读的上下文"""
lines = ["【学生档案】"]
if student.get("sat"):
lines.append(f"- SAT:{student['sat']}")
if student.get("toefl"):
lines.append(f"- TOEFL:{student['toefl']}")
if student.get("gpa"):
lines.append(f"- GPA:{student['gpa']}")
if student.get("major"):
lines.append(f"- 专业方向:{student['major']}")
if student.get("hs_type") or student.get("high_school"):
lines.append(f"- 高中:{student.get('hs_type') or student.get('high_school')}")
if student.get("activities"):
lines.append(f"- 活动:{str(student['activities'])[:600]}")
if student.get("target_schools"):
schools = student["target_schools"]
if isinstance(schools, list):
schools = "、".join(schools)
lines.append(f"- 目标学校:{schools}")
if student.get("grade"):
lines.append(f"- 年级:{student['grade']}")
return "\n".join(lines)