planning-agent-pro / agent /advisor_knowledge.py
catninja123's picture
Phase 6: Add geili_index + red_flags knowledge bases
2b34e89 verified
"""
顾问战略知识库(Phase 4 升级版)
加载并查询来自顾问战略会议和内部文件的结构化知识。
数据来源:
1. data/school_taste_updates.json - 学校口味更新(顾问会议提炼)
2. data/special_case_patterns.json - 特殊案例模式(低GPA/Test Optional/IB等)
3. data/stanford_scoring_rubric.json - Stanford 打分逆向工程(两份真实 FERPA 文件)
4. data/harvard_casebook_rubric.json - Harvard 2012 Casebook(11个真实 Case + Pause Factor)
5. data/harvard_reading_process_kb.json - Harvard 内部阅读流程手册(CONFIDENTIAL)
6. data/scholarship_kb.json - 查校列表奖学金知识库(678所学校,含 merit/need-based)
7. data/school_list_strategy_kb.json - 选校策略知识库(录取率/ED优势/标化要求)
8. data/consultant_knowledge_kb.json - 三士渡顾问知识库(PPT/PDF 演讲提炼)
使用方式:
from agent.advisor_knowledge import AdvisorKnowledge
kb = AdvisorKnowledge()
taste = kb.get_school_taste("Vanderbilt University")
patterns = kb.get_special_patterns(gpa_range=(3.0, 3.5), test_optional=True)
stanford_ctx = kb.get_stanford_context("SAT 1580,海藻研究,REA")
harvard_ctx = kb.get_harvard_context("Harvard 录取标准")
"""
import json
import os
from pathlib import Path
from typing import Dict, List, Optional
_DATA_DIR = Path(__file__).parent.parent / "data"
class AdvisorKnowledge:
"""顾问战略知识库(Phase 4 升级版)"""
def __init__(self):
self._school_taste: Dict = {}
self._special_patterns: List[Dict] = []
self._stanford_rubric: Dict = {}
self._harvard_casebook: Dict = {}
self._harvard_reading: Dict = {}
self._scholarship_kb: Dict = {}
self._school_strategy_kb: Dict = {}
self._consultant_kb: Dict = {}
self._admission_red_flags: Dict = {}
self._geili_index: Dict = {}
self._load()
def _load(self) -> None:
"""加载所有知识库文件"""
# 原有知识库
taste_file = _DATA_DIR / "school_taste_updates.json"
if taste_file.exists():
try:
with open(taste_file, "r", encoding="utf-8") as f:
self._school_taste = json.load(f)
except Exception as e:
print(f"[AdvisorKnowledge] 加载 school_taste_updates.json 失败: {e}")
patterns_file = _DATA_DIR / "special_case_patterns.json"
if patterns_file.exists():
try:
with open(patterns_file, "r", encoding="utf-8") as f:
self._special_patterns = json.load(f)
except Exception as e:
print(f"[AdvisorKnowledge] 加载 special_case_patterns.json 失败: {e}")
# Phase 4 新增:Stanford 打分逆向工程
stanford_file = _DATA_DIR / "stanford_scoring_rubric.json"
if stanford_file.exists():
try:
with open(stanford_file, "r", encoding="utf-8") as f:
self._stanford_rubric = json.load(f)
except Exception as e:
print(f"[AdvisorKnowledge] 加载 stanford_scoring_rubric.json 失败: {e}")
# Phase 4 新增:Harvard 2012 Casebook
casebook_file = _DATA_DIR / "harvard_casebook_rubric.json"
if casebook_file.exists():
try:
with open(casebook_file, "r", encoding="utf-8") as f:
self._harvard_casebook = json.load(f)
except Exception as e:
print(f"[AdvisorKnowledge] 加载 harvard_casebook_rubric.json 失败: {e}")
# Phase 4 新增:Harvard 内部阅读流程
reading_file = _DATA_DIR / "harvard_reading_process_kb.json"
if reading_file.exists():
try:
with open(reading_file, "r", encoding="utf-8") as f:
self._harvard_reading = json.load(f)
except Exception as e:
print(f"[AdvisorKnowledge] 加载 harvard_reading_process_kb.json 失败: {e}")
# Phase 5 新增:奖学金知识库
scholarship_file = _DATA_DIR / "scholarship_kb.json"
if scholarship_file.exists():
try:
with open(scholarship_file, "r", encoding="utf-8") as f:
self._scholarship_kb = json.load(f)
except Exception as e:
print(f"[AdvisorKnowledge] 加载 scholarship_kb.json 失败: {e}")
# Phase 5 新增:选校策略知识库
strategy_file = _DATA_DIR / "school_list_strategy_kb.json"
if strategy_file.exists():
try:
with open(strategy_file, "r", encoding="utf-8") as f:
self._school_strategy_kb = json.load(f)
except Exception as e:
print(f"[AdvisorKnowledge] 加载 school_list_strategy_kb.json 失败: {e}")
# Phase 5 新增:三士渡顾问知识库
consultant_file = _DATA_DIR / "consultant_knowledge_kb.json"
if consultant_file.exists():
try:
with open(consultant_file, "r", encoding="utf-8") as f:
self._consultant_kb = json.load(f)
except Exception as e:
print(f"[AdvisorKnowledge] 加载 consultant_knowledge_kb.json 失败: {e}")
# Phase 6 新增:T20 Admission Blog 红旗知识库
red_flags_file = _DATA_DIR / "admission_blog_red_flags.json"
if red_flags_file.exists():
try:
with open(red_flags_file, "r", encoding="utf-8") as f:
self._admission_red_flags = json.load(f)
except Exception as e:
print(f"[AdvisorKnowledge] 加载 admission_blog_red_flags.json 失败: {e}")
# Phase 6 新增:给力指数知识库(海狸学院第十二版)
geili_file = _DATA_DIR / "geili_index_kb.json"
if geili_file.exists():
try:
with open(geili_file, "r", encoding="utf-8") as f:
self._geili_index = json.load(f)
except Exception as e:
print(f"[AdvisorKnowledge] 加载 geili_index_kb.json 失败: {e}")
# ──────────────────────────────────────────────
# 原有方法(保持不变)
# ──────────────────────────────────────────────
def get_school_taste(self, school_name: str) -> Optional[Dict]:
"""获取学校口味更新"""
if not self._school_taste:
return None
if school_name in self._school_taste:
return self._school_taste[school_name]
school_lower = school_name.lower()
for key, val in self._school_taste.items():
if school_lower in key.lower() or key.lower() in school_lower:
return val
return None
def get_special_patterns(
self,
gpa_range: Optional[tuple] = None,
test_optional: Optional[bool] = None,
ib_score_range: Optional[tuple] = None,
school: Optional[str] = None,
) -> List[Dict]:
"""查询特殊案例模式"""
if not self._special_patterns:
return []
results = []
for pattern in self._special_patterns:
conditions = pattern.get("conditions", {})
if gpa_range is not None:
pattern_gpa = conditions.get("gpa")
if pattern_gpa is not None:
if not (gpa_range[0] <= pattern_gpa <= gpa_range[1]):
continue
if test_optional is not None:
if conditions.get("test_optional") != test_optional:
continue
if ib_score_range is not None:
pattern_ib = conditions.get("ib_score")
if pattern_ib is not None:
if not (ib_score_range[0] <= pattern_ib <= ib_score_range[1]):
continue
if school is not None:
pattern_school = pattern.get("school", "")
if school.lower() not in pattern_school.lower() and pattern_school.lower() not in school.lower():
continue
results.append(pattern)
return results
# ──────────────────────────────────────────────
# Phase 4 新增方法
# ──────────────────────────────────────────────
def get_stanford_context(self, query: str = "") -> str:
"""
获取 Stanford 打分逆向工程上下文。
当 query 中涉及 Stanford 时调用。
"""
if not self._stanford_rubric:
return ""
parts = ["【Stanford 招生评分体系(来源:两份真实 FERPA 打分文件逆向工程)】"]
# 核心评分维度
dims = self._stanford_rubric.get("scoring_dimensions", {})
if dims:
parts.append("\n▌ 六大评分维度(1=最强):")
for dim_name, dim_data in dims.items():
if isinstance(dim_data, dict):
score_1 = dim_data.get("1", "")
score_2 = dim_data.get("2", "")
parts.append(f" • {dim_name}:1分={score_1};2分={score_2}")
# 真实案例
cases = self._stanford_rubric.get("real_cases", [])
if cases:
parts.append("\n▌ 真实录取案例:")
for c in cases:
name = c.get("anonymized_name", "匿名")
outcome = c.get("outcome", "")
overall = c.get("scores", {}).get("Overall", "")
highlights = c.get("key_highlights", [])
parts.append(f" • {name}{outcome},Overall {overall}):{', '.join(highlights[:2])}")
# 大陆学生启示
implications = self._stanford_rubric.get("china_student_implications", {})
if implications:
parts.append("\n▌ 大陆学生申请 Stanford 关键洞察:")
for key, val in implications.items():
if isinstance(val, str):
parts.append(f" • {key}{val}")
elif isinstance(val, dict) and val.get("insight"):
parts.append(f" • {key}{val['insight']}")
return "\n".join(parts)
def get_harvard_context(self, query: str = "") -> str:
"""
获取 Harvard 录取标准上下文(Casebook + 阅读流程)。
当 query 中涉及 Harvard 时调用。
"""
parts = []
# Harvard 阅读流程(评分标准)
if self._harvard_reading:
dims = self._harvard_reading.get("dimensions", {})
if dims:
parts.append("【Harvard 内部评分标准(来源:SFFA v. Harvard 诉讼证据 HARV00097936,CONFIDENTIAL)】")
parts.append("\n▌ 六大评分维度(1=最强,支持 +/- 修饰):")
for dim_name, dim_data in dims.items():
if isinstance(dim_data, dict):
score_1 = dim_data.get("1", "")[:60] if dim_data.get("1") else ""
score_2 = dim_data.get("2", "")[:60] if dim_data.get("2") else ""
key_insight = dim_data.get("key_insight", "")
line = f" • {dim_name}:1={score_1};2={score_2}"
if key_insight:
line += f"(⚠ {key_insight[:50]})"
parts.append(line)
# 大陆学生启示
china = self._harvard_reading.get("china_student_implications", {})
if china:
parts.append("\n▌ 大陆学生关键洞察:")
for key, val in china.items():
if isinstance(val, dict):
insight = val.get("key_insight") or val.get("key_note") or ""
if insight:
parts.append(f" • {key}{insight[:80]}")
# Harvard Casebook 真实案例
if self._harvard_casebook:
cases = self._harvard_casebook.get("cases", [])
pause_patterns = self._harvard_casebook.get("pause_factor_patterns", [])
if cases:
parts.append(f"\n【Harvard 2012 Casebook(11个真实 Case,SFFA 诉讼证据)】")
parts.append("▌ 典型录取/拒绝案例(含 Pause Factor):")
for c in cases[:5]: # 展示前5个最具代表性的
name = c.get("case_id", "")
outcome = c.get("outcome", "")
pause = c.get("pause_factor", "无")
overall = c.get("scores", {}).get("Overall", "")
key_factors = c.get("key_factors", [])
parts.append(
f" • {name}{outcome},Overall {overall},Pause: {pause}):{', '.join(key_factors[:2])}"
)
if pause_patterns:
parts.append("\n▌ Pause Factor 规律(招生官暂停深思的信号):")
for p in pause_patterns[:5]:
parts.append(f" • {p}")
# 关键洞察
insights = self._harvard_casebook.get("key_insights", [])
if insights:
parts.append("\n▌ Casebook 核心洞察:")
for ins in insights[:4]:
parts.append(f" • {ins}")
return "\n".join(parts) if parts else ""
def get_scholarship_context(self, school_name: str = "", query: str = "") -> str:
"""
获取奖学金知识库上下文。
三个核心维度:
1. 国际生获得FA的百分比(intl_pct_receiving_aid)
2. 国际生平均获奖金额(intl_avg_award_usd)
3. 国际生录取率(intl_admission_rate_pct)
"""
if not self._scholarship_kb:
return ""
meta = self._scholarship_kb.get("metadata", {})
rankings = self._scholarship_kb.get("rankings", {})
all_schools = self._scholarship_kb.get("all_schools", [])
parts = [f"【奖学金知识库(来源:查校列表2025-2026,{meta.get('total_schools', 678)}所学校)】"]
parts.append(f"数据覆盖:{meta.get('schools_with_pct_aid', 0)}所学校有国际生获奖比例数据,{meta.get('schools_with_avg_award', 0)}所有平均金额数据,{meta.get('schools_meets_full_need', 0)}所满足全额需求")
q_lower = query.lower()
# 如果指定了学校,查找该学校的完整三维数据
if school_name:
matched = [s for s in all_schools if school_name.lower() in s.get("school", "").lower()]
if matched:
s = matched[0]
parts.append(f"\n▌ {s['school']} 国际生奖学金完整数据:")
# 核心三维
parts.append(f" 「维度1」国际生获得FA的百分比:{s.get('intl_pct_receiving_aid', '无数据')}%")
parts.append(f" 「维度2」国际生平均获奖金额:${s.get('intl_avg_award_usd', '无数据')}")
parts.append(f" 「维度3」国际生录取率:{s.get('intl_admission_rate_pct', '无数据')}%")
# 辅助数据
parts.append(f" 奖学金类型:{s.get('aid_types_for_intl', '未知')}")
parts.append(f" 获奖人数:{s.get('intl_n_awarded', '未知')}")
parts.append(f" 获奖后平均费用:${s.get('intl_avg_cost_after_aid', '未知')}")
parts.append(f" 满足全额需求:{s.get('meets_full_demonstrated_need', '未知')}")
parts.append(f" 最大Merit奖金额:${s.get('largest_merit_scholarship_usd', '无')}")
parts.append(f" 奖学金名称:{s.get('merit_scholarship_name', '无')}")
parts.append(f" 2024-25获奖后学费:${s.get('tuition_after_merit_2425', '未知')}")
parts.append(f" 2024-25获奖后总费用:${s.get('coa_after_merit_2425', '未知')}")
parts.append(f" 申请方式:{s.get('how_to_apply', '未知')}")
if s.get('notes'):
parts.append(f" 备注:{s['notes']}")
parts.append(f" US News排名:{s.get('us_news_national') or s.get('us_news_lac') or 'N/A'}")
else:
parts.append(f"\n⚠ 未找到 '{school_name}' 的奖学金数据")
# 通用查询:根据 query 内容展示不同排行榜
if not school_name:
# 默认展示综合性价排行榜
if any(kw in q_lower for kw in ["性价比", "划算", "实惠", "小学校", "文理学院", "lac"]):
top_val = rankings.get("top40_composite_value", [])[:15]
parts.append(f"\n▌ 小型文理学院综合性价排行榜 Top 15(获奖比例+金额+选择性加权):")
for item in top_val:
parts.append(
f" {item['rank']:2d}. {item['school']:<40} "
f"获奖比例:{item['intl_pct_receiving_aid']}% "
f"平均金额:${item['intl_avg_award_usd']} "
f"录取率:{item['intl_admission_rate_pct']}% "
f"US#{item['us_news'] or 'N/A'}"
)
elif any(kw in q_lower for kw in ["平均", "最多", "金额最高", "最大金额", "最高"]):
top_award = rankings.get("top30_by_intl_avg_award", [])[:15]
parts.append("\n▌ 国际生平均获奖金额最高 Top 15:")
for item in top_award:
parts.append(
f" {item['rank']:2d}. {item['school']:<40} "
f"平均金额:${item['intl_avg_award_usd']:,} "
f"获奖比例:{item['intl_pct_receiving_aid']}% "
f"录取率:{item['intl_admission_rate_pct']}% "
f"US#{item['us_news'] or 'N/A'}"
)
elif any(kw in q_lower for kw in ["比例最高", "最容易", "最多人获奖", "最容易拿"]):
top_pct = rankings.get("top30_by_intl_pct_receiving_aid", [])[:15]
parts.append("\n▌ 国际生获奖比例最高 Top 15:")
for item in top_pct:
parts.append(
f" {item['rank']:2d}. {item['school']:<40} "
f"获奖比例:{item['intl_pct_receiving_aid']}% "
f"平均金额:${item['intl_avg_award_usd']} "
f"录取率:{item['intl_admission_rate_pct']}%"
)
elif any(kw in q_lower for kw in ["全额", "need", "need-blind", "need-based", "需要基础"]):
meets = rankings.get("meets_full_need_schools", [])[:15]
parts.append(f"\n▌ 满足全额 Need-Based Aid 的学校({len(meets)}所,按US News排序):")
for s in meets:
parts.append(
f" US#{str(s['us_news'] or 'N/A'):<5} {s['school']:<40} "
f"获奖比例:{s['intl_pct_receiving_aid']}% "
f"平均金额:${s['intl_avg_award_usd']} "
f"录取率:{s['intl_admission_rate_pct']}%"
)
elif any(kw in q_lower for kw in ["merit", "奖学金最大", "最大merit"]):
top_merit = rankings.get("top30_by_largest_merit", [])[:15]
parts.append("\n▌ 最大 Merit Scholarship 金额 Top 15:")
for item in top_merit:
parts.append(
f" {item['rank']:2d}. {item['school']:<40} "
f"最大Merit:${item['largest_merit_scholarship_usd']:,} "
f"奖学金名称:{item['merit_scholarship_name']} "
f"获奖比例:{item['intl_pct_receiving_aid']}%"
)
else:
# 默认:展示平均金额排行榜
top_award = rankings.get("top30_by_intl_avg_award", [])[:12]
parts.append("\n▌ 国际生平均获奖金额最高 Top 12(含录取难度参考):")
for item in top_award:
parts.append(
f" {item['rank']:2d}. {item['school']:<40} "
f"平均金额:${item['intl_avg_award_usd']:,} "
f"获奖比例:{item['intl_pct_receiving_aid']}% "
f"录取率:{item['intl_admission_rate_pct']}% "
f"US#{item['us_news'] or 'N/A'}"
)
# 同时展示满足全额需求的顶校
meets = rankings.get("meets_full_need_schools", [])[:8]
if meets:
parts.append(f"\n▌ 顶校中满足全额 Need-Based Aid(对国际生):")
for s in meets:
parts.append(
f" US#{str(s['us_news'] or 'N/A'):<5} {s['school']:<40} "
f"获奖比例:{s['intl_pct_receiving_aid']}% "
f"平均金额:${s['intl_avg_award_usd']}"
)
return "\n".join(parts)
def get_consultant_context(self, query: str = "") -> str:
"""
获取三士渡顾问知识库上下文。
当 query 涉及申请策略、普娃逆袭、选校方法论等时调用。
"""
if not self._consultant_kb:
return ""
parts = ["【三士渡顾问知识库(来源:徐谭妥、刘又铭、濮阳演讲 PPT)】"]
q_lower = query.lower()
# 核心策略
core = self._consultant_kb.get("core_strategies", [])
if core:
parts.append("\n▌ 核心申请策略:")
for s in core[:4]:
parts.append(f" • {s.get('strategy', '')}{s.get('description', '')[:100]}")
# 选校方法论
if any(kw in q_lower for kw in ["选校", "school", "选学校", "ed", "ea"]):
school_method = self._consultant_kb.get("school_selection_methodology", [])
if school_method:
parts.append("\n▌ 选校方法论:")
for m in school_method[:3]:
parts.append(f" • {m.get('principle', '')}{m.get('detail', '')[:100]}")
# 反包装理念
if any(kw in q_lower for kw in ["包装", "真实", "authentic", "反包装", "材料"]):
anti = self._consultant_kb.get("anti_packaging_philosophy", [])
if anti:
parts.append("\n▌ 反包装理念:")
for a in anti[:3]:
parts.append(f" • {a.get('concept', '')}{a.get('description', '')[:100]}")
# 三士渡独特方法论
unique = self._consultant_kb.get("stoooges_unique_methodology", [])
if unique:
parts.append("\n▌ 三士渡独特方法论:")
for u in unique[:3]:
parts.append(f" • {u.get('method', '')}{u.get('description', '')[:80]}")
# 2026 录取结果
results = self._consultant_kb.get("admission_results_2026", {})
if results and any(kw in q_lower for kw in ["录取", "结果", "2026", "成绩"]):
parts.append("\n▌ 三士渡 2026 届录取成绩(截至4月1日):")
for school, count in results.items():
if school != "other_results" and count:
parts.append(f" • {school}{count}")
# 大陆学生建议
china_advice = self._consultant_kb.get("china_student_advice", [])
if china_advice:
parts.append("\n▌ 大陆学生申请建议:")
for a in china_advice[:3]:
parts.append(f" • {a.get('advice', '')}{a.get('action', '')[:80]}")
return "\n".join(parts)
def get_school_strategy(self, school_name: str) -> Optional[Dict]:
"""
从选校策略知识库查询特定学校的数据。
"""
if not self._school_strategy_kb:
return None
schools = self._school_strategy_kb.get("schools", [])
for s in schools:
if school_name.lower() in s.get("school", "").lower():
return s
return None
def get_context_for_query(self, query: str, school: Optional[str] = None) -> str:
"""
为 LLM 生成顾问知识上下文(统一入口)。
根据 query 和 school 智能路由到相关知识库。
"""
parts = []
# 1. 学校口味(原有)
if school:
taste = self.get_school_taste(school)
if taste:
parts.append(f"【顾问战略知识(来源:{taste.get('source', '顾问会议')})】")
if taste.get("advisor_consensus"):
parts.append(f"顾问共识:{taste['advisor_consensus']}")
if taste.get("test_optional_effective") is not None:
parts.append(f"Test Optional 有效性:{'有效' if taste['test_optional_effective'] else '建议提交标化'}")
if taste.get("ib_friendly") is not None:
parts.append(f"IB 友好度:{'友好' if taste['ib_friendly'] else '一般'}")
if taste.get("notes"):
parts.append(f"补充说明:{taste['notes']}")
# 2. 特殊案例模式(原有)
q_lower = query.lower()
if "test optional" in q_lower or "test-optional" in q_lower or "无sat" in query or "不提交标化" in query:
patterns = self.get_special_patterns(test_optional=True, school=school)
if patterns:
parts.append(f"\n【相关特殊案例模式({len(patterns)}个)】")
for p in patterns[:3]:
parts.append(
f"- {p.get('anonymized_case', '匿名')}{p.get('description', '')} "
f"→ {p.get('outcome', '')}{p.get('year', '')})"
)
if p.get("advisor_notes"):
parts.append(f" 顾问备注:{p['advisor_notes']}")
# 3. Stanford 打分(Phase 4 新增)
stanford_keywords = ["stanford", "斯坦福", "stanford打分", "stanford评分", "stanford录取"]
if school and "stanford" in school.lower():
parts.append(self.get_stanford_context(query))
elif any(kw in q_lower for kw in stanford_keywords):
parts.append(self.get_stanford_context(query))
# 4. Harvard 打分 + Casebook(Phase 4 新增)
harvard_keywords = ["harvard", "哈佛", "harvard打分", "pause factor", "casebook", "harvard录取"]
if school and "harvard" in school.lower():
parts.append(self.get_harvard_context(query))
elif any(kw in q_lower for kw in harvard_keywords):
parts.append(self.get_harvard_context(query))
# 5. 通用顶校录取逻辑查询(同时提供两校参考)
top_school_keywords = ["录取标准", "招生官", "评分维度", "holistic", "整体评估", "顶校录取", "t10录取"]
if any(kw in q_lower for kw in top_school_keywords):
if not any(kw in q_lower for kw in stanford_keywords + harvard_keywords):
# 同时提供 Stanford 和 Harvard 的参考
stanford_ctx = self.get_stanford_context(query)
harvard_ctx = self.get_harvard_context(query)
if stanford_ctx:
parts.append(stanford_ctx)
if harvard_ctx:
parts.append(harvard_ctx)
# 6. 奖学金查询(Phase 5 新增)
scholarship_keywords = ["奖学金", "scholarship", "助学金", "financial aid", "merit", "need-based", "学费", "费用", "cost"]
if any(kw in q_lower for kw in scholarship_keywords):
scholarship_ctx = self.get_scholarship_context(school_name=school or "", query=query)
if scholarship_ctx:
parts.append(scholarship_ctx)
# 6a. 给力指数知识库(Phase 6 新增)
geili_keywords = [
"给力指数", "竞赛", "夏校", "活动推荐", "竞赛推荐", "展示活动",
"科研项目", "暂期项目", "RSI", "SSP", "PROMYS", "SIMR", "TASP",
"Regeneron", "USAMO", "ISEF", "Coca-Cola", "Intel",
"学术活动", "课外活动推荐", "应该参加什么", "活动清单"
]
if any(kw in q_lower or kw.lower() in q_lower for kw in geili_keywords):
geili_ctx = self.get_geili_context(query)
if geili_ctx:
parts.append(geili_ctx)
# 6b. Admission Blog 红旗知识库(Phase 6 新增)
red_flag_keywords = [
"红旗", "red flag", "申请错误", "常见错误", "不该做", "避免", "文书问题",
"活动问题", "推荐信问题", "面试问题", "申请注意", "招生官不喜欢",
"什么不能做", "申请禁忌", "文书禁忌", "大陆学生问题"
]
if any(kw in q_lower for kw in red_flag_keywords):
red_flag_ctx = self.get_red_flags_context(query)
if red_flag_ctx:
parts.append(red_flag_ctx)
# 7. 顾问知识强路由(Phase 5 新增,Phase 6 升级:扩充关键词,确保最独特资产被激活)
consultant_keywords = [
# 学生类型
"普娃", "背景一般", "普通学生", "中等背景", "非顶尖",
# 申请策略
"策略", "逆袭", "包装", "反包装", "如何申请", "怎么申请",
# 三士渡品牌
"三士渡", "徐谭妥", "刘又鸣", "濦阳",
# 选校方法论
"选校方法", "生态位", "演化", "生态系统",
# 录取结果
"录取结果", "2026届", "2025届", "历届学生",
# 申请哲学
"申请哲学", "申请理念", "超越包装",
]
if any(kw in q_lower for kw in consultant_keywords):
consultant_ctx = self.get_consultant_context(query)
if consultant_ctx:
parts.append(consultant_ctx)
# 8. F1 语义检索兜底(当关键词匹配未命中时,用语义检索补充)
# 只在 parts 为空或很少时触发,避免冗余
if len(parts) < 2:
try:
from agent.semantic_retriever import multi_kb_search
# 跨知识库语义检索
kb_configs = []
if self.stanford_rubric:
kb_configs.append(('stanford_scoring_rubric', self.stanford_rubric))
if self.harvard_casebook:
kb_configs.append(('harvard_casebook_rubric', self.harvard_casebook))
if self.consultant_kb:
kb_configs.append(('consultant_knowledge_kb', self.consultant_kb))
if kb_configs:
semantic_results = multi_kb_search(
query=query,
kb_configs=kb_configs,
top_k_per_kb=2,
total_top_k=4,
)
if semantic_results:
semantic_parts = []
for r in semantic_results:
if r['score'] > 0.1: # 只取相似度 >10% 的结果
source_label = {
'stanford_scoring_rubric': '[Stanford FERPA]',
'harvard_casebook_rubric': '[Harvard Casebook]',
'consultant_knowledge_kb': '[三士渡顾问经验]',
}.get(r['source'], f"[{r['source']}]")
semantic_parts.append(
f"{source_label} {r['section']}: {r['text'][:300]}"
)
if semantic_parts:
parts.append("\n【语义检索补充知识】\n" + "\n".join(semantic_parts[:3]))
except Exception as e:
pass # 语义检索失败不影响主流程
return "\n".join(filter(None, parts))
def get_geili_context(self, query: str = "", min_score: int = 5) -> str:
"""获取给力指数知识库上下文(海狸学院第十二版)"""
if not self._geili_index:
return ""
q_lower = query.lower()
all_items = self._geili_index.get("all_items", [])
by_score = self._geili_index.get("items_by_score", {})
# 确定要展示的评分范围
target_scores = []
if any(kw in q_lower for kw in ["顶尖", "最高", "9分", "10分", "top tier"]):
target_scores = ["10", "9"]
elif any(kw in q_lower for kw in ["科研", "research", "rsi", "ssp", "simr"]):
# 科研类项目
target_scores = ["9", "8", "7", "6"]
elif any(kw in q_lower for kw in ["写作", "writing", "诗歌", "文学"]):
target_scores = ["8", "7", "6", "5"]
elif any(kw in q_lower for kw in ["数学", "math", "olympiad", "usamo"]):
target_scores = ["10", "9", "8", "7"]
elif any(kw in q_lower for kw in ["商业", "金融", "business", "finance"]):
target_scores = ["8", "7", "6", "5"]
else:
# 默认显示高分项目
target_scores = ["9", "8", "7"]
parts = [f"【给力指数推荐项目(来源:海狸学院第十二版,共{self._geili_index.get('total_items', 0)}个项目)】"]
shown = 0
for score_key in target_scores:
items = by_score.get(score_key, [])
if not items:
continue
parts.append(f"\n▌ 给力指数 {score_key} 分:")
for item in items[:5]: # 每个分数最多显示 5 个
elig = f"({item['eligibility']})" if item.get('eligibility') else ""
en = f" / {item['en_name'][:40]}" if item.get('en_name') else ""
admit = f",录取率:{item['admit_rate']}" if item.get('admit_rate') else ""
tags = f" [{', '.join(item['tags'][:2])}]" if item.get('tags') else ""
parts.append(f" • {item['cn_name']}{elig}{en}{admit}{tags}")
shown += 1
if shown >= 15:
break
scoring_guide = self._geili_index.get("scoring_guide", {})
parts.append(f"\n评分说明:9-10分=全球顶尖;7-8分=全球高度认可;5-6分=有一定含金量;3-4分=基础项目")
return "\n".join(parts)
def get_red_flags_context(self, query: str = "") -> str:
"""获取申请红旗知识库上下文(来自 T20 大学招生官博客)"""
if not self._admission_red_flags:
return ""
q_lower = query.lower()
red_flags = self._admission_red_flags.get("red_flags", {})
parts = ["【申请红旗清单(来源:T20 大学招生官博客,含 MIT/Harvard/Yale 等)】"]
# 根据查询关键词选择相关类别
categories_to_show = []
if any(kw in q_lower for kw in ["文书", "essay", "写作", "personal statement"]):
categories_to_show.append(("essay_writing", "文书/写作红旗"))
if any(kw in q_lower for kw in ["活动", "extracurricular", "课外", "ec"]):
categories_to_show.append(("activities", "活动红旗"))
if any(kw in q_lower for kw in ["推荐信", "recommendation", "rec letter"]):
categories_to_show.append(("recommendations", "推荐信红旗"))
if any(kw in q_lower for kw in ["行为", "截止日期", "deadline", "面试", "interview"]):
categories_to_show.append(("behavior", "行为红旗"))
if any(kw in q_lower for kw in ["大陆", "中国", "china", "chinese", "国际生"]):
categories_to_show.append(("china_specific", "大陆学生特有红旗"))
# 如果没有匹配到特定类别,显示所有高严重度红旗
if not categories_to_show:
categories_to_show = [
("essay_writing", "文书/写作红旗"),
("activities", "活动红旗"),
("china_specific", "大陆学生特有红旗"),
]
for cat_key, cat_label in categories_to_show:
flags = red_flags.get(cat_key, [])
if flags:
parts.append(f"\n▌ {cat_label}:")
# 优先显示 critical 和 high 严重度
high_flags = [f for f in flags if f.get("severity") in ("critical", "high")]
for flag in high_flags[:4]:
severity_label = "🚨" if flag.get("severity") == "critical" else "⚠️"
note = f"({flag['note']})" if flag.get("note") else ""
parts.append(f" {severity_label} {flag['flag']}{flag['description']}{note}")
return "\n".join(parts)
def is_empty(self) -> bool:
"""知识库是否为空"""
return (
not self._school_taste
and not self._special_patterns
and not self._stanford_rubric
and not self._harvard_casebook
and not self._harvard_reading
and not self._scholarship_kb
and not self._consultant_kb
)
# 全局单例
_KNOWLEDGE_BASE: Optional[AdvisorKnowledge] = None
def get_knowledge_base() -> AdvisorKnowledge:
"""获取全局知识库单例"""
global _KNOWLEDGE_BASE
if _KNOWLEDGE_BASE is None:
_KNOWLEDGE_BASE = AdvisorKnowledge()
return _KNOWLEDGE_BASE