""" 顾问战略知识库(Phase 4 升级版) 加载并查询来自顾问战略会议和内部文件的结构化知识。 数据来源: 1. data/school_taste_updates.json - 学校口味更新(顾问会议提炼) 2. data/special_case_patterns.json - 特殊案例模式(低GPA/Test Optional/IB等) 3. data/stanford_scoring_rubric.json - Stanford 打分逆向工程(两份真实 FERPA 文件) 4. data/harvard_casebook_rubric.json - Harvard 2012 Casebook(11个真实 Case + Pause Factor) 5. data/harvard_reading_process_kb.json - Harvard 内部阅读流程手册(CONFIDENTIAL) 6. data/scholarship_kb.json - 查校列表奖学金知识库(678所学校,含 merit/need-based) 7. data/school_list_strategy_kb.json - 选校策略知识库(录取率/ED优势/标化要求) 8. data/consultant_knowledge_kb.json - 三士渡顾问知识库(PPT/PDF 演讲提炼) 使用方式: from agent.advisor_knowledge import AdvisorKnowledge kb = AdvisorKnowledge() taste = kb.get_school_taste("Vanderbilt University") patterns = kb.get_special_patterns(gpa_range=(3.0, 3.5), test_optional=True) stanford_ctx = kb.get_stanford_context("SAT 1580,海藻研究,REA") harvard_ctx = kb.get_harvard_context("Harvard 录取标准") """ import json import os from pathlib import Path from typing import Dict, List, Optional _DATA_DIR = Path(__file__).parent.parent / "data" class AdvisorKnowledge: """顾问战略知识库(Phase 4 升级版)""" def __init__(self): self._school_taste: Dict = {} self._special_patterns: List[Dict] = [] self._stanford_rubric: Dict = {} self._harvard_casebook: Dict = {} self._harvard_reading: Dict = {} self._scholarship_kb: Dict = {} self._school_strategy_kb: Dict = {} self._consultant_kb: Dict = {} self._admission_red_flags: Dict = {} self._geili_index: Dict = {} self._load() def _load(self) -> None: """加载所有知识库文件""" # 原有知识库 taste_file = _DATA_DIR / "school_taste_updates.json" if taste_file.exists(): try: with open(taste_file, "r", encoding="utf-8") as f: self._school_taste = json.load(f) except Exception as e: print(f"[AdvisorKnowledge] 加载 school_taste_updates.json 失败: {e}") patterns_file = _DATA_DIR / "special_case_patterns.json" if patterns_file.exists(): try: with open(patterns_file, "r", encoding="utf-8") as f: self._special_patterns = json.load(f) except Exception as e: print(f"[AdvisorKnowledge] 加载 special_case_patterns.json 失败: {e}") # Phase 4 新增:Stanford 打分逆向工程 stanford_file = _DATA_DIR / "stanford_scoring_rubric.json" if stanford_file.exists(): try: with open(stanford_file, "r", encoding="utf-8") as f: self._stanford_rubric = json.load(f) except Exception as e: print(f"[AdvisorKnowledge] 加载 stanford_scoring_rubric.json 失败: {e}") # Phase 4 新增:Harvard 2012 Casebook casebook_file = _DATA_DIR / "harvard_casebook_rubric.json" if casebook_file.exists(): try: with open(casebook_file, "r", encoding="utf-8") as f: self._harvard_casebook = json.load(f) except Exception as e: print(f"[AdvisorKnowledge] 加载 harvard_casebook_rubric.json 失败: {e}") # Phase 4 新增:Harvard 内部阅读流程 reading_file = _DATA_DIR / "harvard_reading_process_kb.json" if reading_file.exists(): try: with open(reading_file, "r", encoding="utf-8") as f: self._harvard_reading = json.load(f) except Exception as e: print(f"[AdvisorKnowledge] 加载 harvard_reading_process_kb.json 失败: {e}") # Phase 5 新增:奖学金知识库 scholarship_file = _DATA_DIR / "scholarship_kb.json" if scholarship_file.exists(): try: with open(scholarship_file, "r", encoding="utf-8") as f: self._scholarship_kb = json.load(f) except Exception as e: print(f"[AdvisorKnowledge] 加载 scholarship_kb.json 失败: {e}") # Phase 5 新增:选校策略知识库 strategy_file = _DATA_DIR / "school_list_strategy_kb.json" if strategy_file.exists(): try: with open(strategy_file, "r", encoding="utf-8") as f: self._school_strategy_kb = json.load(f) except Exception as e: print(f"[AdvisorKnowledge] 加载 school_list_strategy_kb.json 失败: {e}") # Phase 5 新增:三士渡顾问知识库 consultant_file = _DATA_DIR / "consultant_knowledge_kb.json" if consultant_file.exists(): try: with open(consultant_file, "r", encoding="utf-8") as f: self._consultant_kb = json.load(f) except Exception as e: print(f"[AdvisorKnowledge] 加载 consultant_knowledge_kb.json 失败: {e}") # Phase 6 新增:T20 Admission Blog 红旗知识库 red_flags_file = _DATA_DIR / "admission_blog_red_flags.json" if red_flags_file.exists(): try: with open(red_flags_file, "r", encoding="utf-8") as f: self._admission_red_flags = json.load(f) except Exception as e: print(f"[AdvisorKnowledge] 加载 admission_blog_red_flags.json 失败: {e}") # Phase 6 新增:给力指数知识库(海狸学院第十二版) geili_file = _DATA_DIR / "geili_index_kb.json" if geili_file.exists(): try: with open(geili_file, "r", encoding="utf-8") as f: self._geili_index = json.load(f) except Exception as e: print(f"[AdvisorKnowledge] 加载 geili_index_kb.json 失败: {e}") # ────────────────────────────────────────────── # 原有方法(保持不变) # ────────────────────────────────────────────── def get_school_taste(self, school_name: str) -> Optional[Dict]: """获取学校口味更新""" if not self._school_taste: return None if school_name in self._school_taste: return self._school_taste[school_name] school_lower = school_name.lower() for key, val in self._school_taste.items(): if school_lower in key.lower() or key.lower() in school_lower: return val return None def get_special_patterns( self, gpa_range: Optional[tuple] = None, test_optional: Optional[bool] = None, ib_score_range: Optional[tuple] = None, school: Optional[str] = None, ) -> List[Dict]: """查询特殊案例模式""" if not self._special_patterns: return [] results = [] for pattern in self._special_patterns: conditions = pattern.get("conditions", {}) if gpa_range is not None: pattern_gpa = conditions.get("gpa") if pattern_gpa is not None: if not (gpa_range[0] <= pattern_gpa <= gpa_range[1]): continue if test_optional is not None: if conditions.get("test_optional") != test_optional: continue if ib_score_range is not None: pattern_ib = conditions.get("ib_score") if pattern_ib is not None: if not (ib_score_range[0] <= pattern_ib <= ib_score_range[1]): continue if school is not None: pattern_school = pattern.get("school", "") if school.lower() not in pattern_school.lower() and pattern_school.lower() not in school.lower(): continue results.append(pattern) return results # ────────────────────────────────────────────── # Phase 4 新增方法 # ────────────────────────────────────────────── def get_stanford_context(self, query: str = "") -> str: """ 获取 Stanford 打分逆向工程上下文。 当 query 中涉及 Stanford 时调用。 """ if not self._stanford_rubric: return "" parts = ["【Stanford 招生评分体系(来源:两份真实 FERPA 打分文件逆向工程)】"] # 核心评分维度 dims = self._stanford_rubric.get("scoring_dimensions", {}) if dims: parts.append("\n▌ 六大评分维度(1=最强):") for dim_name, dim_data in dims.items(): if isinstance(dim_data, dict): score_1 = dim_data.get("1", "") score_2 = dim_data.get("2", "") parts.append(f" • {dim_name}:1分={score_1};2分={score_2}") # 真实案例 cases = self._stanford_rubric.get("real_cases", []) if cases: parts.append("\n▌ 真实录取案例:") for c in cases: name = c.get("anonymized_name", "匿名") outcome = c.get("outcome", "") overall = c.get("scores", {}).get("Overall", "") highlights = c.get("key_highlights", []) parts.append(f" • {name}({outcome},Overall {overall}):{', '.join(highlights[:2])}") # 大陆学生启示 implications = self._stanford_rubric.get("china_student_implications", {}) if implications: parts.append("\n▌ 大陆学生申请 Stanford 关键洞察:") for key, val in implications.items(): if isinstance(val, str): parts.append(f" • {key}:{val}") elif isinstance(val, dict) and val.get("insight"): parts.append(f" • {key}:{val['insight']}") return "\n".join(parts) def get_harvard_context(self, query: str = "") -> str: """ 获取 Harvard 录取标准上下文(Casebook + 阅读流程)。 当 query 中涉及 Harvard 时调用。 """ parts = [] # Harvard 阅读流程(评分标准) if self._harvard_reading: dims = self._harvard_reading.get("dimensions", {}) if dims: parts.append("【Harvard 内部评分标准(来源:SFFA v. Harvard 诉讼证据 HARV00097936,CONFIDENTIAL)】") parts.append("\n▌ 六大评分维度(1=最强,支持 +/- 修饰):") for dim_name, dim_data in dims.items(): if isinstance(dim_data, dict): score_1 = dim_data.get("1", "")[:60] if dim_data.get("1") else "" score_2 = dim_data.get("2", "")[:60] if dim_data.get("2") else "" key_insight = dim_data.get("key_insight", "") line = f" • {dim_name}:1={score_1};2={score_2}" if key_insight: line += f"(⚠ {key_insight[:50]})" parts.append(line) # 大陆学生启示 china = self._harvard_reading.get("china_student_implications", {}) if china: parts.append("\n▌ 大陆学生关键洞察:") for key, val in china.items(): if isinstance(val, dict): insight = val.get("key_insight") or val.get("key_note") or "" if insight: parts.append(f" • {key}:{insight[:80]}") # Harvard Casebook 真实案例 if self._harvard_casebook: cases = self._harvard_casebook.get("cases", []) pause_patterns = self._harvard_casebook.get("pause_factor_patterns", []) if cases: parts.append(f"\n【Harvard 2012 Casebook(11个真实 Case,SFFA 诉讼证据)】") parts.append("▌ 典型录取/拒绝案例(含 Pause Factor):") for c in cases[:5]: # 展示前5个最具代表性的 name = c.get("case_id", "") outcome = c.get("outcome", "") pause = c.get("pause_factor", "无") overall = c.get("scores", {}).get("Overall", "") key_factors = c.get("key_factors", []) parts.append( f" • {name}({outcome},Overall {overall},Pause: {pause}):{', '.join(key_factors[:2])}" ) if pause_patterns: parts.append("\n▌ Pause Factor 规律(招生官暂停深思的信号):") for p in pause_patterns[:5]: parts.append(f" • {p}") # 关键洞察 insights = self._harvard_casebook.get("key_insights", []) if insights: parts.append("\n▌ Casebook 核心洞察:") for ins in insights[:4]: parts.append(f" • {ins}") return "\n".join(parts) if parts else "" def get_scholarship_context(self, school_name: str = "", query: str = "") -> str: """ 获取奖学金知识库上下文。 三个核心维度: 1. 国际生获得FA的百分比(intl_pct_receiving_aid) 2. 国际生平均获奖金额(intl_avg_award_usd) 3. 国际生录取率(intl_admission_rate_pct) """ if not self._scholarship_kb: return "" meta = self._scholarship_kb.get("metadata", {}) rankings = self._scholarship_kb.get("rankings", {}) all_schools = self._scholarship_kb.get("all_schools", []) parts = [f"【奖学金知识库(来源:查校列表2025-2026,{meta.get('total_schools', 678)}所学校)】"] parts.append(f"数据覆盖:{meta.get('schools_with_pct_aid', 0)}所学校有国际生获奖比例数据,{meta.get('schools_with_avg_award', 0)}所有平均金额数据,{meta.get('schools_meets_full_need', 0)}所满足全额需求") q_lower = query.lower() # 如果指定了学校,查找该学校的完整三维数据 if school_name: matched = [s for s in all_schools if school_name.lower() in s.get("school", "").lower()] if matched: s = matched[0] parts.append(f"\n▌ {s['school']} 国际生奖学金完整数据:") # 核心三维 parts.append(f" 「维度1」国际生获得FA的百分比:{s.get('intl_pct_receiving_aid', '无数据')}%") parts.append(f" 「维度2」国际生平均获奖金额:${s.get('intl_avg_award_usd', '无数据')}") parts.append(f" 「维度3」国际生录取率:{s.get('intl_admission_rate_pct', '无数据')}%") # 辅助数据 parts.append(f" 奖学金类型:{s.get('aid_types_for_intl', '未知')}") parts.append(f" 获奖人数:{s.get('intl_n_awarded', '未知')}") parts.append(f" 获奖后平均费用:${s.get('intl_avg_cost_after_aid', '未知')}") parts.append(f" 满足全额需求:{s.get('meets_full_demonstrated_need', '未知')}") parts.append(f" 最大Merit奖金额:${s.get('largest_merit_scholarship_usd', '无')}") parts.append(f" 奖学金名称:{s.get('merit_scholarship_name', '无')}") parts.append(f" 2024-25获奖后学费:${s.get('tuition_after_merit_2425', '未知')}") parts.append(f" 2024-25获奖后总费用:${s.get('coa_after_merit_2425', '未知')}") parts.append(f" 申请方式:{s.get('how_to_apply', '未知')}") if s.get('notes'): parts.append(f" 备注:{s['notes']}") parts.append(f" US News排名:{s.get('us_news_national') or s.get('us_news_lac') or 'N/A'}") else: parts.append(f"\n⚠ 未找到 '{school_name}' 的奖学金数据") # 通用查询:根据 query 内容展示不同排行榜 if not school_name: # 默认展示综合性价排行榜 if any(kw in q_lower for kw in ["性价比", "划算", "实惠", "小学校", "文理学院", "lac"]): top_val = rankings.get("top40_composite_value", [])[:15] parts.append(f"\n▌ 小型文理学院综合性价排行榜 Top 15(获奖比例+金额+选择性加权):") for item in top_val: parts.append( f" {item['rank']:2d}. {item['school']:<40} " f"获奖比例:{item['intl_pct_receiving_aid']}% " f"平均金额:${item['intl_avg_award_usd']} " f"录取率:{item['intl_admission_rate_pct']}% " f"US#{item['us_news'] or 'N/A'}" ) elif any(kw in q_lower for kw in ["平均", "最多", "金额最高", "最大金额", "最高"]): top_award = rankings.get("top30_by_intl_avg_award", [])[:15] parts.append("\n▌ 国际生平均获奖金额最高 Top 15:") for item in top_award: parts.append( f" {item['rank']:2d}. {item['school']:<40} " f"平均金额:${item['intl_avg_award_usd']:,} " f"获奖比例:{item['intl_pct_receiving_aid']}% " f"录取率:{item['intl_admission_rate_pct']}% " f"US#{item['us_news'] or 'N/A'}" ) elif any(kw in q_lower for kw in ["比例最高", "最容易", "最多人获奖", "最容易拿"]): top_pct = rankings.get("top30_by_intl_pct_receiving_aid", [])[:15] parts.append("\n▌ 国际生获奖比例最高 Top 15:") for item in top_pct: parts.append( f" {item['rank']:2d}. {item['school']:<40} " f"获奖比例:{item['intl_pct_receiving_aid']}% " f"平均金额:${item['intl_avg_award_usd']} " f"录取率:{item['intl_admission_rate_pct']}%" ) elif any(kw in q_lower for kw in ["全额", "need", "need-blind", "need-based", "需要基础"]): meets = rankings.get("meets_full_need_schools", [])[:15] parts.append(f"\n▌ 满足全额 Need-Based Aid 的学校({len(meets)}所,按US News排序):") for s in meets: parts.append( f" US#{str(s['us_news'] or 'N/A'):<5} {s['school']:<40} " f"获奖比例:{s['intl_pct_receiving_aid']}% " f"平均金额:${s['intl_avg_award_usd']} " f"录取率:{s['intl_admission_rate_pct']}%" ) elif any(kw in q_lower for kw in ["merit", "奖学金最大", "最大merit"]): top_merit = rankings.get("top30_by_largest_merit", [])[:15] parts.append("\n▌ 最大 Merit Scholarship 金额 Top 15:") for item in top_merit: parts.append( f" {item['rank']:2d}. {item['school']:<40} " f"最大Merit:${item['largest_merit_scholarship_usd']:,} " f"奖学金名称:{item['merit_scholarship_name']} " f"获奖比例:{item['intl_pct_receiving_aid']}%" ) else: # 默认:展示平均金额排行榜 top_award = rankings.get("top30_by_intl_avg_award", [])[:12] parts.append("\n▌ 国际生平均获奖金额最高 Top 12(含录取难度参考):") for item in top_award: parts.append( f" {item['rank']:2d}. {item['school']:<40} " f"平均金额:${item['intl_avg_award_usd']:,} " f"获奖比例:{item['intl_pct_receiving_aid']}% " f"录取率:{item['intl_admission_rate_pct']}% " f"US#{item['us_news'] or 'N/A'}" ) # 同时展示满足全额需求的顶校 meets = rankings.get("meets_full_need_schools", [])[:8] if meets: parts.append(f"\n▌ 顶校中满足全额 Need-Based Aid(对国际生):") for s in meets: parts.append( f" US#{str(s['us_news'] or 'N/A'):<5} {s['school']:<40} " f"获奖比例:{s['intl_pct_receiving_aid']}% " f"平均金额:${s['intl_avg_award_usd']}" ) return "\n".join(parts) def get_consultant_context(self, query: str = "") -> str: """ 获取三士渡顾问知识库上下文。 当 query 涉及申请策略、普娃逆袭、选校方法论等时调用。 """ if not self._consultant_kb: return "" parts = ["【三士渡顾问知识库(来源:徐谭妥、刘又铭、濮阳演讲 PPT)】"] q_lower = query.lower() # 核心策略 core = self._consultant_kb.get("core_strategies", []) if core: parts.append("\n▌ 核心申请策略:") for s in core[:4]: parts.append(f" • {s.get('strategy', '')}:{s.get('description', '')[:100]}") # 选校方法论 if any(kw in q_lower for kw in ["选校", "school", "选学校", "ed", "ea"]): school_method = self._consultant_kb.get("school_selection_methodology", []) if school_method: parts.append("\n▌ 选校方法论:") for m in school_method[:3]: parts.append(f" • {m.get('principle', '')}:{m.get('detail', '')[:100]}") # 反包装理念 if any(kw in q_lower for kw in ["包装", "真实", "authentic", "反包装", "材料"]): anti = self._consultant_kb.get("anti_packaging_philosophy", []) if anti: parts.append("\n▌ 反包装理念:") for a in anti[:3]: parts.append(f" • {a.get('concept', '')}:{a.get('description', '')[:100]}") # 三士渡独特方法论 unique = self._consultant_kb.get("stoooges_unique_methodology", []) if unique: parts.append("\n▌ 三士渡独特方法论:") for u in unique[:3]: parts.append(f" • {u.get('method', '')}:{u.get('description', '')[:80]}") # 2026 录取结果 results = self._consultant_kb.get("admission_results_2026", {}) if results and any(kw in q_lower for kw in ["录取", "结果", "2026", "成绩"]): parts.append("\n▌ 三士渡 2026 届录取成绩(截至4月1日):") for school, count in results.items(): if school != "other_results" and count: parts.append(f" • {school}:{count}") # 大陆学生建议 china_advice = self._consultant_kb.get("china_student_advice", []) if china_advice: parts.append("\n▌ 大陆学生申请建议:") for a in china_advice[:3]: parts.append(f" • {a.get('advice', '')}:{a.get('action', '')[:80]}") return "\n".join(parts) def get_school_strategy(self, school_name: str) -> Optional[Dict]: """ 从选校策略知识库查询特定学校的数据。 """ if not self._school_strategy_kb: return None schools = self._school_strategy_kb.get("schools", []) for s in schools: if school_name.lower() in s.get("school", "").lower(): return s return None def get_context_for_query(self, query: str, school: Optional[str] = None) -> str: """ 为 LLM 生成顾问知识上下文(统一入口)。 根据 query 和 school 智能路由到相关知识库。 """ parts = [] # 1. 学校口味(原有) if school: taste = self.get_school_taste(school) if taste: parts.append(f"【顾问战略知识(来源:{taste.get('source', '顾问会议')})】") if taste.get("advisor_consensus"): parts.append(f"顾问共识:{taste['advisor_consensus']}") if taste.get("test_optional_effective") is not None: parts.append(f"Test Optional 有效性:{'有效' if taste['test_optional_effective'] else '建议提交标化'}") if taste.get("ib_friendly") is not None: parts.append(f"IB 友好度:{'友好' if taste['ib_friendly'] else '一般'}") if taste.get("notes"): parts.append(f"补充说明:{taste['notes']}") # 2. 特殊案例模式(原有) q_lower = query.lower() if "test optional" in q_lower or "test-optional" in q_lower or "无sat" in query or "不提交标化" in query: patterns = self.get_special_patterns(test_optional=True, school=school) if patterns: parts.append(f"\n【相关特殊案例模式({len(patterns)}个)】") for p in patterns[:3]: parts.append( f"- {p.get('anonymized_case', '匿名')}:{p.get('description', '')} " f"→ {p.get('outcome', '')}({p.get('year', '')})" ) if p.get("advisor_notes"): parts.append(f" 顾问备注:{p['advisor_notes']}") # 3. Stanford 打分(Phase 4 新增) stanford_keywords = ["stanford", "斯坦福", "stanford打分", "stanford评分", "stanford录取"] if school and "stanford" in school.lower(): parts.append(self.get_stanford_context(query)) elif any(kw in q_lower for kw in stanford_keywords): parts.append(self.get_stanford_context(query)) # 4. Harvard 打分 + Casebook(Phase 4 新增) harvard_keywords = ["harvard", "哈佛", "harvard打分", "pause factor", "casebook", "harvard录取"] if school and "harvard" in school.lower(): parts.append(self.get_harvard_context(query)) elif any(kw in q_lower for kw in harvard_keywords): parts.append(self.get_harvard_context(query)) # 5. 通用顶校录取逻辑查询(同时提供两校参考) top_school_keywords = ["录取标准", "招生官", "评分维度", "holistic", "整体评估", "顶校录取", "t10录取"] if any(kw in q_lower for kw in top_school_keywords): if not any(kw in q_lower for kw in stanford_keywords + harvard_keywords): # 同时提供 Stanford 和 Harvard 的参考 stanford_ctx = self.get_stanford_context(query) harvard_ctx = self.get_harvard_context(query) if stanford_ctx: parts.append(stanford_ctx) if harvard_ctx: parts.append(harvard_ctx) # 6. 奖学金查询(Phase 5 新增) scholarship_keywords = ["奖学金", "scholarship", "助学金", "financial aid", "merit", "need-based", "学费", "费用", "cost"] if any(kw in q_lower for kw in scholarship_keywords): scholarship_ctx = self.get_scholarship_context(school_name=school or "", query=query) if scholarship_ctx: parts.append(scholarship_ctx) # 6a. 给力指数知识库(Phase 6 新增) geili_keywords = [ "给力指数", "竞赛", "夏校", "活动推荐", "竞赛推荐", "展示活动", "科研项目", "暂期项目", "RSI", "SSP", "PROMYS", "SIMR", "TASP", "Regeneron", "USAMO", "ISEF", "Coca-Cola", "Intel", "学术活动", "课外活动推荐", "应该参加什么", "活动清单" ] if any(kw in q_lower or kw.lower() in q_lower for kw in geili_keywords): geili_ctx = self.get_geili_context(query) if geili_ctx: parts.append(geili_ctx) # 6b. Admission Blog 红旗知识库(Phase 6 新增) red_flag_keywords = [ "红旗", "red flag", "申请错误", "常见错误", "不该做", "避免", "文书问题", "活动问题", "推荐信问题", "面试问题", "申请注意", "招生官不喜欢", "什么不能做", "申请禁忌", "文书禁忌", "大陆学生问题" ] if any(kw in q_lower for kw in red_flag_keywords): red_flag_ctx = self.get_red_flags_context(query) if red_flag_ctx: parts.append(red_flag_ctx) # 7. 顾问知识强路由(Phase 5 新增,Phase 6 升级:扩充关键词,确保最独特资产被激活) consultant_keywords = [ # 学生类型 "普娃", "背景一般", "普通学生", "中等背景", "非顶尖", # 申请策略 "策略", "逆袭", "包装", "反包装", "如何申请", "怎么申请", # 三士渡品牌 "三士渡", "徐谭妥", "刘又鸣", "濦阳", # 选校方法论 "选校方法", "生态位", "演化", "生态系统", # 录取结果 "录取结果", "2026届", "2025届", "历届学生", # 申请哲学 "申请哲学", "申请理念", "超越包装", ] if any(kw in q_lower for kw in consultant_keywords): consultant_ctx = self.get_consultant_context(query) if consultant_ctx: parts.append(consultant_ctx) # 8. F1 语义检索兜底(当关键词匹配未命中时,用语义检索补充) # 只在 parts 为空或很少时触发,避免冗余 if len(parts) < 2: try: from agent.semantic_retriever import multi_kb_search # 跨知识库语义检索 kb_configs = [] if self.stanford_rubric: kb_configs.append(('stanford_scoring_rubric', self.stanford_rubric)) if self.harvard_casebook: kb_configs.append(('harvard_casebook_rubric', self.harvard_casebook)) if self.consultant_kb: kb_configs.append(('consultant_knowledge_kb', self.consultant_kb)) if kb_configs: semantic_results = multi_kb_search( query=query, kb_configs=kb_configs, top_k_per_kb=2, total_top_k=4, ) if semantic_results: semantic_parts = [] for r in semantic_results: if r['score'] > 0.1: # 只取相似度 >10% 的结果 source_label = { 'stanford_scoring_rubric': '[Stanford FERPA]', 'harvard_casebook_rubric': '[Harvard Casebook]', 'consultant_knowledge_kb': '[三士渡顾问经验]', }.get(r['source'], f"[{r['source']}]") semantic_parts.append( f"{source_label} {r['section']}: {r['text'][:300]}" ) if semantic_parts: parts.append("\n【语义检索补充知识】\n" + "\n".join(semantic_parts[:3])) except Exception as e: pass # 语义检索失败不影响主流程 return "\n".join(filter(None, parts)) def get_geili_context(self, query: str = "", min_score: int = 5) -> str: """获取给力指数知识库上下文(海狸学院第十二版)""" if not self._geili_index: return "" q_lower = query.lower() all_items = self._geili_index.get("all_items", []) by_score = self._geili_index.get("items_by_score", {}) # 确定要展示的评分范围 target_scores = [] if any(kw in q_lower for kw in ["顶尖", "最高", "9分", "10分", "top tier"]): target_scores = ["10", "9"] elif any(kw in q_lower for kw in ["科研", "research", "rsi", "ssp", "simr"]): # 科研类项目 target_scores = ["9", "8", "7", "6"] elif any(kw in q_lower for kw in ["写作", "writing", "诗歌", "文学"]): target_scores = ["8", "7", "6", "5"] elif any(kw in q_lower for kw in ["数学", "math", "olympiad", "usamo"]): target_scores = ["10", "9", "8", "7"] elif any(kw in q_lower for kw in ["商业", "金融", "business", "finance"]): target_scores = ["8", "7", "6", "5"] else: # 默认显示高分项目 target_scores = ["9", "8", "7"] parts = [f"【给力指数推荐项目(来源:海狸学院第十二版,共{self._geili_index.get('total_items', 0)}个项目)】"] shown = 0 for score_key in target_scores: items = by_score.get(score_key, []) if not items: continue parts.append(f"\n▌ 给力指数 {score_key} 分:") for item in items[:5]: # 每个分数最多显示 5 个 elig = f"({item['eligibility']})" if item.get('eligibility') else "" en = f" / {item['en_name'][:40]}" if item.get('en_name') else "" admit = f",录取率:{item['admit_rate']}" if item.get('admit_rate') else "" tags = f" [{', '.join(item['tags'][:2])}]" if item.get('tags') else "" parts.append(f" • {item['cn_name']}{elig}{en}{admit}{tags}") shown += 1 if shown >= 15: break scoring_guide = self._geili_index.get("scoring_guide", {}) parts.append(f"\n评分说明:9-10分=全球顶尖;7-8分=全球高度认可;5-6分=有一定含金量;3-4分=基础项目") return "\n".join(parts) def get_red_flags_context(self, query: str = "") -> str: """获取申请红旗知识库上下文(来自 T20 大学招生官博客)""" if not self._admission_red_flags: return "" q_lower = query.lower() red_flags = self._admission_red_flags.get("red_flags", {}) parts = ["【申请红旗清单(来源:T20 大学招生官博客,含 MIT/Harvard/Yale 等)】"] # 根据查询关键词选择相关类别 categories_to_show = [] if any(kw in q_lower for kw in ["文书", "essay", "写作", "personal statement"]): categories_to_show.append(("essay_writing", "文书/写作红旗")) if any(kw in q_lower for kw in ["活动", "extracurricular", "课外", "ec"]): categories_to_show.append(("activities", "活动红旗")) if any(kw in q_lower for kw in ["推荐信", "recommendation", "rec letter"]): categories_to_show.append(("recommendations", "推荐信红旗")) if any(kw in q_lower for kw in ["行为", "截止日期", "deadline", "面试", "interview"]): categories_to_show.append(("behavior", "行为红旗")) if any(kw in q_lower for kw in ["大陆", "中国", "china", "chinese", "国际生"]): categories_to_show.append(("china_specific", "大陆学生特有红旗")) # 如果没有匹配到特定类别,显示所有高严重度红旗 if not categories_to_show: categories_to_show = [ ("essay_writing", "文书/写作红旗"), ("activities", "活动红旗"), ("china_specific", "大陆学生特有红旗"), ] for cat_key, cat_label in categories_to_show: flags = red_flags.get(cat_key, []) if flags: parts.append(f"\n▌ {cat_label}:") # 优先显示 critical 和 high 严重度 high_flags = [f for f in flags if f.get("severity") in ("critical", "high")] for flag in high_flags[:4]: severity_label = "🚨" if flag.get("severity") == "critical" else "⚠️" note = f"({flag['note']})" if flag.get("note") else "" parts.append(f" {severity_label} {flag['flag']}:{flag['description']}{note}") return "\n".join(parts) def is_empty(self) -> bool: """知识库是否为空""" return ( not self._school_taste and not self._special_patterns and not self._stanford_rubric and not self._harvard_casebook and not self._harvard_reading and not self._scholarship_kb and not self._consultant_kb ) # 全局单例 _KNOWLEDGE_BASE: Optional[AdvisorKnowledge] = None def get_knowledge_base() -> AdvisorKnowledge: """获取全局知识库单例""" global _KNOWLEDGE_BASE if _KNOWLEDGE_BASE is None: _KNOWLEDGE_BASE = AdvisorKnowledge() return _KNOWLEDGE_BASE