Spaces:
Sleeping
Sleeping
| """ | |
| 顾问战略知识库(Phase 4 升级版) | |
| 加载并查询来自顾问战略会议和内部文件的结构化知识。 | |
| 数据来源: | |
| 1. data/school_taste_updates.json - 学校口味更新(顾问会议提炼) | |
| 2. data/special_case_patterns.json - 特殊案例模式(低GPA/Test Optional/IB等) | |
| 3. data/stanford_scoring_rubric.json - Stanford 打分逆向工程(两份真实 FERPA 文件) | |
| 4. data/harvard_casebook_rubric.json - Harvard 2012 Casebook(11个真实 Case + Pause Factor) | |
| 5. data/harvard_reading_process_kb.json - Harvard 内部阅读流程手册(CONFIDENTIAL) | |
| 6. data/scholarship_kb.json - 查校列表奖学金知识库(678所学校,含 merit/need-based) | |
| 7. data/school_list_strategy_kb.json - 选校策略知识库(录取率/ED优势/标化要求) | |
| 8. data/consultant_knowledge_kb.json - 三士渡顾问知识库(PPT/PDF 演讲提炼) | |
| 使用方式: | |
| from agent.advisor_knowledge import AdvisorKnowledge | |
| kb = AdvisorKnowledge() | |
| taste = kb.get_school_taste("Vanderbilt University") | |
| patterns = kb.get_special_patterns(gpa_range=(3.0, 3.5), test_optional=True) | |
| stanford_ctx = kb.get_stanford_context("SAT 1580,海藻研究,REA") | |
| harvard_ctx = kb.get_harvard_context("Harvard 录取标准") | |
| """ | |
| import json | |
| import os | |
| from pathlib import Path | |
| from typing import Dict, List, Optional | |
| _DATA_DIR = Path(__file__).parent.parent / "data" | |
| class AdvisorKnowledge: | |
| """顾问战略知识库(Phase 4 升级版)""" | |
| def __init__(self): | |
| self._school_taste: Dict = {} | |
| self._special_patterns: List[Dict] = [] | |
| self._stanford_rubric: Dict = {} | |
| self._harvard_casebook: Dict = {} | |
| self._harvard_reading: Dict = {} | |
| self._scholarship_kb: Dict = {} | |
| self._school_strategy_kb: Dict = {} | |
| self._consultant_kb: Dict = {} | |
| self._admission_red_flags: Dict = {} | |
| self._geili_index: Dict = {} | |
| self._load() | |
| def _load(self) -> None: | |
| """加载所有知识库文件""" | |
| # 原有知识库 | |
| taste_file = _DATA_DIR / "school_taste_updates.json" | |
| if taste_file.exists(): | |
| try: | |
| with open(taste_file, "r", encoding="utf-8") as f: | |
| self._school_taste = json.load(f) | |
| except Exception as e: | |
| print(f"[AdvisorKnowledge] 加载 school_taste_updates.json 失败: {e}") | |
| patterns_file = _DATA_DIR / "special_case_patterns.json" | |
| if patterns_file.exists(): | |
| try: | |
| with open(patterns_file, "r", encoding="utf-8") as f: | |
| self._special_patterns = json.load(f) | |
| except Exception as e: | |
| print(f"[AdvisorKnowledge] 加载 special_case_patterns.json 失败: {e}") | |
| # Phase 4 新增:Stanford 打分逆向工程 | |
| stanford_file = _DATA_DIR / "stanford_scoring_rubric.json" | |
| if stanford_file.exists(): | |
| try: | |
| with open(stanford_file, "r", encoding="utf-8") as f: | |
| self._stanford_rubric = json.load(f) | |
| except Exception as e: | |
| print(f"[AdvisorKnowledge] 加载 stanford_scoring_rubric.json 失败: {e}") | |
| # Phase 4 新增:Harvard 2012 Casebook | |
| casebook_file = _DATA_DIR / "harvard_casebook_rubric.json" | |
| if casebook_file.exists(): | |
| try: | |
| with open(casebook_file, "r", encoding="utf-8") as f: | |
| self._harvard_casebook = json.load(f) | |
| except Exception as e: | |
| print(f"[AdvisorKnowledge] 加载 harvard_casebook_rubric.json 失败: {e}") | |
| # Phase 4 新增:Harvard 内部阅读流程 | |
| reading_file = _DATA_DIR / "harvard_reading_process_kb.json" | |
| if reading_file.exists(): | |
| try: | |
| with open(reading_file, "r", encoding="utf-8") as f: | |
| self._harvard_reading = json.load(f) | |
| except Exception as e: | |
| print(f"[AdvisorKnowledge] 加载 harvard_reading_process_kb.json 失败: {e}") | |
| # Phase 5 新增:奖学金知识库 | |
| scholarship_file = _DATA_DIR / "scholarship_kb.json" | |
| if scholarship_file.exists(): | |
| try: | |
| with open(scholarship_file, "r", encoding="utf-8") as f: | |
| self._scholarship_kb = json.load(f) | |
| except Exception as e: | |
| print(f"[AdvisorKnowledge] 加载 scholarship_kb.json 失败: {e}") | |
| # Phase 5 新增:选校策略知识库 | |
| strategy_file = _DATA_DIR / "school_list_strategy_kb.json" | |
| if strategy_file.exists(): | |
| try: | |
| with open(strategy_file, "r", encoding="utf-8") as f: | |
| self._school_strategy_kb = json.load(f) | |
| except Exception as e: | |
| print(f"[AdvisorKnowledge] 加载 school_list_strategy_kb.json 失败: {e}") | |
| # Phase 5 新增:三士渡顾问知识库 | |
| consultant_file = _DATA_DIR / "consultant_knowledge_kb.json" | |
| if consultant_file.exists(): | |
| try: | |
| with open(consultant_file, "r", encoding="utf-8") as f: | |
| self._consultant_kb = json.load(f) | |
| except Exception as e: | |
| print(f"[AdvisorKnowledge] 加载 consultant_knowledge_kb.json 失败: {e}") | |
| # Phase 6 新增:T20 Admission Blog 红旗知识库 | |
| red_flags_file = _DATA_DIR / "admission_blog_red_flags.json" | |
| if red_flags_file.exists(): | |
| try: | |
| with open(red_flags_file, "r", encoding="utf-8") as f: | |
| self._admission_red_flags = json.load(f) | |
| except Exception as e: | |
| print(f"[AdvisorKnowledge] 加载 admission_blog_red_flags.json 失败: {e}") | |
| # Phase 6 新增:给力指数知识库(海狸学院第十二版) | |
| geili_file = _DATA_DIR / "geili_index_kb.json" | |
| if geili_file.exists(): | |
| try: | |
| with open(geili_file, "r", encoding="utf-8") as f: | |
| self._geili_index = json.load(f) | |
| except Exception as e: | |
| print(f"[AdvisorKnowledge] 加载 geili_index_kb.json 失败: {e}") | |
| # ────────────────────────────────────────────── | |
| # 原有方法(保持不变) | |
| # ────────────────────────────────────────────── | |
| def get_school_taste(self, school_name: str) -> Optional[Dict]: | |
| """获取学校口味更新""" | |
| if not self._school_taste: | |
| return None | |
| if school_name in self._school_taste: | |
| return self._school_taste[school_name] | |
| school_lower = school_name.lower() | |
| for key, val in self._school_taste.items(): | |
| if school_lower in key.lower() or key.lower() in school_lower: | |
| return val | |
| return None | |
| def get_special_patterns( | |
| self, | |
| gpa_range: Optional[tuple] = None, | |
| test_optional: Optional[bool] = None, | |
| ib_score_range: Optional[tuple] = None, | |
| school: Optional[str] = None, | |
| ) -> List[Dict]: | |
| """查询特殊案例模式""" | |
| if not self._special_patterns: | |
| return [] | |
| results = [] | |
| for pattern in self._special_patterns: | |
| conditions = pattern.get("conditions", {}) | |
| if gpa_range is not None: | |
| pattern_gpa = conditions.get("gpa") | |
| if pattern_gpa is not None: | |
| if not (gpa_range[0] <= pattern_gpa <= gpa_range[1]): | |
| continue | |
| if test_optional is not None: | |
| if conditions.get("test_optional") != test_optional: | |
| continue | |
| if ib_score_range is not None: | |
| pattern_ib = conditions.get("ib_score") | |
| if pattern_ib is not None: | |
| if not (ib_score_range[0] <= pattern_ib <= ib_score_range[1]): | |
| continue | |
| if school is not None: | |
| pattern_school = pattern.get("school", "") | |
| if school.lower() not in pattern_school.lower() and pattern_school.lower() not in school.lower(): | |
| continue | |
| results.append(pattern) | |
| return results | |
| # ────────────────────────────────────────────── | |
| # Phase 4 新增方法 | |
| # ────────────────────────────────────────────── | |
| def get_stanford_context(self, query: str = "") -> str: | |
| """ | |
| 获取 Stanford 打分逆向工程上下文。 | |
| 当 query 中涉及 Stanford 时调用。 | |
| """ | |
| if not self._stanford_rubric: | |
| return "" | |
| parts = ["【Stanford 招生评分体系(来源:两份真实 FERPA 打分文件逆向工程)】"] | |
| # 核心评分维度 | |
| dims = self._stanford_rubric.get("scoring_dimensions", {}) | |
| if dims: | |
| parts.append("\n▌ 六大评分维度(1=最强):") | |
| for dim_name, dim_data in dims.items(): | |
| if isinstance(dim_data, dict): | |
| score_1 = dim_data.get("1", "") | |
| score_2 = dim_data.get("2", "") | |
| parts.append(f" • {dim_name}:1分={score_1};2分={score_2}") | |
| # 真实案例 | |
| cases = self._stanford_rubric.get("real_cases", []) | |
| if cases: | |
| parts.append("\n▌ 真实录取案例:") | |
| for c in cases: | |
| name = c.get("anonymized_name", "匿名") | |
| outcome = c.get("outcome", "") | |
| overall = c.get("scores", {}).get("Overall", "") | |
| highlights = c.get("key_highlights", []) | |
| parts.append(f" • {name}({outcome},Overall {overall}):{', '.join(highlights[:2])}") | |
| # 大陆学生启示 | |
| implications = self._stanford_rubric.get("china_student_implications", {}) | |
| if implications: | |
| parts.append("\n▌ 大陆学生申请 Stanford 关键洞察:") | |
| for key, val in implications.items(): | |
| if isinstance(val, str): | |
| parts.append(f" • {key}:{val}") | |
| elif isinstance(val, dict) and val.get("insight"): | |
| parts.append(f" • {key}:{val['insight']}") | |
| return "\n".join(parts) | |
| def get_harvard_context(self, query: str = "") -> str: | |
| """ | |
| 获取 Harvard 录取标准上下文(Casebook + 阅读流程)。 | |
| 当 query 中涉及 Harvard 时调用。 | |
| """ | |
| parts = [] | |
| # Harvard 阅读流程(评分标准) | |
| if self._harvard_reading: | |
| dims = self._harvard_reading.get("dimensions", {}) | |
| if dims: | |
| parts.append("【Harvard 内部评分标准(来源:SFFA v. Harvard 诉讼证据 HARV00097936,CONFIDENTIAL)】") | |
| parts.append("\n▌ 六大评分维度(1=最强,支持 +/- 修饰):") | |
| for dim_name, dim_data in dims.items(): | |
| if isinstance(dim_data, dict): | |
| score_1 = dim_data.get("1", "")[:60] if dim_data.get("1") else "" | |
| score_2 = dim_data.get("2", "")[:60] if dim_data.get("2") else "" | |
| key_insight = dim_data.get("key_insight", "") | |
| line = f" • {dim_name}:1={score_1};2={score_2}" | |
| if key_insight: | |
| line += f"(⚠ {key_insight[:50]})" | |
| parts.append(line) | |
| # 大陆学生启示 | |
| china = self._harvard_reading.get("china_student_implications", {}) | |
| if china: | |
| parts.append("\n▌ 大陆学生关键洞察:") | |
| for key, val in china.items(): | |
| if isinstance(val, dict): | |
| insight = val.get("key_insight") or val.get("key_note") or "" | |
| if insight: | |
| parts.append(f" • {key}:{insight[:80]}") | |
| # Harvard Casebook 真实案例 | |
| if self._harvard_casebook: | |
| cases = self._harvard_casebook.get("cases", []) | |
| pause_patterns = self._harvard_casebook.get("pause_factor_patterns", []) | |
| if cases: | |
| parts.append(f"\n【Harvard 2012 Casebook(11个真实 Case,SFFA 诉讼证据)】") | |
| parts.append("▌ 典型录取/拒绝案例(含 Pause Factor):") | |
| for c in cases[:5]: # 展示前5个最具代表性的 | |
| name = c.get("case_id", "") | |
| outcome = c.get("outcome", "") | |
| pause = c.get("pause_factor", "无") | |
| overall = c.get("scores", {}).get("Overall", "") | |
| key_factors = c.get("key_factors", []) | |
| parts.append( | |
| f" • {name}({outcome},Overall {overall},Pause: {pause}):{', '.join(key_factors[:2])}" | |
| ) | |
| if pause_patterns: | |
| parts.append("\n▌ Pause Factor 规律(招生官暂停深思的信号):") | |
| for p in pause_patterns[:5]: | |
| parts.append(f" • {p}") | |
| # 关键洞察 | |
| insights = self._harvard_casebook.get("key_insights", []) | |
| if insights: | |
| parts.append("\n▌ Casebook 核心洞察:") | |
| for ins in insights[:4]: | |
| parts.append(f" • {ins}") | |
| return "\n".join(parts) if parts else "" | |
| def get_scholarship_context(self, school_name: str = "", query: str = "") -> str: | |
| """ | |
| 获取奖学金知识库上下文。 | |
| 三个核心维度: | |
| 1. 国际生获得FA的百分比(intl_pct_receiving_aid) | |
| 2. 国际生平均获奖金额(intl_avg_award_usd) | |
| 3. 国际生录取率(intl_admission_rate_pct) | |
| """ | |
| if not self._scholarship_kb: | |
| return "" | |
| meta = self._scholarship_kb.get("metadata", {}) | |
| rankings = self._scholarship_kb.get("rankings", {}) | |
| all_schools = self._scholarship_kb.get("all_schools", []) | |
| parts = [f"【奖学金知识库(来源:查校列表2025-2026,{meta.get('total_schools', 678)}所学校)】"] | |
| parts.append(f"数据覆盖:{meta.get('schools_with_pct_aid', 0)}所学校有国际生获奖比例数据,{meta.get('schools_with_avg_award', 0)}所有平均金额数据,{meta.get('schools_meets_full_need', 0)}所满足全额需求") | |
| q_lower = query.lower() | |
| # 如果指定了学校,查找该学校的完整三维数据 | |
| if school_name: | |
| matched = [s for s in all_schools if school_name.lower() in s.get("school", "").lower()] | |
| if matched: | |
| s = matched[0] | |
| parts.append(f"\n▌ {s['school']} 国际生奖学金完整数据:") | |
| # 核心三维 | |
| parts.append(f" 「维度1」国际生获得FA的百分比:{s.get('intl_pct_receiving_aid', '无数据')}%") | |
| parts.append(f" 「维度2」国际生平均获奖金额:${s.get('intl_avg_award_usd', '无数据')}") | |
| parts.append(f" 「维度3」国际生录取率:{s.get('intl_admission_rate_pct', '无数据')}%") | |
| # 辅助数据 | |
| parts.append(f" 奖学金类型:{s.get('aid_types_for_intl', '未知')}") | |
| parts.append(f" 获奖人数:{s.get('intl_n_awarded', '未知')}") | |
| parts.append(f" 获奖后平均费用:${s.get('intl_avg_cost_after_aid', '未知')}") | |
| parts.append(f" 满足全额需求:{s.get('meets_full_demonstrated_need', '未知')}") | |
| parts.append(f" 最大Merit奖金额:${s.get('largest_merit_scholarship_usd', '无')}") | |
| parts.append(f" 奖学金名称:{s.get('merit_scholarship_name', '无')}") | |
| parts.append(f" 2024-25获奖后学费:${s.get('tuition_after_merit_2425', '未知')}") | |
| parts.append(f" 2024-25获奖后总费用:${s.get('coa_after_merit_2425', '未知')}") | |
| parts.append(f" 申请方式:{s.get('how_to_apply', '未知')}") | |
| if s.get('notes'): | |
| parts.append(f" 备注:{s['notes']}") | |
| parts.append(f" US News排名:{s.get('us_news_national') or s.get('us_news_lac') or 'N/A'}") | |
| else: | |
| parts.append(f"\n⚠ 未找到 '{school_name}' 的奖学金数据") | |
| # 通用查询:根据 query 内容展示不同排行榜 | |
| if not school_name: | |
| # 默认展示综合性价排行榜 | |
| if any(kw in q_lower for kw in ["性价比", "划算", "实惠", "小学校", "文理学院", "lac"]): | |
| top_val = rankings.get("top40_composite_value", [])[:15] | |
| parts.append(f"\n▌ 小型文理学院综合性价排行榜 Top 15(获奖比例+金额+选择性加权):") | |
| for item in top_val: | |
| parts.append( | |
| f" {item['rank']:2d}. {item['school']:<40} " | |
| f"获奖比例:{item['intl_pct_receiving_aid']}% " | |
| f"平均金额:${item['intl_avg_award_usd']} " | |
| f"录取率:{item['intl_admission_rate_pct']}% " | |
| f"US#{item['us_news'] or 'N/A'}" | |
| ) | |
| elif any(kw in q_lower for kw in ["平均", "最多", "金额最高", "最大金额", "最高"]): | |
| top_award = rankings.get("top30_by_intl_avg_award", [])[:15] | |
| parts.append("\n▌ 国际生平均获奖金额最高 Top 15:") | |
| for item in top_award: | |
| parts.append( | |
| f" {item['rank']:2d}. {item['school']:<40} " | |
| f"平均金额:${item['intl_avg_award_usd']:,} " | |
| f"获奖比例:{item['intl_pct_receiving_aid']}% " | |
| f"录取率:{item['intl_admission_rate_pct']}% " | |
| f"US#{item['us_news'] or 'N/A'}" | |
| ) | |
| elif any(kw in q_lower for kw in ["比例最高", "最容易", "最多人获奖", "最容易拿"]): | |
| top_pct = rankings.get("top30_by_intl_pct_receiving_aid", [])[:15] | |
| parts.append("\n▌ 国际生获奖比例最高 Top 15:") | |
| for item in top_pct: | |
| parts.append( | |
| f" {item['rank']:2d}. {item['school']:<40} " | |
| f"获奖比例:{item['intl_pct_receiving_aid']}% " | |
| f"平均金额:${item['intl_avg_award_usd']} " | |
| f"录取率:{item['intl_admission_rate_pct']}%" | |
| ) | |
| elif any(kw in q_lower for kw in ["全额", "need", "need-blind", "need-based", "需要基础"]): | |
| meets = rankings.get("meets_full_need_schools", [])[:15] | |
| parts.append(f"\n▌ 满足全额 Need-Based Aid 的学校({len(meets)}所,按US News排序):") | |
| for s in meets: | |
| parts.append( | |
| f" US#{str(s['us_news'] or 'N/A'):<5} {s['school']:<40} " | |
| f"获奖比例:{s['intl_pct_receiving_aid']}% " | |
| f"平均金额:${s['intl_avg_award_usd']} " | |
| f"录取率:{s['intl_admission_rate_pct']}%" | |
| ) | |
| elif any(kw in q_lower for kw in ["merit", "奖学金最大", "最大merit"]): | |
| top_merit = rankings.get("top30_by_largest_merit", [])[:15] | |
| parts.append("\n▌ 最大 Merit Scholarship 金额 Top 15:") | |
| for item in top_merit: | |
| parts.append( | |
| f" {item['rank']:2d}. {item['school']:<40} " | |
| f"最大Merit:${item['largest_merit_scholarship_usd']:,} " | |
| f"奖学金名称:{item['merit_scholarship_name']} " | |
| f"获奖比例:{item['intl_pct_receiving_aid']}%" | |
| ) | |
| else: | |
| # 默认:展示平均金额排行榜 | |
| top_award = rankings.get("top30_by_intl_avg_award", [])[:12] | |
| parts.append("\n▌ 国际生平均获奖金额最高 Top 12(含录取难度参考):") | |
| for item in top_award: | |
| parts.append( | |
| f" {item['rank']:2d}. {item['school']:<40} " | |
| f"平均金额:${item['intl_avg_award_usd']:,} " | |
| f"获奖比例:{item['intl_pct_receiving_aid']}% " | |
| f"录取率:{item['intl_admission_rate_pct']}% " | |
| f"US#{item['us_news'] or 'N/A'}" | |
| ) | |
| # 同时展示满足全额需求的顶校 | |
| meets = rankings.get("meets_full_need_schools", [])[:8] | |
| if meets: | |
| parts.append(f"\n▌ 顶校中满足全额 Need-Based Aid(对国际生):") | |
| for s in meets: | |
| parts.append( | |
| f" US#{str(s['us_news'] or 'N/A'):<5} {s['school']:<40} " | |
| f"获奖比例:{s['intl_pct_receiving_aid']}% " | |
| f"平均金额:${s['intl_avg_award_usd']}" | |
| ) | |
| return "\n".join(parts) | |
| def get_consultant_context(self, query: str = "") -> str: | |
| """ | |
| 获取三士渡顾问知识库上下文。 | |
| 当 query 涉及申请策略、普娃逆袭、选校方法论等时调用。 | |
| """ | |
| if not self._consultant_kb: | |
| return "" | |
| parts = ["【三士渡顾问知识库(来源:徐谭妥、刘又铭、濮阳演讲 PPT)】"] | |
| q_lower = query.lower() | |
| # 核心策略 | |
| core = self._consultant_kb.get("core_strategies", []) | |
| if core: | |
| parts.append("\n▌ 核心申请策略:") | |
| for s in core[:4]: | |
| parts.append(f" • {s.get('strategy', '')}:{s.get('description', '')[:100]}") | |
| # 选校方法论 | |
| if any(kw in q_lower for kw in ["选校", "school", "选学校", "ed", "ea"]): | |
| school_method = self._consultant_kb.get("school_selection_methodology", []) | |
| if school_method: | |
| parts.append("\n▌ 选校方法论:") | |
| for m in school_method[:3]: | |
| parts.append(f" • {m.get('principle', '')}:{m.get('detail', '')[:100]}") | |
| # 反包装理念 | |
| if any(kw in q_lower for kw in ["包装", "真实", "authentic", "反包装", "材料"]): | |
| anti = self._consultant_kb.get("anti_packaging_philosophy", []) | |
| if anti: | |
| parts.append("\n▌ 反包装理念:") | |
| for a in anti[:3]: | |
| parts.append(f" • {a.get('concept', '')}:{a.get('description', '')[:100]}") | |
| # 三士渡独特方法论 | |
| unique = self._consultant_kb.get("stoooges_unique_methodology", []) | |
| if unique: | |
| parts.append("\n▌ 三士渡独特方法论:") | |
| for u in unique[:3]: | |
| parts.append(f" • {u.get('method', '')}:{u.get('description', '')[:80]}") | |
| # 2026 录取结果 | |
| results = self._consultant_kb.get("admission_results_2026", {}) | |
| if results and any(kw in q_lower for kw in ["录取", "结果", "2026", "成绩"]): | |
| parts.append("\n▌ 三士渡 2026 届录取成绩(截至4月1日):") | |
| for school, count in results.items(): | |
| if school != "other_results" and count: | |
| parts.append(f" • {school}:{count}") | |
| # 大陆学生建议 | |
| china_advice = self._consultant_kb.get("china_student_advice", []) | |
| if china_advice: | |
| parts.append("\n▌ 大陆学生申请建议:") | |
| for a in china_advice[:3]: | |
| parts.append(f" • {a.get('advice', '')}:{a.get('action', '')[:80]}") | |
| return "\n".join(parts) | |
| def get_school_strategy(self, school_name: str) -> Optional[Dict]: | |
| """ | |
| 从选校策略知识库查询特定学校的数据。 | |
| """ | |
| if not self._school_strategy_kb: | |
| return None | |
| schools = self._school_strategy_kb.get("schools", []) | |
| for s in schools: | |
| if school_name.lower() in s.get("school", "").lower(): | |
| return s | |
| return None | |
| def get_context_for_query(self, query: str, school: Optional[str] = None) -> str: | |
| """ | |
| 为 LLM 生成顾问知识上下文(统一入口)。 | |
| 根据 query 和 school 智能路由到相关知识库。 | |
| """ | |
| parts = [] | |
| # 1. 学校口味(原有) | |
| if school: | |
| taste = self.get_school_taste(school) | |
| if taste: | |
| parts.append(f"【顾问战略知识(来源:{taste.get('source', '顾问会议')})】") | |
| if taste.get("advisor_consensus"): | |
| parts.append(f"顾问共识:{taste['advisor_consensus']}") | |
| if taste.get("test_optional_effective") is not None: | |
| parts.append(f"Test Optional 有效性:{'有效' if taste['test_optional_effective'] else '建议提交标化'}") | |
| if taste.get("ib_friendly") is not None: | |
| parts.append(f"IB 友好度:{'友好' if taste['ib_friendly'] else '一般'}") | |
| if taste.get("notes"): | |
| parts.append(f"补充说明:{taste['notes']}") | |
| # 2. 特殊案例模式(原有) | |
| q_lower = query.lower() | |
| if "test optional" in q_lower or "test-optional" in q_lower or "无sat" in query or "不提交标化" in query: | |
| patterns = self.get_special_patterns(test_optional=True, school=school) | |
| if patterns: | |
| parts.append(f"\n【相关特殊案例模式({len(patterns)}个)】") | |
| for p in patterns[:3]: | |
| parts.append( | |
| f"- {p.get('anonymized_case', '匿名')}:{p.get('description', '')} " | |
| f"→ {p.get('outcome', '')}({p.get('year', '')})" | |
| ) | |
| if p.get("advisor_notes"): | |
| parts.append(f" 顾问备注:{p['advisor_notes']}") | |
| # 3. Stanford 打分(Phase 4 新增) | |
| stanford_keywords = ["stanford", "斯坦福", "stanford打分", "stanford评分", "stanford录取"] | |
| if school and "stanford" in school.lower(): | |
| parts.append(self.get_stanford_context(query)) | |
| elif any(kw in q_lower for kw in stanford_keywords): | |
| parts.append(self.get_stanford_context(query)) | |
| # 4. Harvard 打分 + Casebook(Phase 4 新增) | |
| harvard_keywords = ["harvard", "哈佛", "harvard打分", "pause factor", "casebook", "harvard录取"] | |
| if school and "harvard" in school.lower(): | |
| parts.append(self.get_harvard_context(query)) | |
| elif any(kw in q_lower for kw in harvard_keywords): | |
| parts.append(self.get_harvard_context(query)) | |
| # 5. 通用顶校录取逻辑查询(同时提供两校参考) | |
| top_school_keywords = ["录取标准", "招生官", "评分维度", "holistic", "整体评估", "顶校录取", "t10录取"] | |
| if any(kw in q_lower for kw in top_school_keywords): | |
| if not any(kw in q_lower for kw in stanford_keywords + harvard_keywords): | |
| # 同时提供 Stanford 和 Harvard 的参考 | |
| stanford_ctx = self.get_stanford_context(query) | |
| harvard_ctx = self.get_harvard_context(query) | |
| if stanford_ctx: | |
| parts.append(stanford_ctx) | |
| if harvard_ctx: | |
| parts.append(harvard_ctx) | |
| # 6. 奖学金查询(Phase 5 新增) | |
| scholarship_keywords = ["奖学金", "scholarship", "助学金", "financial aid", "merit", "need-based", "学费", "费用", "cost"] | |
| if any(kw in q_lower for kw in scholarship_keywords): | |
| scholarship_ctx = self.get_scholarship_context(school_name=school or "", query=query) | |
| if scholarship_ctx: | |
| parts.append(scholarship_ctx) | |
| # 6a. 给力指数知识库(Phase 6 新增) | |
| geili_keywords = [ | |
| "给力指数", "竞赛", "夏校", "活动推荐", "竞赛推荐", "展示活动", | |
| "科研项目", "暂期项目", "RSI", "SSP", "PROMYS", "SIMR", "TASP", | |
| "Regeneron", "USAMO", "ISEF", "Coca-Cola", "Intel", | |
| "学术活动", "课外活动推荐", "应该参加什么", "活动清单" | |
| ] | |
| if any(kw in q_lower or kw.lower() in q_lower for kw in geili_keywords): | |
| geili_ctx = self.get_geili_context(query) | |
| if geili_ctx: | |
| parts.append(geili_ctx) | |
| # 6b. Admission Blog 红旗知识库(Phase 6 新增) | |
| red_flag_keywords = [ | |
| "红旗", "red flag", "申请错误", "常见错误", "不该做", "避免", "文书问题", | |
| "活动问题", "推荐信问题", "面试问题", "申请注意", "招生官不喜欢", | |
| "什么不能做", "申请禁忌", "文书禁忌", "大陆学生问题" | |
| ] | |
| if any(kw in q_lower for kw in red_flag_keywords): | |
| red_flag_ctx = self.get_red_flags_context(query) | |
| if red_flag_ctx: | |
| parts.append(red_flag_ctx) | |
| # 7. 顾问知识强路由(Phase 5 新增,Phase 6 升级:扩充关键词,确保最独特资产被激活) | |
| consultant_keywords = [ | |
| # 学生类型 | |
| "普娃", "背景一般", "普通学生", "中等背景", "非顶尖", | |
| # 申请策略 | |
| "策略", "逆袭", "包装", "反包装", "如何申请", "怎么申请", | |
| # 三士渡品牌 | |
| "三士渡", "徐谭妥", "刘又鸣", "濦阳", | |
| # 选校方法论 | |
| "选校方法", "生态位", "演化", "生态系统", | |
| # 录取结果 | |
| "录取结果", "2026届", "2025届", "历届学生", | |
| # 申请哲学 | |
| "申请哲学", "申请理念", "超越包装", | |
| ] | |
| if any(kw in q_lower for kw in consultant_keywords): | |
| consultant_ctx = self.get_consultant_context(query) | |
| if consultant_ctx: | |
| parts.append(consultant_ctx) | |
| # 8. F1 语义检索兜底(当关键词匹配未命中时,用语义检索补充) | |
| # 只在 parts 为空或很少时触发,避免冗余 | |
| if len(parts) < 2: | |
| try: | |
| from agent.semantic_retriever import multi_kb_search | |
| # 跨知识库语义检索 | |
| kb_configs = [] | |
| if self.stanford_rubric: | |
| kb_configs.append(('stanford_scoring_rubric', self.stanford_rubric)) | |
| if self.harvard_casebook: | |
| kb_configs.append(('harvard_casebook_rubric', self.harvard_casebook)) | |
| if self.consultant_kb: | |
| kb_configs.append(('consultant_knowledge_kb', self.consultant_kb)) | |
| if kb_configs: | |
| semantic_results = multi_kb_search( | |
| query=query, | |
| kb_configs=kb_configs, | |
| top_k_per_kb=2, | |
| total_top_k=4, | |
| ) | |
| if semantic_results: | |
| semantic_parts = [] | |
| for r in semantic_results: | |
| if r['score'] > 0.1: # 只取相似度 >10% 的结果 | |
| source_label = { | |
| 'stanford_scoring_rubric': '[Stanford FERPA]', | |
| 'harvard_casebook_rubric': '[Harvard Casebook]', | |
| 'consultant_knowledge_kb': '[三士渡顾问经验]', | |
| }.get(r['source'], f"[{r['source']}]") | |
| semantic_parts.append( | |
| f"{source_label} {r['section']}: {r['text'][:300]}" | |
| ) | |
| if semantic_parts: | |
| parts.append("\n【语义检索补充知识】\n" + "\n".join(semantic_parts[:3])) | |
| except Exception as e: | |
| pass # 语义检索失败不影响主流程 | |
| return "\n".join(filter(None, parts)) | |
| def get_geili_context(self, query: str = "", min_score: int = 5) -> str: | |
| """获取给力指数知识库上下文(海狸学院第十二版)""" | |
| if not self._geili_index: | |
| return "" | |
| q_lower = query.lower() | |
| all_items = self._geili_index.get("all_items", []) | |
| by_score = self._geili_index.get("items_by_score", {}) | |
| # 确定要展示的评分范围 | |
| target_scores = [] | |
| if any(kw in q_lower for kw in ["顶尖", "最高", "9分", "10分", "top tier"]): | |
| target_scores = ["10", "9"] | |
| elif any(kw in q_lower for kw in ["科研", "research", "rsi", "ssp", "simr"]): | |
| # 科研类项目 | |
| target_scores = ["9", "8", "7", "6"] | |
| elif any(kw in q_lower for kw in ["写作", "writing", "诗歌", "文学"]): | |
| target_scores = ["8", "7", "6", "5"] | |
| elif any(kw in q_lower for kw in ["数学", "math", "olympiad", "usamo"]): | |
| target_scores = ["10", "9", "8", "7"] | |
| elif any(kw in q_lower for kw in ["商业", "金融", "business", "finance"]): | |
| target_scores = ["8", "7", "6", "5"] | |
| else: | |
| # 默认显示高分项目 | |
| target_scores = ["9", "8", "7"] | |
| parts = [f"【给力指数推荐项目(来源:海狸学院第十二版,共{self._geili_index.get('total_items', 0)}个项目)】"] | |
| shown = 0 | |
| for score_key in target_scores: | |
| items = by_score.get(score_key, []) | |
| if not items: | |
| continue | |
| parts.append(f"\n▌ 给力指数 {score_key} 分:") | |
| for item in items[:5]: # 每个分数最多显示 5 个 | |
| elig = f"({item['eligibility']})" if item.get('eligibility') else "" | |
| en = f" / {item['en_name'][:40]}" if item.get('en_name') else "" | |
| admit = f",录取率:{item['admit_rate']}" if item.get('admit_rate') else "" | |
| tags = f" [{', '.join(item['tags'][:2])}]" if item.get('tags') else "" | |
| parts.append(f" • {item['cn_name']}{elig}{en}{admit}{tags}") | |
| shown += 1 | |
| if shown >= 15: | |
| break | |
| scoring_guide = self._geili_index.get("scoring_guide", {}) | |
| parts.append(f"\n评分说明:9-10分=全球顶尖;7-8分=全球高度认可;5-6分=有一定含金量;3-4分=基础项目") | |
| return "\n".join(parts) | |
| def get_red_flags_context(self, query: str = "") -> str: | |
| """获取申请红旗知识库上下文(来自 T20 大学招生官博客)""" | |
| if not self._admission_red_flags: | |
| return "" | |
| q_lower = query.lower() | |
| red_flags = self._admission_red_flags.get("red_flags", {}) | |
| parts = ["【申请红旗清单(来源:T20 大学招生官博客,含 MIT/Harvard/Yale 等)】"] | |
| # 根据查询关键词选择相关类别 | |
| categories_to_show = [] | |
| if any(kw in q_lower for kw in ["文书", "essay", "写作", "personal statement"]): | |
| categories_to_show.append(("essay_writing", "文书/写作红旗")) | |
| if any(kw in q_lower for kw in ["活动", "extracurricular", "课外", "ec"]): | |
| categories_to_show.append(("activities", "活动红旗")) | |
| if any(kw in q_lower for kw in ["推荐信", "recommendation", "rec letter"]): | |
| categories_to_show.append(("recommendations", "推荐信红旗")) | |
| if any(kw in q_lower for kw in ["行为", "截止日期", "deadline", "面试", "interview"]): | |
| categories_to_show.append(("behavior", "行为红旗")) | |
| if any(kw in q_lower for kw in ["大陆", "中国", "china", "chinese", "国际生"]): | |
| categories_to_show.append(("china_specific", "大陆学生特有红旗")) | |
| # 如果没有匹配到特定类别,显示所有高严重度红旗 | |
| if not categories_to_show: | |
| categories_to_show = [ | |
| ("essay_writing", "文书/写作红旗"), | |
| ("activities", "活动红旗"), | |
| ("china_specific", "大陆学生特有红旗"), | |
| ] | |
| for cat_key, cat_label in categories_to_show: | |
| flags = red_flags.get(cat_key, []) | |
| if flags: | |
| parts.append(f"\n▌ {cat_label}:") | |
| # 优先显示 critical 和 high 严重度 | |
| high_flags = [f for f in flags if f.get("severity") in ("critical", "high")] | |
| for flag in high_flags[:4]: | |
| severity_label = "🚨" if flag.get("severity") == "critical" else "⚠️" | |
| note = f"({flag['note']})" if flag.get("note") else "" | |
| parts.append(f" {severity_label} {flag['flag']}:{flag['description']}{note}") | |
| return "\n".join(parts) | |
| def is_empty(self) -> bool: | |
| """知识库是否为空""" | |
| return ( | |
| not self._school_taste | |
| and not self._special_patterns | |
| and not self._stanford_rubric | |
| and not self._harvard_casebook | |
| and not self._harvard_reading | |
| and not self._scholarship_kb | |
| and not self._consultant_kb | |
| ) | |
| # 全局单例 | |
| _KNOWLEDGE_BASE: Optional[AdvisorKnowledge] = None | |
| def get_knowledge_base() -> AdvisorKnowledge: | |
| """获取全局知识库单例""" | |
| global _KNOWLEDGE_BASE | |
| if _KNOWLEDGE_BASE is None: | |
| _KNOWLEDGE_BASE = AdvisorKnowledge() | |
| return _KNOWLEDGE_BASE | |