""" Harvard Pause Factor 预警系统 基于 11 个真实 Harvard Case 提炼的 10 条结构化规则 在 gap_analysis 和 school_calibrate 时自动触发, 主动预警学生申请材料中可能触发 Pause Factor 的信号。 """ import json import os from typing import Dict, List, Optional, Tuple _pause_factors: Optional[Dict] = None def _load_pause_factors() -> Dict: global _pause_factors if _pause_factors is None: try: path = os.path.join(os.path.dirname(__file__), '..', 'data', 'harvard_pause_factors.json') with open(path) as f: _pause_factors = json.load(f) except Exception: _pause_factors = {"pause_factor_rules": []} return _pause_factors def check_student_profile(student: Dict, target_schools: Optional[List[str]] = None) -> Dict: """ 对学生 profile 做 Pause Factor 预警检查 Args: student: 学生 profile dict(来自 MAZE/用户输入) target_schools: 目标学校列表(用于判断是否需要 Harvard 特定规则) Returns: { triggered_factors: List[{id, category, description, severity, source}], risk_level: 'low'|'medium'|'high'|'critical', warning_summary: str, recommendations: List[str], } """ pf_data = _load_pause_factors() rules = pf_data.get('pause_factor_rules', []) triggered = [] sat = student.get('sat', 0) or 0 gpa = student.get('gpa', 0) or 0 n_activities = student.get('n_activities', 0) or 0 activity_power = student.get('llm_act_mean', 0) or 0 ps_overall = student.get('ps2_overall', 0) or 0 academic_depth = student.get('academic_depth', 0) or 0 major_cat = student.get('major_cat', '') or '' hs_cat = student.get('hs_cat', '') or '' # 是否针对 Harvard/T5 is_harvard_target = False if target_schools: for s in target_schools: if 'harvard' in s.lower() or 'yale' in s.lower() or 'princeton' in s.lower() or 'columbia' in s.lower() or 'penn' in s.lower(): is_harvard_target = True break # PF-001: 绝对标化偏低(Harvard 语境,最常见的 Pause Factor) if sat > 0 and sat < 1450 and is_harvard_target: triggered.append({ 'id': 'PF-001', 'category': '学术标化', 'description': f'SAT {sat} 在 Harvard/T5 申请池中偏低(中位数 1580+),是最常见的 Pause Factor 之一', 'severity': 'high', 'source': '[Harvard Casebook Case #1/#3/#6]', 'recommendation': '重新备考 SAT 到 1500+ ,或通过 AP/IB 高分、研究经历、竞赛奖项来补充学术能力信号', }) # PF-010: GPA 偏低申请 T10 if gpa > 0 and gpa < 3.7 and is_harvard_target: triggered.append({ 'id': 'PF-010', 'category': '学术记录', 'description': f'GPA {gpa} 在 T10 申请池中偏低(中位数 3.9+),可能触发学术能力质疑', 'severity': 'high', 'source': '[Harvard Casebook Case #2/#4]', 'recommendation': '确保课程难度足够高(AP/IB),并在文书中解释 GPA 下降的客观原因', }) # PF-002: 课外活动定位不清晰 if n_activities >= 8 and activity_power < 0.5: triggered.append({ 'id': 'PF-002', 'category': '课外活动定位', 'description': f'活动数量多({n_activities}个)但平均质量偏低(power={activity_power:.2f}),可能触发"宽而浅"的 Pause Factor', 'severity': 'medium', 'source': '[Harvard Casebook Case #1/#2]', 'recommendation': '选择 1-2 个核心活动深化,建立清晰的校园贡献定位', }) # PF-003: 标化与 GPA 不匹配(偏低标化) if sat > 0 and gpa > 3.8 and sat < 1450: triggered.append({ 'id': 'PF-003', 'category': '学术能力匹配', 'description': f'GPA {gpa} 较高但 SAT {sat} 偏低,招生官可能质疑学术能力是否能支撑顶校课程', 'severity': 'medium', 'source': '[Harvard Casebook Case #2]', 'recommendation': '重新备考 SAT,或通过 AP/IB 高分来证明学术能力', }) # PF-004: 缺乏独特性(全能型但无 hook) if (sat >= 1500 and gpa >= 3.9 and n_activities >= 8 and activity_power < 0.65 and academic_depth < 0.6): triggered.append({ 'id': 'PF-004', 'category': '竞争力定位', 'description': '学生整体很强(SAT/GPA 达标)但缺乏独特的 hook,在 Harvard 申请池中可能不够突出', 'severity': 'high', 'source': '[Harvard Casebook Case #3]', 'recommendation': '找到一个真正独特的切入点(研究/竞赛/创业),建立不可替代的申请形象', }) # PF-007: 国际生语言能力(SAT 偏低) if sat > 0 and sat < 1420 and hs_cat in ['国际高中', '公立高中', '私立高中']: triggered.append({ 'id': 'PF-007', 'category': '语言能力', 'description': f'SAT {sat} 偏低,可能引发国际生英语能力的 Pause Factor', 'severity': 'medium', 'source': '[Harvard Casebook Case #6]', 'recommendation': '提升 SAT 成绩,或通过高质量的英语文书来展示语言能力', }) # PF-008: GPA 与标化不匹配(高 GPA 低标化) if sat > 0 and gpa >= 4.0 and sat < 1480: if 'PF-003' not in [t['id'] for t in triggered]: # 避免重复 triggered.append({ 'id': 'PF-008', 'category': '成绩一致性', 'description': f'GPA {gpa}(满分)但 SAT {sat} 未达到预期,招生官可能质疑学校难度或存在"大鱼小池塘"效应', 'severity': 'medium', 'source': '[Harvard Casebook Case #7]', 'recommendation': '确保课程难度(AP/IB)足够高,让 GPA 有说服力', }) # PF-009: STEM 申请但数学标化不足 stem_majors = ['Computer Science', 'Engineering', 'Physics', 'Mathematics', 'Statistics', 'Data Science'] if major_cat in stem_majors and sat > 0 and sat < 1520: triggered.append({ 'id': 'PF-009', 'category': '学术能力匹配', 'description': f'申请 {major_cat} 但 SAT {sat} 偏低(建议 ≥1520),可能引发学术能力匹配的 Pause Factor', 'severity': 'medium', 'source': '[Harvard Casebook Case #8]', 'recommendation': '补充数学/CS 相关竞赛(AMC/USAMO/USACO)或研究经历来证明能力', }) # PF-006: 文书心理健康信号(通过 ps_overall 间接推断) if ps_overall < 0.4 and ps_overall > 0: triggered.append({ 'id': 'PF-006', 'category': '个人品质', 'description': f'文书质量评分偏低({ps_overall:.2f}),可能存在语气消极或个人形象不清晰的问题', 'severity': 'high', 'source': '[Harvard Casebook Case #5]', 'recommendation': '重新审视文书语气,确保有成长弧线,避免停留在负面情绪中', }) # 计算整体风险等级 if not triggered: risk_level = 'low' else: severities = [t['severity'] for t in triggered] if 'critical' in severities: risk_level = 'critical' elif severities.count('high') >= 2: risk_level = 'high' elif 'high' in severities: risk_level = 'medium' else: risk_level = 'low' # 生成警告摘要 if not triggered: warning_summary = "✅ 未发现明显 Pause Factor 风险" else: warning_summary = f"⚠️ 发现 {len(triggered)} 个潜在 Pause Factor(风险等级:{risk_level})" recommendations = list(set([t['recommendation'] for t in triggered])) return { 'triggered_factors': triggered, 'risk_level': risk_level, 'warning_summary': warning_summary, 'recommendations': recommendations, 'source_note': '[Harvard Casebook 2012, 11个真实Case]', } def format_pause_factor_warning(check_result: Dict, school_name: str = "Harvard") -> str: """ 将 Pause Factor 检查结果格式化为 gap_analysis 报告中的预警段落 """ triggered = check_result.get('triggered_factors', []) risk_level = check_result.get('risk_level', 'low') if not triggered: return f"\n**{school_name} Pause Factor 检查**:✅ 未发现明显风险 [Harvard Casebook]" lines = [f"\n**{school_name} Pause Factor 预警** [Harvard Casebook 2012] — 风险等级:{risk_level.upper()}"] for t in triggered: severity_icon = {'critical': '🔴', 'high': '🟠', 'medium': '🟡', 'low': '🟢'}.get(t['severity'], '⚪') lines.append(f"\n{severity_icon} **{t['category']}**({t['id']})") lines.append(f" {t['description']}") lines.append(f" → 建议:{t['recommendation']}") lines.append(f" 来源:{t['source']}") return '\n'.join(lines)