Spaces:
Sleeping
Sleeping
| """ | |
| Harvard Pause Factor 预警系统 | |
| 基于 11 个真实 Harvard Case 提炼的 10 条结构化规则 | |
| 在 gap_analysis 和 school_calibrate 时自动触发, | |
| 主动预警学生申请材料中可能触发 Pause Factor 的信号。 | |
| """ | |
| import json | |
| import os | |
| from typing import Dict, List, Optional, Tuple | |
| _pause_factors: Optional[Dict] = None | |
| def _load_pause_factors() -> Dict: | |
| global _pause_factors | |
| if _pause_factors is None: | |
| try: | |
| path = os.path.join(os.path.dirname(__file__), '..', 'data', 'harvard_pause_factors.json') | |
| with open(path) as f: | |
| _pause_factors = json.load(f) | |
| except Exception: | |
| _pause_factors = {"pause_factor_rules": []} | |
| return _pause_factors | |
| def check_student_profile(student: Dict, target_schools: Optional[List[str]] = None) -> Dict: | |
| """ | |
| 对学生 profile 做 Pause Factor 预警检查 | |
| Args: | |
| student: 学生 profile dict(来自 MAZE/用户输入) | |
| target_schools: 目标学校列表(用于判断是否需要 Harvard 特定规则) | |
| Returns: | |
| { | |
| triggered_factors: List[{id, category, description, severity, source}], | |
| risk_level: 'low'|'medium'|'high'|'critical', | |
| warning_summary: str, | |
| recommendations: List[str], | |
| } | |
| """ | |
| pf_data = _load_pause_factors() | |
| rules = pf_data.get('pause_factor_rules', []) | |
| triggered = [] | |
| sat = student.get('sat', 0) or 0 | |
| gpa = student.get('gpa', 0) or 0 | |
| n_activities = student.get('n_activities', 0) or 0 | |
| activity_power = student.get('llm_act_mean', 0) or 0 | |
| ps_overall = student.get('ps2_overall', 0) or 0 | |
| academic_depth = student.get('academic_depth', 0) or 0 | |
| major_cat = student.get('major_cat', '') or '' | |
| hs_cat = student.get('hs_cat', '') or '' | |
| # 是否针对 Harvard/T5 | |
| is_harvard_target = False | |
| if target_schools: | |
| for s in target_schools: | |
| if 'harvard' in s.lower() or 'yale' in s.lower() or 'princeton' in s.lower() or 'columbia' in s.lower() or 'penn' in s.lower(): | |
| is_harvard_target = True | |
| break | |
| # PF-001: 绝对标化偏低(Harvard 语境,最常见的 Pause Factor) | |
| if sat > 0 and sat < 1450 and is_harvard_target: | |
| triggered.append({ | |
| 'id': 'PF-001', | |
| 'category': '学术标化', | |
| 'description': f'SAT {sat} 在 Harvard/T5 申请池中偏低(中位数 1580+),是最常见的 Pause Factor 之一', | |
| 'severity': 'high', | |
| 'source': '[Harvard Casebook Case #1/#3/#6]', | |
| 'recommendation': '重新备考 SAT 到 1500+ ,或通过 AP/IB 高分、研究经历、竞赛奖项来补充学术能力信号', | |
| }) | |
| # PF-010: GPA 偏低申请 T10 | |
| if gpa > 0 and gpa < 3.7 and is_harvard_target: | |
| triggered.append({ | |
| 'id': 'PF-010', | |
| 'category': '学术记录', | |
| 'description': f'GPA {gpa} 在 T10 申请池中偏低(中位数 3.9+),可能触发学术能力质疑', | |
| 'severity': 'high', | |
| 'source': '[Harvard Casebook Case #2/#4]', | |
| 'recommendation': '确保课程难度足够高(AP/IB),并在文书中解释 GPA 下降的客观原因', | |
| }) | |
| # PF-002: 课外活动定位不清晰 | |
| if n_activities >= 8 and activity_power < 0.5: | |
| triggered.append({ | |
| 'id': 'PF-002', | |
| 'category': '课外活动定位', | |
| 'description': f'活动数量多({n_activities}个)但平均质量偏低(power={activity_power:.2f}),可能触发"宽而浅"的 Pause Factor', | |
| 'severity': 'medium', | |
| 'source': '[Harvard Casebook Case #1/#2]', | |
| 'recommendation': '选择 1-2 个核心活动深化,建立清晰的校园贡献定位', | |
| }) | |
| # PF-003: 标化与 GPA 不匹配(偏低标化) | |
| if sat > 0 and gpa > 3.8 and sat < 1450: | |
| triggered.append({ | |
| 'id': 'PF-003', | |
| 'category': '学术能力匹配', | |
| 'description': f'GPA {gpa} 较高但 SAT {sat} 偏低,招生官可能质疑学术能力是否能支撑顶校课程', | |
| 'severity': 'medium', | |
| 'source': '[Harvard Casebook Case #2]', | |
| 'recommendation': '重新备考 SAT,或通过 AP/IB 高分来证明学术能力', | |
| }) | |
| # PF-004: 缺乏独特性(全能型但无 hook) | |
| if (sat >= 1500 and gpa >= 3.9 and n_activities >= 8 | |
| and activity_power < 0.65 and academic_depth < 0.6): | |
| triggered.append({ | |
| 'id': 'PF-004', | |
| 'category': '竞争力定位', | |
| 'description': '学生整体很强(SAT/GPA 达标)但缺乏独特的 hook,在 Harvard 申请池中可能不够突出', | |
| 'severity': 'high', | |
| 'source': '[Harvard Casebook Case #3]', | |
| 'recommendation': '找到一个真正独特的切入点(研究/竞赛/创业),建立不可替代的申请形象', | |
| }) | |
| # PF-007: 国际生语言能力(SAT 偏低) | |
| if sat > 0 and sat < 1420 and hs_cat in ['国际高中', '公立高中', '私立高中']: | |
| triggered.append({ | |
| 'id': 'PF-007', | |
| 'category': '语言能力', | |
| 'description': f'SAT {sat} 偏低,可能引发国际生英语能力的 Pause Factor', | |
| 'severity': 'medium', | |
| 'source': '[Harvard Casebook Case #6]', | |
| 'recommendation': '提升 SAT 成绩,或通过高质量的英语文书来展示语言能力', | |
| }) | |
| # PF-008: GPA 与标化不匹配(高 GPA 低标化) | |
| if sat > 0 and gpa >= 4.0 and sat < 1480: | |
| if 'PF-003' not in [t['id'] for t in triggered]: # 避免重复 | |
| triggered.append({ | |
| 'id': 'PF-008', | |
| 'category': '成绩一致性', | |
| 'description': f'GPA {gpa}(满分)但 SAT {sat} 未达到预期,招生官可能质疑学校难度或存在"大鱼小池塘"效应', | |
| 'severity': 'medium', | |
| 'source': '[Harvard Casebook Case #7]', | |
| 'recommendation': '确保课程难度(AP/IB)足够高,让 GPA 有说服力', | |
| }) | |
| # PF-009: STEM 申请但数学标化不足 | |
| stem_majors = ['Computer Science', 'Engineering', 'Physics', 'Mathematics', 'Statistics', 'Data Science'] | |
| if major_cat in stem_majors and sat > 0 and sat < 1520: | |
| triggered.append({ | |
| 'id': 'PF-009', | |
| 'category': '学术能力匹配', | |
| 'description': f'申请 {major_cat} 但 SAT {sat} 偏低(建议 ≥1520),可能引发学术能力匹配的 Pause Factor', | |
| 'severity': 'medium', | |
| 'source': '[Harvard Casebook Case #8]', | |
| 'recommendation': '补充数学/CS 相关竞赛(AMC/USAMO/USACO)或研究经历来证明能力', | |
| }) | |
| # PF-006: 文书心理健康信号(通过 ps_overall 间接推断) | |
| if ps_overall < 0.4 and ps_overall > 0: | |
| triggered.append({ | |
| 'id': 'PF-006', | |
| 'category': '个人品质', | |
| 'description': f'文书质量评分偏低({ps_overall:.2f}),可能存在语气消极或个人形象不清晰的问题', | |
| 'severity': 'high', | |
| 'source': '[Harvard Casebook Case #5]', | |
| 'recommendation': '重新审视文书语气,确保有成长弧线,避免停留在负面情绪中', | |
| }) | |
| # 计算整体风险等级 | |
| if not triggered: | |
| risk_level = 'low' | |
| else: | |
| severities = [t['severity'] for t in triggered] | |
| if 'critical' in severities: | |
| risk_level = 'critical' | |
| elif severities.count('high') >= 2: | |
| risk_level = 'high' | |
| elif 'high' in severities: | |
| risk_level = 'medium' | |
| else: | |
| risk_level = 'low' | |
| # 生成警告摘要 | |
| if not triggered: | |
| warning_summary = "✅ 未发现明显 Pause Factor 风险" | |
| else: | |
| warning_summary = f"⚠️ 发现 {len(triggered)} 个潜在 Pause Factor(风险等级:{risk_level})" | |
| recommendations = list(set([t['recommendation'] for t in triggered])) | |
| return { | |
| 'triggered_factors': triggered, | |
| 'risk_level': risk_level, | |
| 'warning_summary': warning_summary, | |
| 'recommendations': recommendations, | |
| 'source_note': '[Harvard Casebook 2012, 11个真实Case]', | |
| } | |
| def format_pause_factor_warning(check_result: Dict, school_name: str = "Harvard") -> str: | |
| """ | |
| 将 Pause Factor 检查结果格式化为 gap_analysis 报告中的预警段落 | |
| """ | |
| triggered = check_result.get('triggered_factors', []) | |
| risk_level = check_result.get('risk_level', 'low') | |
| if not triggered: | |
| return f"\n**{school_name} Pause Factor 检查**:✅ 未发现明显风险 [Harvard Casebook]" | |
| lines = [f"\n**{school_name} Pause Factor 预警** [Harvard Casebook 2012] — 风险等级:{risk_level.upper()}"] | |
| for t in triggered: | |
| severity_icon = {'critical': '🔴', 'high': '🟠', 'medium': '🟡', 'low': '🟢'}.get(t['severity'], '⚪') | |
| lines.append(f"\n{severity_icon} **{t['category']}**({t['id']})") | |
| lines.append(f" {t['description']}") | |
| lines.append(f" → 建议:{t['recommendation']}") | |
| lines.append(f" 来源:{t['source']}") | |
| return '\n'.join(lines) | |