planning-agent-pro / agent /pause_factor_checker.py
catninja123's picture
Deploy Planning Agent Pro v1.0
d09f3b7 verified
"""
Harvard Pause Factor 预警系统
基于 11 个真实 Harvard Case 提炼的 10 条结构化规则
在 gap_analysis 和 school_calibrate 时自动触发,
主动预警学生申请材料中可能触发 Pause Factor 的信号。
"""
import json
import os
from typing import Dict, List, Optional, Tuple
_pause_factors: Optional[Dict] = None
def _load_pause_factors() -> Dict:
global _pause_factors
if _pause_factors is None:
try:
path = os.path.join(os.path.dirname(__file__), '..', 'data', 'harvard_pause_factors.json')
with open(path) as f:
_pause_factors = json.load(f)
except Exception:
_pause_factors = {"pause_factor_rules": []}
return _pause_factors
def check_student_profile(student: Dict, target_schools: Optional[List[str]] = None) -> Dict:
"""
对学生 profile 做 Pause Factor 预警检查
Args:
student: 学生 profile dict(来自 MAZE/用户输入)
target_schools: 目标学校列表(用于判断是否需要 Harvard 特定规则)
Returns:
{
triggered_factors: List[{id, category, description, severity, source}],
risk_level: 'low'|'medium'|'high'|'critical',
warning_summary: str,
recommendations: List[str],
}
"""
pf_data = _load_pause_factors()
rules = pf_data.get('pause_factor_rules', [])
triggered = []
sat = student.get('sat', 0) or 0
gpa = student.get('gpa', 0) or 0
n_activities = student.get('n_activities', 0) or 0
activity_power = student.get('llm_act_mean', 0) or 0
ps_overall = student.get('ps2_overall', 0) or 0
academic_depth = student.get('academic_depth', 0) or 0
major_cat = student.get('major_cat', '') or ''
hs_cat = student.get('hs_cat', '') or ''
# 是否针对 Harvard/T5
is_harvard_target = False
if target_schools:
for s in target_schools:
if 'harvard' in s.lower() or 'yale' in s.lower() or 'princeton' in s.lower() or 'columbia' in s.lower() or 'penn' in s.lower():
is_harvard_target = True
break
# PF-001: 绝对标化偏低(Harvard 语境,最常见的 Pause Factor)
if sat > 0 and sat < 1450 and is_harvard_target:
triggered.append({
'id': 'PF-001',
'category': '学术标化',
'description': f'SAT {sat} 在 Harvard/T5 申请池中偏低(中位数 1580+),是最常见的 Pause Factor 之一',
'severity': 'high',
'source': '[Harvard Casebook Case #1/#3/#6]',
'recommendation': '重新备考 SAT 到 1500+ ,或通过 AP/IB 高分、研究经历、竞赛奖项来补充学术能力信号',
})
# PF-010: GPA 偏低申请 T10
if gpa > 0 and gpa < 3.7 and is_harvard_target:
triggered.append({
'id': 'PF-010',
'category': '学术记录',
'description': f'GPA {gpa} 在 T10 申请池中偏低(中位数 3.9+),可能触发学术能力质疑',
'severity': 'high',
'source': '[Harvard Casebook Case #2/#4]',
'recommendation': '确保课程难度足够高(AP/IB),并在文书中解释 GPA 下降的客观原因',
})
# PF-002: 课外活动定位不清晰
if n_activities >= 8 and activity_power < 0.5:
triggered.append({
'id': 'PF-002',
'category': '课外活动定位',
'description': f'活动数量多({n_activities}个)但平均质量偏低(power={activity_power:.2f}),可能触发"宽而浅"的 Pause Factor',
'severity': 'medium',
'source': '[Harvard Casebook Case #1/#2]',
'recommendation': '选择 1-2 个核心活动深化,建立清晰的校园贡献定位',
})
# PF-003: 标化与 GPA 不匹配(偏低标化)
if sat > 0 and gpa > 3.8 and sat < 1450:
triggered.append({
'id': 'PF-003',
'category': '学术能力匹配',
'description': f'GPA {gpa} 较高但 SAT {sat} 偏低,招生官可能质疑学术能力是否能支撑顶校课程',
'severity': 'medium',
'source': '[Harvard Casebook Case #2]',
'recommendation': '重新备考 SAT,或通过 AP/IB 高分来证明学术能力',
})
# PF-004: 缺乏独特性(全能型但无 hook)
if (sat >= 1500 and gpa >= 3.9 and n_activities >= 8
and activity_power < 0.65 and academic_depth < 0.6):
triggered.append({
'id': 'PF-004',
'category': '竞争力定位',
'description': '学生整体很强(SAT/GPA 达标)但缺乏独特的 hook,在 Harvard 申请池中可能不够突出',
'severity': 'high',
'source': '[Harvard Casebook Case #3]',
'recommendation': '找到一个真正独特的切入点(研究/竞赛/创业),建立不可替代的申请形象',
})
# PF-007: 国际生语言能力(SAT 偏低)
if sat > 0 and sat < 1420 and hs_cat in ['国际高中', '公立高中', '私立高中']:
triggered.append({
'id': 'PF-007',
'category': '语言能力',
'description': f'SAT {sat} 偏低,可能引发国际生英语能力的 Pause Factor',
'severity': 'medium',
'source': '[Harvard Casebook Case #6]',
'recommendation': '提升 SAT 成绩,或通过高质量的英语文书来展示语言能力',
})
# PF-008: GPA 与标化不匹配(高 GPA 低标化)
if sat > 0 and gpa >= 4.0 and sat < 1480:
if 'PF-003' not in [t['id'] for t in triggered]: # 避免重复
triggered.append({
'id': 'PF-008',
'category': '成绩一致性',
'description': f'GPA {gpa}(满分)但 SAT {sat} 未达到预期,招生官可能质疑学校难度或存在"大鱼小池塘"效应',
'severity': 'medium',
'source': '[Harvard Casebook Case #7]',
'recommendation': '确保课程难度(AP/IB)足够高,让 GPA 有说服力',
})
# PF-009: STEM 申请但数学标化不足
stem_majors = ['Computer Science', 'Engineering', 'Physics', 'Mathematics', 'Statistics', 'Data Science']
if major_cat in stem_majors and sat > 0 and sat < 1520:
triggered.append({
'id': 'PF-009',
'category': '学术能力匹配',
'description': f'申请 {major_cat} 但 SAT {sat} 偏低(建议 ≥1520),可能引发学术能力匹配的 Pause Factor',
'severity': 'medium',
'source': '[Harvard Casebook Case #8]',
'recommendation': '补充数学/CS 相关竞赛(AMC/USAMO/USACO)或研究经历来证明能力',
})
# PF-006: 文书心理健康信号(通过 ps_overall 间接推断)
if ps_overall < 0.4 and ps_overall > 0:
triggered.append({
'id': 'PF-006',
'category': '个人品质',
'description': f'文书质量评分偏低({ps_overall:.2f}),可能存在语气消极或个人形象不清晰的问题',
'severity': 'high',
'source': '[Harvard Casebook Case #5]',
'recommendation': '重新审视文书语气,确保有成长弧线,避免停留在负面情绪中',
})
# 计算整体风险等级
if not triggered:
risk_level = 'low'
else:
severities = [t['severity'] for t in triggered]
if 'critical' in severities:
risk_level = 'critical'
elif severities.count('high') >= 2:
risk_level = 'high'
elif 'high' in severities:
risk_level = 'medium'
else:
risk_level = 'low'
# 生成警告摘要
if not triggered:
warning_summary = "✅ 未发现明显 Pause Factor 风险"
else:
warning_summary = f"⚠️ 发现 {len(triggered)} 个潜在 Pause Factor(风险等级:{risk_level})"
recommendations = list(set([t['recommendation'] for t in triggered]))
return {
'triggered_factors': triggered,
'risk_level': risk_level,
'warning_summary': warning_summary,
'recommendations': recommendations,
'source_note': '[Harvard Casebook 2012, 11个真实Case]',
}
def format_pause_factor_warning(check_result: Dict, school_name: str = "Harvard") -> str:
"""
将 Pause Factor 检查结果格式化为 gap_analysis 报告中的预警段落
"""
triggered = check_result.get('triggered_factors', [])
risk_level = check_result.get('risk_level', 'low')
if not triggered:
return f"\n**{school_name} Pause Factor 检查**:✅ 未发现明显风险 [Harvard Casebook]"
lines = [f"\n**{school_name} Pause Factor 预警** [Harvard Casebook 2012] — 风险等级:{risk_level.upper()}"]
for t in triggered:
severity_icon = {'critical': '🔴', 'high': '🟠', 'medium': '🟡', 'low': '🟢'}.get(t['severity'], '⚪')
lines.append(f"\n{severity_icon} **{t['category']}**({t['id']})")
lines.append(f" {t['description']}")
lines.append(f" → 建议:{t['recommendation']}")
lines.append(f" 来源:{t['source']}")
return '\n'.join(lines)