""" parser.py — ATG EOD report parser Claude does two things: 1. Verifies this is a genuine EOD report (not a random bot mention) 2. Extracts all structured fields Falls back to regex if Claude is unavailable. """ import os import re import json from dataclasses import dataclass, field RISK_FLAG_KEYWORDS = [ "stuck", "blocked", "can't proceed", "need help", "behind", "delayed", "not sure", "struggling", "at risk", "won't finish", "unable to", ] @dataclass class ParsedReport: raw_text: str is_eod_report: bool = False # Claude verified this is an actual EOD report name: str = "" report_date: str = "" solution: str = "" # WHAT I SOLVED TODAY tasks_in_progress: str = "" blockers: str = "" learned: str = "" ai_tool: str = "" ai_what_asked: str = "" # what they asked the AI ai_changes: str = "" # what they changed from AI output ai_chat_link: str = "" plan_tomorrow: str = "" plan_week: str = "" confidence: str = "" format_valid: bool = False missing_fields: list = field(default_factory=list) quality_score: int = 0 quality_flags: list = field(default_factory=list) contains_risk_flag: bool = False def parse_report(text: str) -> ParsedReport: """ Parse EOD report. Claude verifies + extracts. Falls back to regex if Claude fails. """ # Strip bot mention tag before parsing clean_text = re.sub(r'<@[A-Z0-9]+>', '', text).strip() report = ParsedReport(raw_text=clean_text) try: extracted = _extract_with_claude(clean_text) except Exception as e: print(f"[parser] Claude failed: {e} — using regex fallback") extracted = _extract_with_regex(clean_text) extracted["is_eod_report"] = True # regex fallback assumes it is # If Claude says this is not an EOD report — stop here report.is_eod_report = extracted.get("is_eod_report", False) if not report.is_eod_report: print(f"[parser] Claude determined this is not an EOD report — ignoring") return report report.name = extracted.get("name", "") report.report_date = extracted.get("date", "") report.solution = extracted.get("solution", "") report.tasks_in_progress = extracted.get("tasks_in_progress", "") report.blockers = extracted.get("blockers", "") report.learned = extracted.get("learned", "") report.ai_tool = extracted.get("ai_tool", "") report.ai_what_asked = extracted.get("ai_what_asked", "") report.ai_changes = extracted.get("ai_changes", "") report.ai_chat_link = extracted.get("ai_chat_link", "") report.plan_tomorrow = extracted.get("plan_tomorrow", "") report.plan_week = extracted.get("plan_week", "") report.confidence = extracted.get("confidence", "") # Validate mandatory fields required = { "name": report.name, "solution": report.solution, "learned": report.learned, "ai_tool": report.ai_tool, "plan_tomorrow": report.plan_tomorrow, "confidence": report.confidence, } missing = [k for k, v in required.items() if not v or len(v.strip()) < 3] # AI chat link — strictly required, must be a real URL starting with http(s) link = report.ai_chat_link.strip().lower() NOT_A_LINK = ( "cant", "can't", "cannot", "not available", "n/a", "na", "not applicable", "internal", "vscode", "vs code", "editor", "inline", "no link", "not shared", "private", "local", ) is_valid_link = ( link.startswith("http://") or link.startswith("https://") ) and not any(bad in link for bad in NOT_A_LINK) if not is_valid_link: missing.append("ai_chat_link") report.missing_fields = missing report.format_valid = len(missing) == 0 if report.format_valid: report.quality_score = _score(report) report.quality_flags = _flags(report) report.contains_risk_flag = _check_risk(report) return report def _extract_with_claude(text: str) -> dict: """ Claude does two things in one call: 1. Decides if this is a real EOD report 2. Extracts all fields if it is """ import anthropic client = anthropic.Anthropic(api_key=os.environ["ANTHROPIC_API_KEY"]) prompt = f"""You are processing a message sent to an intern management bot. First decide: is this a genuine EOD (End of Day) work report from an intern? - YES if: it contains work tasks, learning, plans, blockers — even if format is incomplete - NO if: it's a question, random message, test, greeting, or anything other than a work report If YES, extract all available fields. If NO, return {{"is_eod_report": false}} and nothing else. Fields to extract (return empty string "" if not present): - is_eod_report: true or false - name: person's full name (ignore @ Slack tags) - date: report date - solution: content under WHAT I SOLVED TODAY (outcome, not tasks) - tasks_in_progress: tasks in progress - blockers: blockers - learned: what they learned - ai_tool: AI tool(s) used - ai_what_asked: what they asked the AI - ai_changes: what they changed or rejected from AI output - ai_chat_link: shared chat URL — must start with http:// or https://. Return "" if value is N/A, none, not applicable, internal, "cant provide", "not available", "used in vscode", "used in editor", "copilot in vscode", or any explanation instead of a real URL. Only return an actual URL. - plan_tomorrow: plan for tomorrow - plan_week: plan for the week - confidence: selected confidence level — one of: Crushing it, On track, Need help, Stuck Return ONLY valid JSON. No prose. No markdown fences. Message: {text}""" message = client.messages.create( model="claude-sonnet-4-20250514", max_tokens=1000, messages=[{"role": "user", "content": prompt}], ) raw = message.content[0].text.strip() if raw.startswith("```"): raw = re.sub(r"```(?:json)?", "", raw).replace("```", "").strip() return json.loads(raw) def _extract_with_regex(text: str) -> dict: """Fallback regex extraction.""" result = {"is_eod_report": True} header = re.search(r"EOD REPORT\s*[—\-]+\s*(.+?)\s*[—\-]+\s*(.+)", text, re.IGNORECASE) if header: result["name"] = re.sub(r'<@[A-Z0-9]+>', '', header.group(1)).strip() result["date"] = header.group(2).strip() sections = [ ("solution", r"WHAT I SOLVED TODAY", r"TASKS COMPLETED|TASKS IN PROGRESS|BLOCKERS"), ("tasks_in_progress",r"TASKS IN PROGRESS", r"BLOCKERS|WHAT I LEARNED"), ("blockers", r"BLOCKERS", r"WHAT I LEARNED"), ("learned", r"WHAT I LEARNED TODAY", r"AI USAGE|PLAN FOR"), ("ai_usage", r"AI USAGE TODAY", r"PLAN FOR TOMORROW"), ("plan_tomorrow", r"PLAN FOR TOMORROW", r"PLAN FOR THE WEEK|CONFIDENCE"), ("plan_week", r"PLAN FOR THE WEEK", r"CONFIDENCE"), ("confidence_raw", r"CONFIDENCE LEVEL", r"━━━|$"), ] for key, start_pat, end_pat in sections: start = re.search(start_pat, text, re.IGNORECASE) if not start: result[key] = "" continue end = re.search(end_pat, text[start.end():], re.IGNORECASE) content = text[start.end(): start.end() + end.start()].strip() if end else text[start.end():].strip() result[key] = content # Parse AI sub-fields ai = result.pop("ai_usage", "") tool = re.search(r"tool used\s*:\s*(.+)", ai, re.IGNORECASE) asked = re.search(r"what i asked\s*:\s*(.+)", ai, re.IGNORECASE) changed = re.search(r"what i changed\s*:\s*(.+)", ai, re.IGNORECASE) link = re.search(r"chat link\s*:\s*(.+)", ai, re.IGNORECASE) result["ai_tool"] = tool.group(1).strip() if tool else "" result["ai_what_asked"] = asked.group(1).strip() if asked else "" result["ai_changes"] = changed.group(1).strip() if changed else "" # Extract first valid URL from the chat link line raw_link_line = link.group(1).strip() if link else "" urls = re.findall(r'https?://[^\s]+', raw_link_line) raw_link = urls[0] if urls else raw_link_line result["ai_chat_link"] = "" if raw_link.lower() in ("n/a", "none", "not applicable", "internal", "-", "") else raw_link # Parse confidence conf_raw = result.pop("confidence_raw", "") marked = re.search(r"\[x\]\s*(.+?)(?:\[|$)", conf_raw, re.IGNORECASE) result["confidence"] = marked.group(1).strip() if marked else conf_raw[:30].strip() return result def _score(report: ParsedReport) -> int: score = 3 # Reward solution section if len(report.solution) > 80: score += 1 # Reward detailed learning if len(report.learned) > 80: score += 1 # Reward AI critique if len(report.ai_changes) > 50: score += 1 # Penalise missing/fake chat link link = report.ai_chat_link.strip().lower() if not link: score -= 1 # Penalise copy-paste signals if report.ai_changes: lower = report.ai_changes.lower() if any(x in lower for x in ["nothing", "used as is", "no changes", "accepted all", "kept everything"]): score -= 2 return max(1, min(5, score)) def _flags(report: ParsedReport) -> list: flags = [] if not report.solution or len(report.solution.strip()) < 20: flags.append("no_solution_stated") link = report.ai_chat_link.strip().lower() if not link: flags.append("no_ai_chat_link") if report.ai_changes: lower = report.ai_changes.lower() if any(x in lower for x in ["nothing", "used as is", "no changes", "accepted all"]): flags.append("possible_copy_paste") if len(report.learned) < 30: flags.append("shallow_learning") if not report.plan_week or len(report.plan_week.strip()) < 10: flags.append("no_weekly_plan") conf = report.confidence.lower() if "need help" in conf: flags.append("needs_help") if "stuck" in conf: flags.append("stuck") return flags def _check_risk(report: ParsedReport) -> bool: check = " ".join([report.blockers, report.confidence]).lower() return any(kw in check for kw in RISK_FLAG_KEYWORDS) def format_missing_fields_message(missing_fields: list, report_type: str = "atg") -> str: field_labels = { "name": "Your name in the header", "solution": "WHAT I SOLVED TODAY — outcome not tasks", "learned": "WHAT I LEARNED TODAY", "ai_tool": "AI USAGE TODAY → Tool used", "ai_chat_link": "AI USAGE TODAY → Chat link — must be a real https:// URL. If you used Copilot in VS Code, export or screenshot the conversation and share via a link.", "plan_tomorrow": "PLAN FOR TOMORROW", "confidence": "CONFIDENCE LEVEL", } missing_labels = [field_labels.get(f, f) for f in missing_fields] fields_str = "\n".join(f"• {label}" for label in missing_labels) return ( f"Your report is missing required fields:\n\n" f"{fields_str}\n\n" f"Use this format:\n\n" f"```\n" f"@intern-management-agent\n" f"━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\n" f"EOD REPORT — Your Name — DD Mon YYYY\n" f"━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\n\n" f"WHAT I SOLVED TODAY\n" f"- What problem did you actually solve? (outcome, not tasks)\n\n" f"TASKS IN PROGRESS\n" f"- What you started — expected completion: [date]\n\n" f"BLOCKERS\n" f"- What is stopping you and who you need / None\n\n" f"WHAT I LEARNED TODAY\n" f"- Specific concept — how will you apply it tomorrow?\n\n" f"AI USAGE TODAY\n" f"Tool used: Claude / ChatGPT / Copilot\n" f"What I asked it: [specific task]\n" f"What I changed from its output: [what you modified and why]\n" f"Chat link: [mandatory — paste shared URL]\n\n" f"PLAN FOR TOMORROW\n" f"- Task with expected output\n\n" f"PLAN FOR THE WEEK\n" f"- What you aim to complete by end of week\n\n" f"CONFIDENCE LEVEL\n" f"> [x] Crushing it [ ] On track [ ] Need help [ ] Stuck\n" f"━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\n" f"```\n\n" f"Tag @intern-management-agent when you resubmit." )