Spaces:
Running
Running
| """ | |
| parser.py β ATG EOD report parser | |
| Claude does two things: | |
| 1. Verifies this is a genuine EOD report (not a random bot mention) | |
| 2. Extracts all structured fields | |
| Falls back to regex if Claude is unavailable. | |
| """ | |
| import os | |
| import re | |
| import json | |
| from dataclasses import dataclass, field | |
| RISK_FLAG_KEYWORDS = [ | |
| "stuck", "blocked", "can't proceed", "need help", | |
| "behind", "delayed", "not sure", "struggling", | |
| "at risk", "won't finish", "unable to", | |
| ] | |
| class ParsedReport: | |
| raw_text: str | |
| is_eod_report: bool = False # Claude verified this is an actual EOD report | |
| name: str = "" | |
| report_date: str = "" | |
| solution: str = "" # WHAT I SOLVED TODAY | |
| tasks_in_progress: str = "" | |
| blockers: str = "" | |
| learned: str = "" | |
| ai_tool: str = "" | |
| ai_what_asked: str = "" # what they asked the AI | |
| ai_changes: str = "" # what they changed from AI output | |
| ai_chat_link: str = "" | |
| plan_tomorrow: str = "" | |
| plan_week: str = "" | |
| confidence: str = "" | |
| format_valid: bool = False | |
| missing_fields: list = field(default_factory=list) | |
| quality_score: int = 0 | |
| quality_flags: list = field(default_factory=list) | |
| contains_risk_flag: bool = False | |
| def parse_report(text: str) -> ParsedReport: | |
| """ | |
| Parse EOD report. Claude verifies + extracts. | |
| Falls back to regex if Claude fails. | |
| """ | |
| # Strip bot mention tag before parsing | |
| clean_text = re.sub(r'<@[A-Z0-9]+>', '', text).strip() | |
| report = ParsedReport(raw_text=clean_text) | |
| try: | |
| extracted = _extract_with_claude(clean_text) | |
| except Exception as e: | |
| print(f"[parser] Claude failed: {e} β using regex fallback") | |
| extracted = _extract_with_regex(clean_text) | |
| extracted["is_eod_report"] = True # regex fallback assumes it is | |
| # If Claude says this is not an EOD report β stop here | |
| report.is_eod_report = extracted.get("is_eod_report", False) | |
| if not report.is_eod_report: | |
| print(f"[parser] Claude determined this is not an EOD report β ignoring") | |
| return report | |
| report.name = extracted.get("name", "") | |
| report.report_date = extracted.get("date", "") | |
| report.solution = extracted.get("solution", "") | |
| report.tasks_in_progress = extracted.get("tasks_in_progress", "") | |
| report.blockers = extracted.get("blockers", "") | |
| report.learned = extracted.get("learned", "") | |
| report.ai_tool = extracted.get("ai_tool", "") | |
| report.ai_what_asked = extracted.get("ai_what_asked", "") | |
| report.ai_changes = extracted.get("ai_changes", "") | |
| report.ai_chat_link = extracted.get("ai_chat_link", "") | |
| report.plan_tomorrow = extracted.get("plan_tomorrow", "") | |
| report.plan_week = extracted.get("plan_week", "") | |
| report.confidence = extracted.get("confidence", "") | |
| # Validate mandatory fields | |
| required = { | |
| "name": report.name, | |
| "solution": report.solution, | |
| "learned": report.learned, | |
| "ai_tool": report.ai_tool, | |
| "plan_tomorrow": report.plan_tomorrow, | |
| "confidence": report.confidence, | |
| } | |
| missing = [k for k, v in required.items() if not v or len(v.strip()) < 3] | |
| # AI chat link β strictly required, must be a real URL starting with http(s) | |
| link = report.ai_chat_link.strip().lower() | |
| NOT_A_LINK = ( | |
| "cant", "can't", "cannot", "not available", "n/a", "na", | |
| "not applicable", "internal", "vscode", "vs code", "editor", | |
| "inline", "no link", "not shared", "private", "local", | |
| ) | |
| is_valid_link = ( | |
| link.startswith("http://") or link.startswith("https://") | |
| ) and not any(bad in link for bad in NOT_A_LINK) | |
| if not is_valid_link: | |
| missing.append("ai_chat_link") | |
| report.missing_fields = missing | |
| report.format_valid = len(missing) == 0 | |
| if report.format_valid: | |
| report.quality_score = _score(report) | |
| report.quality_flags = _flags(report) | |
| report.contains_risk_flag = _check_risk(report) | |
| return report | |
| def _extract_with_claude(text: str) -> dict: | |
| """ | |
| Claude does two things in one call: | |
| 1. Decides if this is a real EOD report | |
| 2. Extracts all fields if it is | |
| """ | |
| import anthropic | |
| client = anthropic.Anthropic(api_key=os.environ["ANTHROPIC_API_KEY"]) | |
| prompt = f"""You are processing a message sent to an intern management bot. | |
| First decide: is this a genuine EOD (End of Day) work report from an intern? | |
| - YES if: it contains work tasks, learning, plans, blockers β even if format is incomplete | |
| - NO if: it's a question, random message, test, greeting, or anything other than a work report | |
| If YES, extract all available fields. | |
| If NO, return {{"is_eod_report": false}} and nothing else. | |
| Fields to extract (return empty string "" if not present): | |
| - is_eod_report: true or false | |
| - name: person's full name (ignore @ Slack tags) | |
| - date: report date | |
| - solution: content under WHAT I SOLVED TODAY (outcome, not tasks) | |
| - tasks_in_progress: tasks in progress | |
| - blockers: blockers | |
| - learned: what they learned | |
| - ai_tool: AI tool(s) used | |
| - ai_what_asked: what they asked the AI | |
| - ai_changes: what they changed or rejected from AI output | |
| - ai_chat_link: shared chat URL β must start with http:// or https://. Return "" if value is N/A, none, not applicable, internal, "cant provide", "not available", "used in vscode", "used in editor", "copilot in vscode", or any explanation instead of a real URL. Only return an actual URL. | |
| - plan_tomorrow: plan for tomorrow | |
| - plan_week: plan for the week | |
| - confidence: selected confidence level β one of: Crushing it, On track, Need help, Stuck | |
| Return ONLY valid JSON. No prose. No markdown fences. | |
| Message: | |
| {text}""" | |
| message = client.messages.create( | |
| model="claude-sonnet-4-20250514", | |
| max_tokens=1000, | |
| messages=[{"role": "user", "content": prompt}], | |
| ) | |
| raw = message.content[0].text.strip() | |
| if raw.startswith("```"): | |
| raw = re.sub(r"```(?:json)?", "", raw).replace("```", "").strip() | |
| return json.loads(raw) | |
| def _extract_with_regex(text: str) -> dict: | |
| """Fallback regex extraction.""" | |
| result = {"is_eod_report": True} | |
| header = re.search(r"EOD REPORT\s*[β\-]+\s*(.+?)\s*[β\-]+\s*(.+)", text, re.IGNORECASE) | |
| if header: | |
| result["name"] = re.sub(r'<@[A-Z0-9]+>', '', header.group(1)).strip() | |
| result["date"] = header.group(2).strip() | |
| sections = [ | |
| ("solution", r"WHAT I SOLVED TODAY", r"TASKS COMPLETED|TASKS IN PROGRESS|BLOCKERS"), | |
| ("tasks_in_progress",r"TASKS IN PROGRESS", r"BLOCKERS|WHAT I LEARNED"), | |
| ("blockers", r"BLOCKERS", r"WHAT I LEARNED"), | |
| ("learned", r"WHAT I LEARNED TODAY", r"AI USAGE|PLAN FOR"), | |
| ("ai_usage", r"AI USAGE TODAY", r"PLAN FOR TOMORROW"), | |
| ("plan_tomorrow", r"PLAN FOR TOMORROW", r"PLAN FOR THE WEEK|CONFIDENCE"), | |
| ("plan_week", r"PLAN FOR THE WEEK", r"CONFIDENCE"), | |
| ("confidence_raw", r"CONFIDENCE LEVEL", r"βββ|$"), | |
| ] | |
| for key, start_pat, end_pat in sections: | |
| start = re.search(start_pat, text, re.IGNORECASE) | |
| if not start: | |
| result[key] = "" | |
| continue | |
| end = re.search(end_pat, text[start.end():], re.IGNORECASE) | |
| content = text[start.end(): start.end() + end.start()].strip() if end else text[start.end():].strip() | |
| result[key] = content | |
| # Parse AI sub-fields | |
| ai = result.pop("ai_usage", "") | |
| tool = re.search(r"tool used\s*:\s*(.+)", ai, re.IGNORECASE) | |
| asked = re.search(r"what i asked\s*:\s*(.+)", ai, re.IGNORECASE) | |
| changed = re.search(r"what i changed\s*:\s*(.+)", ai, re.IGNORECASE) | |
| link = re.search(r"chat link\s*:\s*(.+)", ai, re.IGNORECASE) | |
| result["ai_tool"] = tool.group(1).strip() if tool else "" | |
| result["ai_what_asked"] = asked.group(1).strip() if asked else "" | |
| result["ai_changes"] = changed.group(1).strip() if changed else "" | |
| # Extract first valid URL from the chat link line | |
| raw_link_line = link.group(1).strip() if link else "" | |
| urls = re.findall(r'https?://[^\s]+', raw_link_line) | |
| raw_link = urls[0] if urls else raw_link_line | |
| result["ai_chat_link"] = "" if raw_link.lower() in ("n/a", "none", "not applicable", "internal", "-", "") else raw_link | |
| # Parse confidence | |
| conf_raw = result.pop("confidence_raw", "") | |
| marked = re.search(r"\[x\]\s*(.+?)(?:\[|$)", conf_raw, re.IGNORECASE) | |
| result["confidence"] = marked.group(1).strip() if marked else conf_raw[:30].strip() | |
| return result | |
| def _score(report: ParsedReport) -> int: | |
| score = 3 | |
| # Reward solution section | |
| if len(report.solution) > 80: score += 1 | |
| # Reward detailed learning | |
| if len(report.learned) > 80: score += 1 | |
| # Reward AI critique | |
| if len(report.ai_changes) > 50: score += 1 | |
| # Penalise missing/fake chat link | |
| link = report.ai_chat_link.strip().lower() | |
| if not link: | |
| score -= 1 | |
| # Penalise copy-paste signals | |
| if report.ai_changes: | |
| lower = report.ai_changes.lower() | |
| if any(x in lower for x in ["nothing", "used as is", "no changes", "accepted all", "kept everything"]): | |
| score -= 2 | |
| return max(1, min(5, score)) | |
| def _flags(report: ParsedReport) -> list: | |
| flags = [] | |
| if not report.solution or len(report.solution.strip()) < 20: | |
| flags.append("no_solution_stated") | |
| link = report.ai_chat_link.strip().lower() | |
| if not link: | |
| flags.append("no_ai_chat_link") | |
| if report.ai_changes: | |
| lower = report.ai_changes.lower() | |
| if any(x in lower for x in ["nothing", "used as is", "no changes", "accepted all"]): | |
| flags.append("possible_copy_paste") | |
| if len(report.learned) < 30: | |
| flags.append("shallow_learning") | |
| if not report.plan_week or len(report.plan_week.strip()) < 10: | |
| flags.append("no_weekly_plan") | |
| conf = report.confidence.lower() | |
| if "need help" in conf: | |
| flags.append("needs_help") | |
| if "stuck" in conf: | |
| flags.append("stuck") | |
| return flags | |
| def _check_risk(report: ParsedReport) -> bool: | |
| check = " ".join([report.blockers, report.confidence]).lower() | |
| return any(kw in check for kw in RISK_FLAG_KEYWORDS) | |
| def format_missing_fields_message(missing_fields: list, report_type: str = "atg") -> str: | |
| field_labels = { | |
| "name": "Your name in the header", | |
| "solution": "WHAT I SOLVED TODAY β outcome not tasks", | |
| "learned": "WHAT I LEARNED TODAY", | |
| "ai_tool": "AI USAGE TODAY β Tool used", | |
| "ai_chat_link": "AI USAGE TODAY β Chat link β must be a real https:// URL. If you used Copilot in VS Code, export or screenshot the conversation and share via a link.", | |
| "plan_tomorrow": "PLAN FOR TOMORROW", | |
| "confidence": "CONFIDENCE LEVEL", | |
| } | |
| missing_labels = [field_labels.get(f, f) for f in missing_fields] | |
| fields_str = "\n".join(f"β’ {label}" for label in missing_labels) | |
| return ( | |
| f"Your report is missing required fields:\n\n" | |
| f"{fields_str}\n\n" | |
| f"Use this format:\n\n" | |
| f"```\n" | |
| f"@intern-management-agent\n" | |
| f"ββββββββββββββββββββββββββββββββββββββββ\n" | |
| f"EOD REPORT β Your Name β DD Mon YYYY\n" | |
| f"ββββββββββββββββββββββββββββββββββββββββ\n\n" | |
| f"WHAT I SOLVED TODAY\n" | |
| f"- What problem did you actually solve? (outcome, not tasks)\n\n" | |
| f"TASKS IN PROGRESS\n" | |
| f"- What you started β expected completion: [date]\n\n" | |
| f"BLOCKERS\n" | |
| f"- What is stopping you and who you need / None\n\n" | |
| f"WHAT I LEARNED TODAY\n" | |
| f"- Specific concept β how will you apply it tomorrow?\n\n" | |
| f"AI USAGE TODAY\n" | |
| f"Tool used: Claude / ChatGPT / Copilot\n" | |
| f"What I asked it: [specific task]\n" | |
| f"What I changed from its output: [what you modified and why]\n" | |
| f"Chat link: [mandatory β paste shared URL]\n\n" | |
| f"PLAN FOR TOMORROW\n" | |
| f"- Task with expected output\n\n" | |
| f"PLAN FOR THE WEEK\n" | |
| f"- What you aim to complete by end of week\n\n" | |
| f"CONFIDENCE LEVEL\n" | |
| f"> [x] Crushing it [ ] On track [ ] Need help [ ] Stuck\n" | |
| f"ββββββββββββββββββββββββββββββββββββββββ\n" | |
| f"```\n\n" | |
| f"Tag @intern-management-agent when you resubmit." | |
| ) |