Spaces:

banao-tech
/

interns_manager

Running

File size: 12,798 Bytes

2a1d323
8b32013
5d6c69a
 
 
8b32013
2a1d323
 
8b32013
2a1d323
8b32013
2a1d323
 
 
 
9060ca4
 
 
 
 
2a1d323
 
 
 
5d6c69a
2a1d323
9060ca4
5d6c69a
9060ca4
 
d16ec3b
9060ca4
5d6c69a
 
9060ca4
 
5d6c69a
9060ca4
2a1d323
 
d16ec3b
2a1d323
 
 
 
 
5d6c69a
 
 
 
 
 
 
9060ca4
8b32013
5d6c69a
8b32013
5d6c69a
 
 
 
 
 
 
 
 
8b32013
 
 
5d6c69a
8b32013
 
 
 
5d6c69a
 
8b32013
 
5d6c69a
8b32013
 
5d6c69a
9060ca4
 
5d6c69a
9060ca4
 
 
 
2a1d323
5d6c69a
1560291
a57eee1
1560291
a57eee1
 
 
 
 
 
 
 
 
 
1560291
 
d16ec3b
 
2a1d323
d16ec3b
9060ca4
 
 
d16ec3b
 
 
 
8b32013
5d6c69a
 
 
 
 
8b32013
5d6c69a
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
a57eee1
5d6c69a
 
 
 
 
 
 
8b32013
9060ca4
8b32013
 
 
 
 
 
 
 
 
9060ca4
8b32013
9060ca4
 
8b32013
5d6c69a
 
9060ca4
8b32013
 
5d6c69a
8b32013
9060ca4
8b32013
5d6c69a
 
 
 
 
 
 
 
8b32013
9060ca4
8b32013
 
 
 
 
 
 
 
9060ca4
5d6c69a
8b32013
 
5d6c69a
 
8b32013
 
5d6c69a
 
2527c2a
 
 
 
 
5d6c69a
9060ca4
8b32013
 
5d6c69a
 
8b32013
 
9060ca4
 
 
d16ec3b
8b32013
5d6c69a
 
2a1d323
5d6c69a
9060ca4
 
5d6c69a
 
 
 
9060ca4
5d6c69a
 
 
 
 
 
 
 
9060ca4
2a1d323
 
 
9060ca4
2a1d323
 
5d6c69a
 
 
9060ca4
5d6c69a
9060ca4
d16ec3b
5d6c69a
 
 
 
 
d16ec3b
9060ca4
 
5d6c69a
 
9060ca4
 
 
 
 
 
 
2a1d323
 
 
9060ca4
5d6c69a
 
9060ca4
 
 
 
5d6c69a
 
 
 
a57eee1
5d6c69a
 
9060ca4
d16ec3b
2a1d323
 
 
 
 
 
5d6c69a
9060ca4
03580b5
9060ca4
 
 
5d6c69a
 
22a7169
5d6c69a
22a7169
5d6c69a
22a7169
5d6c69a
9060ca4
5d6c69a
 
 
 
22a7169
5d6c69a
22a7169
 
5d6c69a
9060ca4
 
22a7169
36420a4
2a1d323

"""
parser.py — ATG EOD report parser
Claude does two things:
  1. Verifies this is a genuine EOD report (not a random bot mention)
  2. Extracts all structured fields
Falls back to regex if Claude is unavailable.
"""

import os
import re
import json
from dataclasses import dataclass, field


RISK_FLAG_KEYWORDS = [
    "stuck", "blocked", "can't proceed", "need help",
    "behind", "delayed", "not sure", "struggling",
    "at risk", "won't finish", "unable to",
]


@dataclass
class ParsedReport:
    raw_text: str
    is_eod_report: bool = False   # Claude verified this is an actual EOD report
    name: str = ""
    report_date: str = ""
    solution: str = ""            # WHAT I SOLVED TODAY
    tasks_in_progress: str = ""
    blockers: str = ""
    learned: str = ""
    ai_tool: str = ""
    ai_what_asked: str = ""       # what they asked the AI
    ai_changes: str = ""          # what they changed from AI output
    ai_chat_link: str = ""
    plan_tomorrow: str = ""
    plan_week: str = ""
    confidence: str = ""
    format_valid: bool = False
    missing_fields: list = field(default_factory=list)
    quality_score: int = 0
    quality_flags: list = field(default_factory=list)
    contains_risk_flag: bool = False


def parse_report(text: str) -> ParsedReport:
    """
    Parse EOD report. Claude verifies + extracts.
    Falls back to regex if Claude fails.
    """
    # Strip bot mention tag before parsing
    clean_text = re.sub(r'<@[A-Z0-9]+>', '', text).strip()
    report = ParsedReport(raw_text=clean_text)

    try:
        extracted = _extract_with_claude(clean_text)
    except Exception as e:
        print(f"[parser] Claude failed: {e} — using regex fallback")
        extracted = _extract_with_regex(clean_text)
        extracted["is_eod_report"] = True  # regex fallback assumes it is

    # If Claude says this is not an EOD report — stop here
    report.is_eod_report = extracted.get("is_eod_report", False)
    if not report.is_eod_report:
        print(f"[parser] Claude determined this is not an EOD report — ignoring")
        return report

    report.name             = extracted.get("name", "")
    report.report_date      = extracted.get("date", "")
    report.solution         = extracted.get("solution", "")
    report.tasks_in_progress = extracted.get("tasks_in_progress", "")
    report.blockers         = extracted.get("blockers", "")
    report.learned          = extracted.get("learned", "")
    report.ai_tool          = extracted.get("ai_tool", "")
    report.ai_what_asked    = extracted.get("ai_what_asked", "")
    report.ai_changes       = extracted.get("ai_changes", "")
    report.ai_chat_link     = extracted.get("ai_chat_link", "")
    report.plan_tomorrow    = extracted.get("plan_tomorrow", "")
    report.plan_week        = extracted.get("plan_week", "")
    report.confidence       = extracted.get("confidence", "")

    # Validate mandatory fields
    required = {
        "name": report.name,
        "solution": report.solution,
        "learned": report.learned,
        "ai_tool": report.ai_tool,
        "plan_tomorrow": report.plan_tomorrow,
        "confidence": report.confidence,
    }
    missing = [k for k, v in required.items() if not v or len(v.strip()) < 3]

    # AI chat link — strictly required, must be a real URL starting with http(s)
    link = report.ai_chat_link.strip().lower()
    NOT_A_LINK = (
        "cant", "can't", "cannot", "not available", "n/a", "na",
        "not applicable", "internal", "vscode", "vs code", "editor",
        "inline", "no link", "not shared", "private", "local",
    )
    is_valid_link = (
        link.startswith("http://") or link.startswith("https://")
    ) and not any(bad in link for bad in NOT_A_LINK)

    if not is_valid_link:
        missing.append("ai_chat_link")

    report.missing_fields = missing
    report.format_valid = len(missing) == 0

    if report.format_valid:
        report.quality_score = _score(report)
        report.quality_flags = _flags(report)
        report.contains_risk_flag = _check_risk(report)

    return report


def _extract_with_claude(text: str) -> dict:
    """
    Claude does two things in one call:
    1. Decides if this is a real EOD report
    2. Extracts all fields if it is
    """
    import anthropic
    client = anthropic.Anthropic(api_key=os.environ["ANTHROPIC_API_KEY"])

    prompt = f"""You are processing a message sent to an intern management bot.

First decide: is this a genuine EOD (End of Day) work report from an intern?
- YES if: it contains work tasks, learning, plans, blockers — even if format is incomplete
- NO if: it's a question, random message, test, greeting, or anything other than a work report

If YES, extract all available fields.
If NO, return {{"is_eod_report": false}} and nothing else.

Fields to extract (return empty string "" if not present):
- is_eod_report: true or false
- name: person's full name (ignore @ Slack tags)
- date: report date
- solution: content under WHAT I SOLVED TODAY (outcome, not tasks)
- tasks_in_progress: tasks in progress
- blockers: blockers
- learned: what they learned
- ai_tool: AI tool(s) used
- ai_what_asked: what they asked the AI
- ai_changes: what they changed or rejected from AI output
- ai_chat_link: shared chat URL — must start with http:// or https://. Return "" if value is N/A, none, not applicable, internal, "cant provide", "not available", "used in vscode", "used in editor", "copilot in vscode", or any explanation instead of a real URL. Only return an actual URL.
- plan_tomorrow: plan for tomorrow
- plan_week: plan for the week
- confidence: selected confidence level — one of: Crushing it, On track, Need help, Stuck

Return ONLY valid JSON. No prose. No markdown fences.

Message:
{text}"""

    message = client.messages.create(
        model="claude-sonnet-4-20250514",
        max_tokens=1000,
        messages=[{"role": "user", "content": prompt}],
    )

    raw = message.content[0].text.strip()
    if raw.startswith("```"):
        raw = re.sub(r"```(?:json)?", "", raw).replace("```", "").strip()

    return json.loads(raw)


def _extract_with_regex(text: str) -> dict:
    """Fallback regex extraction."""
    result = {"is_eod_report": True}

    header = re.search(r"EOD REPORT\s*[—\-]+\s*(.+?)\s*[—\-]+\s*(.+)", text, re.IGNORECASE)
    if header:
        result["name"] = re.sub(r'<@[A-Z0-9]+>', '', header.group(1)).strip()
        result["date"] = header.group(2).strip()

    sections = [
        ("solution",         r"WHAT I SOLVED TODAY",      r"TASKS COMPLETED|TASKS IN PROGRESS|BLOCKERS"),
        ("tasks_in_progress",r"TASKS IN PROGRESS",         r"BLOCKERS|WHAT I LEARNED"),
        ("blockers",         r"BLOCKERS",                  r"WHAT I LEARNED"),
        ("learned",          r"WHAT I LEARNED TODAY",      r"AI USAGE|PLAN FOR"),
        ("ai_usage",         r"AI USAGE TODAY",            r"PLAN FOR TOMORROW"),
        ("plan_tomorrow",    r"PLAN FOR TOMORROW",         r"PLAN FOR THE WEEK|CONFIDENCE"),
        ("plan_week",        r"PLAN FOR THE WEEK",         r"CONFIDENCE"),
        ("confidence_raw",   r"CONFIDENCE LEVEL",          r"━━━|$"),
    ]

    for key, start_pat, end_pat in sections:
        start = re.search(start_pat, text, re.IGNORECASE)
        if not start:
            result[key] = ""
            continue
        end = re.search(end_pat, text[start.end():], re.IGNORECASE)
        content = text[start.end(): start.end() + end.start()].strip() if end else text[start.end():].strip()
        result[key] = content

    # Parse AI sub-fields
    ai = result.pop("ai_usage", "")
    tool = re.search(r"tool used\s*:\s*(.+)", ai, re.IGNORECASE)
    asked = re.search(r"what i asked\s*:\s*(.+)", ai, re.IGNORECASE)
    changed = re.search(r"what i changed\s*:\s*(.+)", ai, re.IGNORECASE)
    link = re.search(r"chat link\s*:\s*(.+)", ai, re.IGNORECASE)
    result["ai_tool"] = tool.group(1).strip() if tool else ""
    result["ai_what_asked"] = asked.group(1).strip() if asked else ""
    result["ai_changes"] = changed.group(1).strip() if changed else ""

    # Extract first valid URL from the chat link line
    raw_link_line = link.group(1).strip() if link else ""
    urls = re.findall(r'https?://[^\s]+', raw_link_line)
    raw_link = urls[0] if urls else raw_link_line
    result["ai_chat_link"] = "" if raw_link.lower() in ("n/a", "none", "not applicable", "internal", "-", "") else raw_link

    # Parse confidence
    conf_raw = result.pop("confidence_raw", "")
    marked = re.search(r"\[x\]\s*(.+?)(?:\[|$)", conf_raw, re.IGNORECASE)
    result["confidence"] = marked.group(1).strip() if marked else conf_raw[:30].strip()

    return result


def _score(report: ParsedReport) -> int:
    score = 3

    # Reward solution section
    if len(report.solution) > 80: score += 1

    # Reward detailed learning
    if len(report.learned) > 80: score += 1

    # Reward AI critique
    if len(report.ai_changes) > 50: score += 1

    # Penalise missing/fake chat link
    link = report.ai_chat_link.strip().lower()
    if not link:
        score -= 1

    # Penalise copy-paste signals
    if report.ai_changes:
        lower = report.ai_changes.lower()
        if any(x in lower for x in ["nothing", "used as is", "no changes", "accepted all", "kept everything"]):
            score -= 2

    return max(1, min(5, score))


def _flags(report: ParsedReport) -> list:
    flags = []

    if not report.solution or len(report.solution.strip()) < 20:
        flags.append("no_solution_stated")

    link = report.ai_chat_link.strip().lower()
    if not link:
        flags.append("no_ai_chat_link")

    if report.ai_changes:
        lower = report.ai_changes.lower()
        if any(x in lower for x in ["nothing", "used as is", "no changes", "accepted all"]):
            flags.append("possible_copy_paste")

    if len(report.learned) < 30:
        flags.append("shallow_learning")

    if not report.plan_week or len(report.plan_week.strip()) < 10:
        flags.append("no_weekly_plan")

    conf = report.confidence.lower()
    if "need help" in conf:
        flags.append("needs_help")
    if "stuck" in conf:
        flags.append("stuck")

    return flags


def _check_risk(report: ParsedReport) -> bool:
    check = " ".join([report.blockers, report.confidence]).lower()
    return any(kw in check for kw in RISK_FLAG_KEYWORDS)


def format_missing_fields_message(missing_fields: list, report_type: str = "atg") -> str:
    field_labels = {
        "name":           "Your name in the header",
        "solution":       "WHAT I SOLVED TODAY — outcome not tasks",
        "learned":        "WHAT I LEARNED TODAY",
        "ai_tool":        "AI USAGE TODAY → Tool used",
        "ai_chat_link":   "AI USAGE TODAY → Chat link — must be a real https:// URL. If you used Copilot in VS Code, export or screenshot the conversation and share via a link.",
        "plan_tomorrow":  "PLAN FOR TOMORROW",
        "confidence":     "CONFIDENCE LEVEL",
    }

    missing_labels = [field_labels.get(f, f) for f in missing_fields]
    fields_str = "\n".join(f"• {label}" for label in missing_labels)

    return (
        f"Your report is missing required fields:\n\n"
        f"{fields_str}\n\n"
        f"Use this format:\n\n"
        f"```\n"
        f"@intern-management-agent\n"
        f"━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\n"
        f"EOD REPORT — Your Name — DD Mon YYYY\n"
        f"━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\n\n"
        f"WHAT I SOLVED TODAY\n"
        f"- What problem did you actually solve? (outcome, not tasks)\n\n"
        f"TASKS IN PROGRESS\n"
        f"- What you started — expected completion: [date]\n\n"
        f"BLOCKERS\n"
        f"- What is stopping you and who you need / None\n\n"
        f"WHAT I LEARNED TODAY\n"
        f"- Specific concept — how will you apply it tomorrow?\n\n"
        f"AI USAGE TODAY\n"
        f"Tool used: Claude / ChatGPT / Copilot\n"
        f"What I asked it: [specific task]\n"
        f"What I changed from its output: [what you modified and why]\n"
        f"Chat link: [mandatory — paste shared URL]\n\n"
        f"PLAN FOR TOMORROW\n"
        f"- Task with expected output\n\n"
        f"PLAN FOR THE WEEK\n"
        f"- What you aim to complete by end of week\n\n"
        f"CONFIDENCE LEVEL\n"
        f"> [x] Crushing it  [ ] On track  [ ] Need help  [ ] Stuck\n"
        f"━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\n"
        f"```\n\n"
        f"Tag @intern-management-agent when you resubmit."
    )