"""
parser.py — ATG EOD report parser
Claude does two things:
  1. Verifies this is a genuine EOD report (not a random bot mention)
  2. Extracts all structured fields
Falls back to regex if Claude is unavailable.
"""

import os
import re
import json
from dataclasses import dataclass, field


RISK_FLAG_KEYWORDS = [
    "stuck", "blocked", "can't proceed", "need help",
    "behind", "delayed", "not sure", "struggling",
    "at risk", "won't finish", "unable to",
]


@dataclass
class ParsedReport:
    raw_text: str
    is_eod_report: bool = False   # Claude verified this is an actual EOD report
    name: str = ""
    report_date: str = ""
    solution: str = ""            # WHAT I SOLVED TODAY
    tasks_in_progress: str = ""
    blockers: str = ""
    learned: str = ""
    ai_tool: str = ""
    ai_what_asked: str = ""       # what they asked the AI
    ai_changes: str = ""          # what they changed from AI output
    ai_chat_link: str = ""
    plan_tomorrow: str = ""
    plan_week: str = ""
    confidence: str = ""
    format_valid: bool = False
    missing_fields: list = field(default_factory=list)
    quality_score: int = 0
    quality_flags: list = field(default_factory=list)
    contains_risk_flag: bool = False


def parse_report(text: str) -> ParsedReport:
    """
    Parse EOD report. Claude verifies + extracts.
    Falls back to regex if Claude fails.
    """
    # Strip bot mention tag before parsing
    clean_text = re.sub(r'<@[A-Z0-9]+>', '', text).strip()
    report = ParsedReport(raw_text=clean_text)

    try:
        extracted = _extract_with_claude(clean_text)
    except Exception as e:
        print(f"[parser] Claude failed: {e} — using regex fallback")
        extracted = _extract_with_regex(clean_text)
        extracted["is_eod_report"] = True  # regex fallback assumes it is

    # If Claude says this is not an EOD report — stop here
    report.is_eod_report = extracted.get("is_eod_report", False)
    if not report.is_eod_report:
        print(f"[parser] Claude determined this is not an EOD report — ignoring")
        return report

    report.name             = extracted.get("name", "")
    report.report_date      = extracted.get("date", "")
    report.solution         = extracted.get("solution", "")
    report.tasks_in_progress = extracted.get("tasks_in_progress", "")
    report.blockers         = extracted.get("blockers", "")
    report.learned          = extracted.get("learned", "")
    report.ai_tool          = extracted.get("ai_tool", "")
    report.ai_what_asked    = extracted.get("ai_what_asked", "")
    report.ai_changes       = extracted.get("ai_changes", "")
    report.ai_chat_link     = extracted.get("ai_chat_link", "")
    report.plan_tomorrow    = extracted.get("plan_tomorrow", "")
    report.plan_week        = extracted.get("plan_week", "")
    report.confidence       = extracted.get("confidence", "")

    # Validate mandatory fields
    required = {
        "name": report.name,
        "solution": report.solution,
        "learned": report.learned,
        "ai_tool": report.ai_tool,
        "plan_tomorrow": report.plan_tomorrow,
        "confidence": report.confidence,
    }
    missing = [k for k, v in required.items() if not v or len(v.strip()) < 3]

    # AI chat link — strictly required, must be a real URL starting with http(s)
    link = report.ai_chat_link.strip().lower()
    NOT_A_LINK = (
        "cant", "can't", "cannot", "not available", "n/a", "na",
        "not applicable", "internal", "vscode", "vs code", "editor",
        "inline", "no link", "not shared", "private", "local",
    )
    is_valid_link = (
        link.startswith("http://") or link.startswith("https://")
    ) and not any(bad in link for bad in NOT_A_LINK)

    if not is_valid_link:
        missing.append("ai_chat_link")

    report.missing_fields = missing
    report.format_valid = len(missing) == 0

    if report.format_valid:
        report.quality_score = _score(report)
        report.quality_flags = _flags(report)
        report.contains_risk_flag = _check_risk(report)

    return report


def _extract_with_claude(text: str) -> dict:
    """
    Claude does two things in one call:
    1. Decides if this is a real EOD report
    2. Extracts all fields if it is
    """
    import anthropic
    client = anthropic.Anthropic(api_key=os.environ["ANTHROPIC_API_KEY"])

    prompt = f"""You are processing a message sent to an intern management bot.

First decide: is this a genuine EOD (End of Day) work report from an intern?
- YES if: it contains work tasks, learning, plans, blockers — even if format is incomplete
- NO if: it's a question, random message, test, greeting, or anything other than a work report

If YES, extract all available fields.
If NO, return {{"is_eod_report": false}} and nothing else.

Fields to extract (return empty string "" if not present):
- is_eod_report: true or false
- name: person's full name (ignore @ Slack tags)
- date: report date
- solution: content under WHAT I SOLVED TODAY (outcome, not tasks)
- tasks_in_progress: tasks in progress
- blockers: blockers
- learned: what they learned
- ai_tool: AI tool(s) used
- ai_what_asked: what they asked the AI
- ai_changes: what they changed or rejected from AI output
- ai_chat_link: shared chat URL — must start with http:// or https://. Return "" if value is N/A, none, not applicable, internal, "cant provide", "not available", "used in vscode", "used in editor", "copilot in vscode", or any explanation instead of a real URL. Only return an actual URL.
- plan_tomorrow: plan for tomorrow
- plan_week: plan for the week
- confidence: selected confidence level — one of: Crushing it, On track, Need help, Stuck

Return ONLY valid JSON. No prose. No markdown fences.

Message:
{text}"""

    message = client.messages.create(
        model="claude-sonnet-4-20250514",
        max_tokens=1000,
        messages=[{"role": "user", "content": prompt}],
    )

    raw = message.content[0].text.strip()
    if raw.startswith("```"):
        raw = re.sub(r"```(?:json)?", "", raw).replace("```", "").strip()

    return json.loads(raw)


def _extract_with_regex(text: str) -> dict:
    """Fallback regex extraction."""
    result = {"is_eod_report": True}

    header = re.search(r"EOD REPORT\s*[—\-]+\s*(.+?)\s*[—\-]+\s*(.+)", text, re.IGNORECASE)
    if header:
        result["name"] = re.sub(r'<@[A-Z0-9]+>', '', header.group(1)).strip()
        result["date"] = header.group(2).strip()

    sections = [
        ("solution",         r"WHAT I SOLVED TODAY",      r"TASKS COMPLETED|TASKS IN PROGRESS|BLOCKERS"),
        ("tasks_in_progress",r"TASKS IN PROGRESS",         r"BLOCKERS|WHAT I LEARNED"),
        ("blockers",         r"BLOCKERS",                  r"WHAT I LEARNED"),
        ("learned",          r"WHAT I LEARNED TODAY",      r"AI USAGE|PLAN FOR"),
        ("ai_usage",         r"AI USAGE TODAY",            r"PLAN FOR TOMORROW"),
        ("plan_tomorrow",    r"PLAN FOR TOMORROW",         r"PLAN FOR THE WEEK|CONFIDENCE"),
        ("plan_week",        r"PLAN FOR THE WEEK",         r"CONFIDENCE"),
        ("confidence_raw",   r"CONFIDENCE LEVEL",          r"━━━|$"),
    ]

    for key, start_pat, end_pat in sections:
        start = re.search(start_pat, text, re.IGNORECASE)
        if not start:
            result[key] = ""
            continue
        end = re.search(end_pat, text[start.end():], re.IGNORECASE)
        content = text[start.end(): start.end() + end.start()].strip() if end else text[start.end():].strip()
        result[key] = content

    # Parse AI sub-fields
    ai = result.pop("ai_usage", "")
    tool = re.search(r"tool used\s*:\s*(.+)", ai, re.IGNORECASE)
    asked = re.search(r"what i asked\s*:\s*(.+)", ai, re.IGNORECASE)
    changed = re.search(r"what i changed\s*:\s*(.+)", ai, re.IGNORECASE)
    link = re.search(r"chat link\s*:\s*(.+)", ai, re.IGNORECASE)
    result["ai_tool"] = tool.group(1).strip() if tool else ""
    result["ai_what_asked"] = asked.group(1).strip() if asked else ""
    result["ai_changes"] = changed.group(1).strip() if changed else ""

    # Extract first valid URL from the chat link line
    raw_link_line = link.group(1).strip() if link else ""
    urls = re.findall(r'https?://[^\s]+', raw_link_line)
    raw_link = urls[0] if urls else raw_link_line
    result["ai_chat_link"] = "" if raw_link.lower() in ("n/a", "none", "not applicable", "internal", "-", "") else raw_link

    # Parse confidence
    conf_raw = result.pop("confidence_raw", "")
    marked = re.search(r"\[x\]\s*(.+?)(?:\[|$)", conf_raw, re.IGNORECASE)
    result["confidence"] = marked.group(1).strip() if marked else conf_raw[:30].strip()

    return result


def _score(report: ParsedReport) -> int:
    score = 3

    # Reward solution section
    if len(report.solution) > 80: score += 1

    # Reward detailed learning
    if len(report.learned) > 80: score += 1

    # Reward AI critique
    if len(report.ai_changes) > 50: score += 1

    # Penalise missing/fake chat link
    link = report.ai_chat_link.strip().lower()
    if not link:
        score -= 1

    # Penalise copy-paste signals
    if report.ai_changes:
        lower = report.ai_changes.lower()
        if any(x in lower for x in ["nothing", "used as is", "no changes", "accepted all", "kept everything"]):
            score -= 2

    return max(1, min(5, score))


def _flags(report: ParsedReport) -> list:
    flags = []

    if not report.solution or len(report.solution.strip()) < 20:
        flags.append("no_solution_stated")

    link = report.ai_chat_link.strip().lower()
    if not link:
        flags.append("no_ai_chat_link")

    if report.ai_changes:
        lower = report.ai_changes.lower()
        if any(x in lower for x in ["nothing", "used as is", "no changes", "accepted all"]):
            flags.append("possible_copy_paste")

    if len(report.learned) < 30:
        flags.append("shallow_learning")

    if not report.plan_week or len(report.plan_week.strip()) < 10:
        flags.append("no_weekly_plan")

    conf = report.confidence.lower()
    if "need help" in conf:
        flags.append("needs_help")
    if "stuck" in conf:
        flags.append("stuck")

    return flags


def _check_risk(report: ParsedReport) -> bool:
    check = " ".join([report.blockers, report.confidence]).lower()
    return any(kw in check for kw in RISK_FLAG_KEYWORDS)


def format_missing_fields_message(missing_fields: list, report_type: str = "atg") -> str:
    field_labels = {
        "name":           "Your name in the header",
        "solution":       "WHAT I SOLVED TODAY — outcome not tasks",
        "learned":        "WHAT I LEARNED TODAY",
        "ai_tool":        "AI USAGE TODAY → Tool used",
        "ai_chat_link":   "AI USAGE TODAY → Chat link — must be a real https:// URL. If you used Copilot in VS Code, export or screenshot the conversation and share via a link.",
        "plan_tomorrow":  "PLAN FOR TOMORROW",
        "confidence":     "CONFIDENCE LEVEL",
    }

    missing_labels = [field_labels.get(f, f) for f in missing_fields]
    fields_str = "\n".join(f"• {label}" for label in missing_labels)

    return (
        f"Your report is missing required fields:\n\n"
        f"{fields_str}\n\n"
        f"Use this format:\n\n"
        f"```\n"
        f"@intern-management-agent\n"
        f"━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\n"
        f"EOD REPORT — Your Name — DD Mon YYYY\n"
        f"━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\n\n"
        f"WHAT I SOLVED TODAY\n"
        f"- What problem did you actually solve? (outcome, not tasks)\n\n"
        f"TASKS IN PROGRESS\n"
        f"- What you started — expected completion: [date]\n\n"
        f"BLOCKERS\n"
        f"- What is stopping you and who you need / None\n\n"
        f"WHAT I LEARNED TODAY\n"
        f"- Specific concept — how will you apply it tomorrow?\n\n"
        f"AI USAGE TODAY\n"
        f"Tool used: Claude / ChatGPT / Copilot\n"
        f"What I asked it: [specific task]\n"
        f"What I changed from its output: [what you modified and why]\n"
        f"Chat link: [mandatory — paste shared URL]\n\n"
        f"PLAN FOR TOMORROW\n"
        f"- Task with expected output\n\n"
        f"PLAN FOR THE WEEK\n"
        f"- What you aim to complete by end of week\n\n"
        f"CONFIDENCE LEVEL\n"
        f"> [x] Crushing it  [ ] On track  [ ] Need help  [ ] Stuck\n"
        f"━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\n"
        f"```\n\n"
        f"Tag @intern-management-agent when you resubmit."
    )