File size: 12,798 Bytes
2a1d323
8b32013
5d6c69a
 
 
8b32013
2a1d323
 
8b32013
2a1d323
8b32013
2a1d323
 
 
 
9060ca4
 
 
 
 
2a1d323
 
 
 
5d6c69a
2a1d323
9060ca4
5d6c69a
9060ca4
 
d16ec3b
9060ca4
5d6c69a
 
9060ca4
 
5d6c69a
9060ca4
2a1d323
 
d16ec3b
2a1d323
 
 
 
 
5d6c69a
 
 
 
 
 
 
9060ca4
8b32013
5d6c69a
8b32013
5d6c69a
 
 
 
 
 
 
 
 
8b32013
 
 
5d6c69a
8b32013
 
 
 
5d6c69a
 
8b32013
 
5d6c69a
8b32013
 
5d6c69a
9060ca4
 
5d6c69a
9060ca4
 
 
 
2a1d323
5d6c69a
1560291
a57eee1
1560291
a57eee1
 
 
 
 
 
 
 
 
 
1560291
 
d16ec3b
 
2a1d323
d16ec3b
9060ca4
 
 
d16ec3b
 
 
 
8b32013
5d6c69a
 
 
 
 
8b32013
5d6c69a
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
a57eee1
5d6c69a
 
 
 
 
 
 
8b32013
9060ca4
8b32013
 
 
 
 
 
 
 
 
9060ca4
8b32013
9060ca4
 
8b32013
5d6c69a
 
9060ca4
8b32013
 
5d6c69a
8b32013
9060ca4
8b32013
5d6c69a
 
 
 
 
 
 
 
8b32013
9060ca4
8b32013
 
 
 
 
 
 
 
9060ca4
5d6c69a
8b32013
 
5d6c69a
 
8b32013
 
5d6c69a
 
2527c2a
 
 
 
 
5d6c69a
9060ca4
8b32013
 
5d6c69a
 
8b32013
 
9060ca4
 
 
d16ec3b
8b32013
5d6c69a
 
2a1d323
5d6c69a
9060ca4
 
5d6c69a
 
 
 
9060ca4
5d6c69a
 
 
 
 
 
 
 
9060ca4
2a1d323
 
 
9060ca4
2a1d323
 
5d6c69a
 
 
9060ca4
5d6c69a
9060ca4
d16ec3b
5d6c69a
 
 
 
 
d16ec3b
9060ca4
 
5d6c69a
 
9060ca4
 
 
 
 
 
 
2a1d323
 
 
9060ca4
5d6c69a
 
9060ca4
 
 
 
5d6c69a
 
 
 
a57eee1
5d6c69a
 
9060ca4
d16ec3b
2a1d323
 
 
 
 
 
5d6c69a
9060ca4
03580b5
9060ca4
 
 
5d6c69a
 
22a7169
5d6c69a
22a7169
5d6c69a
22a7169
5d6c69a
9060ca4
5d6c69a
 
 
 
22a7169
5d6c69a
22a7169
 
5d6c69a
9060ca4
 
22a7169
36420a4
2a1d323
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
"""
parser.py β€” ATG EOD report parser
Claude does two things:
  1. Verifies this is a genuine EOD report (not a random bot mention)
  2. Extracts all structured fields
Falls back to regex if Claude is unavailable.
"""

import os
import re
import json
from dataclasses import dataclass, field


RISK_FLAG_KEYWORDS = [
    "stuck", "blocked", "can't proceed", "need help",
    "behind", "delayed", "not sure", "struggling",
    "at risk", "won't finish", "unable to",
]


@dataclass
class ParsedReport:
    raw_text: str
    is_eod_report: bool = False   # Claude verified this is an actual EOD report
    name: str = ""
    report_date: str = ""
    solution: str = ""            # WHAT I SOLVED TODAY
    tasks_in_progress: str = ""
    blockers: str = ""
    learned: str = ""
    ai_tool: str = ""
    ai_what_asked: str = ""       # what they asked the AI
    ai_changes: str = ""          # what they changed from AI output
    ai_chat_link: str = ""
    plan_tomorrow: str = ""
    plan_week: str = ""
    confidence: str = ""
    format_valid: bool = False
    missing_fields: list = field(default_factory=list)
    quality_score: int = 0
    quality_flags: list = field(default_factory=list)
    contains_risk_flag: bool = False


def parse_report(text: str) -> ParsedReport:
    """
    Parse EOD report. Claude verifies + extracts.
    Falls back to regex if Claude fails.
    """
    # Strip bot mention tag before parsing
    clean_text = re.sub(r'<@[A-Z0-9]+>', '', text).strip()
    report = ParsedReport(raw_text=clean_text)

    try:
        extracted = _extract_with_claude(clean_text)
    except Exception as e:
        print(f"[parser] Claude failed: {e} β€” using regex fallback")
        extracted = _extract_with_regex(clean_text)
        extracted["is_eod_report"] = True  # regex fallback assumes it is

    # If Claude says this is not an EOD report β€” stop here
    report.is_eod_report = extracted.get("is_eod_report", False)
    if not report.is_eod_report:
        print(f"[parser] Claude determined this is not an EOD report β€” ignoring")
        return report

    report.name             = extracted.get("name", "")
    report.report_date      = extracted.get("date", "")
    report.solution         = extracted.get("solution", "")
    report.tasks_in_progress = extracted.get("tasks_in_progress", "")
    report.blockers         = extracted.get("blockers", "")
    report.learned          = extracted.get("learned", "")
    report.ai_tool          = extracted.get("ai_tool", "")
    report.ai_what_asked    = extracted.get("ai_what_asked", "")
    report.ai_changes       = extracted.get("ai_changes", "")
    report.ai_chat_link     = extracted.get("ai_chat_link", "")
    report.plan_tomorrow    = extracted.get("plan_tomorrow", "")
    report.plan_week        = extracted.get("plan_week", "")
    report.confidence       = extracted.get("confidence", "")

    # Validate mandatory fields
    required = {
        "name": report.name,
        "solution": report.solution,
        "learned": report.learned,
        "ai_tool": report.ai_tool,
        "plan_tomorrow": report.plan_tomorrow,
        "confidence": report.confidence,
    }
    missing = [k for k, v in required.items() if not v or len(v.strip()) < 3]

    # AI chat link β€” strictly required, must be a real URL starting with http(s)
    link = report.ai_chat_link.strip().lower()
    NOT_A_LINK = (
        "cant", "can't", "cannot", "not available", "n/a", "na",
        "not applicable", "internal", "vscode", "vs code", "editor",
        "inline", "no link", "not shared", "private", "local",
    )
    is_valid_link = (
        link.startswith("http://") or link.startswith("https://")
    ) and not any(bad in link for bad in NOT_A_LINK)

    if not is_valid_link:
        missing.append("ai_chat_link")

    report.missing_fields = missing
    report.format_valid = len(missing) == 0

    if report.format_valid:
        report.quality_score = _score(report)
        report.quality_flags = _flags(report)
        report.contains_risk_flag = _check_risk(report)

    return report


def _extract_with_claude(text: str) -> dict:
    """
    Claude does two things in one call:
    1. Decides if this is a real EOD report
    2. Extracts all fields if it is
    """
    import anthropic
    client = anthropic.Anthropic(api_key=os.environ["ANTHROPIC_API_KEY"])

    prompt = f"""You are processing a message sent to an intern management bot.

First decide: is this a genuine EOD (End of Day) work report from an intern?
- YES if: it contains work tasks, learning, plans, blockers β€” even if format is incomplete
- NO if: it's a question, random message, test, greeting, or anything other than a work report

If YES, extract all available fields.
If NO, return {{"is_eod_report": false}} and nothing else.

Fields to extract (return empty string "" if not present):
- is_eod_report: true or false
- name: person's full name (ignore @ Slack tags)
- date: report date
- solution: content under WHAT I SOLVED TODAY (outcome, not tasks)
- tasks_in_progress: tasks in progress
- blockers: blockers
- learned: what they learned
- ai_tool: AI tool(s) used
- ai_what_asked: what they asked the AI
- ai_changes: what they changed or rejected from AI output
- ai_chat_link: shared chat URL β€” must start with http:// or https://. Return "" if value is N/A, none, not applicable, internal, "cant provide", "not available", "used in vscode", "used in editor", "copilot in vscode", or any explanation instead of a real URL. Only return an actual URL.
- plan_tomorrow: plan for tomorrow
- plan_week: plan for the week
- confidence: selected confidence level β€” one of: Crushing it, On track, Need help, Stuck

Return ONLY valid JSON. No prose. No markdown fences.

Message:
{text}"""

    message = client.messages.create(
        model="claude-sonnet-4-20250514",
        max_tokens=1000,
        messages=[{"role": "user", "content": prompt}],
    )

    raw = message.content[0].text.strip()
    if raw.startswith("```"):
        raw = re.sub(r"```(?:json)?", "", raw).replace("```", "").strip()

    return json.loads(raw)


def _extract_with_regex(text: str) -> dict:
    """Fallback regex extraction."""
    result = {"is_eod_report": True}

    header = re.search(r"EOD REPORT\s*[β€”\-]+\s*(.+?)\s*[β€”\-]+\s*(.+)", text, re.IGNORECASE)
    if header:
        result["name"] = re.sub(r'<@[A-Z0-9]+>', '', header.group(1)).strip()
        result["date"] = header.group(2).strip()

    sections = [
        ("solution",         r"WHAT I SOLVED TODAY",      r"TASKS COMPLETED|TASKS IN PROGRESS|BLOCKERS"),
        ("tasks_in_progress",r"TASKS IN PROGRESS",         r"BLOCKERS|WHAT I LEARNED"),
        ("blockers",         r"BLOCKERS",                  r"WHAT I LEARNED"),
        ("learned",          r"WHAT I LEARNED TODAY",      r"AI USAGE|PLAN FOR"),
        ("ai_usage",         r"AI USAGE TODAY",            r"PLAN FOR TOMORROW"),
        ("plan_tomorrow",    r"PLAN FOR TOMORROW",         r"PLAN FOR THE WEEK|CONFIDENCE"),
        ("plan_week",        r"PLAN FOR THE WEEK",         r"CONFIDENCE"),
        ("confidence_raw",   r"CONFIDENCE LEVEL",          r"━━━|$"),
    ]

    for key, start_pat, end_pat in sections:
        start = re.search(start_pat, text, re.IGNORECASE)
        if not start:
            result[key] = ""
            continue
        end = re.search(end_pat, text[start.end():], re.IGNORECASE)
        content = text[start.end(): start.end() + end.start()].strip() if end else text[start.end():].strip()
        result[key] = content

    # Parse AI sub-fields
    ai = result.pop("ai_usage", "")
    tool = re.search(r"tool used\s*:\s*(.+)", ai, re.IGNORECASE)
    asked = re.search(r"what i asked\s*:\s*(.+)", ai, re.IGNORECASE)
    changed = re.search(r"what i changed\s*:\s*(.+)", ai, re.IGNORECASE)
    link = re.search(r"chat link\s*:\s*(.+)", ai, re.IGNORECASE)
    result["ai_tool"] = tool.group(1).strip() if tool else ""
    result["ai_what_asked"] = asked.group(1).strip() if asked else ""
    result["ai_changes"] = changed.group(1).strip() if changed else ""

    # Extract first valid URL from the chat link line
    raw_link_line = link.group(1).strip() if link else ""
    urls = re.findall(r'https?://[^\s]+', raw_link_line)
    raw_link = urls[0] if urls else raw_link_line
    result["ai_chat_link"] = "" if raw_link.lower() in ("n/a", "none", "not applicable", "internal", "-", "") else raw_link

    # Parse confidence
    conf_raw = result.pop("confidence_raw", "")
    marked = re.search(r"\[x\]\s*(.+?)(?:\[|$)", conf_raw, re.IGNORECASE)
    result["confidence"] = marked.group(1).strip() if marked else conf_raw[:30].strip()

    return result


def _score(report: ParsedReport) -> int:
    score = 3

    # Reward solution section
    if len(report.solution) > 80: score += 1

    # Reward detailed learning
    if len(report.learned) > 80: score += 1

    # Reward AI critique
    if len(report.ai_changes) > 50: score += 1

    # Penalise missing/fake chat link
    link = report.ai_chat_link.strip().lower()
    if not link:
        score -= 1

    # Penalise copy-paste signals
    if report.ai_changes:
        lower = report.ai_changes.lower()
        if any(x in lower for x in ["nothing", "used as is", "no changes", "accepted all", "kept everything"]):
            score -= 2

    return max(1, min(5, score))


def _flags(report: ParsedReport) -> list:
    flags = []

    if not report.solution or len(report.solution.strip()) < 20:
        flags.append("no_solution_stated")

    link = report.ai_chat_link.strip().lower()
    if not link:
        flags.append("no_ai_chat_link")

    if report.ai_changes:
        lower = report.ai_changes.lower()
        if any(x in lower for x in ["nothing", "used as is", "no changes", "accepted all"]):
            flags.append("possible_copy_paste")

    if len(report.learned) < 30:
        flags.append("shallow_learning")

    if not report.plan_week or len(report.plan_week.strip()) < 10:
        flags.append("no_weekly_plan")

    conf = report.confidence.lower()
    if "need help" in conf:
        flags.append("needs_help")
    if "stuck" in conf:
        flags.append("stuck")

    return flags


def _check_risk(report: ParsedReport) -> bool:
    check = " ".join([report.blockers, report.confidence]).lower()
    return any(kw in check for kw in RISK_FLAG_KEYWORDS)


def format_missing_fields_message(missing_fields: list, report_type: str = "atg") -> str:
    field_labels = {
        "name":           "Your name in the header",
        "solution":       "WHAT I SOLVED TODAY β€” outcome not tasks",
        "learned":        "WHAT I LEARNED TODAY",
        "ai_tool":        "AI USAGE TODAY β†’ Tool used",
        "ai_chat_link":   "AI USAGE TODAY β†’ Chat link β€” must be a real https:// URL. If you used Copilot in VS Code, export or screenshot the conversation and share via a link.",
        "plan_tomorrow":  "PLAN FOR TOMORROW",
        "confidence":     "CONFIDENCE LEVEL",
    }

    missing_labels = [field_labels.get(f, f) for f in missing_fields]
    fields_str = "\n".join(f"β€’ {label}" for label in missing_labels)

    return (
        f"Your report is missing required fields:\n\n"
        f"{fields_str}\n\n"
        f"Use this format:\n\n"
        f"```\n"
        f"@intern-management-agent\n"
        f"━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\n"
        f"EOD REPORT β€” Your Name β€” DD Mon YYYY\n"
        f"━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\n\n"
        f"WHAT I SOLVED TODAY\n"
        f"- What problem did you actually solve? (outcome, not tasks)\n\n"
        f"TASKS IN PROGRESS\n"
        f"- What you started β€” expected completion: [date]\n\n"
        f"BLOCKERS\n"
        f"- What is stopping you and who you need / None\n\n"
        f"WHAT I LEARNED TODAY\n"
        f"- Specific concept β€” how will you apply it tomorrow?\n\n"
        f"AI USAGE TODAY\n"
        f"Tool used: Claude / ChatGPT / Copilot\n"
        f"What I asked it: [specific task]\n"
        f"What I changed from its output: [what you modified and why]\n"
        f"Chat link: [mandatory β€” paste shared URL]\n\n"
        f"PLAN FOR TOMORROW\n"
        f"- Task with expected output\n\n"
        f"PLAN FOR THE WEEK\n"
        f"- What you aim to complete by end of week\n\n"
        f"CONFIDENCE LEVEL\n"
        f"> [x] Crushing it  [ ] On track  [ ] Need help  [ ] Stuck\n"
        f"━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\n"
        f"```\n\n"
        f"Tag @intern-management-agent when you resubmit."
    )