j-js commited on
Commit
e614e8a
·
verified ·
1 Parent(s): 9e0a5c1

Update conversation_logic.py

Browse files
Files changed (1) hide show
  1. conversation_logic.py +352 -1274
conversation_logic.py CHANGED
@@ -1,1315 +1,393 @@
1
  from __future__ import annotations
2
 
3
  import re
4
- from typing import Any, Dict, List, Optional, Set
5
-
6
- from context_parser import detect_intent, intent_to_help_mode
7
- from formatting import format_reply, format_explainer_response
8
- from generator_engine import GeneratorEngine
9
- from models import RetrievedChunk, SolverResult
10
- from quant_solver import is_quant_question
11
- from solver_router import route_solver
12
- from explainers.explainer_router import route_explainer
13
- from question_classifier import classify_question, normalize_category
14
- from retrieval_engine import RetrievalEngine
15
- from utils import normalize_spaces
16
-
17
-
18
- RETRIEVAL_ALLOWED_INTENTS = {
19
- "walkthrough",
20
- "step_by_step",
21
- "explain",
22
- "method",
23
- "hint",
24
- "definition",
25
- "concept",
26
- "instruction",
27
- }
28
-
29
- DIRECT_SOLVE_PATTERNS = [
30
- r"\bsolve\b",
31
- r"\bwhat is\b",
32
- r"\bfind\b",
33
- r"\bgive (?:me )?the answer\b",
34
- r"\bjust the answer\b",
35
- r"\banswer only\b",
36
- r"\bcalculate\b",
37
- ]
38
-
39
- STRUCTURE_KEYWORDS = {
40
- "algebra": ["equation", "solve", "isolate", "variable", "linear", "expression", "unknown", "algebra"],
41
- "percent": ["percent", "%", "percentage", "increase", "decrease"],
42
- "ratio": ["ratio", "proportion", "part", "share"],
43
- "statistics": ["mean", "median", "mode", "range", "average"],
44
- "probability": ["probability", "chance", "odds"],
45
- "geometry": ["triangle", "circle", "angle", "area", "perimeter", "radius", "diameter"],
46
- "number_theory": ["integer", "odd", "even", "prime", "divisible", "factor", "multiple", "remainder"],
47
- "sequence": ["sequence", "geometric", "arithmetic", "term", "series"],
48
- "quant": ["equation", "solve", "value", "integer", "ratio", "percent"],
49
- "data": ["data", "mean", "median", "trend", "chart", "table", "correlation"],
50
- "verbal": ["grammar", "meaning", "author", "argument", "sentence", "word"],
51
- }
52
-
53
- INTENT_KEYWORDS = {
54
- "walkthrough": ["walkthrough", "work through", "step by step", "full working"],
55
- "step_by_step": ["step", "first step", "next step", "step by step"],
56
- "explain": ["explain", "why", "understand"],
57
- "method": ["method", "approach", "how do i solve", "how to solve", "equation", "formula"],
58
- "hint": ["hint", "nudge", "clue", "what do i do"],
59
- "definition": ["define", "definition", "what does", "what is meant by", "meaning"],
60
- "concept": ["concept", "idea", "principle", "rule"],
61
- "instruction": ["how do i", "how to", "what should i do first", "what step", "first step"],
62
- }
63
-
64
- MISMATCH_TERMS = {
65
- "algebra": ["absolute value", "modulus", "square root", "quadratic", "inequality", "roots", "parabola"],
66
- "percent": ["triangle", "circle", "prime", "absolute value"],
67
- "ratio": ["absolute value", "quadratic", "circle"],
68
- "statistics": ["absolute value", "prime", "triangle"],
69
- "probability": ["absolute value", "circle area", "quadratic"],
70
- "geometry": ["absolute value", "prime", "median salary"],
71
- "number_theory": ["circle", "triangle", "median salary"],
72
- }
73
-
74
- def detect_help_mode(text: str) -> str:
75
- low = (text or "").lower().strip()
76
-
77
- if any(p in low for p in ["what does", "what is", "define", "meaning of"]):
78
- return "definition"
79
-
80
- if any(p in low for p in ["explain", "break down", "what is the question asking", "help me understand"]):
81
- return "explain"
82
-
83
- if any(p in low for p in ["step by step", "steps", "walk me through"]):
84
- return "step_by_step"
85
-
86
- if any(p in low for p in ["how do i", "how to", "approach this", "method"]):
87
- return "answer"
88
-
89
- if any(p in low for p in ["hint", "nudge"]):
90
- return "hint"
91
-
92
- if any(p in low for p in ["walkthrough", "work through"]):
93
- return "walkthrough"
94
-
95
- return "explain"
96
-
97
- def _normalize_classified_topic(topic: Optional[str], category: Optional[str], question_text: str) -> str:
98
- t = (topic or "").strip().lower()
99
- q = (question_text or "").lower()
100
- c = normalize_category(category)
101
-
102
- has_ratio_form = bool(re.search(r"\b\d+\s*:\s*\d+\b", q))
103
- has_algebra_form = (
104
- "=" in q
105
- or bool(re.search(r"\b[xyz]\b", q))
106
- or bool(re.search(r"\d+[a-z]\b", q))
107
- or bool(re.search(r"\b[a-z]\s*[\+\-\*/=]", q))
108
- )
109
 
110
- if t == "ratio" and not has_ratio_form and has_algebra_form:
111
- t = "algebra"
112
 
113
- if t not in {"general_quant", "general", "unknown", ""}:
114
- return t
 
 
 
 
 
 
115
 
116
- if "%" in q or "percent" in q:
117
- return "percent"
118
- if "ratio" in q or has_ratio_form:
119
- return "ratio"
120
- if "probability" in q or "chosen at random" in q or "odds" in q or "chance" in q:
121
- return "probability"
122
- if "divisible" in q or "remainder" in q or "prime" in q or "factor" in q:
123
- return "number_theory"
124
- if any(k in q for k in ["circle", "triangle", "perimeter", "area", "circumference"]):
125
- return "geometry"
126
- if any(k in q for k in ["mean", "median", "average", "sales", "revenue"]):
127
- return "statistics" if c == "Quantitative" else "data"
128
- if has_algebra_form or "what is x" in q or "what is y" in q or "integer" in q:
129
- return "algebra"
130
-
131
- if c == "DataInsight":
132
- return "data"
133
- if c == "Verbal":
134
- return "verbal"
135
- if c == "Quantitative":
136
- return "quant"
137
 
138
- return "general"
 
 
 
 
139
 
140
 
141
- def _teaching_lines(chunks: List[RetrievedChunk]) -> List[str]:
142
- lines: List[str] = []
143
- for chunk in chunks:
144
- text = (chunk.text or "").strip().replace("\n", " ")
145
- if len(text) > 220:
146
- text = text[:217].rstrip() + "…"
147
- topic = chunk.topic or "general"
148
- lines.append(f"- {topic}: {text}")
149
  return lines
150
 
151
 
152
- def _safe_steps(steps: List[str]) -> List[str]:
153
- cleaned: List[str] = []
154
- banned_patterns = [
155
- r"\bthe answer is\b",
156
- r"\banswer:\b",
157
- r"\bthat gives\b",
158
- r"\btherefore\b",
159
- r"\bso x\s*=",
160
- r"\bso y\s*=",
161
- r"\bx\s*=",
162
- r"\by\s*=",
163
- r"\bresult is\b",
164
- ]
165
 
166
- for step in steps:
167
- s = (step or "").strip()
168
- lowered = s.lower()
169
- if any(re.search(p, lowered) for p in banned_patterns):
170
- continue
171
- cleaned.append(s)
172
 
173
- return cleaned
 
174
 
175
 
176
- def _normalize_text(text: str) -> str:
177
- return re.sub(r"\s+", " ", (text or "").strip().lower())
178
-
179
-
180
- def _extract_keywords(text: str) -> Set[str]:
181
- raw = re.findall(r"[a-zA-Z][a-zA-Z0-9_+-]*", (text or "").lower())
182
- stop = {
183
- "the", "a", "an", "is", "are", "to", "of", "for", "and", "or", "in", "on", "at", "by", "this", "that",
184
- "it", "be", "do", "i", "me", "my", "you", "how", "what", "why", "give", "show", "please", "can",
185
- }
186
- return {w for w in raw if len(w) > 2 and w not in stop}
187
-
188
-
189
- def _safe_meta_list(items: Any) -> List[str]:
190
- if not items:
191
  return []
192
- if isinstance(items, list):
193
- return [str(x).strip() for x in items if str(x).strip()]
194
- if isinstance(items, tuple):
195
- return [str(x).strip() for x in items if str(x).strip()]
196
- if isinstance(items, str):
197
- text = items.strip()
198
  return [text] if text else []
199
  return []
200
 
201
 
202
- def _safe_meta_text(value: Any) -> Optional[str]:
203
- if value is None:
204
- return None
205
- text = str(value).strip()
206
- return text or None
207
 
208
 
209
- def _extract_explainer_scaffold(explainer_result: Any) -> Dict[str, Any]:
210
- scaffold = getattr(explainer_result, "scaffold", None)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
211
 
212
- if scaffold is None:
213
- return {}
214
-
215
- return {
216
- "concept": _safe_meta_text(getattr(scaffold, "concept", None)),
217
- "ask": _safe_meta_text(getattr(scaffold, "ask", None)),
218
- "givens": _safe_meta_list(getattr(scaffold, "givens", [])),
219
- "target": _safe_meta_text(getattr(scaffold, "target", None)),
220
- "setup_actions": _safe_meta_list(getattr(scaffold, "setup_actions", [])),
221
- "intermediate_steps": _safe_meta_list(getattr(scaffold, "intermediate_steps", [])),
222
- "first_move": _safe_meta_text(getattr(scaffold, "first_move", None)),
223
- "next_hint": _safe_meta_text(getattr(scaffold, "next_hint", None)),
224
- "common_traps": _safe_meta_list(getattr(scaffold, "common_traps", [])),
225
- "variables_to_define": _safe_meta_list(getattr(scaffold, "variables_to_define", [])),
226
- "equations_to_form": _safe_meta_list(getattr(scaffold, "equations_to_form", [])),
227
- "answer_hidden": bool(getattr(scaffold, "answer_hidden", True)),
228
- }
229
-
230
-
231
- def _build_scaffold_reply(
232
- intent: str,
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
233
  help_mode: str,
234
- scaffold: Dict[str, Any],
235
- summary: Optional[str],
236
- teaching_points: List[str],
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
237
  verbosity: float,
238
  transparency: float,
239
- ) -> Optional[str]:
240
- if not scaffold and not summary and not teaching_points:
241
- return None
242
-
243
- ask = _safe_meta_text(scaffold.get("ask")) if scaffold else None
244
- first_move = _safe_meta_text(scaffold.get("first_move")) if scaffold else None
245
- next_hint = _safe_meta_text(scaffold.get("next_hint")) if scaffold else None
246
- setup_actions = _safe_meta_list(scaffold.get("setup_actions", [])) if scaffold else []
247
- intermediate_steps = _safe_meta_list(scaffold.get("intermediate_steps", [])) if scaffold else []
248
- variables_to_define = _safe_meta_list(scaffold.get("variables_to_define", [])) if scaffold else []
249
- equations_to_form = _safe_meta_list(scaffold.get("equations_to_form", [])) if scaffold else []
250
- common_traps = _safe_meta_list(scaffold.get("common_traps", [])) if scaffold else []
251
-
252
- target_mode = help_mode or intent
253
-
254
- if target_mode == "hint" or intent == "hint":
255
- if first_move:
256
- return first_move
257
- if next_hint:
258
- return next_hint
259
- if setup_actions:
260
- return setup_actions[0]
261
- if ask:
262
- return ask
263
- if teaching_points:
264
- return teaching_points[0]
265
- return None
266
-
267
- if target_mode == "instruction" or intent == "instruction":
268
- if first_move:
269
- return f"First step: {first_move}"
270
- if setup_actions:
271
- return f"First step: {setup_actions[0]}"
272
- if ask:
273
- return f"First, identify this: {ask}"
274
- return None
275
-
276
- if target_mode == "definition" or intent == "definition":
277
- if summary:
278
- return summary
279
- if teaching_points:
280
- return f"Here is the idea in context:\n- {teaching_points[0]}"
281
- if ask:
282
- return ask
283
- return None
284
-
285
- if target_mode in {"walkthrough", "step_by_step"} or intent in {"walkthrough", "step_by_step"}:
286
- lines: List[str] = []
287
-
288
- sequence: List[str] = []
289
- if ask:
290
- sequence.append(f"Identify this first: {ask}")
291
- sequence.extend(setup_actions)
292
- sequence.extend(intermediate_steps)
293
-
294
- if first_move and first_move not in sequence:
295
- sequence.insert(0, first_move)
296
-
297
- if next_hint and next_hint not in sequence:
298
- sequence.append(next_hint)
299
-
300
- if not sequence and summary:
301
- sequence.append(summary)
302
-
303
- if not sequence and teaching_points:
304
- sequence.extend(teaching_points[:3])
305
-
306
- if not sequence:
307
- return None
308
-
309
- if verbosity < 0.25:
310
- shown = sequence[:1]
311
- elif verbosity < 0.6:
312
- shown = sequence[:2]
313
- elif verbosity < 0.85:
314
- shown = sequence[:4]
315
- else:
316
- shown = sequence[:6]
317
-
318
- return "\n".join(f"- {s}" for s in shown)
319
-
320
- if target_mode in {"method", "concept", "explain"} or intent in {"method", "concept", "explain"}:
321
- lines: List[str] = []
322
-
323
- if summary:
324
- lines.append(summary)
325
-
326
- if ask:
327
- lines.append(f"Start by identifying: {ask}")
328
-
329
- core_steps: List[str] = []
330
  if first_move:
331
- core_steps.append(first_move)
332
- core_steps.extend(setup_actions[:3])
333
-
334
- if transparency >= 0.45:
335
- core_steps.extend(intermediate_steps[:2])
336
-
337
- if core_steps:
338
- lines.extend(core_steps[:1] if verbosity < 0.35 else core_steps[:3] if verbosity < 0.75 else core_steps[:5])
339
-
340
- if transparency >= 0.55 and variables_to_define:
341
- lines.append(f"Useful variable setup: {variables_to_define[0]}")
342
 
343
- if transparency >= 0.6 and equations_to_form:
344
- lines.append(f"Key equation: {equations_to_form[0]}")
345
-
346
- if transparency >= 0.65 and next_hint:
347
- lines.append(f"Next idea: {next_hint}")
348
-
349
- if (transparency >= 0.75 or verbosity >= 0.75) and common_traps:
350
- lines.append(f"Watch out for: {common_traps[0]}")
351
-
352
- if not lines and teaching_points:
353
- lines.extend(teaching_points[:2])
354
-
355
- if not lines:
356
- return None
357
-
358
- return "\n".join(f"- {s}" if not s.startswith("- ") and len(lines) > 1 else s for s in lines)
359
-
360
- # generic fallback
361
- if first_move:
362
- return first_move
363
  if setup_actions:
364
- return setup_actions[0]
365
- if summary:
366
- return summary
367
- if teaching_points:
368
- return teaching_points[0]
369
-
370
- return None
371
 
 
 
 
 
372
 
373
- def _compose_reply(
374
- result: SolverResult,
375
- intent: str,
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
376
  verbosity: float,
377
- category: Optional[str] = None,
 
378
  ) -> str:
379
- steps = _safe_steps(result.steps or [])
380
- topic = (result.topic or "").lower().strip()
381
- meta = result.meta or {}
382
-
383
- scaffold_reply = _build_scaffold_reply(
384
- intent=intent,
385
- help_mode=result.help_mode,
386
- scaffold=meta.get("scaffold", {}) if isinstance(meta, dict) else {},
387
- summary=_safe_meta_text(meta.get("explainer_summary")) if isinstance(meta, dict) else None,
388
- teaching_points=_safe_meta_list(meta.get("explainer_teaching_points", [])) if isinstance(meta, dict) else [],
389
- verbosity=verbosity,
390
- transparency=0.5,
391
- )
392
-
393
- def topic_hint_fallback() -> str:
394
- if topic == "algebra":
395
- return "Solve for the variable."
396
- if topic == "percent":
397
- return "Find the percent relationship first."
398
- if topic == "ratio":
399
- return "Set up the ratio relationship."
400
- if topic == "probability":
401
- return "Identify the total possible outcomes first."
402
- if topic == "statistics":
403
- return "Work out which measure the question is asking for."
404
- if topic == "geometry":
405
- return "Focus on the figure relationships first."
406
- if topic == "number_theory":
407
- return "Use the number properties in the question."
408
- return "Focus on the main relationship first."
409
-
410
- def topic_method_fallback() -> str:
411
- if scaffold_reply:
412
- return scaffold_reply
413
-
414
- if topic == "algebra":
415
- return "\n".join([
416
- "- Treat it as an equation.",
417
- "- Undo operations on both sides to isolate the variable.",
418
- ])
419
- if topic == "percent":
420
- return "\n".join([
421
- "- Identify whether you need the part, the whole, or the percent.",
422
- "- Then set up the percent relationship carefully.",
423
- ])
424
- if topic == "ratio":
425
- return "\n".join([
426
- "- Identify which quantities are being compared.",
427
- "- Keep the ratio in the correct order throughout.",
428
- ])
429
- if topic == "probability":
430
- return "\n".join([
431
- "- Identify what counts as a successful outcome.",
432
- "- Then compare favorable outcomes to total possible outcomes.",
433
- ])
434
- if topic == "statistics":
435
- return "\n".join([
436
- "- Identify which statistic the question is asking for.",
437
- "- Then use the relevant values only.",
438
- ])
439
- if topic == "geometry":
440
- return "\n".join([
441
- "- Identify the relevant shape properties.",
442
- "- Then use the relationships given in the diagram or wording.",
443
- ])
444
- if topic == "number_theory":
445
- return "\n".join([
446
- "- Identify the relevant number property.",
447
- "- Then apply the divisibility or factor rule carefully.",
448
- ])
449
- return "I can explain the method, but I do not have enough structured steps yet."
450
-
451
- if intent == "hint":
452
- if scaffold_reply:
453
- return scaffold_reply
454
-
455
- if steps:
456
- first = steps[0].lower()
457
-
458
- if "equation" in first or "=" in first:
459
- return "Treat it as an equation."
460
- if "isolate" in first or "variable" in first or "solve" in first:
461
- return "Solve for the variable."
462
- if "percent" in first:
463
- return "Find the percent relationship first."
464
- if "ratio" in first:
465
- return "Set up the ratio relationship."
466
- if "probability" in first:
467
- return "Identify the total possible outcomes first."
468
-
469
- return topic_hint_fallback()
470
-
471
- if intent == "instruction":
472
- if scaffold_reply:
473
- return scaffold_reply
474
- if steps:
475
- return f"First step: {steps[0]}"
476
- return "First, identify the key relationship or comparison in the question."
477
-
478
- if intent == "definition":
479
- if scaffold_reply:
480
- return scaffold_reply
481
- if steps:
482
- return f"Here is the idea in context:\n- {steps[0]}"
483
- return "This is asking for the meaning of the term or idea in the question."
484
-
485
- if intent in {"walkthrough", "step_by_step", "explain", "method", "concept"}:
486
- if not steps:
487
- return topic_method_fallback()
488
-
489
- generic_lines = {
490
- "solve for the variable.",
491
- "treat it as an equation.",
492
- "identify the quantity the question wants.",
493
- "focus on the relationship in the question.",
494
- }
495
-
496
- meaningful_steps = []
497
- for s in steps:
498
- clean = (s or "").strip()
499
- if not clean:
500
- continue
501
- if clean.lower() in generic_lines and len(steps) > 1:
502
- continue
503
- meaningful_steps.append(clean)
504
-
505
- if not meaningful_steps:
506
- meaningful_steps = steps
507
-
508
- if verbosity < 0.25:
509
- shown_steps = meaningful_steps[:1]
510
- elif verbosity < 0.6:
511
- shown_steps = meaningful_steps[:2]
512
- elif verbosity < 0.85:
513
- shown_steps = meaningful_steps[:3]
514
- else:
515
- shown_steps = meaningful_steps
516
-
517
- return "\n".join(f"- {s}" for s in shown_steps)
518
-
519
- if steps:
520
- if verbosity < 0.35:
521
- shown_steps = steps[:1]
522
- else:
523
- shown_steps = steps[:2]
524
-
525
- if len(shown_steps) == 1:
526
- return shown_steps[0]
527
-
528
- return "\n".join(f"- {s}" for s in shown_steps)
529
-
530
- if scaffold_reply:
531
- return scaffold_reply
532
-
533
- if normalize_category(category) == "Verbal":
534
- return "I can help analyse the wording or logic, but I need the full question text to guide you properly."
535
-
536
- if normalize_category(category) == "DataInsight":
537
- return "I can help reason through the data, but I need the full question or chart details to guide you properly."
538
-
539
- return "I can help with this, but I need the full question text to guide you properly."
540
-
541
-
542
- def is_explainer_request(text: str) -> bool:
543
- t = (text or "").strip().lower()
544
-
545
- explainer_signals = [
546
- "how do i solve",
547
- "how to solve",
548
- "explain this",
549
- "walk me through",
550
- "walkthrough",
551
- "show me how",
552
- "what is the method",
553
- "how would you do this",
554
- "help me understand",
555
- "what's the approach",
556
- "what is the approach",
557
- "how should i think about this",
558
- "what is this asking",
559
- "how do i approach this",
560
- "can you explain",
561
- "explain how",
562
- "explain why",
563
- "break this down",
564
- "question breakdown",
565
- "what should i identify first",
566
- "what do i do first",
567
- "what is the first move",
568
- ]
569
-
570
- return any(p in t for p in explainer_signals)
571
-
572
-
573
- def _infer_structure_terms(question_text: str, topic: Optional[str], question_type: Optional[str]) -> List[str]:
574
- terms: List[str] = []
575
-
576
- if topic and topic in STRUCTURE_KEYWORDS:
577
- terms.extend(STRUCTURE_KEYWORDS[topic])
578
-
579
- if question_type:
580
- terms.extend(question_type.replace("_", " ").split())
581
-
582
- q = (question_text or "").lower()
583
- if "=" in q:
584
- terms.extend(["equation", "solve"])
585
- if "x" in q or "y" in q:
586
- terms.extend(["variable", "isolate"])
587
- if "/" in q or "divide" in q:
588
- terms.extend(["divide", "undo operations"])
589
- if "*" in q or "times" in q or "multiply" in q:
590
- terms.extend(["multiply", "undo operations"])
591
- if "%" in q or "percent" in q:
592
- terms.extend(["percent", "percentage"])
593
- if "ratio" in q or re.search(r"\b\d+\s*:\s*\d+\b", q):
594
- terms.extend(["ratio", "proportion"])
595
- if "mean" in q or "average" in q:
596
- terms.extend(["mean", "average"])
597
- if "median" in q:
598
- terms.extend(["median"])
599
- if "probability" in q or "odds" in q or "chance" in q:
600
- terms.extend(["probability", "outcome", "event"])
601
- if "remainder" in q or "divisible" in q:
602
- terms.extend(["remainder", "divisible"])
603
-
604
- return list(dict.fromkeys(terms))
605
-
606
-
607
- def _infer_mismatch_terms(topic: Optional[str], question_text: str) -> List[str]:
608
- if not topic or topic not in MISMATCH_TERMS:
609
- return []
610
- q = (question_text or "").lower()
611
- return [term for term in MISMATCH_TERMS[topic] if term not in q]
612
-
613
 
614
- def _intent_keywords(intent: str) -> List[str]:
615
- return INTENT_KEYWORDS.get(intent, [])
 
 
 
616
 
 
 
617
 
618
- def _is_direct_solve_request(text: str, intent: str) -> bool:
619
- if intent == "answer":
620
- return True
621
-
622
- t = _normalize_text(text)
623
- if any(re.search(p, t) for p in DIRECT_SOLVE_PATTERNS):
624
- if not any(word in t for word in ["how", "explain", "why", "method", "hint", "define", "definition", "step"]):
625
- return True
626
- return False
627
-
628
-
629
- def should_retrieve(
630
- intent: str,
631
- solved: bool,
632
- raw_user_text: str,
633
- category: Optional[str] = None,
634
- domain: Optional[str] = None,
635
- topic: Optional[str] = None,
636
- ) -> bool:
637
- normalized_category = normalize_category(category)
638
- normalized_domain = (domain or "").strip().lower()
639
- normalized_topic = (topic or "").strip().lower()
640
-
641
- if intent == "hint":
642
- return False
643
-
644
- if normalized_domain == "quant":
645
- if intent in {"walkthrough", "step_by_step", "method", "explain", "concept"}:
646
- return normalized_topic not in {"", "general", "unknown", "general_quant"}
647
- return False
648
-
649
- if intent in {"walkthrough", "step_by_step", "method", "explain", "concept", "definition", "instruction"}:
650
- return True
651
-
652
- if _is_direct_solve_request(raw_user_text, intent):
653
- return (not solved) and normalized_category in {"Verbal", "DataInsight"}
654
-
655
- if not solved and normalized_category in {"Verbal", "DataInsight"}:
656
- return True
657
-
658
- return False
659
-
660
-
661
- def _score_chunk(
662
- chunk: RetrievedChunk,
663
- intent: str,
664
- topic: Optional[str],
665
- question_text: str,
666
- question_type: Optional[str] = None,
667
- ) -> float:
668
- text = f"{chunk.topic} {chunk.text}".lower()
669
- score = 0.0
670
-
671
- if topic:
672
- chunk_topic = (chunk.topic or "").lower()
673
- if chunk_topic == topic.lower():
674
- score += 4.0
675
- elif topic.lower() in text:
676
- score += 2.0
677
-
678
- for term in _infer_structure_terms(question_text, topic, question_type):
679
- if term.lower() in text:
680
- score += 1.5
681
-
682
- for term in _intent_keywords(intent):
683
- if term.lower() in text:
684
- score += 1.2
685
-
686
- overlap = sum(1 for kw in _extract_keywords(question_text) if kw in text)
687
- score += min(overlap * 0.4, 3.0)
688
-
689
- for bad in _infer_mismatch_terms(topic, question_text):
690
- if bad.lower() in text:
691
- score -= 2.5
692
-
693
- return score
694
-
695
-
696
- def _filter_retrieved_chunks(
697
- chunks: List[RetrievedChunk],
698
- intent: str,
699
- topic: Optional[str],
700
- question_text: str,
701
- question_type: Optional[str] = None,
702
- min_score: float = 3.2,
703
- max_chunks: int = 3,
704
- ) -> List[RetrievedChunk]:
705
- scored: List[tuple[float, RetrievedChunk]] = []
706
- normalized_topic = (topic or "").lower()
707
-
708
- for chunk in chunks:
709
- chunk_topic = (chunk.topic or "").lower()
710
-
711
- if normalized_topic and normalized_topic not in {"general", "unknown", "general_quant"}:
712
- if chunk_topic == "general":
713
- continue
714
-
715
- s = _score_chunk(chunk, intent, topic, question_text, question_type)
716
- if s >= min_score:
717
- scored.append((s, chunk))
718
-
719
- scored.sort(key=lambda x: x[0], reverse=True)
720
- filtered = [chunk for _, chunk in scored[:max_chunks]]
721
- if filtered:
722
- return filtered
723
-
724
- fallback: List[tuple[float, RetrievedChunk]] = []
725
- for chunk in chunks:
726
- s = _score_chunk(chunk, intent, topic, question_text, question_type)
727
- if s >= 2.0:
728
- fallback.append((s, chunk))
729
-
730
- fallback.sort(key=lambda x: x[0], reverse=True)
731
- return [chunk for _, chunk in fallback[:max_chunks]]
732
-
733
-
734
- def _build_retrieval_query(
735
- raw_user_text: str,
736
- question_text: str,
737
- intent: str,
738
- topic: Optional[str],
739
- solved: bool,
740
- question_type: Optional[str] = None,
741
- category: Optional[str] = None,
742
- ) -> str:
743
- parts: List[str] = []
744
-
745
- raw = (raw_user_text or "").strip()
746
- question = (question_text or "").strip()
747
-
748
- if question:
749
- parts.append(question)
750
- elif raw:
751
- lowered = raw.lower()
752
-
753
- wrappers = [
754
- "how do i solve",
755
- "how to solve",
756
- "solve",
757
- "can you solve",
758
- "walk me through",
759
- "explain",
760
- "help me solve",
761
- "show me how to solve",
762
- ]
763
-
764
- cleaned = raw
765
- for w in wrappers:
766
- if lowered.startswith(w):
767
- cleaned = raw[len(w):].strip(" :.-?")
768
- break
769
 
770
- if cleaned:
771
- parts.append(cleaned)
772
- else:
773
- parts.append(raw)
774
-
775
- normalized_category = normalize_category(category)
776
- if normalized_category and normalized_category != "General":
777
- parts.append(normalized_category)
778
-
779
- if topic:
780
- parts.append(topic)
781
-
782
- if question_type:
783
- parts.append(question_type.replace("_", " "))
784
-
785
- if intent in {"definition", "concept"}:
786
- parts.append("definition concept explanation")
787
- elif intent in {"walkthrough", "step_by_step", "method", "instruction"}:
788
- parts.append("equation solving method isolate variable worked example")
789
- elif intent == "hint":
790
- parts.append("equation solving hint first step isolate variable")
791
- elif intent == "explain":
792
- parts.append("equation solving explanation reasoning")
793
- elif not solved:
794
- parts.append("teaching explanation method")
795
-
796
- return " ".join(parts).strip()
797
-
798
-
799
- def _fallback_more_info_reply(
800
- category: Optional[str],
801
- topic: Optional[str],
802
- intent: str,
803
- ) -> str:
804
- normalized_category = normalize_category(category)
805
-
806
- if normalized_category == "Quantitative" or topic in {
807
- "algebra", "percent", "ratio", "probability", "number_theory", "geometry", "statistics", "quant"
808
- }:
809
- if intent in {"walkthrough", "step_by_step", "method", "explain", "hint", "instruction"}:
810
- return (
811
- "I need the full question wording to guide this properly step by step. "
812
- "Please paste the complete problem, and include the answer choices if there are any."
813
- )
814
- return (
815
- "I need the full question wording to help properly. "
816
- "Please paste the complete problem, and include the answer choices if there are any."
817
- )
818
-
819
- if normalized_category == "DataInsight":
820
- return (
821
- "I need the full chart, table, or question wording to help properly. "
822
- "Please send the complete prompt and any answer choices."
823
- )
824
-
825
- if normalized_category == "Verbal":
826
- return (
827
- "I need the full passage, sentence, or question wording to help properly. "
828
- "Please paste the complete text and any answer choices."
829
- )
830
-
831
- return (
832
- "I need a bit more information to help properly. "
833
- "Please send the full question or exact wording."
834
  )
835
 
 
 
 
836
 
837
- def _is_bad_generated_reply(text: str, user_text: str = "") -> bool:
838
- t = (text or "").strip()
839
- tl = t.lower()
840
- ul = (user_text or "").strip().lower()
841
-
842
- if not t:
843
- return True
844
-
845
- if len(t) < 12:
846
- return True
847
-
848
- bad_exact = {
849
- "0",
850
- "formula",
851
- "formula formula",
852
- "the answer",
853
- "answer only",
854
- "unknown",
855
- "none",
856
- "n/a",
857
- }
858
- if tl in bad_exact:
859
- return True
860
-
861
- bad_substrings = [
862
- "if the problem is not fully solvable",
863
- "if the problem is not fully solvable from the parse",
864
- "give the test a chance to solve it",
865
- "use the formula formula",
866
- "cannot parse alone yet",
867
- "i cannot parse",
868
- "current parse alone",
869
- "from the parse alone",
870
- ]
871
- if any(b in tl for b in bad_substrings):
872
- return True
873
-
874
- banned_answer_patterns = [
875
- r"\bthe answer is\b",
876
- r"\banswer:\b",
877
- r"\bx\s*=",
878
- r"\by\s*=",
879
- r"\btherefore\b",
880
- r"\bthat gives\b",
881
- r"\bresult is\b",
882
- ]
883
- if any(re.search(p, tl) for p in banned_answer_patterns):
884
- return True
885
-
886
- words = re.findall(r"\b\w+\b", tl)
887
- if len(words) >= 4:
888
- unique_ratio = len(set(words)) / max(1, len(words))
889
- if unique_ratio < 0.45:
890
- return True
891
-
892
- user_keywords = _extract_keywords(ul)
893
- gen_keywords = _extract_keywords(tl)
894
- if user_keywords and gen_keywords:
895
- overlap = user_keywords.intersection(gen_keywords)
896
- if len(overlap) == 0 and len(t) < 180:
897
- return True
898
-
899
- nonsense_patterns = [
900
- r"\bformula\s+formula\b",
901
- r"\btest\s+a\s+chance\s+to\s+solve\b",
902
- r"^[\W_]*\d+[\W_]*$",
903
- ]
904
- if any(re.search(p, tl) for p in nonsense_patterns):
905
- return True
906
-
907
- return False
908
-
909
-
910
- def _clean_teaching_text(text: str) -> str:
911
- text = normalize_spaces((text or "").replace("\n", " ").strip())
912
- text = re.sub(r"^[\-\•\*\d\.\)\s]+", "", text)
913
- if len(text) > 160:
914
- text = text[:157].rstrip() + "..."
915
- return text
916
-
917
-
918
- def _looks_question_specific(text: str, question_text: str) -> bool:
919
- t = (text or "").strip().lower()
920
- q = (question_text or "").strip().lower()
921
-
922
- if not t:
923
- return True
924
-
925
- banned_phrases = [
926
- "the correct answer",
927
- "answer choice",
928
- "statement 1",
929
- "statement 2",
930
- "option a",
931
- "option b",
932
- "option c",
933
- "option d",
934
- "option e",
935
- "try choice",
936
- "plug in numbers",
937
- "backsolving",
938
- "working backwards",
939
- "chapter",
940
- "note:",
941
- ]
942
- if any(p in t for p in banned_phrases):
943
- return True
944
-
945
- if "gmat" in t[:25]:
946
- return True
947
-
948
- if "..." in t:
949
- return True
950
-
951
- if len(re.findall(r"\d+", t)) >= 3:
952
- q_numbers = set(re.findall(r"\d+", q))
953
- t_numbers = set(re.findall(r"\d+", t))
954
- if t_numbers and t_numbers != q_numbers and len(t_numbers - q_numbers) >= 1:
955
- return True
956
-
957
- q_vars = set(re.findall(r"\b[a-z]\b", q))
958
- t_vars = set(re.findall(r"\b[a-z]\b", t))
959
- allowed_vars = q_vars | {"x", "y"}
960
-
961
- if t_vars and q_vars:
962
- extra_vars = t_vars - allowed_vars
963
- if len(extra_vars) >= 1:
964
- return True
965
- if re.search(r"\bset\s+[a-z]\s+equal\s+to\b", t):
966
- return True
967
-
968
- if re.search(r"\bsolve for [a-z]\b", t) and q_vars:
969
- mentioned = set(re.findall(r"\b[a-z]\b", t))
970
- if mentioned - q_vars:
971
- return True
972
-
973
- if len(t.split()) > 35:
974
- return True
975
-
976
- return False
977
-
978
-
979
- def _pick_teaching_line(
980
- chunks: List[RetrievedChunk],
981
- current_reply: str,
982
- question_text: str,
983
- topic: Optional[str] = None,
984
- ) -> Optional[str]:
985
- if not chunks:
986
- return None
987
-
988
- reply_keywords = _extract_keywords(current_reply)
989
- desired_topic = (topic or "").lower().strip()
990
-
991
- best_line = None
992
- best_score = float("-inf")
993
-
994
- topic_phrases = {
995
- "algebra": ["equation", "isolate", "variable", "undo operations", "inverse operation"],
996
- "percent": ["percent", "percentage", "base", "rate", "original value"],
997
- "ratio": ["ratio", "proportion", "part", "share"],
998
- "probability": ["probability", "outcome", "event", "sample space"],
999
- "statistics": ["mean", "median", "average", "distribution"],
1000
- "geometry": ["angle", "triangle", "circle", "area", "perimeter"],
1001
- "number_theory": ["integer", "divisible", "remainder", "factor", "multiple", "prime"],
1002
- }
1003
-
1004
- for chunk in chunks:
1005
- raw_text = (chunk.text or "").strip()
1006
- if not raw_text:
1007
- continue
1008
-
1009
- text = _clean_teaching_text(raw_text)
1010
- if not text:
1011
- continue
1012
-
1013
- lower_text = text.lower()
1014
- chunk_topic = (chunk.topic or "").lower().strip()
1015
-
1016
- if _looks_question_specific(lower_text, question_text):
1017
- continue
1018
-
1019
- chunk_keywords = _extract_keywords(lower_text)
1020
- novelty_vs_reply = len(chunk_keywords - reply_keywords)
1021
- overlap_with_reply = len(chunk_keywords & reply_keywords)
1022
-
1023
- topic_bonus = 0.0
1024
- if desired_topic and chunk_topic == desired_topic:
1025
- topic_bonus += 3.0
1026
- elif desired_topic and desired_topic in chunk_topic:
1027
- topic_bonus += 2.0
1028
-
1029
- phrase_bonus = 0.0
1030
- for phrase in topic_phrases.get(desired_topic, []):
1031
- if phrase in lower_text:
1032
- phrase_bonus += 1.0
1033
-
1034
- score = (
1035
- topic_bonus
1036
- + phrase_bonus
1037
- + 1.2 * novelty_vs_reply
1038
- - 0.8 * overlap_with_reply
1039
- )
1040
-
1041
- if len(text.split()) < 5:
1042
- score -= 2.0
1043
-
1044
- if score > best_score:
1045
- best_score = score
1046
- best_line = text
1047
-
1048
- if best_score < 2.5:
1049
- return None
1050
-
1051
- return best_line
1052
-
1053
-
1054
- class ConversationEngine:
1055
- def __init__(
1056
- self,
1057
- retriever: Optional[RetrievalEngine] = None,
1058
- generator: Optional[GeneratorEngine] = None,
1059
- **kwargs,
1060
- ) -> None:
1061
- self.retriever = retriever
1062
- self.generator = generator
1063
-
1064
- def generate_response(
1065
- self,
1066
- raw_user_text: Optional[str] = None,
1067
- tone: float = 0.5,
1068
- verbosity: float = 0.5,
1069
- transparency: float = 0.5,
1070
- intent: Optional[str] = None,
1071
- help_mode: Optional[str] = None,
1072
- retrieval_context: Optional[List[RetrievedChunk]] = None,
1073
- chat_history: Optional[List[Dict[str, Any]]] = None,
1074
- question_text: Optional[str] = None,
1075
- options_text: Optional[List[str]] = None,
1076
- **kwargs,
1077
- ) -> SolverResult:
1078
- solver_input = (question_text or raw_user_text or "").strip()
1079
- user_text = (raw_user_text or "").strip()
1080
-
1081
- reply: Optional[str] = None
1082
- selected_chunks: List[RetrievedChunk] = []
1083
-
1084
- category = normalize_category(kwargs.get("category"))
1085
- classification = classify_question(question_text=solver_input, category=category)
1086
- inferred_category = normalize_category(classification.get("category") or category)
1087
-
1088
- question_topic = _normalize_classified_topic(
1089
- classification.get("topic"),
1090
- inferred_category,
1091
- solver_input,
1092
- )
1093
- question_type = classification.get("type")
1094
-
1095
- resolved_intent = intent or detect_intent(user_text, help_mode)
1096
- resolved_help_mode = help_mode or intent_to_help_mode(resolved_intent)
1097
-
1098
- is_quant = inferred_category == "Quantitative" or is_quant_question(solver_input)
1099
-
1100
- result = SolverResult(
1101
- domain="quant" if is_quant else "general",
1102
- solved=False,
1103
- help_mode=resolved_help_mode,
1104
- answer_letter=None,
1105
- answer_value=None,
1106
- topic=question_topic,
1107
- used_retrieval=False,
1108
- used_generator=False,
1109
- internal_answer=None,
1110
- steps=[],
1111
- teaching_chunks=[],
1112
- meta={},
1113
- )
1114
-
1115
- # 1. Try explainer early so scaffold is available even when solver is weak
1116
- explainer_result = route_explainer(solver_input)
1117
- explainer_understood = bool(explainer_result is not None and getattr(explainer_result, "understood", False))
1118
- explainer_scaffold = _extract_explainer_scaffold(explainer_result) if explainer_understood else {}
1119
- explainer_summary = getattr(explainer_result, "summary", None) if explainer_understood else None
1120
- explainer_teaching_points = _safe_meta_list(
1121
- getattr(explainer_result, "teaching_points", [])
1122
- ) if explainer_understood else []
1123
-
1124
- # 1a. Explicit explainer request returns scaffold-rich explainer response
1125
- if explainer_result is not None and getattr(explainer_result, "understood", False):
1126
- reply = format_explainer_response(
1127
- result=explainer_result,
1128
- tone=tone,
1129
- verbosity=verbosity,
1130
- transparency=transparency,
1131
- )
1132
-
1133
- result.domain = "quant" if inferred_category == "Quantitative" or is_quant_question(solver_input) else "general"
1134
- result.solved = False
1135
- result.help_mode = detect_help_mode(user_text or solver_input)
1136
- result.topic = explainer_result.topic
1137
- result.answer_letter = None
1138
- result.answer_value = None
1139
- result.internal_answer = None
1140
- result.reply = reply
1141
- result.meta = {
1142
- "intent": "explain_question",
1143
- "question_text": solver_input,
1144
- "used_explainer": True,
1145
- }
1146
- return result
1147
-
1148
- # 2. normal solver path
1149
- if is_quant:
1150
- solved_result = route_solver(solver_input)
1151
-
1152
- if solved_result is not None:
1153
- result = solved_result
1154
-
1155
- result.help_mode = resolved_help_mode
1156
-
1157
- if not result.topic or result.topic in {"general_quant", "general", "unknown"}:
1158
- result.topic = getattr(explainer_result, "topic", None) if explainer_understood else question_topic
1159
-
1160
- result.domain = "quant"
1161
-
1162
- # 2a. Attach explainer scaffold into result meta so generic paths can use it
1163
- if result.meta is None:
1164
- result.meta = {}
1165
-
1166
- if explainer_understood:
1167
- result.meta["explainer_used"] = True
1168
- result.meta["bridge_ready"] = bool(getattr(explainer_result, "meta", {}).get("bridge_ready", False))
1169
- result.meta["hint_style"] = getattr(explainer_result, "meta", {}).get("hint_style")
1170
- result.meta["explainer_summary"] = explainer_summary
1171
- result.meta["explainer_teaching_points"] = explainer_teaching_points
1172
- result.meta["scaffold"] = explainer_scaffold
1173
-
1174
- # 3. compose base reply
1175
- reply = _compose_reply(
1176
- result=result,
1177
- intent=resolved_intent,
1178
- verbosity=verbosity,
1179
- category=inferred_category,
1180
- )
1181
-
1182
- # 4. optional retrieval
1183
- allow_retrieval = should_retrieve(
1184
- intent=resolved_intent,
1185
- solved=bool(result.solved),
1186
- raw_user_text=user_text or solver_input,
1187
- category=inferred_category,
1188
- domain=result.domain,
1189
- topic=result.topic,
1190
- )
1191
-
1192
- if allow_retrieval and reply and len(reply) < 220:
1193
- if retrieval_context:
1194
- filtered = _filter_retrieved_chunks(
1195
- chunks=retrieval_context,
1196
- intent=resolved_intent,
1197
- topic=result.topic,
1198
- question_text=solver_input,
1199
- question_type=question_type,
1200
- )
1201
- if filtered:
1202
- selected_chunks = filtered
1203
- result.used_retrieval = True
1204
- result.teaching_chunks = filtered
1205
-
1206
- elif self.retriever is not None:
1207
- retrieved = self.retriever.search(
1208
- query=_build_retrieval_query(
1209
- raw_user_text=user_text,
1210
- question_text=solver_input,
1211
- intent=resolved_intent,
1212
- topic=result.topic,
1213
- solved=bool(result.solved),
1214
- question_type=question_type,
1215
- category=inferred_category,
1216
- ),
1217
- topic=result.topic or "",
1218
- intent=resolved_intent,
1219
- k=6,
1220
- )
1221
- filtered = _filter_retrieved_chunks(
1222
- chunks=retrieved,
1223
- intent=resolved_intent,
1224
- topic=result.topic,
1225
- question_text=solver_input,
1226
- question_type=question_type,
1227
- )
1228
- if filtered:
1229
- selected_chunks = filtered
1230
- result.used_retrieval = True
1231
- result.teaching_chunks = filtered
1232
-
1233
- if selected_chunks and resolved_help_mode in {"walkthrough", "step_by_step", "method", "explain", "concept"}:
1234
- teaching_line = _pick_teaching_line(
1235
- chunks=selected_chunks,
1236
- current_reply=reply,
1237
- question_text=solver_input,
1238
- topic=result.topic,
1239
- )
1240
- if teaching_line:
1241
- reply = f"{reply}\n\nKey idea: {teaching_line}"
1242
-
1243
- # 5. generator only for non-quant
1244
- should_try_generator = (
1245
- self.generator is not None
1246
- and not result.solved
1247
- and resolved_help_mode not in {"hint", "instruction"}
1248
- and result.domain != "quant"
1249
- )
1250
-
1251
- if should_try_generator:
1252
- try:
1253
- generated = self.generator.generate(
1254
- user_text=user_text or solver_input,
1255
- question_text=solver_input,
1256
- topic=result.topic or "",
1257
- intent=resolved_intent,
1258
- retrieval_context=selected_chunks,
1259
- chat_history=chat_history or [],
1260
- )
1261
-
1262
- if generated and generated.strip():
1263
- candidate = generated.strip()
1264
-
1265
- if not _is_bad_generated_reply(candidate, user_text or solver_input):
1266
- reply = candidate
1267
- result.used_generator = True
1268
- else:
1269
- reply = _fallback_more_info_reply(
1270
- category=inferred_category,
1271
- topic=result.topic,
1272
- intent=resolved_intent,
1273
- )
1274
- else:
1275
- reply = _fallback_more_info_reply(
1276
- category=inferred_category,
1277
- topic=result.topic,
1278
- intent=resolved_intent,
1279
- )
1280
-
1281
- except Exception:
1282
- reply = _fallback_more_info_reply(
1283
- category=inferred_category,
1284
- topic=result.topic,
1285
- intent=resolved_intent,
1286
- )
1287
-
1288
- # 6. final fallback
1289
- if not reply:
1290
- reply = _fallback_more_info_reply(
1291
- category=inferred_category,
1292
- topic=result.topic,
1293
- intent=resolved_intent,
1294
- )
1295
-
1296
- reply = format_reply(reply, tone, verbosity, transparency, resolved_help_mode)
1297
-
1298
- result.answer_letter = None
1299
- result.answer_value = None
1300
- result.internal_answer = None
1301
- result.reply = reply
1302
- result.help_mode = resolved_help_mode
1303
-
1304
- final_meta = dict(result.meta or {})
1305
- final_meta.update({
1306
- "intent": resolved_intent,
1307
- "question_text": question_text or "",
1308
- "options_count": len(options_text or []),
1309
- "category": inferred_category,
1310
- "question_type": question_type,
1311
- "classified_topic": question_topic,
1312
- })
1313
- result.meta = final_meta
1314
-
1315
- return result
 
1
  from __future__ import annotations
2
 
3
  import re
4
+ from typing import Any, List, Optional
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
5
 
 
 
6
 
7
+ def style_prefix(tone: float) -> str:
8
+ if tone < 0.2:
9
+ return ""
10
+ if tone < 0.45:
11
+ return "Let’s solve it efficiently."
12
+ if tone < 0.75:
13
+ return "Let’s work through it."
14
+ return "You’ve got this — let’s solve it cleanly."
15
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
16
 
17
+ def _normalize_key(text: str) -> str:
18
+ text = (text or "").strip().lower()
19
+ text = text.replace("’", "'")
20
+ text = re.sub(r"\s+", " ", text)
21
+ return text
22
 
23
 
24
+ def _clean_lines(core: str) -> list[str]:
25
+ lines = []
26
+ for line in (core or "").splitlines():
27
+ cleaned = line.strip()
28
+ if cleaned:
29
+ lines.append(cleaned)
 
 
30
  return lines
31
 
32
 
33
+ def _dedupe_lines(lines: list[str]) -> list[str]:
34
+ seen = set()
35
+ output = []
36
+ for line in lines:
37
+ key = _normalize_key(line)
38
+ if key and key not in seen:
39
+ seen.add(key)
40
+ output.append(line.strip())
41
+ return output
 
 
 
 
42
 
 
 
 
 
 
 
43
 
44
+ def _coerce_string(value: Any) -> str:
45
+ return (value or "").strip() if isinstance(value, str) else ""
46
 
47
 
48
+ def _coerce_list(value: Any) -> List[str]:
49
+ if not value:
 
 
 
 
 
 
 
 
 
 
 
 
 
50
  return []
51
+ if isinstance(value, list):
52
+ return [str(v).strip() for v in value if str(v).strip()]
53
+ if isinstance(value, tuple):
54
+ return [str(v).strip() for v in value if str(v).strip()]
55
+ if isinstance(value, str):
56
+ text = value.strip()
57
  return [text] if text else []
58
  return []
59
 
60
 
61
+ def _safe_append(lines: List[str], value: str) -> None:
62
+ text = (value or "").strip()
63
+ if text:
64
+ lines.append(text)
 
65
 
66
 
67
+ def _limit_steps(steps: List[str], verbosity: float, minimum: int = 1) -> List[str]:
68
+ if not steps:
69
+ return []
70
+ if verbosity < 0.25:
71
+ limit = minimum
72
+ elif verbosity < 0.5:
73
+ limit = max(minimum, 2)
74
+ elif verbosity < 0.75:
75
+ limit = max(minimum, 3)
76
+ else:
77
+ limit = max(minimum, 5)
78
+ return steps[:limit]
79
+
80
+
81
+ def _why_line(topic: str) -> str:
82
+ topic = (topic or "").lower()
83
+
84
+ if topic == "algebra":
85
+ return "Why: algebra works by keeping the relationship balanced while undoing the operations attached to the variable."
86
+ if topic == "percent":
87
+ return "Why: percent questions depend on choosing the correct base before doing any calculation."
88
+ if topic == "ratio":
89
+ return "Why: ratio questions depend on preserving the comparison and using one shared scale factor."
90
+ if topic == "probability":
91
+ return "Why: probability compares successful outcomes to all possible outcomes."
92
+ if topic == "statistics":
93
+ return "Why: the right method depends on which summary measure the question actually asks for."
94
+ if topic == "geometry":
95
+ return "Why: geometry depends on the relationships between the parts of the figure."
96
+ if topic == "number_theory":
97
+ return "Why: number properties follow fixed rules about divisibility, factors, and remainders."
98
+ return "Why: start with the structure of the problem before calculating."
99
+
100
+
101
+ def _extract_topic_from_text(text: str, fallback: Optional[str] = None) -> str:
102
+ low = (text or "").lower()
103
+ if fallback:
104
+ return fallback
105
+ if any(word in low for word in ["equation", "variable", "isolate", "algebra"]):
106
+ return "algebra"
107
+ if any(word in low for word in ["percent", "percentage", "%"]):
108
+ return "percent"
109
+ if any(word in low for word in ["ratio", "proportion"]):
110
+ return "ratio"
111
+ if any(word in low for word in ["probability", "outcome", "chance", "odds"]):
112
+ return "probability"
113
+ if any(word in low for word in ["mean", "median", "average"]):
114
+ return "statistics"
115
+ if any(word in low for word in ["triangle", "circle", "angle", "area", "perimeter"]):
116
+ return "geometry"
117
+ if any(word in low for word in ["integer", "factor", "multiple", "prime", "remainder"]):
118
+ return "number_theory"
119
+ return "general"
120
 
121
+
122
+ def _format_answer_mode(
123
+ lines: List[str],
124
+ topic: str,
125
+ tone: float,
126
+ verbosity: float,
127
+ transparency: float,
128
+ ) -> str:
129
+ output: List[str] = []
130
+ prefix = style_prefix(tone)
131
+ if prefix:
132
+ output.append(prefix)
133
+ output.append("")
134
+
135
+ limited = _limit_steps(lines, verbosity, minimum=2)
136
+ if limited:
137
+ output.append("Answer path:")
138
+ if len(limited) >= 1:
139
+ output.append(f"- What to identify: {limited[0]}")
140
+ if len(limited) >= 2:
141
+ output.append(f"- First move: {limited[1]}")
142
+ if len(limited) >= 3:
143
+ output.append(f"- Next step: {limited[2]}")
144
+ for extra in limited[3:]:
145
+ output.append(f"- Keep in mind: {extra}")
146
+
147
+ if transparency >= 0.8:
148
+ output.append("")
149
+ output.append(_why_line(topic))
150
+
151
+ return "\n".join(output).strip()
152
+
153
+
154
+ def format_reply(
155
+ core: str,
156
+ tone: float,
157
+ verbosity: float,
158
+ transparency: float,
159
  help_mode: str,
160
+ hint_stage: int = 0,
161
+ topic: Optional[str] = None,
162
+ ) -> str:
163
+ prefix = style_prefix(tone)
164
+ core = (core or "").strip()
165
+
166
+ if not core:
167
+ return prefix or "Start with the structure of the problem."
168
+
169
+ lines = _dedupe_lines(_clean_lines(core))
170
+ if not lines:
171
+ return prefix or "Start with the structure of the problem."
172
+
173
+ resolved_topic = _extract_topic_from_text(core, topic)
174
+
175
+ if help_mode == "answer":
176
+ return _format_answer_mode(lines, resolved_topic, tone, verbosity, transparency)
177
+
178
+ shown = _limit_steps(lines, verbosity, minimum=1)
179
+ output: List[str] = []
180
+
181
+ if prefix:
182
+ output.append(prefix)
183
+ output.append("")
184
+
185
+ if help_mode == "hint":
186
+ output.append("Hint:")
187
+ output.append(f"- {shown[0]}")
188
+ if transparency >= 0.8:
189
+ output.append("")
190
+ output.append(_why_line(resolved_topic))
191
+ return "\n".join(output).strip()
192
+
193
+ if help_mode in {"instruction", "step_by_step", "walkthrough"}:
194
+ label = "First step:" if help_mode == "instruction" else "Walkthrough:"
195
+ output.append(label)
196
+ for line in shown:
197
+ output.append(f"- {line}")
198
+ if transparency >= 0.8:
199
+ output.append("")
200
+ output.append(_why_line(resolved_topic))
201
+ return "\n".join(output).strip()
202
+
203
+ if help_mode in {"method", "explain", "concept", "definition"}:
204
+ label = {
205
+ "method": "Method:",
206
+ "explain": "Explanation:",
207
+ "concept": "Key idea:",
208
+ "definition": "Key idea:",
209
+ }[help_mode]
210
+ output.append(label)
211
+ for line in shown:
212
+ output.append(f"- {line}")
213
+ if transparency >= 0.75:
214
+ output.append("")
215
+ output.append(_why_line(resolved_topic))
216
+ return "\n".join(output).strip()
217
+
218
+ for line in shown:
219
+ output.append(f"- {line}")
220
+
221
+ if transparency >= 0.85:
222
+ output.append("")
223
+ output.append(_why_line(resolved_topic))
224
+
225
+ return "\n".join(output).strip()
226
+
227
+
228
+ def _get_scaffold(result: Any):
229
+ return getattr(result, "scaffold", None)
230
+
231
+
232
+ def _staged_scaffold_lines(
233
+ result: Any,
234
+ hint_stage: int,
235
  verbosity: float,
236
  transparency: float,
237
+ ) -> List[str]:
238
+ output: List[str] = []
239
+ scaffold = _get_scaffold(result)
240
+ if scaffold is None:
241
+ return output
242
+
243
+ stage = max(0, min(int(hint_stage), 3))
244
+
245
+ concept = _coerce_string(getattr(scaffold, "concept", ""))
246
+ ask = _coerce_string(getattr(scaffold, "ask", ""))
247
+ first_move = _coerce_string(getattr(scaffold, "first_move", ""))
248
+ next_hint = _coerce_string(getattr(scaffold, "next_hint", ""))
249
+ setup_actions = _coerce_list(getattr(scaffold, "setup_actions", []))
250
+ intermediate_steps = _coerce_list(getattr(scaffold, "intermediate_steps", []))
251
+ variables_to_define = _coerce_list(getattr(scaffold, "variables_to_define", []))
252
+ equations_to_form = _coerce_list(getattr(scaffold, "equations_to_form", []))
253
+ common_traps = _coerce_list(getattr(scaffold, "common_traps", []))
254
+ hint_ladder = _coerce_list(getattr(scaffold, "hint_ladder", []))
255
+
256
+ if concept and stage == 0 and transparency >= 0.75:
257
+ output.append("Core idea:")
258
+ output.append(f"- {concept}")
259
+ output.append("")
260
+
261
+ if ask:
262
+ output.append("What to identify first:")
263
+ output.append(f"- {ask}")
264
+
265
+ if stage == 0:
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
266
  if first_move:
267
+ output.append("")
268
+ output.append("First move:")
269
+ output.append(f"- {first_move}")
270
+ elif hint_ladder:
271
+ output.append("")
272
+ output.append("First move:")
273
+ output.append(f"- {hint_ladder[0]}")
274
+ return output
 
 
 
275
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
276
  if setup_actions:
277
+ output.append("")
278
+ output.append("Set-up path:")
279
+ for item in _limit_steps(setup_actions, verbosity, minimum=2 if stage >= 1 else 1):
280
+ output.append(f"- {item}")
 
 
 
281
 
282
+ if first_move:
283
+ output.append("")
284
+ output.append("First move:")
285
+ output.append(f"- {first_move}")
286
 
287
+ if stage == 1:
288
+ if next_hint:
289
+ output.append("")
290
+ output.append("Next hint:")
291
+ output.append(f"- {next_hint}")
292
+ elif len(hint_ladder) >= 2:
293
+ output.append("")
294
+ output.append("Next hint:")
295
+ output.append(f"- {hint_ladder[1]}")
296
+ return output
297
+
298
+ if intermediate_steps:
299
+ output.append("")
300
+ output.append("How to build it:")
301
+ for item in _limit_steps(intermediate_steps, verbosity, minimum=2):
302
+ output.append(f"- {item}")
303
+
304
+ if next_hint:
305
+ output.append("")
306
+ output.append("Next hint:")
307
+ output.append(f"- {next_hint}")
308
+
309
+ if stage == 2:
310
+ if variables_to_define:
311
+ output.append("")
312
+ output.append("Variables to define:")
313
+ for item in variables_to_define[:2]:
314
+ output.append(f"- {item}")
315
+ if equations_to_form:
316
+ output.append("")
317
+ output.append("Equations to form:")
318
+ for item in equations_to_form[:2]:
319
+ output.append(f"- {item}")
320
+ return output
321
+
322
+ if variables_to_define:
323
+ output.append("")
324
+ output.append("Variables to define:")
325
+ for item in variables_to_define[:3]:
326
+ output.append(f"- {item}")
327
+
328
+ if equations_to_form:
329
+ output.append("")
330
+ output.append("Equations to form:")
331
+ for item in equations_to_form[:3]:
332
+ output.append(f"- {item}")
333
+
334
+ if common_traps:
335
+ output.append("")
336
+ output.append("Watch out for:")
337
+ for item in common_traps[:4]:
338
+ output.append(f"- {item}")
339
+
340
+ return output
341
+
342
+
343
+ def format_explainer_response(
344
+ result: Any,
345
+ tone: float,
346
  verbosity: float,
347
+ transparency: float,
348
+ hint_stage: int = 0,
349
  ) -> str:
350
+ if not result or not getattr(result, "understood", False):
351
+ return "I can help explain what the question is asking, but I need the full wording of the question."
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
352
 
353
+ output: List[str] = []
354
+ prefix = style_prefix(tone)
355
+ if prefix:
356
+ output.append(prefix)
357
+ output.append("")
358
 
359
+ output.append("Question breakdown:")
360
+ output.append("")
361
 
362
+ summary = _coerce_string(getattr(result, "summary", ""))
363
+ if summary:
364
+ output.append(summary)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
365
 
366
+ scaffold_lines = _staged_scaffold_lines(
367
+ result=result,
368
+ hint_stage=hint_stage,
369
+ verbosity=verbosity,
370
+ transparency=transparency,
371
+ )
372
+ if scaffold_lines:
373
+ if summary:
374
+ output.append("")
375
+ output.extend(scaffold_lines)
376
+
377
+ teaching_points = _coerce_list(getattr(result, "teaching_points", []))
378
+ if teaching_points and (verbosity >= 0.55 or hint_stage >= 2):
379
+ output.append("")
380
+ output.append("Key teaching points:")
381
+ for item in _limit_steps(teaching_points, verbosity, minimum=2):
382
+ output.append(f"- {item}")
383
+
384
+ topic = _extract_topic_from_text(
385
+ f"{summary} {' '.join(teaching_points)}",
386
+ getattr(result, "topic", None),
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
387
  )
388
 
389
+ if transparency >= 0.8:
390
+ output.append("")
391
+ output.append(_why_line(topic))
392
 
393
+ return "\n".join(_dedupe_lines(output)).strip()