XQ commited on
Commit
fdc3773
·
1 Parent(s): 9915876

Fix LLM output

Browse files
src/agent/intent_classifier.py CHANGED
@@ -11,7 +11,8 @@ from src.models import IntentType
11
 
12
  logger = logging.getLogger(__name__)
13
 
14
- _THINK_RE = re.compile(r"<think>.*?</think>\s*", re.DOTALL)
 
15
 
16
  _VALID_INTENTS = {intent.value for intent in IntentType}
17
 
@@ -58,7 +59,8 @@ class IntentClassifier:
58
  Returns:
59
  The classified IntentType.
60
  """
61
- raw = _THINK_RE.sub("", self._chain.invoke({"query": query})).strip().lower()
 
62
  logger.debug("Raw classification result: %s", raw)
63
 
64
  if raw in _VALID_INTENTS:
 
11
 
12
  logger = logging.getLogger(__name__)
13
 
14
+ _THINK_CLOSED_RE = re.compile(r"<think>.*?</think>\s*", re.DOTALL)
15
+ _THINK_UNCLOSED_RE = re.compile(r"<think>.*", re.DOTALL)
16
 
17
  _VALID_INTENTS = {intent.value for intent in IntentType}
18
 
 
59
  Returns:
60
  The classified IntentType.
61
  """
62
+ _raw_out = self._chain.invoke({"query": query})
63
+ raw = _THINK_UNCLOSED_RE.sub("", _THINK_CLOSED_RE.sub("", _raw_out)).strip().lower()
64
  logger.debug("Raw classification result: %s", raw)
65
 
66
  if raw in _VALID_INTENTS:
src/agent/plan_and_execute.py CHANGED
@@ -41,6 +41,7 @@ _MAX_STEPS = 6
41
  # ------------------------------------------------------------------
42
 
43
  _PLANNER_PROMPT = (
 
44
  "You are a planning assistant for the University of Copenhagen (KU) document system.\n\n"
45
  "Given a user question, produce a JSON list of 1–4 steps needed to answer it.\n"
46
  "Each step is an object with:\n"
@@ -51,8 +52,9 @@ _PLANNER_PROMPT = (
51
  "- For simple factual questions: 1 search step is enough.\n"
52
  "- For comparison questions: use multi_search or separate search steps.\n"
53
  "- For document overview requests: use summarize.\n"
 
54
  "- Always end with the steps needed; do NOT include a final 'answer' step.\n\n"
55
- "Reply with ONLY the JSON array, nothing else.\n\n"
56
  "Examples:\n"
57
  'Question: "What is the exam policy?"\n'
58
  '[{"action": "search", "detail": "KU eksamensregler"}]\n\n'
@@ -61,6 +63,10 @@ _PLANNER_PROMPT = (
61
  '{"action": "search", "detail": "ferieregler administrativt personale"}]\n\n'
62
  'Question: "Summarize the AI policy document"\n'
63
  '[{"action": "summarize", "detail": "ku_ai_policy.pdf"}]\n\n'
 
 
 
 
64
  "Now plan for this question:\n"
65
  )
66
 
@@ -447,7 +453,19 @@ class PlanAndExecuteRouter:
447
  # Helpers
448
  # ------------------------------------------------------------------
449
 
450
- _THINK_RE = re.compile(r"<think>.*?</think>\s*", re.DOTALL)
 
 
 
 
 
 
 
 
 
 
 
 
451
 
452
 
453
  def _extract_content(result: object) -> str:
@@ -470,7 +488,6 @@ def _extract_content(result: object) -> str:
470
  content = result
471
 
472
  if isinstance(content, list):
473
- # content can be list[str | dict]; extract text from each block
474
  parts: list[str] = []
475
  for block in content:
476
  if isinstance(block, str):
@@ -481,7 +498,7 @@ def _extract_content(result: object) -> str:
481
  else:
482
  text = str(content)
483
 
484
- return _THINK_RE.sub("", text).strip()
485
 
486
 
487
  def _parse_plan(raw: str) -> list[PlanStep]:
 
41
  # ------------------------------------------------------------------
42
 
43
  _PLANNER_PROMPT = (
44
+ "/no_think\n"
45
  "You are a planning assistant for the University of Copenhagen (KU) document system.\n\n"
46
  "Given a user question, produce a JSON list of 1–4 steps needed to answer it.\n"
47
  "Each step is an object with:\n"
 
52
  "- For simple factual questions: 1 search step is enough.\n"
53
  "- For comparison questions: use multi_search or separate search steps.\n"
54
  "- For document overview requests: use summarize.\n"
55
+ "- For questions with multiple aspects: use 2–4 separate steps.\n"
56
  "- Always end with the steps needed; do NOT include a final 'answer' step.\n\n"
57
+ "Reply with ONLY the JSON array, nothing else. No explanation, no thinking.\n\n"
58
  "Examples:\n"
59
  'Question: "What is the exam policy?"\n'
60
  '[{"action": "search", "detail": "KU eksamensregler"}]\n\n'
 
63
  '{"action": "search", "detail": "ferieregler administrativt personale"}]\n\n'
64
  'Question: "Summarize the AI policy document"\n'
65
  '[{"action": "summarize", "detail": "ku_ai_policy.pdf"}]\n\n'
66
+ 'Question: "Which documents are about AI? Summarize and find the rules for written exams"\n'
67
+ '[{"action": "list_docs", "detail": "list all available documents"}, '
68
+ '{"action": "search", "detail": "AI dokumenter KU"}, '
69
+ '{"action": "search", "detail": "regler skriftlige opgaver eksamen GAI"}]\n\n'
70
  "Now plan for this question:\n"
71
  )
72
 
 
453
  # Helpers
454
  # ------------------------------------------------------------------
455
 
456
+ _THINK_CLOSED_RE = re.compile(r"<think>.*?</think>\s*", re.DOTALL)
457
+ _THINK_UNCLOSED_RE = re.compile(r"<think>.*", re.DOTALL)
458
+
459
+
460
+ def _strip_think(text: str) -> str:
461
+ """Remove ``<think>`` blocks — both closed and unclosed.
462
+
463
+ Some models (Qwen3) always emit ``<think>...</think>``; others may
464
+ leave the tag unclosed. This handles both cases.
465
+ """
466
+ text = _THINK_CLOSED_RE.sub("", text)
467
+ text = _THINK_UNCLOSED_RE.sub("", text)
468
+ return text.strip()
469
 
470
 
471
  def _extract_content(result: object) -> str:
 
488
  content = result
489
 
490
  if isinstance(content, list):
 
491
  parts: list[str] = []
492
  for block in content:
493
  if isinstance(block, str):
 
498
  else:
499
  text = str(content)
500
 
501
+ return _strip_think(text)
502
 
503
 
504
  def _parse_plan(raw: str) -> list[PlanStep]:
src/agent/router.py CHANGED
@@ -25,7 +25,15 @@ from src.retrieval.reranker import Reranker
25
 
26
  logger = logging.getLogger(__name__)
27
 
28
- _THINK_RE = re.compile(r"<think>.*?</think>\s*", re.DOTALL)
 
 
 
 
 
 
 
 
29
 
30
 
31
  def _extract_content(result: object) -> str:
@@ -49,7 +57,7 @@ def _extract_content(result: object) -> str:
49
  else:
50
  text = str(content)
51
 
52
- return _THINK_RE.sub("", text).strip()
53
 
54
 
55
  # Reranker confidence below this triggers a query-broadening retry.
 
25
 
26
  logger = logging.getLogger(__name__)
27
 
28
+ _THINK_CLOSED_RE = re.compile(r"<think>.*?</think>\s*", re.DOTALL)
29
+ _THINK_UNCLOSED_RE = re.compile(r"<think>.*", re.DOTALL)
30
+
31
+
32
+ def _strip_think(text: str) -> str:
33
+ """Remove ``<think>`` blocks — both closed and unclosed."""
34
+ text = _THINK_CLOSED_RE.sub("", text)
35
+ text = _THINK_UNCLOSED_RE.sub("", text)
36
+ return text.strip()
37
 
38
 
39
  def _extract_content(result: object) -> str:
 
57
  else:
58
  text = str(content)
59
 
60
+ return _strip_think(text)
61
 
62
 
63
  # Reranker confidence below this triggers a query-broadening retry.
src/agent/tools.py CHANGED
@@ -14,19 +14,21 @@ from src.retrieval.vector_store import VectorStore
14
 
15
  logger = logging.getLogger(__name__)
16
 
17
- _THINK_RE = re.compile(r"<think>.*?</think>\s*", re.DOTALL)
 
 
 
 
 
 
 
 
18
 
19
 
20
  def _extract_content(result: object) -> str:
21
  """Extract plain text from an LLM invoke result.
22
 
23
  Handles AIMessage (content: str or list), plain strings, etc.
24
-
25
- Args:
26
- result: Return value of ``llm.invoke()`` or ``chain.invoke()``.
27
-
28
- Returns:
29
- Cleaned text with ``<think>`` blocks removed.
30
  """
31
  if hasattr(result, "content"):
32
  content = result.content
@@ -44,7 +46,7 @@ def _extract_content(result: object) -> str:
44
  else:
45
  text = str(content)
46
 
47
- return _THINK_RE.sub("", text).strip()
48
 
49
 
50
  @dataclass
 
14
 
15
  logger = logging.getLogger(__name__)
16
 
17
+ _THINK_CLOSED_RE = re.compile(r"<think>.*?</think>\s*", re.DOTALL)
18
+ _THINK_UNCLOSED_RE = re.compile(r"<think>.*", re.DOTALL)
19
+
20
+
21
+ def _strip_think(text: str) -> str:
22
+ """Remove ``<think>`` blocks — both closed and unclosed."""
23
+ text = _THINK_CLOSED_RE.sub("", text)
24
+ text = _THINK_UNCLOSED_RE.sub("", text)
25
+ return text.strip()
26
 
27
 
28
  def _extract_content(result: object) -> str:
29
  """Extract plain text from an LLM invoke result.
30
 
31
  Handles AIMessage (content: str or list), plain strings, etc.
 
 
 
 
 
 
32
  """
33
  if hasattr(result, "content"):
34
  content = result.content
 
46
  else:
47
  text = str(content)
48
 
49
+ return _strip_think(text)
50
 
51
 
52
  @dataclass
src/api/routes.py CHANGED
@@ -26,14 +26,29 @@ logger = logging.getLogger(__name__)
26
  router = APIRouter()
27
 
28
 
29
- def _is_rate_limit_error(exc_str: str) -> bool:
30
- """Check whether an exception string indicates a rate-limit / quota error."""
31
- lower = exc_str.lower()
 
 
 
 
 
 
 
 
 
 
 
 
 
 
32
  return (
33
- "429" in exc_str
34
- or "resource_exhausted" in lower
35
- or "rate" in lower
36
- or "too many requests" in lower
 
37
  )
38
 
39
 
@@ -201,7 +216,7 @@ async def query_documents(request: QueryRequest) -> QueryResponse:
201
  response = _query_router.route(query=request.question, top_k=request.top_k)
202
  except Exception as exc:
203
  exc_str = str(exc)
204
- if _is_rate_limit_error(exc_str):
205
  logger.warning("Rate limit / quota exhausted: %s", exc_str)
206
  raise HTTPException(
207
  status_code=429,
@@ -258,7 +273,7 @@ async def query_stream(request: QueryRequest) -> StreamingResponse:
258
  event_queue.put(event)
259
  except Exception as exc:
260
  exc_str = str(exc)
261
- if _is_rate_limit_error(exc_str):
262
  event_queue.put({"step": "error", "code": 429, "message": exc_str})
263
  else:
264
  event_queue.put({"step": "error", "code": 500, "message": exc_str})
 
26
  router = APIRouter()
27
 
28
 
29
+ def _is_rate_limit_error(exc: str | Exception) -> bool:
30
+ """Check whether an exception indicates a rate-limit / quota error.
31
+
32
+ Walks the full cause chain so wrapped exceptions (e.g. LangGraph
33
+ wrapping an upstream 429) are still detected.
34
+ """
35
+ texts: list[str] = []
36
+ if isinstance(exc, Exception):
37
+ current: BaseException | None = exc
38
+ while current is not None:
39
+ texts.append(str(current))
40
+ texts.append(type(current).__name__)
41
+ current = current.__cause__
42
+ else:
43
+ texts.append(exc)
44
+
45
+ blob = " ".join(texts).lower()
46
  return (
47
+ "429" in blob
48
+ or "resource_exhausted" in blob
49
+ or "rate limit" in blob
50
+ or "rate_limit" in blob
51
+ or "too many requests" in blob
52
  )
53
 
54
 
 
216
  response = _query_router.route(query=request.question, top_k=request.top_k)
217
  except Exception as exc:
218
  exc_str = str(exc)
219
+ if _is_rate_limit_error(exc):
220
  logger.warning("Rate limit / quota exhausted: %s", exc_str)
221
  raise HTTPException(
222
  status_code=429,
 
273
  event_queue.put(event)
274
  except Exception as exc:
275
  exc_str = str(exc)
276
+ if _is_rate_limit_error(exc):
277
  event_queue.put({"step": "error", "code": 429, "message": exc_str})
278
  else:
279
  event_queue.put({"step": "error", "code": 500, "message": exc_str})