Spaces:
Sleeping
Sleeping
XQ commited on
Commit ·
fdc3773
1
Parent(s): 9915876
Fix LLM output
Browse files- src/agent/intent_classifier.py +4 -2
- src/agent/plan_and_execute.py +21 -4
- src/agent/router.py +10 -2
- src/agent/tools.py +10 -8
- src/api/routes.py +24 -9
src/agent/intent_classifier.py
CHANGED
|
@@ -11,7 +11,8 @@ from src.models import IntentType
|
|
| 11 |
|
| 12 |
logger = logging.getLogger(__name__)
|
| 13 |
|
| 14 |
-
|
|
|
|
| 15 |
|
| 16 |
_VALID_INTENTS = {intent.value for intent in IntentType}
|
| 17 |
|
|
@@ -58,7 +59,8 @@ class IntentClassifier:
|
|
| 58 |
Returns:
|
| 59 |
The classified IntentType.
|
| 60 |
"""
|
| 61 |
-
|
|
|
|
| 62 |
logger.debug("Raw classification result: %s", raw)
|
| 63 |
|
| 64 |
if raw in _VALID_INTENTS:
|
|
|
|
| 11 |
|
| 12 |
logger = logging.getLogger(__name__)
|
| 13 |
|
| 14 |
+
_THINK_CLOSED_RE = re.compile(r"<think>.*?</think>\s*", re.DOTALL)
|
| 15 |
+
_THINK_UNCLOSED_RE = re.compile(r"<think>.*", re.DOTALL)
|
| 16 |
|
| 17 |
_VALID_INTENTS = {intent.value for intent in IntentType}
|
| 18 |
|
|
|
|
| 59 |
Returns:
|
| 60 |
The classified IntentType.
|
| 61 |
"""
|
| 62 |
+
_raw_out = self._chain.invoke({"query": query})
|
| 63 |
+
raw = _THINK_UNCLOSED_RE.sub("", _THINK_CLOSED_RE.sub("", _raw_out)).strip().lower()
|
| 64 |
logger.debug("Raw classification result: %s", raw)
|
| 65 |
|
| 66 |
if raw in _VALID_INTENTS:
|
src/agent/plan_and_execute.py
CHANGED
|
@@ -41,6 +41,7 @@ _MAX_STEPS = 6
|
|
| 41 |
# ------------------------------------------------------------------
|
| 42 |
|
| 43 |
_PLANNER_PROMPT = (
|
|
|
|
| 44 |
"You are a planning assistant for the University of Copenhagen (KU) document system.\n\n"
|
| 45 |
"Given a user question, produce a JSON list of 1–4 steps needed to answer it.\n"
|
| 46 |
"Each step is an object with:\n"
|
|
@@ -51,8 +52,9 @@ _PLANNER_PROMPT = (
|
|
| 51 |
"- For simple factual questions: 1 search step is enough.\n"
|
| 52 |
"- For comparison questions: use multi_search or separate search steps.\n"
|
| 53 |
"- For document overview requests: use summarize.\n"
|
|
|
|
| 54 |
"- Always end with the steps needed; do NOT include a final 'answer' step.\n\n"
|
| 55 |
-
"Reply with ONLY the JSON array, nothing else.\n\n"
|
| 56 |
"Examples:\n"
|
| 57 |
'Question: "What is the exam policy?"\n'
|
| 58 |
'[{"action": "search", "detail": "KU eksamensregler"}]\n\n'
|
|
@@ -61,6 +63,10 @@ _PLANNER_PROMPT = (
|
|
| 61 |
'{"action": "search", "detail": "ferieregler administrativt personale"}]\n\n'
|
| 62 |
'Question: "Summarize the AI policy document"\n'
|
| 63 |
'[{"action": "summarize", "detail": "ku_ai_policy.pdf"}]\n\n'
|
|
|
|
|
|
|
|
|
|
|
|
|
| 64 |
"Now plan for this question:\n"
|
| 65 |
)
|
| 66 |
|
|
@@ -447,7 +453,19 @@ class PlanAndExecuteRouter:
|
|
| 447 |
# Helpers
|
| 448 |
# ------------------------------------------------------------------
|
| 449 |
|
| 450 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 451 |
|
| 452 |
|
| 453 |
def _extract_content(result: object) -> str:
|
|
@@ -470,7 +488,6 @@ def _extract_content(result: object) -> str:
|
|
| 470 |
content = result
|
| 471 |
|
| 472 |
if isinstance(content, list):
|
| 473 |
-
# content can be list[str | dict]; extract text from each block
|
| 474 |
parts: list[str] = []
|
| 475 |
for block in content:
|
| 476 |
if isinstance(block, str):
|
|
@@ -481,7 +498,7 @@ def _extract_content(result: object) -> str:
|
|
| 481 |
else:
|
| 482 |
text = str(content)
|
| 483 |
|
| 484 |
-
return
|
| 485 |
|
| 486 |
|
| 487 |
def _parse_plan(raw: str) -> list[PlanStep]:
|
|
|
|
| 41 |
# ------------------------------------------------------------------
|
| 42 |
|
| 43 |
_PLANNER_PROMPT = (
|
| 44 |
+
"/no_think\n"
|
| 45 |
"You are a planning assistant for the University of Copenhagen (KU) document system.\n\n"
|
| 46 |
"Given a user question, produce a JSON list of 1–4 steps needed to answer it.\n"
|
| 47 |
"Each step is an object with:\n"
|
|
|
|
| 52 |
"- For simple factual questions: 1 search step is enough.\n"
|
| 53 |
"- For comparison questions: use multi_search or separate search steps.\n"
|
| 54 |
"- For document overview requests: use summarize.\n"
|
| 55 |
+
"- For questions with multiple aspects: use 2–4 separate steps.\n"
|
| 56 |
"- Always end with the steps needed; do NOT include a final 'answer' step.\n\n"
|
| 57 |
+
"Reply with ONLY the JSON array, nothing else. No explanation, no thinking.\n\n"
|
| 58 |
"Examples:\n"
|
| 59 |
'Question: "What is the exam policy?"\n'
|
| 60 |
'[{"action": "search", "detail": "KU eksamensregler"}]\n\n'
|
|
|
|
| 63 |
'{"action": "search", "detail": "ferieregler administrativt personale"}]\n\n'
|
| 64 |
'Question: "Summarize the AI policy document"\n'
|
| 65 |
'[{"action": "summarize", "detail": "ku_ai_policy.pdf"}]\n\n'
|
| 66 |
+
'Question: "Which documents are about AI? Summarize and find the rules for written exams"\n'
|
| 67 |
+
'[{"action": "list_docs", "detail": "list all available documents"}, '
|
| 68 |
+
'{"action": "search", "detail": "AI dokumenter KU"}, '
|
| 69 |
+
'{"action": "search", "detail": "regler skriftlige opgaver eksamen GAI"}]\n\n'
|
| 70 |
"Now plan for this question:\n"
|
| 71 |
)
|
| 72 |
|
|
|
|
| 453 |
# Helpers
|
| 454 |
# ------------------------------------------------------------------
|
| 455 |
|
| 456 |
+
_THINK_CLOSED_RE = re.compile(r"<think>.*?</think>\s*", re.DOTALL)
|
| 457 |
+
_THINK_UNCLOSED_RE = re.compile(r"<think>.*", re.DOTALL)
|
| 458 |
+
|
| 459 |
+
|
| 460 |
+
def _strip_think(text: str) -> str:
|
| 461 |
+
"""Remove ``<think>`` blocks — both closed and unclosed.
|
| 462 |
+
|
| 463 |
+
Some models (Qwen3) always emit ``<think>...</think>``; others may
|
| 464 |
+
leave the tag unclosed. This handles both cases.
|
| 465 |
+
"""
|
| 466 |
+
text = _THINK_CLOSED_RE.sub("", text)
|
| 467 |
+
text = _THINK_UNCLOSED_RE.sub("", text)
|
| 468 |
+
return text.strip()
|
| 469 |
|
| 470 |
|
| 471 |
def _extract_content(result: object) -> str:
|
|
|
|
| 488 |
content = result
|
| 489 |
|
| 490 |
if isinstance(content, list):
|
|
|
|
| 491 |
parts: list[str] = []
|
| 492 |
for block in content:
|
| 493 |
if isinstance(block, str):
|
|
|
|
| 498 |
else:
|
| 499 |
text = str(content)
|
| 500 |
|
| 501 |
+
return _strip_think(text)
|
| 502 |
|
| 503 |
|
| 504 |
def _parse_plan(raw: str) -> list[PlanStep]:
|
src/agent/router.py
CHANGED
|
@@ -25,7 +25,15 @@ from src.retrieval.reranker import Reranker
|
|
| 25 |
|
| 26 |
logger = logging.getLogger(__name__)
|
| 27 |
|
| 28 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 29 |
|
| 30 |
|
| 31 |
def _extract_content(result: object) -> str:
|
|
@@ -49,7 +57,7 @@ def _extract_content(result: object) -> str:
|
|
| 49 |
else:
|
| 50 |
text = str(content)
|
| 51 |
|
| 52 |
-
return
|
| 53 |
|
| 54 |
|
| 55 |
# Reranker confidence below this triggers a query-broadening retry.
|
|
|
|
| 25 |
|
| 26 |
logger = logging.getLogger(__name__)
|
| 27 |
|
| 28 |
+
_THINK_CLOSED_RE = re.compile(r"<think>.*?</think>\s*", re.DOTALL)
|
| 29 |
+
_THINK_UNCLOSED_RE = re.compile(r"<think>.*", re.DOTALL)
|
| 30 |
+
|
| 31 |
+
|
| 32 |
+
def _strip_think(text: str) -> str:
|
| 33 |
+
"""Remove ``<think>`` blocks — both closed and unclosed."""
|
| 34 |
+
text = _THINK_CLOSED_RE.sub("", text)
|
| 35 |
+
text = _THINK_UNCLOSED_RE.sub("", text)
|
| 36 |
+
return text.strip()
|
| 37 |
|
| 38 |
|
| 39 |
def _extract_content(result: object) -> str:
|
|
|
|
| 57 |
else:
|
| 58 |
text = str(content)
|
| 59 |
|
| 60 |
+
return _strip_think(text)
|
| 61 |
|
| 62 |
|
| 63 |
# Reranker confidence below this triggers a query-broadening retry.
|
src/agent/tools.py
CHANGED
|
@@ -14,19 +14,21 @@ from src.retrieval.vector_store import VectorStore
|
|
| 14 |
|
| 15 |
logger = logging.getLogger(__name__)
|
| 16 |
|
| 17 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 18 |
|
| 19 |
|
| 20 |
def _extract_content(result: object) -> str:
|
| 21 |
"""Extract plain text from an LLM invoke result.
|
| 22 |
|
| 23 |
Handles AIMessage (content: str or list), plain strings, etc.
|
| 24 |
-
|
| 25 |
-
Args:
|
| 26 |
-
result: Return value of ``llm.invoke()`` or ``chain.invoke()``.
|
| 27 |
-
|
| 28 |
-
Returns:
|
| 29 |
-
Cleaned text with ``<think>`` blocks removed.
|
| 30 |
"""
|
| 31 |
if hasattr(result, "content"):
|
| 32 |
content = result.content
|
|
@@ -44,7 +46,7 @@ def _extract_content(result: object) -> str:
|
|
| 44 |
else:
|
| 45 |
text = str(content)
|
| 46 |
|
| 47 |
-
return
|
| 48 |
|
| 49 |
|
| 50 |
@dataclass
|
|
|
|
| 14 |
|
| 15 |
logger = logging.getLogger(__name__)
|
| 16 |
|
| 17 |
+
_THINK_CLOSED_RE = re.compile(r"<think>.*?</think>\s*", re.DOTALL)
|
| 18 |
+
_THINK_UNCLOSED_RE = re.compile(r"<think>.*", re.DOTALL)
|
| 19 |
+
|
| 20 |
+
|
| 21 |
+
def _strip_think(text: str) -> str:
|
| 22 |
+
"""Remove ``<think>`` blocks — both closed and unclosed."""
|
| 23 |
+
text = _THINK_CLOSED_RE.sub("", text)
|
| 24 |
+
text = _THINK_UNCLOSED_RE.sub("", text)
|
| 25 |
+
return text.strip()
|
| 26 |
|
| 27 |
|
| 28 |
def _extract_content(result: object) -> str:
|
| 29 |
"""Extract plain text from an LLM invoke result.
|
| 30 |
|
| 31 |
Handles AIMessage (content: str or list), plain strings, etc.
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 32 |
"""
|
| 33 |
if hasattr(result, "content"):
|
| 34 |
content = result.content
|
|
|
|
| 46 |
else:
|
| 47 |
text = str(content)
|
| 48 |
|
| 49 |
+
return _strip_think(text)
|
| 50 |
|
| 51 |
|
| 52 |
@dataclass
|
src/api/routes.py
CHANGED
|
@@ -26,14 +26,29 @@ logger = logging.getLogger(__name__)
|
|
| 26 |
router = APIRouter()
|
| 27 |
|
| 28 |
|
| 29 |
-
def _is_rate_limit_error(
|
| 30 |
-
"""Check whether an exception
|
| 31 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 32 |
return (
|
| 33 |
-
"429" in
|
| 34 |
-
or "resource_exhausted" in
|
| 35 |
-
or "rate" in
|
| 36 |
-
or "
|
|
|
|
| 37 |
)
|
| 38 |
|
| 39 |
|
|
@@ -201,7 +216,7 @@ async def query_documents(request: QueryRequest) -> QueryResponse:
|
|
| 201 |
response = _query_router.route(query=request.question, top_k=request.top_k)
|
| 202 |
except Exception as exc:
|
| 203 |
exc_str = str(exc)
|
| 204 |
-
if _is_rate_limit_error(
|
| 205 |
logger.warning("Rate limit / quota exhausted: %s", exc_str)
|
| 206 |
raise HTTPException(
|
| 207 |
status_code=429,
|
|
@@ -258,7 +273,7 @@ async def query_stream(request: QueryRequest) -> StreamingResponse:
|
|
| 258 |
event_queue.put(event)
|
| 259 |
except Exception as exc:
|
| 260 |
exc_str = str(exc)
|
| 261 |
-
if _is_rate_limit_error(
|
| 262 |
event_queue.put({"step": "error", "code": 429, "message": exc_str})
|
| 263 |
else:
|
| 264 |
event_queue.put({"step": "error", "code": 500, "message": exc_str})
|
|
|
|
| 26 |
router = APIRouter()
|
| 27 |
|
| 28 |
|
| 29 |
+
def _is_rate_limit_error(exc: str | Exception) -> bool:
|
| 30 |
+
"""Check whether an exception indicates a rate-limit / quota error.
|
| 31 |
+
|
| 32 |
+
Walks the full cause chain so wrapped exceptions (e.g. LangGraph
|
| 33 |
+
wrapping an upstream 429) are still detected.
|
| 34 |
+
"""
|
| 35 |
+
texts: list[str] = []
|
| 36 |
+
if isinstance(exc, Exception):
|
| 37 |
+
current: BaseException | None = exc
|
| 38 |
+
while current is not None:
|
| 39 |
+
texts.append(str(current))
|
| 40 |
+
texts.append(type(current).__name__)
|
| 41 |
+
current = current.__cause__
|
| 42 |
+
else:
|
| 43 |
+
texts.append(exc)
|
| 44 |
+
|
| 45 |
+
blob = " ".join(texts).lower()
|
| 46 |
return (
|
| 47 |
+
"429" in blob
|
| 48 |
+
or "resource_exhausted" in blob
|
| 49 |
+
or "rate limit" in blob
|
| 50 |
+
or "rate_limit" in blob
|
| 51 |
+
or "too many requests" in blob
|
| 52 |
)
|
| 53 |
|
| 54 |
|
|
|
|
| 216 |
response = _query_router.route(query=request.question, top_k=request.top_k)
|
| 217 |
except Exception as exc:
|
| 218 |
exc_str = str(exc)
|
| 219 |
+
if _is_rate_limit_error(exc):
|
| 220 |
logger.warning("Rate limit / quota exhausted: %s", exc_str)
|
| 221 |
raise HTTPException(
|
| 222 |
status_code=429,
|
|
|
|
| 273 |
event_queue.put(event)
|
| 274 |
except Exception as exc:
|
| 275 |
exc_str = str(exc)
|
| 276 |
+
if _is_rate_limit_error(exc):
|
| 277 |
event_queue.put({"step": "error", "code": 429, "message": exc_str})
|
| 278 |
else:
|
| 279 |
event_queue.put({"step": "error", "code": 500, "message": exc_str})
|