Spaces:

XQ
/

Dokumentassistent

Sleeping

App Files Files

XQ commited on Apr 7

Commit

fdc3773

1 Parent(s): 9915876

Fix LLM output

Browse files

Files changed (5) hide show

src/agent/intent_classifier.py +4 -2
src/agent/plan_and_execute.py +21 -4
src/agent/router.py +10 -2
src/agent/tools.py +10 -8
src/api/routes.py +24 -9

src/agent/intent_classifier.py CHANGED Viewed

@@ -11,7 +11,8 @@ from src.models import IntentType
 logger = logging.getLogger(__name__)
-_THINK_RE = re.compile(r"<think>.*?</think>\s*", re.DOTALL)
 _VALID_INTENTS = {intent.value for intent in IntentType}
@@ -58,7 +59,8 @@ class IntentClassifier:
         Returns:
             The classified IntentType.
         """
-        raw = _THINK_RE.sub("", self._chain.invoke({"query": query})).strip().lower()
         logger.debug("Raw classification result: %s", raw)
         if raw in _VALID_INTENTS:

 logger = logging.getLogger(__name__)
+_THINK_CLOSED_RE = re.compile(r"<think>.*?</think>\s*", re.DOTALL)
+_THINK_UNCLOSED_RE = re.compile(r"<think>.*", re.DOTALL)
 _VALID_INTENTS = {intent.value for intent in IntentType}
         Returns:
             The classified IntentType.
         """
+        _raw_out = self._chain.invoke({"query": query})
+        raw = _THINK_UNCLOSED_RE.sub("", _THINK_CLOSED_RE.sub("", _raw_out)).strip().lower()
         logger.debug("Raw classification result: %s", raw)
         if raw in _VALID_INTENTS:

src/agent/plan_and_execute.py CHANGED Viewed

@@ -41,6 +41,7 @@ _MAX_STEPS = 6
 # ------------------------------------------------------------------
 _PLANNER_PROMPT = (
     "You are a planning assistant for the University of Copenhagen (KU) document system.\n\n"
     "Given a user question, produce a JSON list of 1–4 steps needed to answer it.\n"
     "Each step is an object with:\n"
@@ -51,8 +52,9 @@ _PLANNER_PROMPT = (
     "- For simple factual questions: 1 search step is enough.\n"
     "- For comparison questions: use multi_search or separate search steps.\n"
     "- For document overview requests: use summarize.\n"
     "- Always end with the steps needed; do NOT include a final 'answer' step.\n\n"
-    "Reply with ONLY the JSON array, nothing else.\n\n"
     "Examples:\n"
     'Question: "What is the exam policy?"\n'
     '[{"action": "search", "detail": "KU eksamensregler"}]\n\n'
@@ -61,6 +63,10 @@ _PLANNER_PROMPT = (
     '{"action": "search", "detail": "ferieregler administrativt personale"}]\n\n'
     'Question: "Summarize the AI policy document"\n'
     '[{"action": "summarize", "detail": "ku_ai_policy.pdf"}]\n\n'
     "Now plan for this question:\n"
 )
@@ -447,7 +453,19 @@ class PlanAndExecuteRouter:
 # Helpers
 # ------------------------------------------------------------------
-_THINK_RE = re.compile(r"<think>.*?</think>\s*", re.DOTALL)
 def _extract_content(result: object) -> str:
@@ -470,7 +488,6 @@ def _extract_content(result: object) -> str:
         content = result
     if isinstance(content, list):
-        # content can be list[str | dict]; extract text from each block
         parts: list[str] = []
         for block in content:
             if isinstance(block, str):
@@ -481,7 +498,7 @@ def _extract_content(result: object) -> str:
     else:
         text = str(content)
-    return _THINK_RE.sub("", text).strip()
 def _parse_plan(raw: str) -> list[PlanStep]:

 # ------------------------------------------------------------------
 _PLANNER_PROMPT = (
+    "/no_think\n"
     "You are a planning assistant for the University of Copenhagen (KU) document system.\n\n"
     "Given a user question, produce a JSON list of 1–4 steps needed to answer it.\n"
     "Each step is an object with:\n"
     "- For simple factual questions: 1 search step is enough.\n"
     "- For comparison questions: use multi_search or separate search steps.\n"
     "- For document overview requests: use summarize.\n"
+    "- For questions with multiple aspects: use 2–4 separate steps.\n"
     "- Always end with the steps needed; do NOT include a final 'answer' step.\n\n"
+    "Reply with ONLY the JSON array, nothing else. No explanation, no thinking.\n\n"
     "Examples:\n"
     'Question: "What is the exam policy?"\n'
     '[{"action": "search", "detail": "KU eksamensregler"}]\n\n'
     '{"action": "search", "detail": "ferieregler administrativt personale"}]\n\n'
     'Question: "Summarize the AI policy document"\n'
     '[{"action": "summarize", "detail": "ku_ai_policy.pdf"}]\n\n'
+    'Question: "Which documents are about AI? Summarize and find the rules for written exams"\n'
+    '[{"action": "list_docs", "detail": "list all available documents"}, '
+    '{"action": "search", "detail": "AI dokumenter KU"}, '
+    '{"action": "search", "detail": "regler skriftlige opgaver eksamen GAI"}]\n\n'
     "Now plan for this question:\n"
 )
 # Helpers
 # ------------------------------------------------------------------
+_THINK_CLOSED_RE = re.compile(r"<think>.*?</think>\s*", re.DOTALL)
+_THINK_UNCLOSED_RE = re.compile(r"<think>.*", re.DOTALL)
+def _strip_think(text: str) -> str:
+    """Remove ``<think>`` blocks — both closed and unclosed.
+    Some models (Qwen3) always emit ``<think>...</think>``; others may
+    leave the tag unclosed.  This handles both cases.
+    """
+    text = _THINK_CLOSED_RE.sub("", text)
+    text = _THINK_UNCLOSED_RE.sub("", text)
+    return text.strip()
 def _extract_content(result: object) -> str:
         content = result
     if isinstance(content, list):
         parts: list[str] = []
         for block in content:
             if isinstance(block, str):
     else:
         text = str(content)
+    return _strip_think(text)
 def _parse_plan(raw: str) -> list[PlanStep]:

src/agent/router.py CHANGED Viewed

@@ -25,7 +25,15 @@ from src.retrieval.reranker import Reranker
 logger = logging.getLogger(__name__)
-_THINK_RE = re.compile(r"<think>.*?</think>\s*", re.DOTALL)
 def _extract_content(result: object) -> str:
@@ -49,7 +57,7 @@ def _extract_content(result: object) -> str:
     else:
         text = str(content)
-    return _THINK_RE.sub("", text).strip()
 # Reranker confidence below this triggers a query-broadening retry.

 logger = logging.getLogger(__name__)
+_THINK_CLOSED_RE = re.compile(r"<think>.*?</think>\s*", re.DOTALL)
+_THINK_UNCLOSED_RE = re.compile(r"<think>.*", re.DOTALL)
+def _strip_think(text: str) -> str:
+    """Remove ``<think>`` blocks — both closed and unclosed."""
+    text = _THINK_CLOSED_RE.sub("", text)
+    text = _THINK_UNCLOSED_RE.sub("", text)
+    return text.strip()
 def _extract_content(result: object) -> str:
     else:
         text = str(content)
+    return _strip_think(text)
 # Reranker confidence below this triggers a query-broadening retry.

src/agent/tools.py CHANGED Viewed

@@ -14,19 +14,21 @@ from src.retrieval.vector_store import VectorStore
 logger = logging.getLogger(__name__)
-_THINK_RE = re.compile(r"<think>.*?</think>\s*", re.DOTALL)
 def _extract_content(result: object) -> str:
     """Extract plain text from an LLM invoke result.
     Handles AIMessage (content: str or list), plain strings, etc.
-    Args:
-        result: Return value of ``llm.invoke()`` or ``chain.invoke()``.
-    Returns:
-        Cleaned text with ``<think>`` blocks removed.
     """
     if hasattr(result, "content"):
         content = result.content
@@ -44,7 +46,7 @@ def _extract_content(result: object) -> str:
     else:
         text = str(content)
-    return _THINK_RE.sub("", text).strip()
 @dataclass

 logger = logging.getLogger(__name__)
+_THINK_CLOSED_RE = re.compile(r"<think>.*?</think>\s*", re.DOTALL)
+_THINK_UNCLOSED_RE = re.compile(r"<think>.*", re.DOTALL)
+def _strip_think(text: str) -> str:
+    """Remove ``<think>`` blocks — both closed and unclosed."""
+    text = _THINK_CLOSED_RE.sub("", text)
+    text = _THINK_UNCLOSED_RE.sub("", text)
+    return text.strip()
 def _extract_content(result: object) -> str:
     """Extract plain text from an LLM invoke result.
     Handles AIMessage (content: str or list), plain strings, etc.
     """
     if hasattr(result, "content"):
         content = result.content
     else:
         text = str(content)
+    return _strip_think(text)
 @dataclass

src/api/routes.py CHANGED Viewed

@@ -26,14 +26,29 @@ logger = logging.getLogger(__name__)
 router = APIRouter()
-def _is_rate_limit_error(exc_str: str) -> bool:
-    """Check whether an exception string indicates a rate-limit / quota error."""
-    lower = exc_str.lower()
     return (
-        "429" in exc_str
-        or "resource_exhausted" in lower
-        or "rate" in lower
-        or "too many requests" in lower
     )
@@ -201,7 +216,7 @@ async def query_documents(request: QueryRequest) -> QueryResponse:
         response = _query_router.route(query=request.question, top_k=request.top_k)
     except Exception as exc:
         exc_str = str(exc)
-        if _is_rate_limit_error(exc_str):
             logger.warning("Rate limit / quota exhausted: %s", exc_str)
             raise HTTPException(
                 status_code=429,
@@ -258,7 +273,7 @@ async def query_stream(request: QueryRequest) -> StreamingResponse:
                 event_queue.put(event)
         except Exception as exc:
             exc_str = str(exc)
-            if _is_rate_limit_error(exc_str):
                 event_queue.put({"step": "error", "code": 429, "message": exc_str})
             else:
                 event_queue.put({"step": "error", "code": 500, "message": exc_str})

 router = APIRouter()
+def _is_rate_limit_error(exc: str | Exception) -> bool:
+    """Check whether an exception indicates a rate-limit / quota error.
+    Walks the full cause chain so wrapped exceptions (e.g. LangGraph
+    wrapping an upstream 429) are still detected.
+    """
+    texts: list[str] = []
+    if isinstance(exc, Exception):
+        current: BaseException | None = exc
+        while current is not None:
+            texts.append(str(current))
+            texts.append(type(current).__name__)
+            current = current.__cause__
+    else:
+        texts.append(exc)
+    blob = " ".join(texts).lower()
     return (
+        "429" in blob
+        or "resource_exhausted" in blob
+        or "rate limit" in blob
+        or "rate_limit" in blob
+        or "too many requests" in blob
     )
         response = _query_router.route(query=request.question, top_k=request.top_k)
     except Exception as exc:
         exc_str = str(exc)
+        if _is_rate_limit_error(exc):
             logger.warning("Rate limit / quota exhausted: %s", exc_str)
             raise HTTPException(
                 status_code=429,
                 event_queue.put(event)
         except Exception as exc:
             exc_str = str(exc)
+            if _is_rate_limit_error(exc):
                 event_queue.put({"step": "error", "code": 429, "message": exc_str})
             else:
                 event_queue.put({"step": "error", "code": 500, "message": exc_str})