Spaces:

fabioantonini
/

grapholab

Running

Fabio Antonini Claude Sonnet 4.6 commited on Apr 5

Commit

167611e

1 Parent(s): 3559e28

fix: render NER table correctly in agent chat

- core/agent.py: extract markdown tables from tool results verbatim and
append to final response, bypassing the LLM which collapses newlines;
strip mangled pipe-rows from LLM answer when clean tables are available;
add remarkGfm-compatible _extract_tables() helper
- frontend/AgentProjectPage.tsx: enable remark-gfm plugin on ReactMarkdown
so markdown tables are rendered as HTML tables (was missing, all tables
rendered as plain text)
- backend/routers/agent.py: improve project context note to prevent LLM
from using bare filenames as paths

Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>

Files changed (3) hide show

backend/routers/agent.py +6 -1
core/agent.py +64 -1
frontend/src/pages/AgentProjectPage.tsx +2 -1

backend/routers/agent.py CHANGED Viewed

@@ -469,7 +469,12 @@ async def chat(
         )
         all_docs = doc_result.scalars().all()
         if all_docs:
-            ctx_lines.append("Documenti disponibili nel progetto:")
             for d in all_docs:
                 ctx_lines.append(f"  - {d.filename} (id={d.id})")
         # Previous chats summary (titles only — keep context short)

         )
         all_docs = doc_result.scalars().all()
         if all_docs:
+            ctx_lines.append(
+                "Documenti caricati nel progetto (solo a scopo informativo — "
+                "i percorsi reali dei file ti vengono forniti nel messaggio utente "
+                "nel formato [file: /percorso/assoluto]. "
+                "NON usare il solo nome del file come percorso: usa SEMPRE il percorso [file: ...] iniettato nel messaggio):"
+            )
             for d in all_docs:
                 ctx_lines.append(f"  - {d.filename} (id={d.id})")
         # Previous chats summary (titles only — keep context short)

core/agent.py CHANGED Viewed

@@ -42,6 +42,8 @@ FORENSIC_SYSTEM_PROMPT = (
     "nel formato [file: /percorso/al/file]. Usali come argomenti degli strumenti.\n"
     "Se la richiesta riguarda sia la trascrizione che altre analisi (NER, date, ecc.), "
     "trascrivi prima il testo e poi usa il testo risultante come input per gli altri strumenti.\n"
     "Al termine di ogni risposta, fornisci un breve riepilogo delle analisi effettuate."
 )
@@ -543,10 +545,56 @@ def agent_stream(
     accumulated = ""
     tool_log: list[str] = []
     image_blocks: list[str] = []  # image markdown extracted from tool results
     import re as _re
     _img_md_re = _re.compile(r'!\[.*?\]\(/api/agent/images/[^\)]+\)')
     try:
         for chunk in agent.stream(
             {"messages": messages},
@@ -570,7 +618,16 @@ def agent_stream(
                     elif content:
                         # Final answer from the agent
                         accumulated = content
-                        # Append any images extracted from tool results
                         if image_blocks:
                             accumulated += "\n\n" + "\n\n".join(image_blocks)
                         if tool_log:
@@ -591,6 +648,12 @@ def agent_stream(
                     for img_md in _img_md_re.findall(content):
                         if img_md not in image_blocks:
                             image_blocks.append(img_md)
                     short = content[:120] + ("…" if len(content) > 120 else "")
                     if tool_log:
                         tool_log[-1] = tool_log[-1].rstrip("…*") + " ✅*"

     "nel formato [file: /percorso/al/file]. Usali come argomenti degli strumenti.\n"
     "Se la richiesta riguarda sia la trascrizione che altre analisi (NER, date, ecc.), "
     "trascrivi prima il testo e poi usa il testo risultante come input per gli altri strumenti.\n"
+    "Quando uno strumento restituisce una tabella Markdown (righe con | ... |), "
+    "includila SEMPRE integralmente nella risposta senza riscriverla come testo.\n"
     "Al termine di ogni risposta, fornisci un breve riepilogo delle analisi effettuate."
 )
     accumulated = ""
     tool_log: list[str] = []
     image_blocks: list[str] = []  # image markdown extracted from tool results
+    table_blocks: list[str] = []  # markdown tables extracted from tool results
     import re as _re
+    import logging as _logging
     _img_md_re = _re.compile(r'!\[.*?\]\(/api/agent/images/[^\)]+\)')
+    def _extract_tables(text: str) -> list[str]:
+        """Extract markdown tables from tool result text.
+        Handles both properly-newlined tables and collapsed single-line tables
+        (LangGraph sometimes strips newlines from tool result strings).
+        """
+        tables: list[str] = []
+        # ── Case 1: multi-line table (normal case) ────────────────────────────
+        current: list[str] = []
+        for line in text.splitlines():
+            stripped = line.strip()
+            if stripped.startswith("|") and stripped.endswith("|"):
+                current.append(stripped)
+            else:
+                if len(current) >= 3:
+                    tables.append("\n".join(current))
+                current = []
+        if len(current) >= 3:
+            tables.append("\n".join(current))
+        if tables:
+            return tables
+        # ── Case 2: collapsed single-line table ───────────────────────────────
+        # Detect a line with many pipes and a separator chunk like |---|
+        for line in text.splitlines():
+            stripped = line.strip()
+            if stripped.count("|") >= 6 and "|--" in stripped:
+                # Split on " | " boundary keeping the outer pipes
+                # e.g. "| A | B | C | |---|---|---| | x | y | z |"
+                # Reconstruct by splitting at separator pattern
+                parts = _re.split(r'(?=\|[-:| ]+\|)', stripped)
+                rows: list[str] = []
+                for part in parts:
+                    part = part.strip()
+                    if part.startswith("|") and part.endswith("|"):
+                        rows.append(part)
+                if len(rows) >= 3:
+                    tables.append("\n".join(rows))
+                    break
+        return tables
     try:
         for chunk in agent.stream(
             {"messages": messages},
                     elif content:
                         # Final answer from the agent
                         accumulated = content
+                        # If we extracted clean tables from tool results, strip
+                        # any mangled pipe-rows the LLM may have written inline
+                        # (qwen3 collapses table newlines into a single line)
+                        if table_blocks:
+                            clean_lines = [
+                                ln for ln in accumulated.splitlines()
+                                if ln.count("|") < 4  # keep narrative, drop pipe-heavy lines
+                            ]
+                            accumulated = "\n".join(clean_lines).strip()
+                            accumulated += "\n\n" + "\n\n".join(table_blocks)
                         if image_blocks:
                             accumulated += "\n\n" + "\n\n".join(image_blocks)
                         if tool_log:
                     for img_md in _img_md_re.findall(content):
                         if img_md not in image_blocks:
                             image_blocks.append(img_md)
+                    # Extract markdown tables from tool result so they are
+                    # appended verbatim to the final response (LLM tends to
+                    # collapse table rows onto a single line when rewriting)
+                    for tbl in _extract_tables(content):
+                        if tbl not in table_blocks:
+                            table_blocks.append(tbl)
                     short = content[:120] + ("…" if len(content) > 120 else "")
                     if tool_log:
                         tool_log[-1] = tool_log[-1].rstrip("…*") + " ✅*"

frontend/src/pages/AgentProjectPage.tsx CHANGED Viewed

@@ -14,6 +14,7 @@ import {
 import { cn } from "@/lib/utils"
 import { useAuthStore } from "@/store/auth"
 import ReactMarkdown from "react-markdown"
 import { api, agentProjectsApi, type AgentChat, type AgentMessage, type Document } from "@/lib/api"
 // ── AuthImage ────────────────────────────────────────────────────────────────
@@ -494,7 +495,7 @@ export default function AgentProjectPage() {
                         )}
                         {!isLiveStreaming && (
                           <div className="prose prose-sm max-w-none dark:prose-invert">
-                            <ReactMarkdown components={{ img: ({ src, alt }) => src ? <AuthImage src={src} alt={alt ?? ""} /> : null }}>
                               {main}
                             </ReactMarkdown>
                           </div>

 import { cn } from "@/lib/utils"
 import { useAuthStore } from "@/store/auth"
 import ReactMarkdown from "react-markdown"
+import remarkGfm from "remark-gfm"
 import { api, agentProjectsApi, type AgentChat, type AgentMessage, type Document } from "@/lib/api"
 // ── AuthImage ────────────────────────────────────────────────────────────────
                         )}
                         {!isLiveStreaming && (
                           <div className="prose prose-sm max-w-none dark:prose-invert">
+                            <ReactMarkdown remarkPlugins={[remarkGfm]} components={{ img: ({ src, alt }) => src ? <AuthImage src={src} alt={alt ?? ""} /> : null }}>
                               {main}
                             </ReactMarkdown>
                           </div>