Fabio Antonini Claude Sonnet 4.6 commited on
Commit
167611e
·
1 Parent(s): 3559e28

fix: render NER table correctly in agent chat

Browse files

- core/agent.py: extract markdown tables from tool results verbatim and
append to final response, bypassing the LLM which collapses newlines;
strip mangled pipe-rows from LLM answer when clean tables are available;
add remarkGfm-compatible _extract_tables() helper
- frontend/AgentProjectPage.tsx: enable remark-gfm plugin on ReactMarkdown
so markdown tables are rendered as HTML tables (was missing, all tables
rendered as plain text)
- backend/routers/agent.py: improve project context note to prevent LLM
from using bare filenames as paths

Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>

backend/routers/agent.py CHANGED
@@ -469,7 +469,12 @@ async def chat(
469
  )
470
  all_docs = doc_result.scalars().all()
471
  if all_docs:
472
- ctx_lines.append("Documenti disponibili nel progetto:")
 
 
 
 
 
473
  for d in all_docs:
474
  ctx_lines.append(f" - {d.filename} (id={d.id})")
475
  # Previous chats summary (titles only — keep context short)
 
469
  )
470
  all_docs = doc_result.scalars().all()
471
  if all_docs:
472
+ ctx_lines.append(
473
+ "Documenti caricati nel progetto (solo a scopo informativo — "
474
+ "i percorsi reali dei file ti vengono forniti nel messaggio utente "
475
+ "nel formato [file: /percorso/assoluto]. "
476
+ "NON usare il solo nome del file come percorso: usa SEMPRE il percorso [file: ...] iniettato nel messaggio):"
477
+ )
478
  for d in all_docs:
479
  ctx_lines.append(f" - {d.filename} (id={d.id})")
480
  # Previous chats summary (titles only — keep context short)
core/agent.py CHANGED
@@ -42,6 +42,8 @@ FORENSIC_SYSTEM_PROMPT = (
42
  "nel formato [file: /percorso/al/file]. Usali come argomenti degli strumenti.\n"
43
  "Se la richiesta riguarda sia la trascrizione che altre analisi (NER, date, ecc.), "
44
  "trascrivi prima il testo e poi usa il testo risultante come input per gli altri strumenti.\n"
 
 
45
  "Al termine di ogni risposta, fornisci un breve riepilogo delle analisi effettuate."
46
  )
47
 
@@ -543,10 +545,56 @@ def agent_stream(
543
  accumulated = ""
544
  tool_log: list[str] = []
545
  image_blocks: list[str] = [] # image markdown extracted from tool results
 
546
 
547
  import re as _re
 
548
  _img_md_re = _re.compile(r'!\[.*?\]\(/api/agent/images/[^\)]+\)')
549
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
550
  try:
551
  for chunk in agent.stream(
552
  {"messages": messages},
@@ -570,7 +618,16 @@ def agent_stream(
570
  elif content:
571
  # Final answer from the agent
572
  accumulated = content
573
- # Append any images extracted from tool results
 
 
 
 
 
 
 
 
 
574
  if image_blocks:
575
  accumulated += "\n\n" + "\n\n".join(image_blocks)
576
  if tool_log:
@@ -591,6 +648,12 @@ def agent_stream(
591
  for img_md in _img_md_re.findall(content):
592
  if img_md not in image_blocks:
593
  image_blocks.append(img_md)
 
 
 
 
 
 
594
  short = content[:120] + ("…" if len(content) > 120 else "")
595
  if tool_log:
596
  tool_log[-1] = tool_log[-1].rstrip("…*") + " ✅*"
 
42
  "nel formato [file: /percorso/al/file]. Usali come argomenti degli strumenti.\n"
43
  "Se la richiesta riguarda sia la trascrizione che altre analisi (NER, date, ecc.), "
44
  "trascrivi prima il testo e poi usa il testo risultante come input per gli altri strumenti.\n"
45
+ "Quando uno strumento restituisce una tabella Markdown (righe con | ... |), "
46
+ "includila SEMPRE integralmente nella risposta senza riscriverla come testo.\n"
47
  "Al termine di ogni risposta, fornisci un breve riepilogo delle analisi effettuate."
48
  )
49
 
 
545
  accumulated = ""
546
  tool_log: list[str] = []
547
  image_blocks: list[str] = [] # image markdown extracted from tool results
548
+ table_blocks: list[str] = [] # markdown tables extracted from tool results
549
 
550
  import re as _re
551
+ import logging as _logging
552
  _img_md_re = _re.compile(r'!\[.*?\]\(/api/agent/images/[^\)]+\)')
553
 
554
+ def _extract_tables(text: str) -> list[str]:
555
+ """Extract markdown tables from tool result text.
556
+
557
+ Handles both properly-newlined tables and collapsed single-line tables
558
+ (LangGraph sometimes strips newlines from tool result strings).
559
+ """
560
+ tables: list[str] = []
561
+
562
+ # ── Case 1: multi-line table (normal case) ────────────────────────────
563
+ current: list[str] = []
564
+ for line in text.splitlines():
565
+ stripped = line.strip()
566
+ if stripped.startswith("|") and stripped.endswith("|"):
567
+ current.append(stripped)
568
+ else:
569
+ if len(current) >= 3:
570
+ tables.append("\n".join(current))
571
+ current = []
572
+ if len(current) >= 3:
573
+ tables.append("\n".join(current))
574
+
575
+ if tables:
576
+ return tables
577
+
578
+ # ── Case 2: collapsed single-line table ───────────────────────────────
579
+ # Detect a line with many pipes and a separator chunk like |---|
580
+ for line in text.splitlines():
581
+ stripped = line.strip()
582
+ if stripped.count("|") >= 6 and "|--" in stripped:
583
+ # Split on " | " boundary keeping the outer pipes
584
+ # e.g. "| A | B | C | |---|---|---| | x | y | z |"
585
+ # Reconstruct by splitting at separator pattern
586
+ parts = _re.split(r'(?=\|[-:| ]+\|)', stripped)
587
+ rows: list[str] = []
588
+ for part in parts:
589
+ part = part.strip()
590
+ if part.startswith("|") and part.endswith("|"):
591
+ rows.append(part)
592
+ if len(rows) >= 3:
593
+ tables.append("\n".join(rows))
594
+ break
595
+
596
+ return tables
597
+
598
  try:
599
  for chunk in agent.stream(
600
  {"messages": messages},
 
618
  elif content:
619
  # Final answer from the agent
620
  accumulated = content
621
+ # If we extracted clean tables from tool results, strip
622
+ # any mangled pipe-rows the LLM may have written inline
623
+ # (qwen3 collapses table newlines into a single line)
624
+ if table_blocks:
625
+ clean_lines = [
626
+ ln for ln in accumulated.splitlines()
627
+ if ln.count("|") < 4 # keep narrative, drop pipe-heavy lines
628
+ ]
629
+ accumulated = "\n".join(clean_lines).strip()
630
+ accumulated += "\n\n" + "\n\n".join(table_blocks)
631
  if image_blocks:
632
  accumulated += "\n\n" + "\n\n".join(image_blocks)
633
  if tool_log:
 
648
  for img_md in _img_md_re.findall(content):
649
  if img_md not in image_blocks:
650
  image_blocks.append(img_md)
651
+ # Extract markdown tables from tool result so they are
652
+ # appended verbatim to the final response (LLM tends to
653
+ # collapse table rows onto a single line when rewriting)
654
+ for tbl in _extract_tables(content):
655
+ if tbl not in table_blocks:
656
+ table_blocks.append(tbl)
657
  short = content[:120] + ("…" if len(content) > 120 else "")
658
  if tool_log:
659
  tool_log[-1] = tool_log[-1].rstrip("…*") + " ✅*"
frontend/src/pages/AgentProjectPage.tsx CHANGED
@@ -14,6 +14,7 @@ import {
14
  import { cn } from "@/lib/utils"
15
  import { useAuthStore } from "@/store/auth"
16
  import ReactMarkdown from "react-markdown"
 
17
  import { api, agentProjectsApi, type AgentChat, type AgentMessage, type Document } from "@/lib/api"
18
 
19
  // ── AuthImage ────────────────────────────────────────────────────────────────
@@ -494,7 +495,7 @@ export default function AgentProjectPage() {
494
  )}
495
  {!isLiveStreaming && (
496
  <div className="prose prose-sm max-w-none dark:prose-invert">
497
- <ReactMarkdown components={{ img: ({ src, alt }) => src ? <AuthImage src={src} alt={alt ?? ""} /> : null }}>
498
  {main}
499
  </ReactMarkdown>
500
  </div>
 
14
  import { cn } from "@/lib/utils"
15
  import { useAuthStore } from "@/store/auth"
16
  import ReactMarkdown from "react-markdown"
17
+ import remarkGfm from "remark-gfm"
18
  import { api, agentProjectsApi, type AgentChat, type AgentMessage, type Document } from "@/lib/api"
19
 
20
  // ── AuthImage ────────────────────────────────────────────────────────────────
 
495
  )}
496
  {!isLiveStreaming && (
497
  <div className="prose prose-sm max-w-none dark:prose-invert">
498
+ <ReactMarkdown remarkPlugins={[remarkGfm]} components={{ img: ({ src, alt }) => src ? <AuthImage src={src} alt={alt ?? ""} /> : null }}>
499
  {main}
500
  </ReactMarkdown>
501
  </div>