Spaces:
Running
Running
Fabio Antonini Claude Sonnet 4.6 commited on
Commit ·
167611e
1
Parent(s): 3559e28
fix: render NER table correctly in agent chat
Browse files- core/agent.py: extract markdown tables from tool results verbatim and
append to final response, bypassing the LLM which collapses newlines;
strip mangled pipe-rows from LLM answer when clean tables are available;
add remarkGfm-compatible _extract_tables() helper
- frontend/AgentProjectPage.tsx: enable remark-gfm plugin on ReactMarkdown
so markdown tables are rendered as HTML tables (was missing, all tables
rendered as plain text)
- backend/routers/agent.py: improve project context note to prevent LLM
from using bare filenames as paths
Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>
- backend/routers/agent.py +6 -1
- core/agent.py +64 -1
- frontend/src/pages/AgentProjectPage.tsx +2 -1
backend/routers/agent.py
CHANGED
|
@@ -469,7 +469,12 @@ async def chat(
|
|
| 469 |
)
|
| 470 |
all_docs = doc_result.scalars().all()
|
| 471 |
if all_docs:
|
| 472 |
-
ctx_lines.append(
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 473 |
for d in all_docs:
|
| 474 |
ctx_lines.append(f" - {d.filename} (id={d.id})")
|
| 475 |
# Previous chats summary (titles only — keep context short)
|
|
|
|
| 469 |
)
|
| 470 |
all_docs = doc_result.scalars().all()
|
| 471 |
if all_docs:
|
| 472 |
+
ctx_lines.append(
|
| 473 |
+
"Documenti caricati nel progetto (solo a scopo informativo — "
|
| 474 |
+
"i percorsi reali dei file ti vengono forniti nel messaggio utente "
|
| 475 |
+
"nel formato [file: /percorso/assoluto]. "
|
| 476 |
+
"NON usare il solo nome del file come percorso: usa SEMPRE il percorso [file: ...] iniettato nel messaggio):"
|
| 477 |
+
)
|
| 478 |
for d in all_docs:
|
| 479 |
ctx_lines.append(f" - {d.filename} (id={d.id})")
|
| 480 |
# Previous chats summary (titles only — keep context short)
|
core/agent.py
CHANGED
|
@@ -42,6 +42,8 @@ FORENSIC_SYSTEM_PROMPT = (
|
|
| 42 |
"nel formato [file: /percorso/al/file]. Usali come argomenti degli strumenti.\n"
|
| 43 |
"Se la richiesta riguarda sia la trascrizione che altre analisi (NER, date, ecc.), "
|
| 44 |
"trascrivi prima il testo e poi usa il testo risultante come input per gli altri strumenti.\n"
|
|
|
|
|
|
|
| 45 |
"Al termine di ogni risposta, fornisci un breve riepilogo delle analisi effettuate."
|
| 46 |
)
|
| 47 |
|
|
@@ -543,10 +545,56 @@ def agent_stream(
|
|
| 543 |
accumulated = ""
|
| 544 |
tool_log: list[str] = []
|
| 545 |
image_blocks: list[str] = [] # image markdown extracted from tool results
|
|
|
|
| 546 |
|
| 547 |
import re as _re
|
|
|
|
| 548 |
_img_md_re = _re.compile(r'!\[.*?\]\(/api/agent/images/[^\)]+\)')
|
| 549 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 550 |
try:
|
| 551 |
for chunk in agent.stream(
|
| 552 |
{"messages": messages},
|
|
@@ -570,7 +618,16 @@ def agent_stream(
|
|
| 570 |
elif content:
|
| 571 |
# Final answer from the agent
|
| 572 |
accumulated = content
|
| 573 |
-
#
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 574 |
if image_blocks:
|
| 575 |
accumulated += "\n\n" + "\n\n".join(image_blocks)
|
| 576 |
if tool_log:
|
|
@@ -591,6 +648,12 @@ def agent_stream(
|
|
| 591 |
for img_md in _img_md_re.findall(content):
|
| 592 |
if img_md not in image_blocks:
|
| 593 |
image_blocks.append(img_md)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 594 |
short = content[:120] + ("…" if len(content) > 120 else "")
|
| 595 |
if tool_log:
|
| 596 |
tool_log[-1] = tool_log[-1].rstrip("…*") + " ✅*"
|
|
|
|
| 42 |
"nel formato [file: /percorso/al/file]. Usali come argomenti degli strumenti.\n"
|
| 43 |
"Se la richiesta riguarda sia la trascrizione che altre analisi (NER, date, ecc.), "
|
| 44 |
"trascrivi prima il testo e poi usa il testo risultante come input per gli altri strumenti.\n"
|
| 45 |
+
"Quando uno strumento restituisce una tabella Markdown (righe con | ... |), "
|
| 46 |
+
"includila SEMPRE integralmente nella risposta senza riscriverla come testo.\n"
|
| 47 |
"Al termine di ogni risposta, fornisci un breve riepilogo delle analisi effettuate."
|
| 48 |
)
|
| 49 |
|
|
|
|
| 545 |
accumulated = ""
|
| 546 |
tool_log: list[str] = []
|
| 547 |
image_blocks: list[str] = [] # image markdown extracted from tool results
|
| 548 |
+
table_blocks: list[str] = [] # markdown tables extracted from tool results
|
| 549 |
|
| 550 |
import re as _re
|
| 551 |
+
import logging as _logging
|
| 552 |
_img_md_re = _re.compile(r'!\[.*?\]\(/api/agent/images/[^\)]+\)')
|
| 553 |
|
| 554 |
+
def _extract_tables(text: str) -> list[str]:
|
| 555 |
+
"""Extract markdown tables from tool result text.
|
| 556 |
+
|
| 557 |
+
Handles both properly-newlined tables and collapsed single-line tables
|
| 558 |
+
(LangGraph sometimes strips newlines from tool result strings).
|
| 559 |
+
"""
|
| 560 |
+
tables: list[str] = []
|
| 561 |
+
|
| 562 |
+
# ── Case 1: multi-line table (normal case) ────────────────────────────
|
| 563 |
+
current: list[str] = []
|
| 564 |
+
for line in text.splitlines():
|
| 565 |
+
stripped = line.strip()
|
| 566 |
+
if stripped.startswith("|") and stripped.endswith("|"):
|
| 567 |
+
current.append(stripped)
|
| 568 |
+
else:
|
| 569 |
+
if len(current) >= 3:
|
| 570 |
+
tables.append("\n".join(current))
|
| 571 |
+
current = []
|
| 572 |
+
if len(current) >= 3:
|
| 573 |
+
tables.append("\n".join(current))
|
| 574 |
+
|
| 575 |
+
if tables:
|
| 576 |
+
return tables
|
| 577 |
+
|
| 578 |
+
# ── Case 2: collapsed single-line table ───────────────────────────────
|
| 579 |
+
# Detect a line with many pipes and a separator chunk like |---|
|
| 580 |
+
for line in text.splitlines():
|
| 581 |
+
stripped = line.strip()
|
| 582 |
+
if stripped.count("|") >= 6 and "|--" in stripped:
|
| 583 |
+
# Split on " | " boundary keeping the outer pipes
|
| 584 |
+
# e.g. "| A | B | C | |---|---|---| | x | y | z |"
|
| 585 |
+
# Reconstruct by splitting at separator pattern
|
| 586 |
+
parts = _re.split(r'(?=\|[-:| ]+\|)', stripped)
|
| 587 |
+
rows: list[str] = []
|
| 588 |
+
for part in parts:
|
| 589 |
+
part = part.strip()
|
| 590 |
+
if part.startswith("|") and part.endswith("|"):
|
| 591 |
+
rows.append(part)
|
| 592 |
+
if len(rows) >= 3:
|
| 593 |
+
tables.append("\n".join(rows))
|
| 594 |
+
break
|
| 595 |
+
|
| 596 |
+
return tables
|
| 597 |
+
|
| 598 |
try:
|
| 599 |
for chunk in agent.stream(
|
| 600 |
{"messages": messages},
|
|
|
|
| 618 |
elif content:
|
| 619 |
# Final answer from the agent
|
| 620 |
accumulated = content
|
| 621 |
+
# If we extracted clean tables from tool results, strip
|
| 622 |
+
# any mangled pipe-rows the LLM may have written inline
|
| 623 |
+
# (qwen3 collapses table newlines into a single line)
|
| 624 |
+
if table_blocks:
|
| 625 |
+
clean_lines = [
|
| 626 |
+
ln for ln in accumulated.splitlines()
|
| 627 |
+
if ln.count("|") < 4 # keep narrative, drop pipe-heavy lines
|
| 628 |
+
]
|
| 629 |
+
accumulated = "\n".join(clean_lines).strip()
|
| 630 |
+
accumulated += "\n\n" + "\n\n".join(table_blocks)
|
| 631 |
if image_blocks:
|
| 632 |
accumulated += "\n\n" + "\n\n".join(image_blocks)
|
| 633 |
if tool_log:
|
|
|
|
| 648 |
for img_md in _img_md_re.findall(content):
|
| 649 |
if img_md not in image_blocks:
|
| 650 |
image_blocks.append(img_md)
|
| 651 |
+
# Extract markdown tables from tool result so they are
|
| 652 |
+
# appended verbatim to the final response (LLM tends to
|
| 653 |
+
# collapse table rows onto a single line when rewriting)
|
| 654 |
+
for tbl in _extract_tables(content):
|
| 655 |
+
if tbl not in table_blocks:
|
| 656 |
+
table_blocks.append(tbl)
|
| 657 |
short = content[:120] + ("…" if len(content) > 120 else "")
|
| 658 |
if tool_log:
|
| 659 |
tool_log[-1] = tool_log[-1].rstrip("…*") + " ✅*"
|
frontend/src/pages/AgentProjectPage.tsx
CHANGED
|
@@ -14,6 +14,7 @@ import {
|
|
| 14 |
import { cn } from "@/lib/utils"
|
| 15 |
import { useAuthStore } from "@/store/auth"
|
| 16 |
import ReactMarkdown from "react-markdown"
|
|
|
|
| 17 |
import { api, agentProjectsApi, type AgentChat, type AgentMessage, type Document } from "@/lib/api"
|
| 18 |
|
| 19 |
// ── AuthImage ────────────────────────────────────────────────────────────────
|
|
@@ -494,7 +495,7 @@ export default function AgentProjectPage() {
|
|
| 494 |
)}
|
| 495 |
{!isLiveStreaming && (
|
| 496 |
<div className="prose prose-sm max-w-none dark:prose-invert">
|
| 497 |
-
<ReactMarkdown components={{ img: ({ src, alt }) => src ? <AuthImage src={src} alt={alt ?? ""} /> : null }}>
|
| 498 |
{main}
|
| 499 |
</ReactMarkdown>
|
| 500 |
</div>
|
|
|
|
| 14 |
import { cn } from "@/lib/utils"
|
| 15 |
import { useAuthStore } from "@/store/auth"
|
| 16 |
import ReactMarkdown from "react-markdown"
|
| 17 |
+
import remarkGfm from "remark-gfm"
|
| 18 |
import { api, agentProjectsApi, type AgentChat, type AgentMessage, type Document } from "@/lib/api"
|
| 19 |
|
| 20 |
// ── AuthImage ────────────────────────────────────────────────────────────────
|
|
|
|
| 495 |
)}
|
| 496 |
{!isLiveStreaming && (
|
| 497 |
<div className="prose prose-sm max-w-none dark:prose-invert">
|
| 498 |
+
<ReactMarkdown remarkPlugins={[remarkGfm]} components={{ img: ({ src, alt }) => src ? <AuthImage src={src} alt={alt ?? ""} /> : null }}>
|
| 499 |
{main}
|
| 500 |
</ReactMarkdown>
|
| 501 |
</div>
|