import re
from io import BytesIO
from typing import Any, Dict, List, Tuple
from docx import Document
from docx.oxml import OxmlElement
from docx.oxml.ns import qn
from docx.shared import Pt, RGBColor
# ---------------------------------------------------------------------------
# Markdown table helpers
# ---------------------------------------------------------------------------
_TABLE_ROW_RE = re.compile(r"^\|(.+)\|$")
_SEPARATOR_RE = re.compile(r"^\|[-:| ]+\|$")
_INLINE_MD_RE = re.compile(r"\*{1,2}([^*]+)\*{1,2}|`([^`]+)`")
def _strip_inline_md(text: str) -> str:
"""Remove common inline markdown markers (bold, italic, code) from text."""
return _INLINE_MD_RE.sub(lambda m: m.group(1) or m.group(2), text)
def _is_table_separator(line: str) -> bool:
return bool(_SEPARATOR_RE.match(line.strip()))
def _parse_table_rows(lines: List[str]) -> List[List[str]]:
"""Convert markdown table lines into a list of rows (list of cell strings)."""
rows: List[List[str]] = []
for line in lines:
if _is_table_separator(line):
continue
m = _TABLE_ROW_RE.match(line.strip())
if m:
cells = [_strip_inline_md(c.strip()) for c in m.group(1).split("|")]
rows.append(cells)
return rows
def _shade_cell(cell, hex_color: str) -> None:
"""Apply a background fill colour to a table cell."""
tc = cell._tc
tcPr = tc.get_or_add_tcPr()
shd = OxmlElement("w:shd")
shd.set(qn("w:val"), "clear")
shd.set(qn("w:color"), "auto")
shd.set(qn("w:fill"), hex_color)
tcPr.append(shd)
def _add_markdown_table(doc: Document, lines: List[str]) -> None:
"""Render a markdown table as a formatted Word table."""
rows = _parse_table_rows(lines)
if not rows:
return
max_cols = max(len(r) for r in rows)
table = doc.add_table(rows=len(rows), cols=max_cols)
table.style = "Table Grid"
for r_idx, row in enumerate(rows):
tr = table.rows[r_idx]
for c_idx in range(max_cols):
cell_text = row[c_idx] if c_idx < len(row) else ""
cell = tr.cells[c_idx]
para = cell.paragraphs[0]
run = para.add_run(cell_text)
if r_idx == 0:
run.bold = True
run.font.color.rgb = RGBColor(0xFF, 0xFF, 0xFF)
_shade_cell(cell, "2E74B5") # blue header
doc.add_paragraph() # spacing after table
# ---------------------------------------------------------------------------
# Content block splitter
# ---------------------------------------------------------------------------
def _split_into_blocks(content: str) -> List[Tuple[str, Any]]:
"""
Split markdown content into alternating ("text", str) and ("table", list[str])
blocks so each can be rendered appropriately.
"""
blocks: List[Tuple[str, Any]] = []
text_lines: List[str] = []
table_lines: List[str] = []
in_table = False
for line in content.split("\n"):
stripped = line.strip()
is_table_line = (
stripped.startswith("|")
and stripped.endswith("|")
and len(stripped) > 2
)
if is_table_line:
if not in_table:
if text_lines:
blocks.append(("text", "\n".join(text_lines)))
text_lines = []
in_table = True
table_lines.append(line)
else:
if in_table:
blocks.append(("table", list(table_lines)))
table_lines = []
in_table = False
text_lines.append(line)
if in_table and table_lines:
blocks.append(("table", table_lines))
elif text_lines:
blocks.append(("text", "\n".join(text_lines)))
return blocks
def _add_content(doc: Document, content: str) -> None:
"""Add message content to *doc*, converting markdown tables to Word tables."""
if not content:
return
for block_type, data in _split_into_blocks(content):
if block_type == "table":
_add_markdown_table(doc, data)
else:
text = data.strip()
if text:
doc.add_paragraph(text)
# ---------------------------------------------------------------------------
# Public API
# ---------------------------------------------------------------------------
def build_conversation_docx(messages: List[Dict[str, Any]]) -> bytes:
"""Build a .docx file from chat messages and return raw bytes."""
doc = Document()
doc.add_heading("Conversa Chatbot NORM ⚛", level=1)
for msg in messages:
role = str(msg.get("role") or "")
content = str(msg.get("content") or "").strip()
if not content:
continue
doc.add_heading(role, level=2)
_add_content(doc, content)
references = str(msg.get("references") or "").strip()
if references:
cleaned_refs = references.replace("
", "\n")
doc.add_paragraph("Referencias:")
doc.add_paragraph(cleaned_refs)
buffer = BytesIO()
doc.save(buffer)
buffer.seek(0)
return buffer.getvalue()
def build_single_response_docx(message: Dict[str, Any]) -> bytes:
"""Build a .docx file for a single assistant response and return raw bytes."""
doc = Document()
doc.add_heading("Ultima resposta do chatbot ⚛", level=1)
content = str(message.get("content") or "").strip()
_add_content(doc, content)
references = str(message.get("references") or "").strip()
if references:
cleaned_refs = references.replace("
", "\n")
doc.add_paragraph("Referencias:")
doc.add_paragraph(cleaned_refs)
buffer = BytesIO()
doc.save(buffer)
buffer.seek(0)
return buffer.getvalue()