import re from io import BytesIO from typing import Any, Dict, List, Tuple from docx import Document from docx.oxml import OxmlElement from docx.oxml.ns import qn from docx.shared import Pt, RGBColor # --------------------------------------------------------------------------- # Markdown table helpers # --------------------------------------------------------------------------- _TABLE_ROW_RE = re.compile(r"^\|(.+)\|$") _SEPARATOR_RE = re.compile(r"^\|[-:| ]+\|$") _INLINE_MD_RE = re.compile(r"\*{1,2}([^*]+)\*{1,2}|`([^`]+)`") def _strip_inline_md(text: str) -> str: """Remove common inline markdown markers (bold, italic, code) from text.""" return _INLINE_MD_RE.sub(lambda m: m.group(1) or m.group(2), text) def _is_table_separator(line: str) -> bool: return bool(_SEPARATOR_RE.match(line.strip())) def _parse_table_rows(lines: List[str]) -> List[List[str]]: """Convert markdown table lines into a list of rows (list of cell strings).""" rows: List[List[str]] = [] for line in lines: if _is_table_separator(line): continue m = _TABLE_ROW_RE.match(line.strip()) if m: cells = [_strip_inline_md(c.strip()) for c in m.group(1).split("|")] rows.append(cells) return rows def _shade_cell(cell, hex_color: str) -> None: """Apply a background fill colour to a table cell.""" tc = cell._tc tcPr = tc.get_or_add_tcPr() shd = OxmlElement("w:shd") shd.set(qn("w:val"), "clear") shd.set(qn("w:color"), "auto") shd.set(qn("w:fill"), hex_color) tcPr.append(shd) def _add_markdown_table(doc: Document, lines: List[str]) -> None: """Render a markdown table as a formatted Word table.""" rows = _parse_table_rows(lines) if not rows: return max_cols = max(len(r) for r in rows) table = doc.add_table(rows=len(rows), cols=max_cols) table.style = "Table Grid" for r_idx, row in enumerate(rows): tr = table.rows[r_idx] for c_idx in range(max_cols): cell_text = row[c_idx] if c_idx < len(row) else "" cell = tr.cells[c_idx] para = cell.paragraphs[0] run = para.add_run(cell_text) if r_idx == 0: run.bold = True run.font.color.rgb = RGBColor(0xFF, 0xFF, 0xFF) _shade_cell(cell, "2E74B5") # blue header doc.add_paragraph() # spacing after table # --------------------------------------------------------------------------- # Content block splitter # --------------------------------------------------------------------------- def _split_into_blocks(content: str) -> List[Tuple[str, Any]]: """ Split markdown content into alternating ("text", str) and ("table", list[str]) blocks so each can be rendered appropriately. """ blocks: List[Tuple[str, Any]] = [] text_lines: List[str] = [] table_lines: List[str] = [] in_table = False for line in content.split("\n"): stripped = line.strip() is_table_line = ( stripped.startswith("|") and stripped.endswith("|") and len(stripped) > 2 ) if is_table_line: if not in_table: if text_lines: blocks.append(("text", "\n".join(text_lines))) text_lines = [] in_table = True table_lines.append(line) else: if in_table: blocks.append(("table", list(table_lines))) table_lines = [] in_table = False text_lines.append(line) if in_table and table_lines: blocks.append(("table", table_lines)) elif text_lines: blocks.append(("text", "\n".join(text_lines))) return blocks def _add_content(doc: Document, content: str) -> None: """Add message content to *doc*, converting markdown tables to Word tables.""" if not content: return for block_type, data in _split_into_blocks(content): if block_type == "table": _add_markdown_table(doc, data) else: text = data.strip() if text: doc.add_paragraph(text) # --------------------------------------------------------------------------- # Public API # --------------------------------------------------------------------------- def build_conversation_docx(messages: List[Dict[str, Any]]) -> bytes: """Build a .docx file from chat messages and return raw bytes.""" doc = Document() doc.add_heading("Conversa Chatbot NORM ⚛", level=1) for msg in messages: role = str(msg.get("role") or "") content = str(msg.get("content") or "").strip() if not content: continue doc.add_heading(role, level=2) _add_content(doc, content) references = str(msg.get("references") or "").strip() if references: cleaned_refs = references.replace("
", "\n") doc.add_paragraph("Referencias:") doc.add_paragraph(cleaned_refs) buffer = BytesIO() doc.save(buffer) buffer.seek(0) return buffer.getvalue() def build_single_response_docx(message: Dict[str, Any]) -> bytes: """Build a .docx file for a single assistant response and return raw bytes.""" doc = Document() doc.add_heading("Ultima resposta do chatbot ⚛", level=1) content = str(message.get("content") or "").strip() _add_content(doc, content) references = str(message.get("references") or "").strip() if references: cleaned_refs = references.replace("
", "\n") doc.add_paragraph("Referencias:") doc.add_paragraph(cleaned_refs) buffer = BytesIO() doc.save(buffer) buffer.seek(0) return buffer.getvalue()