| import re |
| from io import BytesIO |
| from typing import Any, Dict, List, Tuple |
|
|
| from docx import Document |
| from docx.oxml import OxmlElement |
| from docx.oxml.ns import qn |
| from docx.shared import Pt, RGBColor |
|
|
|
|
| |
| |
| |
|
|
| _TABLE_ROW_RE = re.compile(r"^\|(.+)\|$") |
| _SEPARATOR_RE = re.compile(r"^\|[-:| ]+\|$") |
| _INLINE_MD_RE = re.compile(r"\*{1,2}([^*]+)\*{1,2}|`([^`]+)`") |
|
|
|
|
| def _strip_inline_md(text: str) -> str: |
| """Remove common inline markdown markers (bold, italic, code) from text.""" |
| return _INLINE_MD_RE.sub(lambda m: m.group(1) or m.group(2), text) |
|
|
|
|
| def _is_table_separator(line: str) -> bool: |
| return bool(_SEPARATOR_RE.match(line.strip())) |
|
|
|
|
| def _parse_table_rows(lines: List[str]) -> List[List[str]]: |
| """Convert markdown table lines into a list of rows (list of cell strings).""" |
| rows: List[List[str]] = [] |
| for line in lines: |
| if _is_table_separator(line): |
| continue |
| m = _TABLE_ROW_RE.match(line.strip()) |
| if m: |
| cells = [_strip_inline_md(c.strip()) for c in m.group(1).split("|")] |
| rows.append(cells) |
| return rows |
|
|
|
|
| def _shade_cell(cell, hex_color: str) -> None: |
| """Apply a background fill colour to a table cell.""" |
| tc = cell._tc |
| tcPr = tc.get_or_add_tcPr() |
| shd = OxmlElement("w:shd") |
| shd.set(qn("w:val"), "clear") |
| shd.set(qn("w:color"), "auto") |
| shd.set(qn("w:fill"), hex_color) |
| tcPr.append(shd) |
|
|
|
|
| def _add_markdown_table(doc: Document, lines: List[str]) -> None: |
| """Render a markdown table as a formatted Word table.""" |
| rows = _parse_table_rows(lines) |
| if not rows: |
| return |
|
|
| max_cols = max(len(r) for r in rows) |
| table = doc.add_table(rows=len(rows), cols=max_cols) |
| table.style = "Table Grid" |
|
|
| for r_idx, row in enumerate(rows): |
| tr = table.rows[r_idx] |
| for c_idx in range(max_cols): |
| cell_text = row[c_idx] if c_idx < len(row) else "" |
| cell = tr.cells[c_idx] |
| para = cell.paragraphs[0] |
| run = para.add_run(cell_text) |
| if r_idx == 0: |
| run.bold = True |
| run.font.color.rgb = RGBColor(0xFF, 0xFF, 0xFF) |
| _shade_cell(cell, "2E74B5") |
|
|
| doc.add_paragraph() |
|
|
|
|
| |
| |
| |
|
|
| def _split_into_blocks(content: str) -> List[Tuple[str, Any]]: |
| """ |
| Split markdown content into alternating ("text", str) and ("table", list[str]) |
| blocks so each can be rendered appropriately. |
| """ |
| blocks: List[Tuple[str, Any]] = [] |
| text_lines: List[str] = [] |
| table_lines: List[str] = [] |
| in_table = False |
|
|
| for line in content.split("\n"): |
| stripped = line.strip() |
| is_table_line = ( |
| stripped.startswith("|") |
| and stripped.endswith("|") |
| and len(stripped) > 2 |
| ) |
|
|
| if is_table_line: |
| if not in_table: |
| if text_lines: |
| blocks.append(("text", "\n".join(text_lines))) |
| text_lines = [] |
| in_table = True |
| table_lines.append(line) |
| else: |
| if in_table: |
| blocks.append(("table", list(table_lines))) |
| table_lines = [] |
| in_table = False |
| text_lines.append(line) |
|
|
| if in_table and table_lines: |
| blocks.append(("table", table_lines)) |
| elif text_lines: |
| blocks.append(("text", "\n".join(text_lines))) |
|
|
| return blocks |
|
|
|
|
| def _add_content(doc: Document, content: str) -> None: |
| """Add message content to *doc*, converting markdown tables to Word tables.""" |
| if not content: |
| return |
| for block_type, data in _split_into_blocks(content): |
| if block_type == "table": |
| _add_markdown_table(doc, data) |
| else: |
| text = data.strip() |
| if text: |
| doc.add_paragraph(text) |
|
|
|
|
| |
| |
| |
|
|
| def build_conversation_docx(messages: List[Dict[str, Any]]) -> bytes: |
| """Build a .docx file from chat messages and return raw bytes.""" |
| doc = Document() |
| doc.add_heading("Conversa Chatbot NORM ⚛", level=1) |
|
|
| for msg in messages: |
| role = str(msg.get("role") or "") |
| content = str(msg.get("content") or "").strip() |
|
|
| if not content: |
| continue |
|
|
| doc.add_heading(role, level=2) |
| _add_content(doc, content) |
|
|
| references = str(msg.get("references") or "").strip() |
| if references: |
| cleaned_refs = references.replace("<br>", "\n") |
| doc.add_paragraph("Referencias:") |
| doc.add_paragraph(cleaned_refs) |
|
|
| buffer = BytesIO() |
| doc.save(buffer) |
| buffer.seek(0) |
| return buffer.getvalue() |
|
|
|
|
| def build_single_response_docx(message: Dict[str, Any]) -> bytes: |
| """Build a .docx file for a single assistant response and return raw bytes.""" |
| doc = Document() |
| doc.add_heading("Ultima resposta do chatbot ⚛", level=1) |
|
|
| content = str(message.get("content") or "").strip() |
| _add_content(doc, content) |
|
|
| references = str(message.get("references") or "").strip() |
| if references: |
| cleaned_refs = references.replace("<br>", "\n") |
| doc.add_paragraph("Referencias:") |
| doc.add_paragraph(cleaned_refs) |
|
|
| buffer = BytesIO() |
| doc.save(buffer) |
| buffer.seek(0) |
| return buffer.getvalue() |
|
|