"""Export endpoints (JSON + Excel workbook).""" from __future__ import annotations import io import json from datetime import datetime from typing import Any, Dict, List, Optional from fastapi import APIRouter, HTTPException from fastapi.responses import Response from pydantic import BaseModel, Field router = APIRouter(prefix="", tags=["export"]) class ExportMessage(BaseModel): role: str persona: Optional[str] = None time: Optional[str] = None text: str class ExportRequest(BaseModel): conversation_id: str = Field(..., description="Conversation id from the UI session") exported_at: Optional[str] = Field(default=None, description="ISO timestamp from the client") messages: List[ExportMessage] resources: Dict[str, Any] = Field(default_factory=dict, description="Resource agent output + evidence_catalog") def _filename(conversation_id: str, exported_at: str, ext: str) -> str: safe_ts = exported_at.replace(":", "-") return f"converta_{conversation_id}_{safe_ts}.{ext.lstrip('.')}" def _safe_join(values: Any, sep: str = "; ") -> str: if not values: return "" if isinstance(values, list): return sep.join([str(v) for v in values if v is not None]) return str(values) def _extract_evidence_ids(evidence: Any) -> List[str]: if not evidence or not isinstance(evidence, list): return [] evidence_ids: List[str] = [] for item in evidence: if isinstance(item, dict): ev_id = item.get("evidence_id") if isinstance(ev_id, str) and ev_id: evidence_ids.append(ev_id) return evidence_ids def _export_json_bytes(payload: ExportRequest) -> bytes: exported_at = payload.exported_at or datetime.now().isoformat() export_obj = { "conversation_id": payload.conversation_id, "exported_at": exported_at, "messages": [m.model_dump() for m in payload.messages], "resources": payload.resources, } return json.dumps(export_obj, ensure_ascii=False, indent=2).encode("utf-8") def _export_xlsx_bytes(payload: ExportRequest) -> bytes: from openpyxl import Workbook from openpyxl.styles import Alignment, Font exported_at = payload.exported_at or datetime.now().isoformat() wb = Workbook() header_font = Font(bold=True) header_alignment = Alignment(wrap_text=True, vertical="top") cell_wrap = Alignment(wrap_text=True, vertical="top") def add_sheet(title: str, headers: List[str]): ws = wb.create_sheet(title=title) ws.append(headers) for col_idx in range(1, len(headers) + 1): cell = ws.cell(row=1, column=col_idx) cell.font = header_font cell.alignment = header_alignment ws.freeze_panes = "A2" return ws default = wb.active wb.remove(default) ws_meta = add_sheet("Metadata", ["key", "value"]) schema_version = payload.resources.get("schema_version") analysis_prompt_version = payload.resources.get("analysis_prompt_version") ws_meta.append(["conversation_id", payload.conversation_id]) ws_meta.append(["exported_at", exported_at]) if schema_version is not None: ws_meta.append(["schema_version", str(schema_version)]) if analysis_prompt_version is not None: ws_meta.append(["analysis_prompt_version", str(analysis_prompt_version)]) ws_tx = add_sheet("Transcript", ["message_index", "time", "role", "persona", "text"]) for idx, msg in enumerate(payload.messages): ws_tx.append([idx, msg.time or "", msg.role, msg.persona or "", msg.text]) evidence_catalog = payload.resources.get("evidence_catalog") or {} ws_ev = add_sheet("EvidenceCatalog", ["evidence_id", "message_index", "sentence_index", "sentence_text"]) if isinstance(evidence_catalog, dict): def sort_key(item): _, val = item if not isinstance(val, dict): return (10**9, 10**9) mi = val.get("message_index") si = val.get("sentence_index") return (mi if isinstance(mi, int) else 10**9, si if isinstance(si, int) else 10**9) for evidence_id, entry in sorted(evidence_catalog.items(), key=sort_key): if not isinstance(entry, dict): continue ws_ev.append( [ str(evidence_id), entry.get("message_index", ""), entry.get("sentence_index", ""), entry.get("text", "") or "", ] ) ws_hs = add_sheet("Themes", ["index", "code", "summary", "confidence", "evidence_ids"]) themes = payload.resources.get("themes") if not isinstance(themes, list): themes = payload.resources.get("health_situations") or [] if isinstance(themes, list): for idx, item in enumerate(themes): if not isinstance(item, dict): continue evidence_ids = _extract_evidence_ids(item.get("evidence")) ws_hs.append( [ idx, item.get("code", "") or "", item.get("summary", "") or "", item.get("confidence", ""), _safe_join(evidence_ids), ] ) ws_care = add_sheet("CareExperience", ["tone", "summary", "confidence", "reasons", "evidence_ids"]) care = payload.resources.get("care_experience") or {} if isinstance(care, dict): for tone in ("positive", "mixed", "negative", "neutral"): box = care.get(tone) or {} if not isinstance(box, dict): continue evidence_ids = _extract_evidence_ids(box.get("evidence")) ws_care.append( [ tone, box.get("summary", "") or "", box.get("confidence", ""), _safe_join(box.get("reasons") or [], sep=" | "), _safe_join(evidence_ids), ] ) ws_td = add_sheet("TopDownCodes", ["category", "index", "code", "summary", "confidence", "evidence_ids"]) td_book = payload.resources.get("top_down_codebook") if isinstance(td_book, dict) and isinstance(td_book.get("categories"), list): for cat in td_book.get("categories") or []: if not isinstance(cat, dict): continue label = cat.get("label") or cat.get("category_id") or "Category" arr = cat.get("items") or [] if not isinstance(arr, list): continue for idx, item in enumerate(arr): if not isinstance(item, dict): continue evidence_ids = _extract_evidence_ids(item.get("evidence")) ws_td.append( [ str(label), idx, item.get("code", "") or "", item.get("summary", "") or "", item.get("confidence", ""), _safe_join(evidence_ids), ] ) else: top_down = payload.resources.get("top_down_codes") or {} if isinstance(top_down, dict): for category, arr in top_down.items(): if not isinstance(arr, list): continue for idx, item in enumerate(arr): if not isinstance(item, dict): continue evidence_ids = _extract_evidence_ids(item.get("evidence")) ws_td.append( [ str(category), idx, item.get("code", "") or "", item.get("summary", "") or "", item.get("confidence", ""), _safe_join(evidence_ids), ] ) for ws in wb.worksheets: for row in ws.iter_rows(min_row=2): for cell in row: cell.alignment = cell_wrap buf = io.BytesIO() wb.save(buf) return buf.getvalue() @router.post("/export/json") async def export_json(payload: ExportRequest) -> Response: exported_at = payload.exported_at or datetime.now().isoformat() data = _export_json_bytes(payload) filename = _filename(payload.conversation_id, exported_at, "json") headers = {"Content-Disposition": f'attachment; filename="{filename}"'} return Response(content=data, media_type="application/json", headers=headers) @router.post("/export/xlsx") async def export_xlsx(payload: ExportRequest) -> Response: try: data = _export_xlsx_bytes(payload) except Exception as e: raise HTTPException(status_code=500, detail=f"openpyxl not available: {e}") exported_at = payload.exported_at or datetime.now().isoformat() filename = _filename(payload.conversation_id, exported_at, "xlsx") headers = {"Content-Disposition": f'attachment; filename="{filename}"'} return Response( content=data, media_type="application/vnd.openxmlformats-officedocument.spreadsheetml.sheet", headers=headers, )