ConverTA / backend /api /export_routes.py
MikelWL's picture
Analysis frameworks: themes, top-down instructions, category descriptions
2568a5f
"""Export endpoints (JSON + Excel workbook)."""
from __future__ import annotations
import io
import json
from datetime import datetime
from typing import Any, Dict, List, Optional
from fastapi import APIRouter, HTTPException
from fastapi.responses import Response
from pydantic import BaseModel, Field
router = APIRouter(prefix="", tags=["export"])
class ExportMessage(BaseModel):
role: str
persona: Optional[str] = None
time: Optional[str] = None
text: str
class ExportRequest(BaseModel):
conversation_id: str = Field(..., description="Conversation id from the UI session")
exported_at: Optional[str] = Field(default=None, description="ISO timestamp from the client")
messages: List[ExportMessage]
resources: Dict[str, Any] = Field(default_factory=dict, description="Resource agent output + evidence_catalog")
def _filename(conversation_id: str, exported_at: str, ext: str) -> str:
safe_ts = exported_at.replace(":", "-")
return f"converta_{conversation_id}_{safe_ts}.{ext.lstrip('.')}"
def _safe_join(values: Any, sep: str = "; ") -> str:
if not values:
return ""
if isinstance(values, list):
return sep.join([str(v) for v in values if v is not None])
return str(values)
def _extract_evidence_ids(evidence: Any) -> List[str]:
if not evidence or not isinstance(evidence, list):
return []
evidence_ids: List[str] = []
for item in evidence:
if isinstance(item, dict):
ev_id = item.get("evidence_id")
if isinstance(ev_id, str) and ev_id:
evidence_ids.append(ev_id)
return evidence_ids
def _export_json_bytes(payload: ExportRequest) -> bytes:
exported_at = payload.exported_at or datetime.now().isoformat()
export_obj = {
"conversation_id": payload.conversation_id,
"exported_at": exported_at,
"messages": [m.model_dump() for m in payload.messages],
"resources": payload.resources,
}
return json.dumps(export_obj, ensure_ascii=False, indent=2).encode("utf-8")
def _export_xlsx_bytes(payload: ExportRequest) -> bytes:
from openpyxl import Workbook
from openpyxl.styles import Alignment, Font
exported_at = payload.exported_at or datetime.now().isoformat()
wb = Workbook()
header_font = Font(bold=True)
header_alignment = Alignment(wrap_text=True, vertical="top")
cell_wrap = Alignment(wrap_text=True, vertical="top")
def add_sheet(title: str, headers: List[str]):
ws = wb.create_sheet(title=title)
ws.append(headers)
for col_idx in range(1, len(headers) + 1):
cell = ws.cell(row=1, column=col_idx)
cell.font = header_font
cell.alignment = header_alignment
ws.freeze_panes = "A2"
return ws
default = wb.active
wb.remove(default)
ws_meta = add_sheet("Metadata", ["key", "value"])
schema_version = payload.resources.get("schema_version")
analysis_prompt_version = payload.resources.get("analysis_prompt_version")
ws_meta.append(["conversation_id", payload.conversation_id])
ws_meta.append(["exported_at", exported_at])
if schema_version is not None:
ws_meta.append(["schema_version", str(schema_version)])
if analysis_prompt_version is not None:
ws_meta.append(["analysis_prompt_version", str(analysis_prompt_version)])
ws_tx = add_sheet("Transcript", ["message_index", "time", "role", "persona", "text"])
for idx, msg in enumerate(payload.messages):
ws_tx.append([idx, msg.time or "", msg.role, msg.persona or "", msg.text])
evidence_catalog = payload.resources.get("evidence_catalog") or {}
ws_ev = add_sheet("EvidenceCatalog", ["evidence_id", "message_index", "sentence_index", "sentence_text"])
if isinstance(evidence_catalog, dict):
def sort_key(item):
_, val = item
if not isinstance(val, dict):
return (10**9, 10**9)
mi = val.get("message_index")
si = val.get("sentence_index")
return (mi if isinstance(mi, int) else 10**9, si if isinstance(si, int) else 10**9)
for evidence_id, entry in sorted(evidence_catalog.items(), key=sort_key):
if not isinstance(entry, dict):
continue
ws_ev.append(
[
str(evidence_id),
entry.get("message_index", ""),
entry.get("sentence_index", ""),
entry.get("text", "") or "",
]
)
ws_hs = add_sheet("Themes", ["index", "code", "summary", "confidence", "evidence_ids"])
themes = payload.resources.get("themes")
if not isinstance(themes, list):
themes = payload.resources.get("health_situations") or []
if isinstance(themes, list):
for idx, item in enumerate(themes):
if not isinstance(item, dict):
continue
evidence_ids = _extract_evidence_ids(item.get("evidence"))
ws_hs.append(
[
idx,
item.get("code", "") or "",
item.get("summary", "") or "",
item.get("confidence", ""),
_safe_join(evidence_ids),
]
)
ws_care = add_sheet("CareExperience", ["tone", "summary", "confidence", "reasons", "evidence_ids"])
care = payload.resources.get("care_experience") or {}
if isinstance(care, dict):
for tone in ("positive", "mixed", "negative", "neutral"):
box = care.get(tone) or {}
if not isinstance(box, dict):
continue
evidence_ids = _extract_evidence_ids(box.get("evidence"))
ws_care.append(
[
tone,
box.get("summary", "") or "",
box.get("confidence", ""),
_safe_join(box.get("reasons") or [], sep=" | "),
_safe_join(evidence_ids),
]
)
ws_td = add_sheet("TopDownCodes", ["category", "index", "code", "summary", "confidence", "evidence_ids"])
td_book = payload.resources.get("top_down_codebook")
if isinstance(td_book, dict) and isinstance(td_book.get("categories"), list):
for cat in td_book.get("categories") or []:
if not isinstance(cat, dict):
continue
label = cat.get("label") or cat.get("category_id") or "Category"
arr = cat.get("items") or []
if not isinstance(arr, list):
continue
for idx, item in enumerate(arr):
if not isinstance(item, dict):
continue
evidence_ids = _extract_evidence_ids(item.get("evidence"))
ws_td.append(
[
str(label),
idx,
item.get("code", "") or "",
item.get("summary", "") or "",
item.get("confidence", ""),
_safe_join(evidence_ids),
]
)
else:
top_down = payload.resources.get("top_down_codes") or {}
if isinstance(top_down, dict):
for category, arr in top_down.items():
if not isinstance(arr, list):
continue
for idx, item in enumerate(arr):
if not isinstance(item, dict):
continue
evidence_ids = _extract_evidence_ids(item.get("evidence"))
ws_td.append(
[
str(category),
idx,
item.get("code", "") or "",
item.get("summary", "") or "",
item.get("confidence", ""),
_safe_join(evidence_ids),
]
)
for ws in wb.worksheets:
for row in ws.iter_rows(min_row=2):
for cell in row:
cell.alignment = cell_wrap
buf = io.BytesIO()
wb.save(buf)
return buf.getvalue()
@router.post("/export/json")
async def export_json(payload: ExportRequest) -> Response:
exported_at = payload.exported_at or datetime.now().isoformat()
data = _export_json_bytes(payload)
filename = _filename(payload.conversation_id, exported_at, "json")
headers = {"Content-Disposition": f'attachment; filename="{filename}"'}
return Response(content=data, media_type="application/json", headers=headers)
@router.post("/export/xlsx")
async def export_xlsx(payload: ExportRequest) -> Response:
try:
data = _export_xlsx_bytes(payload)
except Exception as e:
raise HTTPException(status_code=500, detail=f"openpyxl not available: {e}")
exported_at = payload.exported_at or datetime.now().isoformat()
filename = _filename(payload.conversation_id, exported_at, "xlsx")
headers = {"Content-Disposition": f'attachment; filename="{filename}"'}
return Response(
content=data,
media_type="application/vnd.openxmlformats-officedocument.spreadsheetml.sheet",
headers=headers,
)