File size: 9,318 Bytes
66700c2
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
a7fd580
 
 
 
 
66700c2
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
a7fd580
66700c2
 
 
 
 
 
 
a7fd580
66700c2
 
a7fd580
 
 
66700c2
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
a7fd580
66700c2
 
 
 
 
 
 
 
 
 
 
a7fd580
 
 
 
 
 
 
 
66700c2
2568a5f
 
 
 
 
 
66700c2
 
 
a7fd580
 
 
 
 
 
 
 
 
66700c2
 
 
 
 
 
 
 
 
a7fd580
 
 
 
 
 
 
 
 
66700c2
 
b5897db
 
 
 
 
 
 
66700c2
 
 
 
 
 
a7fd580
 
b5897db
a7fd580
 
 
 
 
 
 
b5897db
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
66700c2
 
 
 
 
 
 
 
a7fd580
66700c2
a7fd580
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
66700c2
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
"""Export endpoints (JSON + Excel workbook)."""

from __future__ import annotations

import io
import json
from datetime import datetime
from typing import Any, Dict, List, Optional

from fastapi import APIRouter, HTTPException
from fastapi.responses import Response
from pydantic import BaseModel, Field

router = APIRouter(prefix="", tags=["export"])


class ExportMessage(BaseModel):
    role: str
    persona: Optional[str] = None
    time: Optional[str] = None
    text: str


class ExportRequest(BaseModel):
    conversation_id: str = Field(..., description="Conversation id from the UI session")
    exported_at: Optional[str] = Field(default=None, description="ISO timestamp from the client")
    messages: List[ExportMessage]
    resources: Dict[str, Any] = Field(default_factory=dict, description="Resource agent output + evidence_catalog")


def _filename(conversation_id: str, exported_at: str, ext: str) -> str:
    safe_ts = exported_at.replace(":", "-")
    return f"converta_{conversation_id}_{safe_ts}.{ext.lstrip('.')}"


def _safe_join(values: Any, sep: str = "; ") -> str:
    if not values:
        return ""
    if isinstance(values, list):
        return sep.join([str(v) for v in values if v is not None])
    return str(values)


def _extract_evidence_ids(evidence: Any) -> List[str]:
    if not evidence or not isinstance(evidence, list):
        return []
    evidence_ids: List[str] = []
    for item in evidence:
        if isinstance(item, dict):
            ev_id = item.get("evidence_id")
            if isinstance(ev_id, str) and ev_id:
                evidence_ids.append(ev_id)
    return evidence_ids


def _export_json_bytes(payload: ExportRequest) -> bytes:
    exported_at = payload.exported_at or datetime.now().isoformat()
    export_obj = {
        "conversation_id": payload.conversation_id,
        "exported_at": exported_at,
        "messages": [m.model_dump() for m in payload.messages],
        "resources": payload.resources,
    }
    return json.dumps(export_obj, ensure_ascii=False, indent=2).encode("utf-8")


def _export_xlsx_bytes(payload: ExportRequest) -> bytes:
    from openpyxl import Workbook
    from openpyxl.styles import Alignment, Font

    exported_at = payload.exported_at or datetime.now().isoformat()
    wb = Workbook()

    header_font = Font(bold=True)
    header_alignment = Alignment(wrap_text=True, vertical="top")
    cell_wrap = Alignment(wrap_text=True, vertical="top")

    def add_sheet(title: str, headers: List[str]):
        ws = wb.create_sheet(title=title)
        ws.append(headers)
        for col_idx in range(1, len(headers) + 1):
            cell = ws.cell(row=1, column=col_idx)
            cell.font = header_font
            cell.alignment = header_alignment
        ws.freeze_panes = "A2"
        return ws

    default = wb.active
    wb.remove(default)

    ws_meta = add_sheet("Metadata", ["key", "value"])
    schema_version = payload.resources.get("schema_version")
    analysis_prompt_version = payload.resources.get("analysis_prompt_version")
    ws_meta.append(["conversation_id", payload.conversation_id])
    ws_meta.append(["exported_at", exported_at])
    if schema_version is not None:
        ws_meta.append(["schema_version", str(schema_version)])
    if analysis_prompt_version is not None:
        ws_meta.append(["analysis_prompt_version", str(analysis_prompt_version)])

    ws_tx = add_sheet("Transcript", ["message_index", "time", "role", "persona", "text"])
    for idx, msg in enumerate(payload.messages):
        ws_tx.append([idx, msg.time or "", msg.role, msg.persona or "", msg.text])

    evidence_catalog = payload.resources.get("evidence_catalog") or {}
    ws_ev = add_sheet("EvidenceCatalog", ["evidence_id", "message_index", "sentence_index", "sentence_text"])
    if isinstance(evidence_catalog, dict):

        def sort_key(item):
            _, val = item
            if not isinstance(val, dict):
                return (10**9, 10**9)
            mi = val.get("message_index")
            si = val.get("sentence_index")
            return (mi if isinstance(mi, int) else 10**9, si if isinstance(si, int) else 10**9)

        for evidence_id, entry in sorted(evidence_catalog.items(), key=sort_key):
            if not isinstance(entry, dict):
                continue
            ws_ev.append(
                [
                    str(evidence_id),
                    entry.get("message_index", ""),
                    entry.get("sentence_index", ""),
                    entry.get("text", "") or "",
                ]
            )

    ws_hs = add_sheet("Themes", ["index", "code", "summary", "confidence", "evidence_ids"])
    themes = payload.resources.get("themes")
    if not isinstance(themes, list):
        themes = payload.resources.get("health_situations") or []
    if isinstance(themes, list):
        for idx, item in enumerate(themes):
            if not isinstance(item, dict):
                continue
            evidence_ids = _extract_evidence_ids(item.get("evidence"))
            ws_hs.append(
                [
                    idx,
                    item.get("code", "") or "",
                    item.get("summary", "") or "",
                    item.get("confidence", ""),
                    _safe_join(evidence_ids),
                ]
            )

    ws_care = add_sheet("CareExperience", ["tone", "summary", "confidence", "reasons", "evidence_ids"])
    care = payload.resources.get("care_experience") or {}
    if isinstance(care, dict):
        for tone in ("positive", "mixed", "negative", "neutral"):
            box = care.get(tone) or {}
            if not isinstance(box, dict):
                continue
            evidence_ids = _extract_evidence_ids(box.get("evidence"))
            ws_care.append(
                [
                    tone,
                    box.get("summary", "") or "",
                    box.get("confidence", ""),
                    _safe_join(box.get("reasons") or [], sep=" | "),
                    _safe_join(evidence_ids),
                ]
            )

    ws_td = add_sheet("TopDownCodes", ["category", "index", "code", "summary", "confidence", "evidence_ids"])
    td_book = payload.resources.get("top_down_codebook")
    if isinstance(td_book, dict) and isinstance(td_book.get("categories"), list):
        for cat in td_book.get("categories") or []:
            if not isinstance(cat, dict):
                continue
            label = cat.get("label") or cat.get("category_id") or "Category"
            arr = cat.get("items") or []
            if not isinstance(arr, list):
                continue
            for idx, item in enumerate(arr):
                if not isinstance(item, dict):
                    continue
                evidence_ids = _extract_evidence_ids(item.get("evidence"))
                ws_td.append(
                    [
                        str(label),
                        idx,
                        item.get("code", "") or "",
                        item.get("summary", "") or "",
                        item.get("confidence", ""),
                        _safe_join(evidence_ids),
                    ]
                )
    else:
        top_down = payload.resources.get("top_down_codes") or {}
        if isinstance(top_down, dict):
            for category, arr in top_down.items():
                if not isinstance(arr, list):
                    continue
                for idx, item in enumerate(arr):
                    if not isinstance(item, dict):
                        continue
                    evidence_ids = _extract_evidence_ids(item.get("evidence"))
                    ws_td.append(
                        [
                            str(category),
                            idx,
                            item.get("code", "") or "",
                            item.get("summary", "") or "",
                            item.get("confidence", ""),
                            _safe_join(evidence_ids),
                        ]
                    )

    for ws in wb.worksheets:
        for row in ws.iter_rows(min_row=2):
            for cell in row:
                cell.alignment = cell_wrap

    buf = io.BytesIO()
    wb.save(buf)
    return buf.getvalue()


@router.post("/export/json")
async def export_json(payload: ExportRequest) -> Response:
    exported_at = payload.exported_at or datetime.now().isoformat()
    data = _export_json_bytes(payload)
    filename = _filename(payload.conversation_id, exported_at, "json")
    headers = {"Content-Disposition": f'attachment; filename="{filename}"'}
    return Response(content=data, media_type="application/json", headers=headers)


@router.post("/export/xlsx")
async def export_xlsx(payload: ExportRequest) -> Response:
    try:
        data = _export_xlsx_bytes(payload)
    except Exception as e:
        raise HTTPException(status_code=500, detail=f"openpyxl not available: {e}")

    exported_at = payload.exported_at or datetime.now().isoformat()
    filename = _filename(payload.conversation_id, exported_at, "xlsx")
    headers = {"Content-Disposition": f'attachment; filename="{filename}"'}
    return Response(
        content=data,
        media_type="application/vnd.openxmlformats-officedocument.spreadsheetml.sheet",
        headers=headers,
    )