Spaces:

MikelWL
/

ConverTA

Sleeping

App Files Files Community

MikelWL commited on Jan 19

Commit

01da502

1 Parent(s): 3a61902

Upload: analyze pasted text and PDFs

Browse files

Files changed (5) hide show

backend/api/conversation_service.py +162 -144
backend/api/routes.py +195 -2
docs/roadmap.md +11 -5
frontend/pages/main_page.py +198 -37
requirements.txt +3 -0

backend/api/conversation_service.py CHANGED Viewed

@@ -78,6 +78,161 @@ def _normalize_confidence(value: Any) -> Optional[float]:
     return max(0.0, min(1.0, confidence))
 class ConversationStatus(Enum):
     """Status of managed conversations."""
     STARTING = "starting"
@@ -403,146 +558,14 @@ class ConversationService:
         conv_info = self.active_conversations.get(conversation_id)
         if not conv_info:
             return
-        llm_params = self._build_llm_parameters()
-        client_kwargs: Dict[str, Any] = {
-            "host": conv_info.host,
-            "model": conv_info.model,
-            **llm_params,
-        }
-        client = create_llm_client(conv_info.llm_backend, **client_kwargs)
-        schema_version = "7"
-        analysis_prompt_version = "v2"
-        evidence_catalog: Dict[str, Dict[str, Any]] = {}
-        for message in transcript:
-            message_index = message.get("index")
-            content = message.get("content", "") or ""
-            if not isinstance(message_index, int):
-                continue
-            for sentence_index, sentence in enumerate(_split_sentences(content)):
-                evidence_id = f"m{message_index}s{sentence_index}"
-                evidence_catalog[evidence_id] = {
-                    "message_index": message_index,
-                    "sentence_index": sentence_index,
-                    "text": sentence,
-                }
-        system_prompt = (
-            "You are a clinical research 'resource agent'. You are given a transcript of a simulated "
-            "health survey conversation between a surveyor and a patient. Your task is to extract "
-            "post-hoc insights as strict JSON for a UI.\n\n"
-            "Rules:\n"
-            "- Use ONLY the provided transcript.\n"
-            "- Output MUST be valid JSON only (no markdown, no backticks).\n"
-            "- Evidence must be selected from the provided evidence catalog by evidence_id.\n"
-            "- Do NOT invent quotes. Do NOT paraphrase evidence. Cite by evidence_id only.\n"
-            "- For care experience: do not duplicate the same evidence_id across positive/negative/mixed/neutral.\n"
-            "  If a sentence supports both positive and negative interpretations, put it in care_experience.mixed.\n"
-            "- confidence must be a number between 0 and 1.\n"
-            "- For health_situations: include a short code label (1-3 words) in addition to the longer summary.\n"
-            "- For top_down_codes categories: include a short code label (1-3 words) and cite evidence.\n"
-            "- Prefer fewer, higher-confidence items.\n"
-        )
-        evidence_catalog_json = json.dumps(evidence_catalog, ensure_ascii=False)
-        user_prompt = (
-            "Evidence catalog (JSON object mapping evidence_id -> sentence):\n"
-            f"{evidence_catalog_json}\n\n"
-            "Return JSON matching this schema:\n"
-            "{\n"
-            f"  \"schema_version\": \"{schema_version}\",\n"
-            f"  \"analysis_prompt_version\": \"{analysis_prompt_version}\",\n"
-            "  \"health_situations\": [\n"
-            "    {\n"
-            "      \"code\": string,  // 1-3 word label\n"
-            "      \"summary\": string,\n"
-            "      \"evidence\": [ {\"evidence_id\": string} ],\n"
-            "      \"confidence\": number  // 0..1\n"
-            "    }\n"
-            "  ],\n"
-            "  \"care_experience\": {\n"
-            "    \"positive\": {\n"
-            "      \"summary\": string,\n"
-            "      \"reasons\": [string],\n"
-            "      \"evidence\": [ {\"evidence_id\": string} ],\n"
-            "      \"confidence\": number  // 0..1\n"
-            "    },\n"
-            "    \"mixed\": {\n"
-            "      \"summary\": string,\n"
-            "      \"reasons\": [string],\n"
-            "      \"evidence\": [ {\"evidence_id\": string} ],\n"
-            "      \"confidence\": number  // 0..1\n"
-            "    },\n"
-            "    \"negative\": {\n"
-            "      \"summary\": string,\n"
-            "      \"reasons\": [string],\n"
-            "      \"evidence\": [ {\"evidence_id\": string} ],\n"
-            "      \"confidence\": number  // 0..1\n"
-            "    },\n"
-            "    \"neutral\": {\n"
-            "      \"summary\": string,\n"
-            "      \"reasons\": [string],\n"
-            "      \"evidence\": [ {\"evidence_id\": string} ],\n"
-            "      \"confidence\": number  // 0..1\n"
-            "    }\n"
-            "  }\n"
-            "  \"top_down_codes\": {\n"
-            "    \"symptoms_concerns\": [\n"
-            "      {\"code\": string, \"summary\": string, \"evidence\": [ {\"evidence_id\": string} ], \"confidence\": number}\n"
-            "    ],\n"
-            "    \"daily_management\": [\n"
-            "      {\"code\": string, \"summary\": string, \"evidence\": [ {\"evidence_id\": string} ], \"confidence\": number}\n"
-            "    ],\n"
-            "    \"barriers_constraints\": [\n"
-            "      {\"code\": string, \"summary\": string, \"evidence\": [ {\"evidence_id\": string} ], \"confidence\": number}\n"
-            "    ],\n"
-            "    \"support_resources\": [\n"
-            "      {\"code\": string, \"summary\": string, \"evidence\": [ {\"evidence_id\": string} ], \"confidence\": number}\n"
-            "    ]\n"
-            "  }\n"
-            "  \"top_down_codes\": {\n"
-            "    \"symptoms_concerns\": [ {\"code\": string, \"summary\": string, \"evidence\": [ {\"evidence_id\": string} ], \"confidence\": number  // 0..1 } ],\n"
-            "    \"daily_management\": [ {\"code\": string, \"summary\": string, \"evidence\": [ {\"evidence_id\": string} ], \"confidence\": number  // 0..1 } ],\n"
-            "    \"barriers_constraints\": [ {\"code\": string, \"summary\": string, \"evidence\": [ {\"evidence_id\": string} ], \"confidence\": number  // 0..1 } ],\n"
-            "    \"support_resources\": [ {\"code\": string, \"summary\": string, \"evidence\": [ {\"evidence_id\": string} ], \"confidence\": number  // 0..1 } ]\n"
-            "  }\n"
-            "}\n"
-        )
         try:
-            raw = await client.generate(prompt=user_prompt, system_prompt=system_prompt, temperature=0.2)
-            parsed = json.loads(raw)
-            parsed["evidence_catalog"] = evidence_catalog
-            parsed["analysis_prompt_version"] = analysis_prompt_version
-            for item in parsed.get("health_situations", []) or []:
-                normalized = _normalize_confidence(item.get("confidence"))
-                if normalized is not None:
-                    item["confidence"] = normalized
-            care_experience = parsed.get("care_experience") or {}
-            for key in ("positive", "mixed", "negative", "neutral"):
-                box = care_experience.get(key)
-                if isinstance(box, dict):
-                    normalized = _normalize_confidence(box.get("confidence"))
-                    if normalized is not None:
-                        box["confidence"] = normalized
-            top_down_codes = parsed.get("top_down_codes") or {}
-            for key in ("symptoms_concerns", "daily_management", "barriers_constraints", "support_resources"):
-                items = top_down_codes.get(key) or []
-                if not isinstance(items, list):
-                    continue
-                for item in items:
-                    if not isinstance(item, dict):
-                        continue
-                    normalized = _normalize_confidence(item.get("confidence"))
-                    if normalized is not None:
-                        item["confidence"] = normalized
             await self.websocket_manager.send_to_conversation(conversation_id, {
                 "type": "resource_agent_result",
                 "conversation_id": conversation_id,
@@ -564,11 +587,6 @@ class ConversationService:
                 "error": str(e),
                 "timestamp": datetime.now().isoformat(),
             })
-        finally:
-            try:
-                await client.close()
-            except Exception:
-                pass
     def _build_llm_parameters(self) -> Dict[str, Any]:
         """Prepare keyword arguments for LLM client creation."""

     return max(0.0, min(1.0, confidence))
+async def run_resource_agent_analysis(
+    *,
+    transcript: List[Dict[str, Any]],
+    llm_backend: str,
+    host: str,
+    model: str,
+    settings: AppSettings,
+) -> Dict[str, Any]:
+    """Run the resource agent analysis on an in-memory transcript and return parsed JSON.
+    Shared by the live conversation flow and ad-hoc analysis endpoints.
+    """
+    llm_params: Dict[str, Any] = {
+        "timeout": settings.llm.timeout,
+        "max_retries": settings.llm.max_retries,
+        "retry_delay": settings.llm.retry_delay,
+    }
+    if settings.llm.api_key:
+        llm_params["api_key"] = settings.llm.api_key
+    if settings.llm.site_url:
+        llm_params["site_url"] = settings.llm.site_url
+    if settings.llm.app_name:
+        llm_params["app_name"] = settings.llm.app_name
+    client = create_llm_client(
+        llm_backend,
+        host=host,
+        model=model,
+        **llm_params,
+    )
+    schema_version = "7"
+    analysis_prompt_version = "v2"
+    evidence_catalog: Dict[str, Dict[str, Any]] = {}
+    for message in transcript:
+        message_index = message.get("index")
+        content = message.get("content", "") or ""
+        if not isinstance(message_index, int):
+            continue
+        for sentence_index, sentence in enumerate(_split_sentences(content)):
+            evidence_id = f"m{message_index}s{sentence_index}"
+            evidence_catalog[evidence_id] = {
+                "message_index": message_index,
+                "sentence_index": sentence_index,
+                "text": sentence,
+            }
+    system_prompt = (
+        "You are a clinical research 'resource agent'. You are given a transcript of a simulated "
+        "health survey conversation between a surveyor and a patient. Your task is to extract "
+        "post-hoc insights as strict JSON for a UI.\n\n"
+        "Rules:\n"
+        "- Use ONLY the provided transcript.\n"
+        "- Output MUST be valid JSON only (no markdown, no backticks).\n"
+        "- Evidence must be selected from the provided evidence catalog by evidence_id.\n"
+        "- Do NOT invent quotes. Do NOT paraphrase evidence. Cite by evidence_id only.\n"
+        "- For care experience: do not duplicate the same evidence_id across positive/negative/mixed/neutral.\n"
+        "  If a sentence supports both positive and negative interpretations, put it in care_experience.mixed.\n"
+        "- confidence must be a number between 0 and 1.\n"
+        "- For health_situations: include a short code label (1-3 words) in addition to the longer summary.\n"
+        "- For top_down_codes categories: include a short code label (1-3 words) and cite evidence.\n"
+        "- Prefer fewer, higher-confidence items.\n"
+    )
+    evidence_catalog_json = json.dumps(evidence_catalog, ensure_ascii=False)
+    user_prompt = (
+        "Evidence catalog (JSON object mapping evidence_id -> sentence):\n"
+        f"{evidence_catalog_json}\n\n"
+        "Return JSON matching this schema:\n"
+        "{\n"
+        f"  \"schema_version\": \"{schema_version}\",\n"
+        f"  \"analysis_prompt_version\": \"{analysis_prompt_version}\",\n"
+        "  \"health_situations\": [\n"
+        "    {\n"
+        "      \"code\": string,  // 1-3 word label\n"
+        "      \"summary\": string,\n"
+        "      \"evidence\": [ {\"evidence_id\": string} ],\n"
+        "      \"confidence\": number  // 0..1\n"
+        "    }\n"
+        "  ],\n"
+        "  \"care_experience\": {\n"
+        "    \"positive\": {\n"
+        "      \"summary\": string,\n"
+        "      \"reasons\": [string],\n"
+        "      \"evidence\": [ {\"evidence_id\": string} ],\n"
+        "      \"confidence\": number  // 0..1\n"
+        "    },\n"
+        "    \"mixed\": {\n"
+        "      \"summary\": string,\n"
+        "      \"reasons\": [string],\n"
+        "      \"evidence\": [ {\"evidence_id\": string} ],\n"
+        "      \"confidence\": number  // 0..1\n"
+        "    },\n"
+        "    \"negative\": {\n"
+        "      \"summary\": string,\n"
+        "      \"reasons\": [string],\n"
+        "      \"evidence\": [ {\"evidence_id\": string} ],\n"
+        "      \"confidence\": number  // 0..1\n"
+        "    },\n"
+        "    \"neutral\": {\n"
+        "      \"summary\": string,\n"
+        "      \"reasons\": [string],\n"
+        "      \"evidence\": [ {\"evidence_id\": string} ],\n"
+        "      \"confidence\": number  // 0..1\n"
+        "    }\n"
+        "  }\n"
+        "  \"top_down_codes\": {\n"
+        "    \"symptoms_concerns\": [ {\"code\": string, \"summary\": string, \"evidence\": [ {\"evidence_id\": string} ], \"confidence\": number  // 0..1 } ],\n"
+        "    \"daily_management\": [ {\"code\": string, \"summary\": string, \"evidence\": [ {\"evidence_id\": string} ], \"confidence\": number  // 0..1 } ],\n"
+        "    \"barriers_constraints\": [ {\"code\": string, \"summary\": string, \"evidence\": [ {\"evidence_id\": string} ], \"confidence\": number  // 0..1 } ],\n"
+        "    \"support_resources\": [ {\"code\": string, \"summary\": string, \"evidence\": [ {\"evidence_id\": string} ], \"confidence\": number  // 0..1 } ]\n"
+        "  }\n"
+        "}\n"
+    )
+    try:
+        raw = await client.generate(prompt=user_prompt, system_prompt=system_prompt, temperature=0.2)
+        parsed = json.loads(raw)
+        parsed["evidence_catalog"] = evidence_catalog
+        parsed["analysis_prompt_version"] = analysis_prompt_version
+        for item in parsed.get("health_situations", []) or []:
+            normalized = _normalize_confidence(item.get("confidence"))
+            if normalized is not None:
+                item["confidence"] = normalized
+        care_experience = parsed.get("care_experience") or {}
+        for key in ("positive", "mixed", "negative", "neutral"):
+            box = care_experience.get(key)
+            if isinstance(box, dict):
+                normalized = _normalize_confidence(box.get("confidence"))
+                if normalized is not None:
+                    box["confidence"] = normalized
+        top_down_codes = parsed.get("top_down_codes") or {}
+        for key in ("symptoms_concerns", "daily_management", "barriers_constraints", "support_resources"):
+            items = top_down_codes.get(key) or []
+            if not isinstance(items, list):
+                continue
+            for item in items:
+                if not isinstance(item, dict):
+                    continue
+                normalized = _normalize_confidence(item.get("confidence"))
+                if normalized is not None:
+                    item["confidence"] = normalized
+        return parsed
+    finally:
+        try:
+            await client.close()
+        except Exception:
+            pass
 class ConversationStatus(Enum):
     """Status of managed conversations."""
     STARTING = "starting"
         conv_info = self.active_conversations.get(conversation_id)
         if not conv_info:
             return
         try:
+            parsed = await run_resource_agent_analysis(
+                transcript=transcript,
+                llm_backend=conv_info.llm_backend,
+                host=conv_info.host,
+                model=conv_info.model,
+                settings=self.settings,
+            )
             await self.websocket_manager.send_to_conversation(conversation_id, {
                 "type": "resource_agent_result",
                 "conversation_id": conversation_id,
                 "error": str(e),
                 "timestamp": datetime.now().isoformat(),
             })
     def _build_llm_parameters(self) -> Dict[str, Any]:
         """Prepare keyword arguments for LLM client creation."""

backend/api/routes.py CHANGED Viewed

@@ -19,7 +19,7 @@ Example:
     }
 """
-from fastapi import APIRouter, HTTPException
 from pydantic import BaseModel, Field
 from typing import Any, Dict, List, Optional
 import logging
@@ -28,7 +28,10 @@ import json
 from datetime import datetime
 from fastapi.responses import Response
-from .conversation_service import get_conversation_service
 from backend.core.persona_system import PersonaSystem
 # Setup logging
@@ -96,6 +99,18 @@ class ExportRequest(BaseModel):
     resources: Dict[str, Any] = Field(default_factory=dict, description="Resource agent output + evidence_catalog")
 # Initialize persona system
 persona_system = PersonaSystem()
@@ -334,6 +349,115 @@ def _extract_evidence_ids(evidence: Any) -> List[str]:
     return evidence_ids
 @router.post("/export/json")
 async def export_json(payload: ExportRequest) -> Response:
     exported_at = payload.exported_at or datetime.now().isoformat()
@@ -486,3 +610,72 @@ async def export_xlsx(payload: ExportRequest) -> Response:
         media_type="application/vnd.openxmlformats-officedocument.spreadsheetml.sheet",
         headers=headers,
     )

     }
 """
+from fastapi import APIRouter, HTTPException, UploadFile, File, Form
 from pydantic import BaseModel, Field
 from typing import Any, Dict, List, Optional
 import logging
 from datetime import datetime
 from fastapi.responses import Response
+import re
+from config.settings import get_settings
+from .conversation_service import get_conversation_service, run_resource_agent_analysis
 from backend.core.persona_system import PersonaSystem
 # Setup logging
     resources: Dict[str, Any] = Field(default_factory=dict, description="Resource agent output + evidence_catalog")
+class AnalyzeTextRequest(BaseModel):
+    text: str = Field(..., description="Raw transcript text to analyze")
+    conversation_id: Optional[str] = Field(default=None, description="Optional client-generated id for this analysis run")
+    source_name: Optional[str] = Field(default=None, description="Optional label for the uploaded/pasted source")
+class AnalyzeTextResponse(BaseModel):
+    conversation_id: str
+    messages: List[ExportMessage]
+    resources: Dict[str, Any]
 # Initialize persona system
 persona_system = PersonaSystem()
     return evidence_ids
+def _parse_transcript_text(text: str, source_name: Optional[str]) -> List[Dict[str, Any]]:
+    normalized = (text or "").replace("\r\n", "\n").replace("\r", "\n").strip()
+    if not normalized:
+        return []
+    label = source_name or "Uploaded transcript"
+    lines = [line.rstrip() for line in normalized.split("\n")]
+    labeled = False
+    blocks: List[Dict[str, Any]] = []
+    current_role: Optional[str] = None
+    current_lines: List[str] = []
+    def flush():
+        nonlocal current_role, current_lines
+        content = "\n".join([l for l in current_lines]).strip()
+        if content:
+            role = current_role or "transcript"
+            persona = "Surveyor" if role == "surveyor" else ("Patient" if role == "patient" else label)
+            blocks.append({
+                "role": role,
+                "persona": persona,
+                "content": content,
+            })
+        current_role = None
+        current_lines = []
+    pattern = re.compile(r"^(surveyor|interviewer|patient|respondent)\s*:\s*(.*)$", re.IGNORECASE)
+    for line in lines:
+        stripped = line.strip()
+        if not stripped:
+            if current_lines:
+                current_lines.append("")
+            continue
+        match = pattern.match(stripped)
+        if match:
+            labeled = True
+            flush()
+            speaker = match.group(1).lower()
+            current_role = "surveyor" if speaker in ("surveyor", "interviewer") else "patient"
+            remainder = match.group(2).strip()
+            if remainder:
+                current_lines.append(remainder)
+            continue
+        if current_role is None:
+            current_role = "transcript"
+        current_lines.append(line)
+    flush()
+    if labeled:
+        return blocks
+    # If nothing was labeled, split by blank lines into paragraphs for better evidence traceability.
+    paragraphs = [p.strip() for p in re.split(r"\n\s*\n+", normalized) if p.strip()]
+    return [{
+        "role": "transcript",
+        "persona": label,
+        "content": p,
+    } for p in paragraphs] or [{
+        "role": "transcript",
+        "persona": label,
+        "content": normalized,
+    }]
+async def _analyze_from_text(*, text: str, conversation_id: str, source_name: Optional[str]) -> AnalyzeTextResponse:
+    settings = get_settings()
+    exported_at = datetime.now().isoformat()
+    parsed_messages = _parse_transcript_text(text, source_name)
+    if not parsed_messages:
+        raise HTTPException(status_code=400, detail="No content to analyze")
+    transcript: List[Dict[str, Any]] = []
+    ui_messages: List[ExportMessage] = []
+    for idx, msg in enumerate(parsed_messages):
+        transcript.append({
+            "index": idx,
+            "role": msg["role"],
+            "persona": msg.get("persona"),
+            "content": msg["content"],
+            "timestamp": exported_at,
+        })
+        ui_messages.append(ExportMessage(
+            role=msg["role"],
+            persona=msg.get("persona"),
+            time=exported_at,
+            text=msg["content"],
+        ))
+    resources = await run_resource_agent_analysis(
+        transcript=transcript,
+        llm_backend=settings.llm.backend,
+        host=settings.llm.host,
+        model=settings.llm.model,
+        settings=settings,
+    )
+    return AnalyzeTextResponse(
+        conversation_id=conversation_id,
+        messages=ui_messages,
+        resources=resources,
+    )
 @router.post("/export/json")
 async def export_json(payload: ExportRequest) -> Response:
     exported_at = payload.exported_at or datetime.now().isoformat()
         media_type="application/vnd.openxmlformats-officedocument.spreadsheetml.sheet",
         headers=headers,
     )
+@router.post("/analyze/text")
+async def analyze_text(payload: AnalyzeTextRequest) -> AnalyzeTextResponse:
+    if not isinstance(payload.text, str) or not payload.text.strip():
+        raise HTTPException(status_code=400, detail="text is required")
+    conversation_id = payload.conversation_id or f"analysis_{int(datetime.now().timestamp())}"
+    return await _analyze_from_text(
+        text=payload.text,
+        conversation_id=conversation_id,
+        source_name=payload.source_name,
+    )
+@router.post("/analyze/file")
+async def analyze_file(
+    file: UploadFile = File(...),
+    conversation_id: Optional[str] = Form(default=None),
+    source_name: Optional[str] = Form(default=None),
+) -> AnalyzeTextResponse:
+    data = await file.read()
+    if not data:
+        raise HTTPException(status_code=400, detail="Empty file")
+    inferred_name = source_name or file.filename or "Uploaded file"
+    cid = conversation_id or f"analysis_{int(datetime.now().timestamp())}"
+    filename = (file.filename or "").lower()
+    content_type = (file.content_type or "").lower()
+    is_pdf = filename.endswith(".pdf") or content_type == "application/pdf"
+    if is_pdf:
+        try:
+            from pypdf import PdfReader  # type: ignore
+        except Exception as e:
+            raise HTTPException(status_code=500, detail=f"pypdf not available: {e}")
+        try:
+            reader = PdfReader(io.BytesIO(data))
+            chunks: List[str] = []
+            for page in reader.pages:
+                text = page.extract_text() or ""
+                text = text.strip()
+                if text:
+                    chunks.append(text)
+            extracted = "\n\n".join(chunks).strip()
+        except Exception as e:
+            raise HTTPException(status_code=400, detail=f"Failed to parse PDF: {e}")
+        if not extracted:
+            raise HTTPException(status_code=400, detail="No extractable text found in PDF")
+        return await _analyze_from_text(
+            text=extracted,
+            conversation_id=cid,
+            source_name=inferred_name,
+        )
+    # Best-effort: treat everything else as UTF-8 text.
+    decoded = data.decode("utf-8", errors="replace").strip()
+    if not decoded:
+        raise HTTPException(status_code=400, detail="No text content found in file")
+    return await _analyze_from_text(
+        text=decoded,
+        conversation_id=cid,
+        source_name=inferred_name,
+    )

docs/roadmap.md CHANGED Viewed

@@ -29,20 +29,26 @@ _Last updated: 2026-01-19_
 4. **Analysis on pasted/uploaded text**
    Add a panel to paste text or upload a file, run the same analysis pipeline, render results, and allow download.
-5. **Human ↔ Surveyor chat mode**
    Add a panel where a human chats as the patient with the surveyor agent (text input), while keeping the same analysis pipeline at end-of-session.
-6. **Persistent storage (HF Spaces `/data`)**
    Add a simple storage layer and persist runs (transcript + analysis) and user-created personas so they survive restarts/redeploys.
-7. **Run history browser**
    List prior runs, allow selecting one to reload transcript + analysis in the UI.
-8. **Configuration Panel (Expand Beyond MVP)**
    The UI already supports persona selection + per-role prompt additions (browser-local). Next steps: persona CRUD + validation, richer prompt/model settings, and server-side persistence.
-9. **Basic Test Coverage**
    Add smoke tests (mocked LLM responses) to prevent regressions in conversation flow and analysis schema parsing.
 ## Longer-Term Ideas

 4. **Analysis on pasted/uploaded text**
    Add a panel to paste text or upload a file, run the same analysis pipeline, render results, and allow download.
+   ✅ Implemented: “Upload Text” tab supports paste, text-file upload, and best-effort PDF text extraction; exports work (Excel + JSON).
+5. **Modularization / Separation of Concerns (refactor)**
+   Before adding more major UI modes, refactor to keep the codebase maintainable:
+   - Split the growing frontend UI logic (currently concentrated in `frontend/pages/main_page.py`) into smaller, focused modules/components.
+   - Split API routers so `backend/api/routes.py` doesn’t become a catch-all (e.g., separate export + analysis routes).
+6. **Human ↔ Surveyor chat mode**
    Add a panel where a human chats as the patient with the surveyor agent (text input), while keeping the same analysis pipeline at end-of-session.
+7. **Persistent storage (HF Spaces `/data`)**
    Add a simple storage layer and persist runs (transcript + analysis) and user-created personas so they survive restarts/redeploys.
+8. **Run history browser**
    List prior runs, allow selecting one to reload transcript + analysis in the UI.
+9. **Configuration Panel (Expand Beyond MVP)**
    The UI already supports persona selection + per-role prompt additions (browser-local). Next steps: persona CRUD + validation, richer prompt/model settings, and server-side persistence.
+10. **Basic Test Coverage**
    Add smoke tests (mocked LLM responses) to prevent regressions in conversation flow and analysis schema parsing.
 ## Longer-Term Ideas

frontend/pages/main_page.py CHANGED Viewed

@@ -74,6 +74,13 @@ def get_main_page_html(auth_enabled: bool = False) -> str:
             return fetch(url, Object.assign({}, options || {}, { headers }));
         }
         function PageNav({ active, onChange }) {
             const base = "px-4 py-2 rounded-lg text-sm font-semibold border transition-colors";
             const activeCls = "bg-slate-900 text-white border-slate-900";
@@ -81,6 +88,7 @@ def get_main_page_html(auth_enabled: bool = False) -> str:
             return (
                 <div className="flex gap-2">
                     <button type="button" onClick={() => onChange('main')} className={`${base} ${active === 'main' ? activeCls : inactiveCls}`}>Conversation</button>
                     <button type="button" onClick={() => onChange('config')} className={`${base} ${active === 'config' ? activeCls : inactiveCls}`}>Configuration</button>
                 </div>
             );
@@ -158,6 +166,13 @@ def get_main_page_html(auth_enabled: bool = False) -> str:
             const [connectionStatus, setConnectionStatus] = useState('disconnected');
             const [stats, setStats] = useState({ sent: 0, received: 0 });
             const [highlightedEvidence, setHighlightedEvidence] = useState(null); // { evidence_id, message_index, sentence }
             const wsRef = useRef(null);
             const conversationIdRef = useRef(null);
@@ -165,6 +180,11 @@ def get_main_page_html(auth_enabled: bool = False) -> str:
             const stickToBottomRef = useRef(true);
             const clearHighlightTimeoutRef = useRef(null);
             useEffect(() => {
                 if (!AUTH_ENABLED) return;
                 const token = loadSessionToken();
@@ -278,16 +298,16 @@ def get_main_page_html(auth_enabled: bool = False) -> str:
             const getEvidenceSnippet = (evidence) => {
                 const evidenceId = evidence?.evidence_id;
-                if (!evidenceId || !resources?.evidence_catalog) {
                     return { label: 'Unknown', snippet: '' };
                 }
-                const entry = resources.evidence_catalog[evidenceId];
                 if (!entry) {
                     return { label: evidenceId, snippet: '' };
                 }
                 const idx = entry.message_index;
-                const msg = messages[idx];
                 const label = msg ? `${msg.role === 'surveyor' ? 'Surveyor' : 'Patient'} (${msg.persona})` : `Message #${idx}`;
                 return { label, snippet: entry.text || '' };
             };
@@ -296,7 +316,7 @@ def get_main_page_html(auth_enabled: bool = False) -> str:
                 const evidenceId = typeof evidence === 'string' ? evidence : evidence?.evidence_id;
                 if (!evidenceId) return;
-                const entry = resources?.evidence_catalog?.[evidenceId];
                 const messageIndex = entry?.message_index;
                 if (typeof messageIndex !== 'number' || !Number.isFinite(messageIndex)) {
                     console.warn('Unknown evidence_id:', evidenceId, entry);
@@ -368,21 +388,125 @@ def get_main_page_html(auth_enabled: bool = False) -> str:
                 }, 500);
             };
             const downloadExport = async (format) => {
-                if (!resources || resourceAgentStatus !== 'complete') return;
                 const conversationId = conversationIdRef.current || `react_conv_${Date.now()}`;
                 const exportedAt = new Date().toISOString();
                 const payload = {
                     conversation_id: conversationId,
                     exported_at: exportedAt,
-                    messages: (messages || []).map((m) => ({
                         role: m.role,
                         persona: m.persona,
                         time: m.time,
                         text: m.text
                     })),
-                    resources
                 };
                 const endpoint = format === 'xlsx' ? '/api/export/xlsx' : '/api/export/json';
@@ -485,7 +609,7 @@ def get_main_page_html(auth_enabled: bool = False) -> str:
                                                         key={idx}
                                                         type="button"
                                                         onClick={() => jumpToEvidence(evidenceId)}
-                                                        disabled={!evidenceId || !resources?.evidence_catalog?.[evidenceId]}
                                                         className="w-full text-left text-xs text-slate-600 hover:bg-slate-100 rounded px-1 py-0.5 disabled:opacity-50 disabled:hover:bg-transparent"
                                                         title={evidenceId ? `Jump to ${evidenceId}` : 'Unknown evidence'}
                                                     >
@@ -513,7 +637,7 @@ def get_main_page_html(auth_enabled: bool = False) -> str:
 		                        <div className="bg-white rounded-lg shadow-lg p-4 mb-6">
 		                            <div className="flex items-center justify-between gap-4">
 		                                <PageNav active={activePage} onChange={setActivePage} />
-		                                {activePage === 'main' && resourceAgentStatus === 'complete' && resources && (
 		                                    <div className="flex items-center gap-2">
 		                                        <button
 		                                            type="button"
@@ -552,19 +676,56 @@ def get_main_page_html(auth_enabled: bool = False) -> str:
 	                            <div className="grid grid-cols-[2fr_1fr_2fr] gap-6 items-start">
 	                                <div className="bg-white rounded-lg shadow-lg p-6">
 	                                    <div className="flex items-center gap-2 mb-4">
-	                                        <span className="text-2xl">💬</span>
-	                                        <h2 className="text-xl font-bold text-slate-800">Live Conversation</h2>
-	                                        {conversationActive && <span className="ml-auto text-green-600 font-medium animate-pulse">● LIVE</span>}
 	                                    </div>
 	                                    <div ref={transcriptContainerRef} onScroll={onTranscriptScroll} className="space-y-3 h-96 overflow-y-auto bg-slate-50 p-4 rounded-lg">
-	                                        {messages.length === 0 && (
 	                                            <div className="text-center text-slate-400 py-20">
-	                                                {conversationActive
-	                                                    ? '🔄 Waiting for the first messages...'
-	                                                    : '👋 Click "Start" to begin. This panel streams conversation utterances in real time.'}
 	                                            </div>
 	                                        )}
-	                                        {messages.map((msg, idx) => (
 	                                            <div
 	                                                key={idx}
 	                                                id={`msg-${idx}`}
@@ -588,30 +749,30 @@ def get_main_page_html(auth_enabled: bool = False) -> str:
 	                                    <div className="flex items-center gap-2 mb-4">
 	                                        <span className="text-2xl">📊</span>
 	                                        <h2 className="text-xl font-bold text-slate-800">Bottom-Up Findings</h2>
-	                                        {resourceAgentStatus === 'running' && (
 	                                            <span className="ml-auto text-purple-600 font-medium animate-pulse">● RUNNING</span>
 	                                        )}
-	                                        {((resources?.health_situations || []).length > 0) && (
 	                                            <span className="ml-auto bg-green-100 text-green-700 px-3 py-1 rounded-full text-sm font-medium">
-	                                                {(resources?.health_situations || []).length}
 	                                            </span>
 	                                        )}
 	                                    </div>
 	                                    <div className="space-y-2 max-h-[42rem] overflow-y-auto">
-	                                        {resourceAgentStatus !== 'complete' && (
 	                                            <p className="text-slate-400 text-center py-8 text-sm">
-	                                                {conversationActive
-	                                                    ? 'Runs automatically when the conversation completes...'
-	                                                    : 'Runs automatically when the conversation completes. Evidence-backed emergent themes (open coding).'}
 	                                            </p>
 	                                        )}
-	                                        {resourceAgentStatus === 'complete' && resources && (
 	                                            <>
-	                                                {(resources.health_situations || []).length === 0 ? (
 	                                                    <p className="text-slate-400 text-center py-8 text-sm">No findings detected.</p>
 	                                                ) : (
 	                                                    <div className="space-y-3">
-	                                                        {(resources.health_situations || []).map((item, idx) => (
 	                                                            <div key={idx} className="bg-slate-50 border border-slate-200 rounded-lg p-3">
 	                                                                <div className="flex items-center gap-2">
 	                                                                    <div className="font-semibold text-slate-800">
@@ -641,7 +802,7 @@ def get_main_page_html(auth_enabled: bool = False) -> str:
 	                                                                                    key={evIdx}
 	                                                                                    type="button"
 	                                                                                    onClick={() => jumpToEvidence(evidenceId)}
-	                                                                                    disabled={!evidenceId || !resources?.evidence_catalog?.[evidenceId]}
 	                                                                                    className="w-full text-left text-xs text-slate-600 hover:bg-slate-100 rounded px-1 py-0.5 disabled:opacity-50 disabled:hover:bg-transparent"
 	                                                                                    title={evidenceId ? `Jump to ${evidenceId}` : 'Unknown evidence'}
 	                                                                                >
@@ -664,25 +825,25 @@ def get_main_page_html(auth_enabled: bool = False) -> str:
 	                                    <div className="flex items-center gap-2 mb-4">
 	                                        <span className="text-2xl">📚</span>
 	                                        <h2 className="text-xl font-bold text-slate-800">Top-Down Coding</h2>
-	                                        {resourceAgentStatus === 'running' && (
 	                                            <span className="ml-auto text-purple-600 font-medium animate-pulse">● RUNNING</span>
 	                                        )}
 	                                    </div>
-	                                    {resourceAgentStatus !== 'complete' && (
 	                                        <p className="text-slate-400 text-center py-8 text-sm">
-	                                            {conversationActive
-	                                                ? 'Runs automatically when the conversation completes...'
-	                                                : 'Runs automatically when the conversation completes. Rubric + a priori codebook (top-down coding).'}
 	                                        </p>
 	                                    )}
-	                                    {resourceAgentStatus === 'complete' && (
 	                                        <div className="grid grid-cols-2 gap-4">
 	                                            <div className="space-y-3">
 	                                                <div className="text-lg font-extrabold text-slate-900 mb-2">Care experience rubric</div>
 	                                                {(() => {
-	                                                    const care = resources?.care_experience || {};
 	                                                    const positive = care.positive || null;
 	                                                    const mixed = care.mixed || null;
 	                                                    const negative = care.negative || null;
@@ -702,7 +863,7 @@ def get_main_page_html(auth_enabled: bool = False) -> str:
 	                                            <div className="space-y-3">
 	                                                <div className="text-lg font-extrabold text-slate-900 mb-2">Top-down codebook categories</div>
 	                                                {(() => {
-	                                                    const td = resources?.top_down_codes || {};
 	                                                    const order = [
 	                                                        { key: 'symptoms_concerns', label: 'Symptoms/concerns', empty: 'No symptoms/concerns excerpts detected.' },
 	                                                        { key: 'daily_management', label: 'Daily management', empty: 'No daily management excerpts detected.' },
@@ -751,7 +912,7 @@ def get_main_page_html(auth_enabled: bool = False) -> str:
 	                                                                                                            key={idx2}
 	                                                                                                            type="button"
 	                                                                                                            onClick={() => jumpToEvidence(evidenceId)}
-	                                                                                                            disabled={!evidenceId || !resources?.evidence_catalog?.[evidenceId]}
 	                                                                                                            className="w-full text-left text-xs text-slate-600 hover:bg-slate-100 rounded px-1 py-0.5 disabled:opacity-50 disabled:hover:bg-transparent"
 	                                                                                                            title={evidenceId ? `Jump to ${evidenceId}` : 'Unknown evidence'}
 	                                                                                                        >

             return fetch(url, Object.assign({}, options || {}, { headers }));
         }
+        async function authedFetchForm(url, formData) {
+            const token = loadSessionToken();
+            const headers = {};
+            if (token) headers['Authorization'] = `Bearer ${token}`;
+            return fetch(url, { method: 'POST', headers, body: formData });
+        }
         function PageNav({ active, onChange }) {
             const base = "px-4 py-2 rounded-lg text-sm font-semibold border transition-colors";
             const activeCls = "bg-slate-900 text-white border-slate-900";
             return (
                 <div className="flex gap-2">
                     <button type="button" onClick={() => onChange('main')} className={`${base} ${active === 'main' ? activeCls : inactiveCls}`}>Conversation</button>
+                    <button type="button" onClick={() => onChange('analyze')} className={`${base} ${active === 'analyze' ? activeCls : inactiveCls}`}>Upload Text</button>
                     <button type="button" onClick={() => onChange('config')} className={`${base} ${active === 'config' ? activeCls : inactiveCls}`}>Configuration</button>
                 </div>
             );
             const [connectionStatus, setConnectionStatus] = useState('disconnected');
             const [stats, setStats] = useState({ sent: 0, received: 0 });
             const [highlightedEvidence, setHighlightedEvidence] = useState(null); // { evidence_id, message_index, sentence }
+            const [analysisText, setAnalysisText] = useState('');
+            const [analysisSourceName, setAnalysisSourceName] = useState(null);
+            const [analysisBusy, setAnalysisBusy] = useState(false);
+            const [analysisMessages, setAnalysisMessages] = useState([]);
+            const [analysisResources, setAnalysisResources] = useState(null);
+            const [analysisStatus, setAnalysisStatus] = useState('idle'); // idle|running|complete|error
+            const [analysisError, setAnalysisError] = useState(null);
             const wsRef = useRef(null);
             const conversationIdRef = useRef(null);
             const stickToBottomRef = useRef(true);
             const clearHighlightTimeoutRef = useRef(null);
+            const activeMessages = activePage === 'analyze' ? analysisMessages : messages;
+            const activeResources = activePage === 'analyze' ? analysisResources : resources;
+            const activeStatus = activePage === 'analyze' ? analysisStatus : resourceAgentStatus;
+            const activeError = activePage === 'analyze' ? analysisError : resourceAgentError;
             useEffect(() => {
                 if (!AUTH_ENABLED) return;
                 const token = loadSessionToken();
             const getEvidenceSnippet = (evidence) => {
                 const evidenceId = evidence?.evidence_id;
+                if (!evidenceId || !activeResources?.evidence_catalog) {
                     return { label: 'Unknown', snippet: '' };
                 }
+                const entry = activeResources.evidence_catalog[evidenceId];
                 if (!entry) {
                     return { label: evidenceId, snippet: '' };
                 }
                 const idx = entry.message_index;
+                const msg = activeMessages[idx];
                 const label = msg ? `${msg.role === 'surveyor' ? 'Surveyor' : 'Patient'} (${msg.persona})` : `Message #${idx}`;
                 return { label, snippet: entry.text || '' };
             };
                 const evidenceId = typeof evidence === 'string' ? evidence : evidence?.evidence_id;
                 if (!evidenceId) return;
+                const entry = activeResources?.evidence_catalog?.[evidenceId];
                 const messageIndex = entry?.message_index;
                 if (typeof messageIndex !== 'number' || !Number.isFinite(messageIndex)) {
                     console.warn('Unknown evidence_id:', evidenceId, entry);
                 }, 500);
             };
+            const loadTextFile = (file) => {
+                if (!file) return;
+                const name = (file.name || '').toLowerCase();
+                if (name.endsWith('.pdf')) {
+                    runFileAnalysis(file);
+                    return;
+                }
+                const reader = new FileReader();
+                reader.onload = (e) => {
+                    const content = (e && e.target && e.target.result) ? String(e.target.result) : '';
+                    setAnalysisText(content);
+                    setAnalysisSourceName(file.name || null);
+                };
+                reader.readAsText(file);
+            };
+            const runFileAnalysis = async (file) => {
+                if (AUTH_ENABLED && !authenticated) return;
+                if (!file) return;
+                setAnalysisBusy(true);
+                setConversationActive(false);
+                setInsights([]);
+                setRouting(null);
+                setAnalysisMessages([]);
+                setAnalysisResources(null);
+                setAnalysisStatus('running');
+                setAnalysisError(null);
+                const conversationId = `analysis_${Date.now()}`;
+                conversationIdRef.current = conversationId;
+                try {
+                    const fd = new FormData();
+                    fd.append('file', file);
+                    fd.append('conversation_id', conversationId);
+                    if (file.name) fd.append('source_name', file.name);
+                    const res = await authedFetchForm('/api/analyze/file', fd);
+                    if (!res.ok) {
+                        const msg = await res.text().catch(() => '');
+                        throw new Error(msg || `Analysis failed (${res.status})`);
+                    }
+                    const data = await res.json();
+                    conversationIdRef.current = data.conversation_id || conversationId;
+                    setAnalysisMessages(data.messages || []);
+                    setAnalysisResources(data.resources || null);
+                    setAnalysisStatus('complete');
+                    setAnalysisError(null);
+                    stickToBottomRef.current = false;
+                } catch (e) {
+                    setAnalysisStatus('error');
+                    setAnalysisError(e?.message || 'Analysis failed');
+                } finally {
+                    setAnalysisBusy(false);
+                }
+            };
+            const runTextAnalysis = async () => {
+                if (AUTH_ENABLED && !authenticated) return;
+                const text = (analysisText || '').trim();
+                if (!text) return;
+                setAnalysisBusy(true);
+                setConversationActive(false);
+                setInsights([]);
+                setRouting(null);
+                setAnalysisMessages([]);
+                setAnalysisResources(null);
+                setAnalysisStatus('running');
+                setAnalysisError(null);
+                const conversationId = `analysis_${Date.now()}`;
+                conversationIdRef.current = conversationId;
+                try {
+                    const res = await authedFetch('/api/analyze/text', {
+                        method: 'POST',
+                        headers: { 'Content-Type': 'application/json' },
+                        body: JSON.stringify({
+                            conversation_id: conversationId,
+                            source_name: analysisSourceName || undefined,
+                            text
+                        })
+                    });
+                    if (!res.ok) {
+                        const msg = await res.text().catch(() => '');
+                        throw new Error(msg || `Analysis failed (${res.status})`);
+                    }
+                    const data = await res.json();
+                    conversationIdRef.current = data.conversation_id || conversationId;
+                    setAnalysisMessages(data.messages || []);
+                    setAnalysisResources(data.resources || null);
+                    setAnalysisStatus('complete');
+                    setAnalysisError(null);
+                    stickToBottomRef.current = false;
+                } catch (e) {
+                    setAnalysisStatus('error');
+                    setAnalysisError(e?.message || 'Analysis failed');
+                } finally {
+                    setAnalysisBusy(false);
+                }
+            };
             const downloadExport = async (format) => {
+                if (!activeResources || activeStatus !== 'complete') return;
                 const conversationId = conversationIdRef.current || `react_conv_${Date.now()}`;
                 const exportedAt = new Date().toISOString();
                 const payload = {
                     conversation_id: conversationId,
                     exported_at: exportedAt,
+                    messages: (activeMessages || []).map((m) => ({
                         role: m.role,
                         persona: m.persona,
                         time: m.time,
                         text: m.text
                     })),
+                    resources: activeResources
                 };
                 const endpoint = format === 'xlsx' ? '/api/export/xlsx' : '/api/export/json';
                                                         key={idx}
                                                         type="button"
                                                         onClick={() => jumpToEvidence(evidenceId)}
+                                                        disabled={!evidenceId || !activeResources?.evidence_catalog?.[evidenceId]}
                                                         className="w-full text-left text-xs text-slate-600 hover:bg-slate-100 rounded px-1 py-0.5 disabled:opacity-50 disabled:hover:bg-transparent"
                                                         title={evidenceId ? `Jump to ${evidenceId}` : 'Unknown evidence'}
                                                     >
 		                        <div className="bg-white rounded-lg shadow-lg p-4 mb-6">
 		                            <div className="flex items-center justify-between gap-4">
 		                                <PageNav active={activePage} onChange={setActivePage} />
+		                                {(activePage === 'main' || activePage === 'analyze') && activeStatus === 'complete' && activeResources && (
 		                                    <div className="flex items-center gap-2">
 		                                        <button
 		                                            type="button"
 	                            <div className="grid grid-cols-[2fr_1fr_2fr] gap-6 items-start">
 	                                <div className="bg-white rounded-lg shadow-lg p-6">
 	                                    <div className="flex items-center gap-2 mb-4">
+	                                        <span className="text-2xl">{activePage === 'analyze' ? '🧾' : '💬'}</span>
+	                                        <h2 className="text-xl font-bold text-slate-800">{activePage === 'analyze' ? 'Analyze Text' : 'Live Conversation'}</h2>
+	                                        {activePage === 'main' && conversationActive && <span className="ml-auto text-green-600 font-medium animate-pulse">● LIVE</span>}
+	                                        {activePage === 'analyze' && analysisBusy && <span className="ml-auto text-purple-600 font-medium animate-pulse">● RUNNING</span>}
 	                                    </div>
+	                                    {activePage === 'analyze' && (
+	                                        <div className="mb-4 space-y-3">
+	                                            <div className="flex items-center gap-3">
+	                                                <input
+	                                                    type="file"
+	                                                    accept=".txt,.md,.csv,.json,.pdf"
+	                                                    onChange={(e) => loadTextFile(e.target.files && e.target.files[0])}
+	                                                    className="text-sm"
+	                                                />
+	                                                {analysisSourceName && (
+	                                                    <div className="text-xs text-slate-500">Loaded: {analysisSourceName}</div>
+	                                                )}
+	                                            </div>
+	                                            <textarea
+	                                                className="w-full border border-slate-300 rounded-lg px-3 py-2 text-sm bg-white h-40"
+	                                                placeholder="Paste a transcript here (optionally with lines like 'Surveyor: ...' / 'Patient: ...')."
+	                                                value={analysisText}
+	                                                onChange={(e) => setAnalysisText(e.target.value)}
+	                                            />
+	                                            <div className="flex items-center gap-3">
+	                                                <button
+	                                                    type="button"
+	                                                    onClick={runTextAnalysis}
+	                                                    disabled={analysisBusy || !analysisText.trim() || (AUTH_ENABLED && !authenticated)}
+	                                                    className="bg-purple-600 hover:bg-purple-700 disabled:bg-slate-300 text-white px-4 py-2 rounded-lg text-sm font-semibold transition-all shadow"
+	                                                >
+	                                                    Run analysis
+	                                                </button>
+	                                                {analysisStatus === 'error' && analysisError && (
+	                                                    <div className="text-xs text-red-600">{analysisError}</div>
+	                                                )}
+	                                            </div>
+	                                        </div>
+	                                    )}
 	                                    <div ref={transcriptContainerRef} onScroll={onTranscriptScroll} className="space-y-3 h-96 overflow-y-auto bg-slate-50 p-4 rounded-lg">
+	                                        {activeMessages.length === 0 && (
 	                                            <div className="text-center text-slate-400 py-20">
+	                                                {activePage === 'main'
+	                                                    ? (conversationActive ? '🔄 Waiting for the first messages...' : '👋 Click "Start" to begin. This panel streams conversation utterances in real time.')
+	                                                    : 'Paste or upload text above, then click “Run analysis”.'}
 	                                            </div>
 	                                        )}
+	                                        {activeMessages.map((msg, idx) => (
 	                                            <div
 	                                                key={idx}
 	                                                id={`msg-${idx}`}
 	                                    <div className="flex items-center gap-2 mb-4">
 	                                        <span className="text-2xl">📊</span>
 	                                        <h2 className="text-xl font-bold text-slate-800">Bottom-Up Findings</h2>
+	                                        {activeStatus === 'running' && (
 	                                            <span className="ml-auto text-purple-600 font-medium animate-pulse">● RUNNING</span>
 	                                        )}
+	                                        {((activeResources?.health_situations || []).length > 0) && (
 	                                            <span className="ml-auto bg-green-100 text-green-700 px-3 py-1 rounded-full text-sm font-medium">
+	                                                {(activeResources?.health_situations || []).length}
 	                                            </span>
 	                                        )}
 	                                    </div>
 	                                    <div className="space-y-2 max-h-[42rem] overflow-y-auto">
+	                                        {activeStatus !== 'complete' && (
 	                                            <p className="text-slate-400 text-center py-8 text-sm">
+	                                                {activePage === 'main'
+	                                                    ? (conversationActive ? 'Runs automatically when the conversation completes...' : 'Runs automatically when the conversation completes. Evidence-backed emergent themes (open coding).')
+	                                                    : 'Runs when you click “Run analysis”.'}
 	                                            </p>
 	                                        )}
+	                                        {activeStatus === 'complete' && activeResources && (
 	                                            <>
+	                                                {(activeResources.health_situations || []).length === 0 ? (
 	                                                    <p className="text-slate-400 text-center py-8 text-sm">No findings detected.</p>
 	                                                ) : (
 	                                                    <div className="space-y-3">
+	                                                        {(activeResources.health_situations || []).map((item, idx) => (
 	                                                            <div key={idx} className="bg-slate-50 border border-slate-200 rounded-lg p-3">
 	                                                                <div className="flex items-center gap-2">
 	                                                                    <div className="font-semibold text-slate-800">
 	                                                                                    key={evIdx}
 	                                                                                    type="button"
 	                                                                                    onClick={() => jumpToEvidence(evidenceId)}
+	                                                                                    disabled={!evidenceId || !activeResources?.evidence_catalog?.[evidenceId]}
 	                                                                                    className="w-full text-left text-xs text-slate-600 hover:bg-slate-100 rounded px-1 py-0.5 disabled:opacity-50 disabled:hover:bg-transparent"
 	                                                                                    title={evidenceId ? `Jump to ${evidenceId}` : 'Unknown evidence'}
 	                                                                                >
 	                                    <div className="flex items-center gap-2 mb-4">
 	                                        <span className="text-2xl">📚</span>
 	                                        <h2 className="text-xl font-bold text-slate-800">Top-Down Coding</h2>
+	                                        {activeStatus === 'running' && (
 	                                            <span className="ml-auto text-purple-600 font-medium animate-pulse">● RUNNING</span>
 	                                        )}
 	                                    </div>
+	                                    {activeStatus !== 'complete' && (
 	                                        <p className="text-slate-400 text-center py-8 text-sm">
+	                                            {activePage === 'main'
+	                                                ? (conversationActive ? 'Runs automatically when the conversation completes...' : 'Runs automatically when the conversation completes. Rubric + a priori codebook (top-down coding).')
+	                                                : 'Runs when you click “Run analysis”.'}
 	                                        </p>
 	                                    )}
+	                                    {activeStatus === 'complete' && activeResources && (
 	                                        <div className="grid grid-cols-2 gap-4">
 	                                            <div className="space-y-3">
 	                                                <div className="text-lg font-extrabold text-slate-900 mb-2">Care experience rubric</div>
 	                                                {(() => {
+	                                                    const care = activeResources?.care_experience || {};
 	                                                    const positive = care.positive || null;
 	                                                    const mixed = care.mixed || null;
 	                                                    const negative = care.negative || null;
 	                                            <div className="space-y-3">
 	                                                <div className="text-lg font-extrabold text-slate-900 mb-2">Top-down codebook categories</div>
 	                                                {(() => {
+	                                                    const td = activeResources?.top_down_codes || {};
 	                                                    const order = [
 	                                                        { key: 'symptoms_concerns', label: 'Symptoms/concerns', empty: 'No symptoms/concerns excerpts detected.' },
 	                                                        { key: 'daily_management', label: 'Daily management', empty: 'No daily management excerpts detected.' },
 	                                                                                                            key={idx2}
 	                                                                                                            type="button"
 	                                                                                                            onClick={() => jumpToEvidence(evidenceId)}
+	                                                                                                            disabled={!evidenceId || !activeResources?.evidence_catalog?.[evidenceId]}
 	                                                                                                            className="w-full text-left text-xs text-slate-600 hover:bg-slate-100 rounded px-1 py-0.5 disabled:opacity-50 disabled:hover:bg-transparent"
 	                                                                                                            title={evidenceId ? `Jump to ${evidenceId}` : 'Unknown evidence'}
 	                                                                                                        >

requirements.txt CHANGED Viewed

@@ -44,3 +44,6 @@ pysbd>=0.3.4
 # Excel export (multi-sheet .xlsx)
 openpyxl>=3.1.2

 # Excel export (multi-sheet .xlsx)
 openpyxl>=3.1.2
+# PDF text extraction (best-effort)
+pypdf>=5.0.0