MikelWL commited on
Commit
01da502
·
1 Parent(s): 3a61902

Upload: analyze pasted text and PDFs

Browse files
backend/api/conversation_service.py CHANGED
@@ -78,6 +78,161 @@ def _normalize_confidence(value: Any) -> Optional[float]:
78
  return max(0.0, min(1.0, confidence))
79
 
80
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
81
  class ConversationStatus(Enum):
82
  """Status of managed conversations."""
83
  STARTING = "starting"
@@ -403,146 +558,14 @@ class ConversationService:
403
  conv_info = self.active_conversations.get(conversation_id)
404
  if not conv_info:
405
  return
406
-
407
- llm_params = self._build_llm_parameters()
408
- client_kwargs: Dict[str, Any] = {
409
- "host": conv_info.host,
410
- "model": conv_info.model,
411
- **llm_params,
412
- }
413
- client = create_llm_client(conv_info.llm_backend, **client_kwargs)
414
-
415
- schema_version = "7"
416
- analysis_prompt_version = "v2"
417
-
418
- evidence_catalog: Dict[str, Dict[str, Any]] = {}
419
- for message in transcript:
420
- message_index = message.get("index")
421
- content = message.get("content", "") or ""
422
- if not isinstance(message_index, int):
423
- continue
424
- for sentence_index, sentence in enumerate(_split_sentences(content)):
425
- evidence_id = f"m{message_index}s{sentence_index}"
426
- evidence_catalog[evidence_id] = {
427
- "message_index": message_index,
428
- "sentence_index": sentence_index,
429
- "text": sentence,
430
- }
431
-
432
- system_prompt = (
433
- "You are a clinical research 'resource agent'. You are given a transcript of a simulated "
434
- "health survey conversation between a surveyor and a patient. Your task is to extract "
435
- "post-hoc insights as strict JSON for a UI.\n\n"
436
- "Rules:\n"
437
- "- Use ONLY the provided transcript.\n"
438
- "- Output MUST be valid JSON only (no markdown, no backticks).\n"
439
- "- Evidence must be selected from the provided evidence catalog by evidence_id.\n"
440
- "- Do NOT invent quotes. Do NOT paraphrase evidence. Cite by evidence_id only.\n"
441
- "- For care experience: do not duplicate the same evidence_id across positive/negative/mixed/neutral.\n"
442
- " If a sentence supports both positive and negative interpretations, put it in care_experience.mixed.\n"
443
- "- confidence must be a number between 0 and 1.\n"
444
- "- For health_situations: include a short code label (1-3 words) in addition to the longer summary.\n"
445
- "- For top_down_codes categories: include a short code label (1-3 words) and cite evidence.\n"
446
- "- Prefer fewer, higher-confidence items.\n"
447
- )
448
-
449
- evidence_catalog_json = json.dumps(evidence_catalog, ensure_ascii=False)
450
-
451
- user_prompt = (
452
- "Evidence catalog (JSON object mapping evidence_id -> sentence):\n"
453
- f"{evidence_catalog_json}\n\n"
454
- "Return JSON matching this schema:\n"
455
- "{\n"
456
- f" \"schema_version\": \"{schema_version}\",\n"
457
- f" \"analysis_prompt_version\": \"{analysis_prompt_version}\",\n"
458
- " \"health_situations\": [\n"
459
- " {\n"
460
- " \"code\": string, // 1-3 word label\n"
461
- " \"summary\": string,\n"
462
- " \"evidence\": [ {\"evidence_id\": string} ],\n"
463
- " \"confidence\": number // 0..1\n"
464
- " }\n"
465
- " ],\n"
466
- " \"care_experience\": {\n"
467
- " \"positive\": {\n"
468
- " \"summary\": string,\n"
469
- " \"reasons\": [string],\n"
470
- " \"evidence\": [ {\"evidence_id\": string} ],\n"
471
- " \"confidence\": number // 0..1\n"
472
- " },\n"
473
- " \"mixed\": {\n"
474
- " \"summary\": string,\n"
475
- " \"reasons\": [string],\n"
476
- " \"evidence\": [ {\"evidence_id\": string} ],\n"
477
- " \"confidence\": number // 0..1\n"
478
- " },\n"
479
- " \"negative\": {\n"
480
- " \"summary\": string,\n"
481
- " \"reasons\": [string],\n"
482
- " \"evidence\": [ {\"evidence_id\": string} ],\n"
483
- " \"confidence\": number // 0..1\n"
484
- " },\n"
485
- " \"neutral\": {\n"
486
- " \"summary\": string,\n"
487
- " \"reasons\": [string],\n"
488
- " \"evidence\": [ {\"evidence_id\": string} ],\n"
489
- " \"confidence\": number // 0..1\n"
490
- " }\n"
491
- " }\n"
492
- " \"top_down_codes\": {\n"
493
- " \"symptoms_concerns\": [\n"
494
- " {\"code\": string, \"summary\": string, \"evidence\": [ {\"evidence_id\": string} ], \"confidence\": number}\n"
495
- " ],\n"
496
- " \"daily_management\": [\n"
497
- " {\"code\": string, \"summary\": string, \"evidence\": [ {\"evidence_id\": string} ], \"confidence\": number}\n"
498
- " ],\n"
499
- " \"barriers_constraints\": [\n"
500
- " {\"code\": string, \"summary\": string, \"evidence\": [ {\"evidence_id\": string} ], \"confidence\": number}\n"
501
- " ],\n"
502
- " \"support_resources\": [\n"
503
- " {\"code\": string, \"summary\": string, \"evidence\": [ {\"evidence_id\": string} ], \"confidence\": number}\n"
504
- " ]\n"
505
- " }\n"
506
- " \"top_down_codes\": {\n"
507
- " \"symptoms_concerns\": [ {\"code\": string, \"summary\": string, \"evidence\": [ {\"evidence_id\": string} ], \"confidence\": number // 0..1 } ],\n"
508
- " \"daily_management\": [ {\"code\": string, \"summary\": string, \"evidence\": [ {\"evidence_id\": string} ], \"confidence\": number // 0..1 } ],\n"
509
- " \"barriers_constraints\": [ {\"code\": string, \"summary\": string, \"evidence\": [ {\"evidence_id\": string} ], \"confidence\": number // 0..1 } ],\n"
510
- " \"support_resources\": [ {\"code\": string, \"summary\": string, \"evidence\": [ {\"evidence_id\": string} ], \"confidence\": number // 0..1 } ]\n"
511
- " }\n"
512
- "}\n"
513
- )
514
-
515
  try:
516
- raw = await client.generate(prompt=user_prompt, system_prompt=system_prompt, temperature=0.2)
517
- parsed = json.loads(raw)
518
- parsed["evidence_catalog"] = evidence_catalog
519
- parsed["analysis_prompt_version"] = analysis_prompt_version
520
-
521
- for item in parsed.get("health_situations", []) or []:
522
- normalized = _normalize_confidence(item.get("confidence"))
523
- if normalized is not None:
524
- item["confidence"] = normalized
525
-
526
- care_experience = parsed.get("care_experience") or {}
527
- for key in ("positive", "mixed", "negative", "neutral"):
528
- box = care_experience.get(key)
529
- if isinstance(box, dict):
530
- normalized = _normalize_confidence(box.get("confidence"))
531
- if normalized is not None:
532
- box["confidence"] = normalized
533
-
534
- top_down_codes = parsed.get("top_down_codes") or {}
535
- for key in ("symptoms_concerns", "daily_management", "barriers_constraints", "support_resources"):
536
- items = top_down_codes.get(key) or []
537
- if not isinstance(items, list):
538
- continue
539
- for item in items:
540
- if not isinstance(item, dict):
541
- continue
542
- normalized = _normalize_confidence(item.get("confidence"))
543
- if normalized is not None:
544
- item["confidence"] = normalized
545
-
546
  await self.websocket_manager.send_to_conversation(conversation_id, {
547
  "type": "resource_agent_result",
548
  "conversation_id": conversation_id,
@@ -564,11 +587,6 @@ class ConversationService:
564
  "error": str(e),
565
  "timestamp": datetime.now().isoformat(),
566
  })
567
- finally:
568
- try:
569
- await client.close()
570
- except Exception:
571
- pass
572
 
573
  def _build_llm_parameters(self) -> Dict[str, Any]:
574
  """Prepare keyword arguments for LLM client creation."""
 
78
  return max(0.0, min(1.0, confidence))
79
 
80
 
81
+ async def run_resource_agent_analysis(
82
+ *,
83
+ transcript: List[Dict[str, Any]],
84
+ llm_backend: str,
85
+ host: str,
86
+ model: str,
87
+ settings: AppSettings,
88
+ ) -> Dict[str, Any]:
89
+ """Run the resource agent analysis on an in-memory transcript and return parsed JSON.
90
+
91
+ Shared by the live conversation flow and ad-hoc analysis endpoints.
92
+ """
93
+ llm_params: Dict[str, Any] = {
94
+ "timeout": settings.llm.timeout,
95
+ "max_retries": settings.llm.max_retries,
96
+ "retry_delay": settings.llm.retry_delay,
97
+ }
98
+ if settings.llm.api_key:
99
+ llm_params["api_key"] = settings.llm.api_key
100
+ if settings.llm.site_url:
101
+ llm_params["site_url"] = settings.llm.site_url
102
+ if settings.llm.app_name:
103
+ llm_params["app_name"] = settings.llm.app_name
104
+
105
+ client = create_llm_client(
106
+ llm_backend,
107
+ host=host,
108
+ model=model,
109
+ **llm_params,
110
+ )
111
+
112
+ schema_version = "7"
113
+ analysis_prompt_version = "v2"
114
+
115
+ evidence_catalog: Dict[str, Dict[str, Any]] = {}
116
+ for message in transcript:
117
+ message_index = message.get("index")
118
+ content = message.get("content", "") or ""
119
+ if not isinstance(message_index, int):
120
+ continue
121
+ for sentence_index, sentence in enumerate(_split_sentences(content)):
122
+ evidence_id = f"m{message_index}s{sentence_index}"
123
+ evidence_catalog[evidence_id] = {
124
+ "message_index": message_index,
125
+ "sentence_index": sentence_index,
126
+ "text": sentence,
127
+ }
128
+
129
+ system_prompt = (
130
+ "You are a clinical research 'resource agent'. You are given a transcript of a simulated "
131
+ "health survey conversation between a surveyor and a patient. Your task is to extract "
132
+ "post-hoc insights as strict JSON for a UI.\n\n"
133
+ "Rules:\n"
134
+ "- Use ONLY the provided transcript.\n"
135
+ "- Output MUST be valid JSON only (no markdown, no backticks).\n"
136
+ "- Evidence must be selected from the provided evidence catalog by evidence_id.\n"
137
+ "- Do NOT invent quotes. Do NOT paraphrase evidence. Cite by evidence_id only.\n"
138
+ "- For care experience: do not duplicate the same evidence_id across positive/negative/mixed/neutral.\n"
139
+ " If a sentence supports both positive and negative interpretations, put it in care_experience.mixed.\n"
140
+ "- confidence must be a number between 0 and 1.\n"
141
+ "- For health_situations: include a short code label (1-3 words) in addition to the longer summary.\n"
142
+ "- For top_down_codes categories: include a short code label (1-3 words) and cite evidence.\n"
143
+ "- Prefer fewer, higher-confidence items.\n"
144
+ )
145
+
146
+ evidence_catalog_json = json.dumps(evidence_catalog, ensure_ascii=False)
147
+ user_prompt = (
148
+ "Evidence catalog (JSON object mapping evidence_id -> sentence):\n"
149
+ f"{evidence_catalog_json}\n\n"
150
+ "Return JSON matching this schema:\n"
151
+ "{\n"
152
+ f" \"schema_version\": \"{schema_version}\",\n"
153
+ f" \"analysis_prompt_version\": \"{analysis_prompt_version}\",\n"
154
+ " \"health_situations\": [\n"
155
+ " {\n"
156
+ " \"code\": string, // 1-3 word label\n"
157
+ " \"summary\": string,\n"
158
+ " \"evidence\": [ {\"evidence_id\": string} ],\n"
159
+ " \"confidence\": number // 0..1\n"
160
+ " }\n"
161
+ " ],\n"
162
+ " \"care_experience\": {\n"
163
+ " \"positive\": {\n"
164
+ " \"summary\": string,\n"
165
+ " \"reasons\": [string],\n"
166
+ " \"evidence\": [ {\"evidence_id\": string} ],\n"
167
+ " \"confidence\": number // 0..1\n"
168
+ " },\n"
169
+ " \"mixed\": {\n"
170
+ " \"summary\": string,\n"
171
+ " \"reasons\": [string],\n"
172
+ " \"evidence\": [ {\"evidence_id\": string} ],\n"
173
+ " \"confidence\": number // 0..1\n"
174
+ " },\n"
175
+ " \"negative\": {\n"
176
+ " \"summary\": string,\n"
177
+ " \"reasons\": [string],\n"
178
+ " \"evidence\": [ {\"evidence_id\": string} ],\n"
179
+ " \"confidence\": number // 0..1\n"
180
+ " },\n"
181
+ " \"neutral\": {\n"
182
+ " \"summary\": string,\n"
183
+ " \"reasons\": [string],\n"
184
+ " \"evidence\": [ {\"evidence_id\": string} ],\n"
185
+ " \"confidence\": number // 0..1\n"
186
+ " }\n"
187
+ " }\n"
188
+ " \"top_down_codes\": {\n"
189
+ " \"symptoms_concerns\": [ {\"code\": string, \"summary\": string, \"evidence\": [ {\"evidence_id\": string} ], \"confidence\": number // 0..1 } ],\n"
190
+ " \"daily_management\": [ {\"code\": string, \"summary\": string, \"evidence\": [ {\"evidence_id\": string} ], \"confidence\": number // 0..1 } ],\n"
191
+ " \"barriers_constraints\": [ {\"code\": string, \"summary\": string, \"evidence\": [ {\"evidence_id\": string} ], \"confidence\": number // 0..1 } ],\n"
192
+ " \"support_resources\": [ {\"code\": string, \"summary\": string, \"evidence\": [ {\"evidence_id\": string} ], \"confidence\": number // 0..1 } ]\n"
193
+ " }\n"
194
+ "}\n"
195
+ )
196
+
197
+ try:
198
+ raw = await client.generate(prompt=user_prompt, system_prompt=system_prompt, temperature=0.2)
199
+ parsed = json.loads(raw)
200
+ parsed["evidence_catalog"] = evidence_catalog
201
+ parsed["analysis_prompt_version"] = analysis_prompt_version
202
+
203
+ for item in parsed.get("health_situations", []) or []:
204
+ normalized = _normalize_confidence(item.get("confidence"))
205
+ if normalized is not None:
206
+ item["confidence"] = normalized
207
+
208
+ care_experience = parsed.get("care_experience") or {}
209
+ for key in ("positive", "mixed", "negative", "neutral"):
210
+ box = care_experience.get(key)
211
+ if isinstance(box, dict):
212
+ normalized = _normalize_confidence(box.get("confidence"))
213
+ if normalized is not None:
214
+ box["confidence"] = normalized
215
+
216
+ top_down_codes = parsed.get("top_down_codes") or {}
217
+ for key in ("symptoms_concerns", "daily_management", "barriers_constraints", "support_resources"):
218
+ items = top_down_codes.get(key) or []
219
+ if not isinstance(items, list):
220
+ continue
221
+ for item in items:
222
+ if not isinstance(item, dict):
223
+ continue
224
+ normalized = _normalize_confidence(item.get("confidence"))
225
+ if normalized is not None:
226
+ item["confidence"] = normalized
227
+
228
+ return parsed
229
+ finally:
230
+ try:
231
+ await client.close()
232
+ except Exception:
233
+ pass
234
+
235
+
236
  class ConversationStatus(Enum):
237
  """Status of managed conversations."""
238
  STARTING = "starting"
 
558
  conv_info = self.active_conversations.get(conversation_id)
559
  if not conv_info:
560
  return
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
561
  try:
562
+ parsed = await run_resource_agent_analysis(
563
+ transcript=transcript,
564
+ llm_backend=conv_info.llm_backend,
565
+ host=conv_info.host,
566
+ model=conv_info.model,
567
+ settings=self.settings,
568
+ )
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
569
  await self.websocket_manager.send_to_conversation(conversation_id, {
570
  "type": "resource_agent_result",
571
  "conversation_id": conversation_id,
 
587
  "error": str(e),
588
  "timestamp": datetime.now().isoformat(),
589
  })
 
 
 
 
 
590
 
591
  def _build_llm_parameters(self) -> Dict[str, Any]:
592
  """Prepare keyword arguments for LLM client creation."""
backend/api/routes.py CHANGED
@@ -19,7 +19,7 @@ Example:
19
  }
20
  """
21
 
22
- from fastapi import APIRouter, HTTPException
23
  from pydantic import BaseModel, Field
24
  from typing import Any, Dict, List, Optional
25
  import logging
@@ -28,7 +28,10 @@ import json
28
  from datetime import datetime
29
 
30
  from fastapi.responses import Response
31
- from .conversation_service import get_conversation_service
 
 
 
32
  from backend.core.persona_system import PersonaSystem
33
 
34
  # Setup logging
@@ -96,6 +99,18 @@ class ExportRequest(BaseModel):
96
  resources: Dict[str, Any] = Field(default_factory=dict, description="Resource agent output + evidence_catalog")
97
 
98
 
 
 
 
 
 
 
 
 
 
 
 
 
99
  # Initialize persona system
100
  persona_system = PersonaSystem()
101
 
@@ -334,6 +349,115 @@ def _extract_evidence_ids(evidence: Any) -> List[str]:
334
  return evidence_ids
335
 
336
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
337
  @router.post("/export/json")
338
  async def export_json(payload: ExportRequest) -> Response:
339
  exported_at = payload.exported_at or datetime.now().isoformat()
@@ -486,3 +610,72 @@ async def export_xlsx(payload: ExportRequest) -> Response:
486
  media_type="application/vnd.openxmlformats-officedocument.spreadsheetml.sheet",
487
  headers=headers,
488
  )
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
19
  }
20
  """
21
 
22
+ from fastapi import APIRouter, HTTPException, UploadFile, File, Form
23
  from pydantic import BaseModel, Field
24
  from typing import Any, Dict, List, Optional
25
  import logging
 
28
  from datetime import datetime
29
 
30
  from fastapi.responses import Response
31
+ import re
32
+
33
+ from config.settings import get_settings
34
+ from .conversation_service import get_conversation_service, run_resource_agent_analysis
35
  from backend.core.persona_system import PersonaSystem
36
 
37
  # Setup logging
 
99
  resources: Dict[str, Any] = Field(default_factory=dict, description="Resource agent output + evidence_catalog")
100
 
101
 
102
+ class AnalyzeTextRequest(BaseModel):
103
+ text: str = Field(..., description="Raw transcript text to analyze")
104
+ conversation_id: Optional[str] = Field(default=None, description="Optional client-generated id for this analysis run")
105
+ source_name: Optional[str] = Field(default=None, description="Optional label for the uploaded/pasted source")
106
+
107
+
108
+ class AnalyzeTextResponse(BaseModel):
109
+ conversation_id: str
110
+ messages: List[ExportMessage]
111
+ resources: Dict[str, Any]
112
+
113
+
114
  # Initialize persona system
115
  persona_system = PersonaSystem()
116
 
 
349
  return evidence_ids
350
 
351
 
352
+ def _parse_transcript_text(text: str, source_name: Optional[str]) -> List[Dict[str, Any]]:
353
+ normalized = (text or "").replace("\r\n", "\n").replace("\r", "\n").strip()
354
+ if not normalized:
355
+ return []
356
+
357
+ label = source_name or "Uploaded transcript"
358
+ lines = [line.rstrip() for line in normalized.split("\n")]
359
+ labeled = False
360
+ blocks: List[Dict[str, Any]] = []
361
+
362
+ current_role: Optional[str] = None
363
+ current_lines: List[str] = []
364
+
365
+ def flush():
366
+ nonlocal current_role, current_lines
367
+ content = "\n".join([l for l in current_lines]).strip()
368
+ if content:
369
+ role = current_role or "transcript"
370
+ persona = "Surveyor" if role == "surveyor" else ("Patient" if role == "patient" else label)
371
+ blocks.append({
372
+ "role": role,
373
+ "persona": persona,
374
+ "content": content,
375
+ })
376
+ current_role = None
377
+ current_lines = []
378
+
379
+ pattern = re.compile(r"^(surveyor|interviewer|patient|respondent)\s*:\s*(.*)$", re.IGNORECASE)
380
+
381
+ for line in lines:
382
+ stripped = line.strip()
383
+ if not stripped:
384
+ if current_lines:
385
+ current_lines.append("")
386
+ continue
387
+
388
+ match = pattern.match(stripped)
389
+ if match:
390
+ labeled = True
391
+ flush()
392
+ speaker = match.group(1).lower()
393
+ current_role = "surveyor" if speaker in ("surveyor", "interviewer") else "patient"
394
+ remainder = match.group(2).strip()
395
+ if remainder:
396
+ current_lines.append(remainder)
397
+ continue
398
+
399
+ if current_role is None:
400
+ current_role = "transcript"
401
+ current_lines.append(line)
402
+
403
+ flush()
404
+
405
+ if labeled:
406
+ return blocks
407
+
408
+ # If nothing was labeled, split by blank lines into paragraphs for better evidence traceability.
409
+ paragraphs = [p.strip() for p in re.split(r"\n\s*\n+", normalized) if p.strip()]
410
+ return [{
411
+ "role": "transcript",
412
+ "persona": label,
413
+ "content": p,
414
+ } for p in paragraphs] or [{
415
+ "role": "transcript",
416
+ "persona": label,
417
+ "content": normalized,
418
+ }]
419
+
420
+
421
+ async def _analyze_from_text(*, text: str, conversation_id: str, source_name: Optional[str]) -> AnalyzeTextResponse:
422
+ settings = get_settings()
423
+ exported_at = datetime.now().isoformat()
424
+
425
+ parsed_messages = _parse_transcript_text(text, source_name)
426
+ if not parsed_messages:
427
+ raise HTTPException(status_code=400, detail="No content to analyze")
428
+
429
+ transcript: List[Dict[str, Any]] = []
430
+ ui_messages: List[ExportMessage] = []
431
+ for idx, msg in enumerate(parsed_messages):
432
+ transcript.append({
433
+ "index": idx,
434
+ "role": msg["role"],
435
+ "persona": msg.get("persona"),
436
+ "content": msg["content"],
437
+ "timestamp": exported_at,
438
+ })
439
+ ui_messages.append(ExportMessage(
440
+ role=msg["role"],
441
+ persona=msg.get("persona"),
442
+ time=exported_at,
443
+ text=msg["content"],
444
+ ))
445
+
446
+ resources = await run_resource_agent_analysis(
447
+ transcript=transcript,
448
+ llm_backend=settings.llm.backend,
449
+ host=settings.llm.host,
450
+ model=settings.llm.model,
451
+ settings=settings,
452
+ )
453
+
454
+ return AnalyzeTextResponse(
455
+ conversation_id=conversation_id,
456
+ messages=ui_messages,
457
+ resources=resources,
458
+ )
459
+
460
+
461
  @router.post("/export/json")
462
  async def export_json(payload: ExportRequest) -> Response:
463
  exported_at = payload.exported_at or datetime.now().isoformat()
 
610
  media_type="application/vnd.openxmlformats-officedocument.spreadsheetml.sheet",
611
  headers=headers,
612
  )
613
+
614
+
615
+ @router.post("/analyze/text")
616
+ async def analyze_text(payload: AnalyzeTextRequest) -> AnalyzeTextResponse:
617
+ if not isinstance(payload.text, str) or not payload.text.strip():
618
+ raise HTTPException(status_code=400, detail="text is required")
619
+
620
+ conversation_id = payload.conversation_id or f"analysis_{int(datetime.now().timestamp())}"
621
+ return await _analyze_from_text(
622
+ text=payload.text,
623
+ conversation_id=conversation_id,
624
+ source_name=payload.source_name,
625
+ )
626
+
627
+
628
+ @router.post("/analyze/file")
629
+ async def analyze_file(
630
+ file: UploadFile = File(...),
631
+ conversation_id: Optional[str] = Form(default=None),
632
+ source_name: Optional[str] = Form(default=None),
633
+ ) -> AnalyzeTextResponse:
634
+ data = await file.read()
635
+ if not data:
636
+ raise HTTPException(status_code=400, detail="Empty file")
637
+
638
+ inferred_name = source_name or file.filename or "Uploaded file"
639
+ cid = conversation_id or f"analysis_{int(datetime.now().timestamp())}"
640
+
641
+ filename = (file.filename or "").lower()
642
+ content_type = (file.content_type or "").lower()
643
+
644
+ is_pdf = filename.endswith(".pdf") or content_type == "application/pdf"
645
+ if is_pdf:
646
+ try:
647
+ from pypdf import PdfReader # type: ignore
648
+ except Exception as e:
649
+ raise HTTPException(status_code=500, detail=f"pypdf not available: {e}")
650
+
651
+ try:
652
+ reader = PdfReader(io.BytesIO(data))
653
+ chunks: List[str] = []
654
+ for page in reader.pages:
655
+ text = page.extract_text() or ""
656
+ text = text.strip()
657
+ if text:
658
+ chunks.append(text)
659
+ extracted = "\n\n".join(chunks).strip()
660
+ except Exception as e:
661
+ raise HTTPException(status_code=400, detail=f"Failed to parse PDF: {e}")
662
+
663
+ if not extracted:
664
+ raise HTTPException(status_code=400, detail="No extractable text found in PDF")
665
+
666
+ return await _analyze_from_text(
667
+ text=extracted,
668
+ conversation_id=cid,
669
+ source_name=inferred_name,
670
+ )
671
+
672
+ # Best-effort: treat everything else as UTF-8 text.
673
+ decoded = data.decode("utf-8", errors="replace").strip()
674
+ if not decoded:
675
+ raise HTTPException(status_code=400, detail="No text content found in file")
676
+
677
+ return await _analyze_from_text(
678
+ text=decoded,
679
+ conversation_id=cid,
680
+ source_name=inferred_name,
681
+ )
docs/roadmap.md CHANGED
@@ -29,20 +29,26 @@ _Last updated: 2026-01-19_
29
 
30
  4. **Analysis on pasted/uploaded text**
31
  Add a panel to paste text or upload a file, run the same analysis pipeline, render results, and allow download.
 
32
 
33
- 5. **Human Surveyor chat mode**
 
 
 
 
 
34
  Add a panel where a human chats as the patient with the surveyor agent (text input), while keeping the same analysis pipeline at end-of-session.
35
 
36
- 6. **Persistent storage (HF Spaces `/data`)**
37
  Add a simple storage layer and persist runs (transcript + analysis) and user-created personas so they survive restarts/redeploys.
38
 
39
- 7. **Run history browser**
40
  List prior runs, allow selecting one to reload transcript + analysis in the UI.
41
 
42
- 8. **Configuration Panel (Expand Beyond MVP)**
43
  The UI already supports persona selection + per-role prompt additions (browser-local). Next steps: persona CRUD + validation, richer prompt/model settings, and server-side persistence.
44
 
45
- 9. **Basic Test Coverage**
46
  Add smoke tests (mocked LLM responses) to prevent regressions in conversation flow and analysis schema parsing.
47
 
48
  ## Longer-Term Ideas
 
29
 
30
  4. **Analysis on pasted/uploaded text**
31
  Add a panel to paste text or upload a file, run the same analysis pipeline, render results, and allow download.
32
+ ✅ Implemented: “Upload Text” tab supports paste, text-file upload, and best-effort PDF text extraction; exports work (Excel + JSON).
33
 
34
+ 5. **Modularization / Separation of Concerns (refactor)**
35
+ Before adding more major UI modes, refactor to keep the codebase maintainable:
36
+ - Split the growing frontend UI logic (currently concentrated in `frontend/pages/main_page.py`) into smaller, focused modules/components.
37
+ - Split API routers so `backend/api/routes.py` doesn’t become a catch-all (e.g., separate export + analysis routes).
38
+
39
+ 6. **Human ↔ Surveyor chat mode**
40
  Add a panel where a human chats as the patient with the surveyor agent (text input), while keeping the same analysis pipeline at end-of-session.
41
 
42
+ 7. **Persistent storage (HF Spaces `/data`)**
43
  Add a simple storage layer and persist runs (transcript + analysis) and user-created personas so they survive restarts/redeploys.
44
 
45
+ 8. **Run history browser**
46
  List prior runs, allow selecting one to reload transcript + analysis in the UI.
47
 
48
+ 9. **Configuration Panel (Expand Beyond MVP)**
49
  The UI already supports persona selection + per-role prompt additions (browser-local). Next steps: persona CRUD + validation, richer prompt/model settings, and server-side persistence.
50
 
51
+ 10. **Basic Test Coverage**
52
  Add smoke tests (mocked LLM responses) to prevent regressions in conversation flow and analysis schema parsing.
53
 
54
  ## Longer-Term Ideas
frontend/pages/main_page.py CHANGED
@@ -74,6 +74,13 @@ def get_main_page_html(auth_enabled: bool = False) -> str:
74
  return fetch(url, Object.assign({}, options || {}, { headers }));
75
  }
76
 
 
 
 
 
 
 
 
77
  function PageNav({ active, onChange }) {
78
  const base = "px-4 py-2 rounded-lg text-sm font-semibold border transition-colors";
79
  const activeCls = "bg-slate-900 text-white border-slate-900";
@@ -81,6 +88,7 @@ def get_main_page_html(auth_enabled: bool = False) -> str:
81
  return (
82
  <div className="flex gap-2">
83
  <button type="button" onClick={() => onChange('main')} className={`${base} ${active === 'main' ? activeCls : inactiveCls}`}>Conversation</button>
 
84
  <button type="button" onClick={() => onChange('config')} className={`${base} ${active === 'config' ? activeCls : inactiveCls}`}>Configuration</button>
85
  </div>
86
  );
@@ -158,6 +166,13 @@ def get_main_page_html(auth_enabled: bool = False) -> str:
158
  const [connectionStatus, setConnectionStatus] = useState('disconnected');
159
  const [stats, setStats] = useState({ sent: 0, received: 0 });
160
  const [highlightedEvidence, setHighlightedEvidence] = useState(null); // { evidence_id, message_index, sentence }
 
 
 
 
 
 
 
161
 
162
  const wsRef = useRef(null);
163
  const conversationIdRef = useRef(null);
@@ -165,6 +180,11 @@ def get_main_page_html(auth_enabled: bool = False) -> str:
165
  const stickToBottomRef = useRef(true);
166
  const clearHighlightTimeoutRef = useRef(null);
167
 
 
 
 
 
 
168
  useEffect(() => {
169
  if (!AUTH_ENABLED) return;
170
  const token = loadSessionToken();
@@ -278,16 +298,16 @@ def get_main_page_html(auth_enabled: bool = False) -> str:
278
 
279
  const getEvidenceSnippet = (evidence) => {
280
  const evidenceId = evidence?.evidence_id;
281
- if (!evidenceId || !resources?.evidence_catalog) {
282
  return { label: 'Unknown', snippet: '' };
283
  }
284
- const entry = resources.evidence_catalog[evidenceId];
285
  if (!entry) {
286
  return { label: evidenceId, snippet: '' };
287
  }
288
 
289
  const idx = entry.message_index;
290
- const msg = messages[idx];
291
  const label = msg ? `${msg.role === 'surveyor' ? 'Surveyor' : 'Patient'} (${msg.persona})` : `Message #${idx}`;
292
  return { label, snippet: entry.text || '' };
293
  };
@@ -296,7 +316,7 @@ def get_main_page_html(auth_enabled: bool = False) -> str:
296
  const evidenceId = typeof evidence === 'string' ? evidence : evidence?.evidence_id;
297
  if (!evidenceId) return;
298
 
299
- const entry = resources?.evidence_catalog?.[evidenceId];
300
  const messageIndex = entry?.message_index;
301
  if (typeof messageIndex !== 'number' || !Number.isFinite(messageIndex)) {
302
  console.warn('Unknown evidence_id:', evidenceId, entry);
@@ -368,21 +388,125 @@ def get_main_page_html(auth_enabled: bool = False) -> str:
368
  }, 500);
369
  };
370
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
371
  const downloadExport = async (format) => {
372
- if (!resources || resourceAgentStatus !== 'complete') return;
373
  const conversationId = conversationIdRef.current || `react_conv_${Date.now()}`;
374
  const exportedAt = new Date().toISOString();
375
 
376
  const payload = {
377
  conversation_id: conversationId,
378
  exported_at: exportedAt,
379
- messages: (messages || []).map((m) => ({
380
  role: m.role,
381
  persona: m.persona,
382
  time: m.time,
383
  text: m.text
384
  })),
385
- resources
386
  };
387
 
388
  const endpoint = format === 'xlsx' ? '/api/export/xlsx' : '/api/export/json';
@@ -485,7 +609,7 @@ def get_main_page_html(auth_enabled: bool = False) -> str:
485
  key={idx}
486
  type="button"
487
  onClick={() => jumpToEvidence(evidenceId)}
488
- disabled={!evidenceId || !resources?.evidence_catalog?.[evidenceId]}
489
  className="w-full text-left text-xs text-slate-600 hover:bg-slate-100 rounded px-1 py-0.5 disabled:opacity-50 disabled:hover:bg-transparent"
490
  title={evidenceId ? `Jump to ${evidenceId}` : 'Unknown evidence'}
491
  >
@@ -513,7 +637,7 @@ def get_main_page_html(auth_enabled: bool = False) -> str:
513
  <div className="bg-white rounded-lg shadow-lg p-4 mb-6">
514
  <div className="flex items-center justify-between gap-4">
515
  <PageNav active={activePage} onChange={setActivePage} />
516
- {activePage === 'main' && resourceAgentStatus === 'complete' && resources && (
517
  <div className="flex items-center gap-2">
518
  <button
519
  type="button"
@@ -552,19 +676,56 @@ def get_main_page_html(auth_enabled: bool = False) -> str:
552
  <div className="grid grid-cols-[2fr_1fr_2fr] gap-6 items-start">
553
  <div className="bg-white rounded-lg shadow-lg p-6">
554
  <div className="flex items-center gap-2 mb-4">
555
- <span className="text-2xl">💬</span>
556
- <h2 className="text-xl font-bold text-slate-800">Live Conversation</h2>
557
- {conversationActive && <span className="ml-auto text-green-600 font-medium animate-pulse">● LIVE</span>}
 
558
  </div>
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
559
  <div ref={transcriptContainerRef} onScroll={onTranscriptScroll} className="space-y-3 h-96 overflow-y-auto bg-slate-50 p-4 rounded-lg">
560
- {messages.length === 0 && (
561
  <div className="text-center text-slate-400 py-20">
562
- {conversationActive
563
- ? '🔄 Waiting for the first messages...'
564
- : '👋 Click "Start" to begin. This panel streams conversation utterances in real time.'}
565
  </div>
566
  )}
567
- {messages.map((msg, idx) => (
568
  <div
569
  key={idx}
570
  id={`msg-${idx}`}
@@ -588,30 +749,30 @@ def get_main_page_html(auth_enabled: bool = False) -> str:
588
  <div className="flex items-center gap-2 mb-4">
589
  <span className="text-2xl">📊</span>
590
  <h2 className="text-xl font-bold text-slate-800">Bottom-Up Findings</h2>
591
- {resourceAgentStatus === 'running' && (
592
  <span className="ml-auto text-purple-600 font-medium animate-pulse">● RUNNING</span>
593
  )}
594
- {((resources?.health_situations || []).length > 0) && (
595
  <span className="ml-auto bg-green-100 text-green-700 px-3 py-1 rounded-full text-sm font-medium">
596
- {(resources?.health_situations || []).length}
597
  </span>
598
  )}
599
  </div>
600
  <div className="space-y-2 max-h-[42rem] overflow-y-auto">
601
- {resourceAgentStatus !== 'complete' && (
602
  <p className="text-slate-400 text-center py-8 text-sm">
603
- {conversationActive
604
- ? 'Runs automatically when the conversation completes...'
605
- : 'Runs automatically when the conversation completes. Evidence-backed emergent themes (open coding).'}
606
  </p>
607
  )}
608
- {resourceAgentStatus === 'complete' && resources && (
609
  <>
610
- {(resources.health_situations || []).length === 0 ? (
611
  <p className="text-slate-400 text-center py-8 text-sm">No findings detected.</p>
612
  ) : (
613
  <div className="space-y-3">
614
- {(resources.health_situations || []).map((item, idx) => (
615
  <div key={idx} className="bg-slate-50 border border-slate-200 rounded-lg p-3">
616
  <div className="flex items-center gap-2">
617
  <div className="font-semibold text-slate-800">
@@ -641,7 +802,7 @@ def get_main_page_html(auth_enabled: bool = False) -> str:
641
  key={evIdx}
642
  type="button"
643
  onClick={() => jumpToEvidence(evidenceId)}
644
- disabled={!evidenceId || !resources?.evidence_catalog?.[evidenceId]}
645
  className="w-full text-left text-xs text-slate-600 hover:bg-slate-100 rounded px-1 py-0.5 disabled:opacity-50 disabled:hover:bg-transparent"
646
  title={evidenceId ? `Jump to ${evidenceId}` : 'Unknown evidence'}
647
  >
@@ -664,25 +825,25 @@ def get_main_page_html(auth_enabled: bool = False) -> str:
664
  <div className="flex items-center gap-2 mb-4">
665
  <span className="text-2xl">📚</span>
666
  <h2 className="text-xl font-bold text-slate-800">Top-Down Coding</h2>
667
- {resourceAgentStatus === 'running' && (
668
  <span className="ml-auto text-purple-600 font-medium animate-pulse">● RUNNING</span>
669
  )}
670
  </div>
671
 
672
- {resourceAgentStatus !== 'complete' && (
673
  <p className="text-slate-400 text-center py-8 text-sm">
674
- {conversationActive
675
- ? 'Runs automatically when the conversation completes...'
676
- : 'Runs automatically when the conversation completes. Rubric + a priori codebook (top-down coding).'}
677
  </p>
678
  )}
679
 
680
- {resourceAgentStatus === 'complete' && (
681
  <div className="grid grid-cols-2 gap-4">
682
  <div className="space-y-3">
683
  <div className="text-lg font-extrabold text-slate-900 mb-2">Care experience rubric</div>
684
  {(() => {
685
- const care = resources?.care_experience || {};
686
  const positive = care.positive || null;
687
  const mixed = care.mixed || null;
688
  const negative = care.negative || null;
@@ -702,7 +863,7 @@ def get_main_page_html(auth_enabled: bool = False) -> str:
702
  <div className="space-y-3">
703
  <div className="text-lg font-extrabold text-slate-900 mb-2">Top-down codebook categories</div>
704
  {(() => {
705
- const td = resources?.top_down_codes || {};
706
  const order = [
707
  { key: 'symptoms_concerns', label: 'Symptoms/concerns', empty: 'No symptoms/concerns excerpts detected.' },
708
  { key: 'daily_management', label: 'Daily management', empty: 'No daily management excerpts detected.' },
@@ -751,7 +912,7 @@ def get_main_page_html(auth_enabled: bool = False) -> str:
751
  key={idx2}
752
  type="button"
753
  onClick={() => jumpToEvidence(evidenceId)}
754
- disabled={!evidenceId || !resources?.evidence_catalog?.[evidenceId]}
755
  className="w-full text-left text-xs text-slate-600 hover:bg-slate-100 rounded px-1 py-0.5 disabled:opacity-50 disabled:hover:bg-transparent"
756
  title={evidenceId ? `Jump to ${evidenceId}` : 'Unknown evidence'}
757
  >
 
74
  return fetch(url, Object.assign({}, options || {}, { headers }));
75
  }
76
 
77
+ async function authedFetchForm(url, formData) {
78
+ const token = loadSessionToken();
79
+ const headers = {};
80
+ if (token) headers['Authorization'] = `Bearer ${token}`;
81
+ return fetch(url, { method: 'POST', headers, body: formData });
82
+ }
83
+
84
  function PageNav({ active, onChange }) {
85
  const base = "px-4 py-2 rounded-lg text-sm font-semibold border transition-colors";
86
  const activeCls = "bg-slate-900 text-white border-slate-900";
 
88
  return (
89
  <div className="flex gap-2">
90
  <button type="button" onClick={() => onChange('main')} className={`${base} ${active === 'main' ? activeCls : inactiveCls}`}>Conversation</button>
91
+ <button type="button" onClick={() => onChange('analyze')} className={`${base} ${active === 'analyze' ? activeCls : inactiveCls}`}>Upload Text</button>
92
  <button type="button" onClick={() => onChange('config')} className={`${base} ${active === 'config' ? activeCls : inactiveCls}`}>Configuration</button>
93
  </div>
94
  );
 
166
  const [connectionStatus, setConnectionStatus] = useState('disconnected');
167
  const [stats, setStats] = useState({ sent: 0, received: 0 });
168
  const [highlightedEvidence, setHighlightedEvidence] = useState(null); // { evidence_id, message_index, sentence }
169
+ const [analysisText, setAnalysisText] = useState('');
170
+ const [analysisSourceName, setAnalysisSourceName] = useState(null);
171
+ const [analysisBusy, setAnalysisBusy] = useState(false);
172
+ const [analysisMessages, setAnalysisMessages] = useState([]);
173
+ const [analysisResources, setAnalysisResources] = useState(null);
174
+ const [analysisStatus, setAnalysisStatus] = useState('idle'); // idle|running|complete|error
175
+ const [analysisError, setAnalysisError] = useState(null);
176
 
177
  const wsRef = useRef(null);
178
  const conversationIdRef = useRef(null);
 
180
  const stickToBottomRef = useRef(true);
181
  const clearHighlightTimeoutRef = useRef(null);
182
 
183
+ const activeMessages = activePage === 'analyze' ? analysisMessages : messages;
184
+ const activeResources = activePage === 'analyze' ? analysisResources : resources;
185
+ const activeStatus = activePage === 'analyze' ? analysisStatus : resourceAgentStatus;
186
+ const activeError = activePage === 'analyze' ? analysisError : resourceAgentError;
187
+
188
  useEffect(() => {
189
  if (!AUTH_ENABLED) return;
190
  const token = loadSessionToken();
 
298
 
299
  const getEvidenceSnippet = (evidence) => {
300
  const evidenceId = evidence?.evidence_id;
301
+ if (!evidenceId || !activeResources?.evidence_catalog) {
302
  return { label: 'Unknown', snippet: '' };
303
  }
304
+ const entry = activeResources.evidence_catalog[evidenceId];
305
  if (!entry) {
306
  return { label: evidenceId, snippet: '' };
307
  }
308
 
309
  const idx = entry.message_index;
310
+ const msg = activeMessages[idx];
311
  const label = msg ? `${msg.role === 'surveyor' ? 'Surveyor' : 'Patient'} (${msg.persona})` : `Message #${idx}`;
312
  return { label, snippet: entry.text || '' };
313
  };
 
316
  const evidenceId = typeof evidence === 'string' ? evidence : evidence?.evidence_id;
317
  if (!evidenceId) return;
318
 
319
+ const entry = activeResources?.evidence_catalog?.[evidenceId];
320
  const messageIndex = entry?.message_index;
321
  if (typeof messageIndex !== 'number' || !Number.isFinite(messageIndex)) {
322
  console.warn('Unknown evidence_id:', evidenceId, entry);
 
388
  }, 500);
389
  };
390
 
391
+ const loadTextFile = (file) => {
392
+ if (!file) return;
393
+ const name = (file.name || '').toLowerCase();
394
+ if (name.endsWith('.pdf')) {
395
+ runFileAnalysis(file);
396
+ return;
397
+ }
398
+ const reader = new FileReader();
399
+ reader.onload = (e) => {
400
+ const content = (e && e.target && e.target.result) ? String(e.target.result) : '';
401
+ setAnalysisText(content);
402
+ setAnalysisSourceName(file.name || null);
403
+ };
404
+ reader.readAsText(file);
405
+ };
406
+
407
+ const runFileAnalysis = async (file) => {
408
+ if (AUTH_ENABLED && !authenticated) return;
409
+ if (!file) return;
410
+
411
+ setAnalysisBusy(true);
412
+ setConversationActive(false);
413
+ setInsights([]);
414
+ setRouting(null);
415
+ setAnalysisMessages([]);
416
+ setAnalysisResources(null);
417
+ setAnalysisStatus('running');
418
+ setAnalysisError(null);
419
+
420
+ const conversationId = `analysis_${Date.now()}`;
421
+ conversationIdRef.current = conversationId;
422
+
423
+ try {
424
+ const fd = new FormData();
425
+ fd.append('file', file);
426
+ fd.append('conversation_id', conversationId);
427
+ if (file.name) fd.append('source_name', file.name);
428
+
429
+ const res = await authedFetchForm('/api/analyze/file', fd);
430
+ if (!res.ok) {
431
+ const msg = await res.text().catch(() => '');
432
+ throw new Error(msg || `Analysis failed (${res.status})`);
433
+ }
434
+ const data = await res.json();
435
+ conversationIdRef.current = data.conversation_id || conversationId;
436
+ setAnalysisMessages(data.messages || []);
437
+ setAnalysisResources(data.resources || null);
438
+ setAnalysisStatus('complete');
439
+ setAnalysisError(null);
440
+ stickToBottomRef.current = false;
441
+ } catch (e) {
442
+ setAnalysisStatus('error');
443
+ setAnalysisError(e?.message || 'Analysis failed');
444
+ } finally {
445
+ setAnalysisBusy(false);
446
+ }
447
+ };
448
+
449
+ const runTextAnalysis = async () => {
450
+ if (AUTH_ENABLED && !authenticated) return;
451
+ const text = (analysisText || '').trim();
452
+ if (!text) return;
453
+
454
+ setAnalysisBusy(true);
455
+ setConversationActive(false);
456
+ setInsights([]);
457
+ setRouting(null);
458
+ setAnalysisMessages([]);
459
+ setAnalysisResources(null);
460
+ setAnalysisStatus('running');
461
+ setAnalysisError(null);
462
+
463
+ const conversationId = `analysis_${Date.now()}`;
464
+ conversationIdRef.current = conversationId;
465
+
466
+ try {
467
+ const res = await authedFetch('/api/analyze/text', {
468
+ method: 'POST',
469
+ headers: { 'Content-Type': 'application/json' },
470
+ body: JSON.stringify({
471
+ conversation_id: conversationId,
472
+ source_name: analysisSourceName || undefined,
473
+ text
474
+ })
475
+ });
476
+ if (!res.ok) {
477
+ const msg = await res.text().catch(() => '');
478
+ throw new Error(msg || `Analysis failed (${res.status})`);
479
+ }
480
+ const data = await res.json();
481
+ conversationIdRef.current = data.conversation_id || conversationId;
482
+ setAnalysisMessages(data.messages || []);
483
+ setAnalysisResources(data.resources || null);
484
+ setAnalysisStatus('complete');
485
+ setAnalysisError(null);
486
+ stickToBottomRef.current = false;
487
+ } catch (e) {
488
+ setAnalysisStatus('error');
489
+ setAnalysisError(e?.message || 'Analysis failed');
490
+ } finally {
491
+ setAnalysisBusy(false);
492
+ }
493
+ };
494
+
495
  const downloadExport = async (format) => {
496
+ if (!activeResources || activeStatus !== 'complete') return;
497
  const conversationId = conversationIdRef.current || `react_conv_${Date.now()}`;
498
  const exportedAt = new Date().toISOString();
499
 
500
  const payload = {
501
  conversation_id: conversationId,
502
  exported_at: exportedAt,
503
+ messages: (activeMessages || []).map((m) => ({
504
  role: m.role,
505
  persona: m.persona,
506
  time: m.time,
507
  text: m.text
508
  })),
509
+ resources: activeResources
510
  };
511
 
512
  const endpoint = format === 'xlsx' ? '/api/export/xlsx' : '/api/export/json';
 
609
  key={idx}
610
  type="button"
611
  onClick={() => jumpToEvidence(evidenceId)}
612
+ disabled={!evidenceId || !activeResources?.evidence_catalog?.[evidenceId]}
613
  className="w-full text-left text-xs text-slate-600 hover:bg-slate-100 rounded px-1 py-0.5 disabled:opacity-50 disabled:hover:bg-transparent"
614
  title={evidenceId ? `Jump to ${evidenceId}` : 'Unknown evidence'}
615
  >
 
637
  <div className="bg-white rounded-lg shadow-lg p-4 mb-6">
638
  <div className="flex items-center justify-between gap-4">
639
  <PageNav active={activePage} onChange={setActivePage} />
640
+ {(activePage === 'main' || activePage === 'analyze') && activeStatus === 'complete' && activeResources && (
641
  <div className="flex items-center gap-2">
642
  <button
643
  type="button"
 
676
  <div className="grid grid-cols-[2fr_1fr_2fr] gap-6 items-start">
677
  <div className="bg-white rounded-lg shadow-lg p-6">
678
  <div className="flex items-center gap-2 mb-4">
679
+ <span className="text-2xl">{activePage === 'analyze' ? '🧾' : '💬'}</span>
680
+ <h2 className="text-xl font-bold text-slate-800">{activePage === 'analyze' ? 'Analyze Text' : 'Live Conversation'}</h2>
681
+ {activePage === 'main' && conversationActive && <span className="ml-auto text-green-600 font-medium animate-pulse">● LIVE</span>}
682
+ {activePage === 'analyze' && analysisBusy && <span className="ml-auto text-purple-600 font-medium animate-pulse">● RUNNING</span>}
683
  </div>
684
+
685
+ {activePage === 'analyze' && (
686
+ <div className="mb-4 space-y-3">
687
+ <div className="flex items-center gap-3">
688
+ <input
689
+ type="file"
690
+ accept=".txt,.md,.csv,.json,.pdf"
691
+ onChange={(e) => loadTextFile(e.target.files && e.target.files[0])}
692
+ className="text-sm"
693
+ />
694
+ {analysisSourceName && (
695
+ <div className="text-xs text-slate-500">Loaded: {analysisSourceName}</div>
696
+ )}
697
+ </div>
698
+ <textarea
699
+ className="w-full border border-slate-300 rounded-lg px-3 py-2 text-sm bg-white h-40"
700
+ placeholder="Paste a transcript here (optionally with lines like 'Surveyor: ...' / 'Patient: ...')."
701
+ value={analysisText}
702
+ onChange={(e) => setAnalysisText(e.target.value)}
703
+ />
704
+ <div className="flex items-center gap-3">
705
+ <button
706
+ type="button"
707
+ onClick={runTextAnalysis}
708
+ disabled={analysisBusy || !analysisText.trim() || (AUTH_ENABLED && !authenticated)}
709
+ className="bg-purple-600 hover:bg-purple-700 disabled:bg-slate-300 text-white px-4 py-2 rounded-lg text-sm font-semibold transition-all shadow"
710
+ >
711
+ Run analysis
712
+ </button>
713
+ {analysisStatus === 'error' && analysisError && (
714
+ <div className="text-xs text-red-600">{analysisError}</div>
715
+ )}
716
+ </div>
717
+ </div>
718
+ )}
719
+
720
  <div ref={transcriptContainerRef} onScroll={onTranscriptScroll} className="space-y-3 h-96 overflow-y-auto bg-slate-50 p-4 rounded-lg">
721
+ {activeMessages.length === 0 && (
722
  <div className="text-center text-slate-400 py-20">
723
+ {activePage === 'main'
724
+ ? (conversationActive ? '🔄 Waiting for the first messages...' : '👋 Click "Start" to begin. This panel streams conversation utterances in real time.')
725
+ : 'Paste or upload text above, then click “Run analysis”.'}
726
  </div>
727
  )}
728
+ {activeMessages.map((msg, idx) => (
729
  <div
730
  key={idx}
731
  id={`msg-${idx}`}
 
749
  <div className="flex items-center gap-2 mb-4">
750
  <span className="text-2xl">📊</span>
751
  <h2 className="text-xl font-bold text-slate-800">Bottom-Up Findings</h2>
752
+ {activeStatus === 'running' && (
753
  <span className="ml-auto text-purple-600 font-medium animate-pulse">● RUNNING</span>
754
  )}
755
+ {((activeResources?.health_situations || []).length > 0) && (
756
  <span className="ml-auto bg-green-100 text-green-700 px-3 py-1 rounded-full text-sm font-medium">
757
+ {(activeResources?.health_situations || []).length}
758
  </span>
759
  )}
760
  </div>
761
  <div className="space-y-2 max-h-[42rem] overflow-y-auto">
762
+ {activeStatus !== 'complete' && (
763
  <p className="text-slate-400 text-center py-8 text-sm">
764
+ {activePage === 'main'
765
+ ? (conversationActive ? 'Runs automatically when the conversation completes...' : 'Runs automatically when the conversation completes. Evidence-backed emergent themes (open coding).')
766
+ : 'Runs when you click “Run analysis”.'}
767
  </p>
768
  )}
769
+ {activeStatus === 'complete' && activeResources && (
770
  <>
771
+ {(activeResources.health_situations || []).length === 0 ? (
772
  <p className="text-slate-400 text-center py-8 text-sm">No findings detected.</p>
773
  ) : (
774
  <div className="space-y-3">
775
+ {(activeResources.health_situations || []).map((item, idx) => (
776
  <div key={idx} className="bg-slate-50 border border-slate-200 rounded-lg p-3">
777
  <div className="flex items-center gap-2">
778
  <div className="font-semibold text-slate-800">
 
802
  key={evIdx}
803
  type="button"
804
  onClick={() => jumpToEvidence(evidenceId)}
805
+ disabled={!evidenceId || !activeResources?.evidence_catalog?.[evidenceId]}
806
  className="w-full text-left text-xs text-slate-600 hover:bg-slate-100 rounded px-1 py-0.5 disabled:opacity-50 disabled:hover:bg-transparent"
807
  title={evidenceId ? `Jump to ${evidenceId}` : 'Unknown evidence'}
808
  >
 
825
  <div className="flex items-center gap-2 mb-4">
826
  <span className="text-2xl">📚</span>
827
  <h2 className="text-xl font-bold text-slate-800">Top-Down Coding</h2>
828
+ {activeStatus === 'running' && (
829
  <span className="ml-auto text-purple-600 font-medium animate-pulse">● RUNNING</span>
830
  )}
831
  </div>
832
 
833
+ {activeStatus !== 'complete' && (
834
  <p className="text-slate-400 text-center py-8 text-sm">
835
+ {activePage === 'main'
836
+ ? (conversationActive ? 'Runs automatically when the conversation completes...' : 'Runs automatically when the conversation completes. Rubric + a priori codebook (top-down coding).')
837
+ : 'Runs when you click “Run analysis”.'}
838
  </p>
839
  )}
840
 
841
+ {activeStatus === 'complete' && activeResources && (
842
  <div className="grid grid-cols-2 gap-4">
843
  <div className="space-y-3">
844
  <div className="text-lg font-extrabold text-slate-900 mb-2">Care experience rubric</div>
845
  {(() => {
846
+ const care = activeResources?.care_experience || {};
847
  const positive = care.positive || null;
848
  const mixed = care.mixed || null;
849
  const negative = care.negative || null;
 
863
  <div className="space-y-3">
864
  <div className="text-lg font-extrabold text-slate-900 mb-2">Top-down codebook categories</div>
865
  {(() => {
866
+ const td = activeResources?.top_down_codes || {};
867
  const order = [
868
  { key: 'symptoms_concerns', label: 'Symptoms/concerns', empty: 'No symptoms/concerns excerpts detected.' },
869
  { key: 'daily_management', label: 'Daily management', empty: 'No daily management excerpts detected.' },
 
912
  key={idx2}
913
  type="button"
914
  onClick={() => jumpToEvidence(evidenceId)}
915
+ disabled={!evidenceId || !activeResources?.evidence_catalog?.[evidenceId]}
916
  className="w-full text-left text-xs text-slate-600 hover:bg-slate-100 rounded px-1 py-0.5 disabled:opacity-50 disabled:hover:bg-transparent"
917
  title={evidenceId ? `Jump to ${evidenceId}` : 'Unknown evidence'}
918
  >
requirements.txt CHANGED
@@ -44,3 +44,6 @@ pysbd>=0.3.4
44
 
45
  # Excel export (multi-sheet .xlsx)
46
  openpyxl>=3.1.2
 
 
 
 
44
 
45
  # Excel export (multi-sheet .xlsx)
46
  openpyxl>=3.1.2
47
+
48
+ # PDF text extraction (best-effort)
49
+ pypdf>=5.0.0