Spaces:

Fa987123
/

multimodal_previsit

Sleeping

App Files Files Community

frabbani commited on Jan 27

Commit

08c1d46

1 Parent(s): 7c9f1d6

Fix fact extraction - pass raw data for simple tools.................,nk,

Browse files

Files changed (4) hide show

Dockerfile +1 -1
agent.py +390 -0
server.py +2 -56
static/index.html +77 -355

Dockerfile CHANGED Viewed

@@ -14,8 +14,8 @@ RUN pip install --no-cache-dir -r requirements.txt
 # Copy application code
 COPY server.py .
 COPY agent_v2.py .
-COPY agent_v3.py .
 COPY report_generator.py .
 COPY tools.py .
 COPY init_db_hybrid.py .

 # Copy application code
 COPY server.py .
+COPY agent.py .
 COPY agent_v2.py .
 COPY report_generator.py .
 COPY tools.py .
 COPY init_db_hybrid.py .

agent.py ADDED Viewed

	@@ -0,0 +1,390 @@

+#!/usr/bin/env python3
+"""
+MedGemma Agent with Tool Calling
+Simple agent loop that:
+1. Receives a question
+2. Decides which tools to call
+3. Executes tools and gathers results
+4. Synthesizes a final answer
+"""
+import os
+import json
+import re
+from typing import AsyncGenerator, Optional, Dict
+import httpx
+from tools import get_tools_description, execute_tool
+LLAMA_SERVER_URL = os.getenv("LLAMA_SERVER_URL", "http://localhost:8081")
+MAX_STEPS = 5  # Max tool calls per question
+# Headers for LLM requests (ngrok requires this to skip browser warning)
+LLM_HEADERS = {
+    "Content-Type": "application/json",
+    "ngrok-skip-browser-warning": "true"
+}
+def build_system_prompt(patient_id: str) -> str:
+    """Build the system prompt with tool descriptions."""
+    tools_desc = get_tools_description()
+    return f"""You are MedGemma, a helpful medical AI assistant with access to a patient's health records.
+Patient ID: {patient_id}
+{tools_desc}
+HOW TO USE TOOLS:
+When you need information, respond with a tool call in this format:
+TOOL_CALL: {{"tool": "tool_name", "args": {{"param1": "value1"}}}}
+WHEN TO USE TOOLS vs ANSWER DIRECTLY:
+- USE TOOLS when user asks about THEIR specific data: "show MY blood pressure", "what are MY medications"
+- ANSWER DIRECTLY for general health questions: "is walking good?", "what is diabetes?", "how does aspirin work?"
+- You can combine both: get patient data THEN provide personalized advice
+CHART TOOL GUIDELINES:
+- Use get_vital_chart_data for VITALS: blood pressure, heart rate, weight, temperature, oxygen
+- Use get_lab_chart_data for LABS: cholesterol, A1c, glucose, kidney function
+- Use these chart tools when user asks to "show", "display", "graph", "trend", or "visualize"
+EXAMPLES:
+- "Show my blood pressure" → get_vital_chart_data with vital_type="blood_pressure"
+- "Show my cholesterol" → get_lab_chart_data with lab_type="cholesterol"
+- "How is my A1c trending?" → get_lab_chart_data with lab_type="a1c"
+- "Is walking good for health?" → ANSWER directly (general knowledge)
+- "Is walking good for MY heart given my conditions?" → get_conditions, then synthesize answer
+GENERAL GUIDELINES:
+1. Use get_recent_vitals or get_lab_results for TEXT summaries only
+2. Use chart tools for any visual/trend/graph request
+3. Be specific - include numbers, dates, and medication names
+4. For general health questions, you can answer from medical knowledge
+5. Remind users to consult their healthcare provider for medical decisions
+When ready to give your final answer, start with "ANSWER:" followed by your response."""
+def build_prompt(system: str, question: str, history: list) -> str:
+    """Build the full prompt."""
+    prompt = f"""<start_of_turn>user
+{system}
+Question: {question}
+<end_of_turn>
+"""
+    for entry in history:
+        if entry["role"] == "assistant":
+            prompt += f"<start_of_turn>model\n{entry['content']}\n<end_of_turn>\n"
+        elif entry["role"] == "tool_result":
+            prompt += f"<start_of_turn>user\nTool result ({entry['tool']}):\n{entry['content']}\n\nContinue or provide your ANSWER:\n<end_of_turn>\n"
+    prompt += "<start_of_turn>model\n"
+    return prompt
+def parse_tool_call(text: str) -> Optional[Dict]:
+    """Extract tool call from response."""
+    # Format 1: TOOL_CALL: {...}
+    match = re.search(r'TOOL_CALL:\s*(\{.*)', text, re.IGNORECASE | re.DOTALL)
+    if match:
+        try:
+            json_str = match.group(1)
+            brace_count = 0
+            end_idx = 0
+            for i, char in enumerate(json_str):
+                if char == '{':
+                    brace_count += 1
+                elif char == '}':
+                    brace_count -= 1
+                    if brace_count == 0:
+                        end_idx = i + 1
+                        break
+            if end_idx > 0:
+                return json.loads(json_str[:end_idx])
+        except json.JSONDecodeError:
+            pass
+    # Format 2: ```tool_call\n{...}\n``` or ```tool\n{...}\n```
+    match = re.search(r'```(?:tool_call|tool)\s*\n?\s*(\{.*?\})\s*\n?```', text, re.IGNORECASE | re.DOTALL)
+    if match:
+        try:
+            return json.loads(match.group(1))
+        except json.JSONDecodeError:
+            pass
+    # Format 3: ```json\n{"tool":...}\n``` - find last occurrence (in case of thinking)
+    matches = re.findall(r'```json\s*\n?\s*(\{[^`]*\})\s*\n?```', text, re.IGNORECASE | re.DOTALL)
+    for m in reversed(matches):  # Check from last to first
+        try:
+            parsed = json.loads(m)
+            if "tool" in parsed and "args" in parsed:
+                return parsed
+        except json.JSONDecodeError:
+            pass
+    # Format 4: Just find any JSON with "tool" and "args" keys
+    # Use a more flexible pattern
+    for match in re.finditer(r'\{\s*"tool"\s*:\s*"([^"]+)"\s*,\s*"args"\s*:\s*(\{[^}]*\})\s*\}', text):
+        try:
+            return json.loads(match.group(0))
+        except json.JSONDecodeError:
+            pass
+    return None
+def extract_answer(text: str) -> str:
+    """Extract final answer from response."""
+    # Look for ANSWER: prefix
+    for marker in ["ANSWER:", "Answer:", "FINAL ANSWER:", "Final Answer:"]:
+        if marker in text:
+            idx = text.find(marker)
+            return text[idx + len(marker):].strip()
+    return text.strip()
+def has_answer(text: str) -> bool:
+    """Check if response contains a final answer."""
+    markers = ["ANSWER:", "Answer:", "FINAL ANSWER:", "Final Answer:"]
+    return any(m in text for m in markers)
+def filter_thinking(text: str) -> str:
+    """Remove thinking blocks from text."""
+    # Remove <think>...</think> blocks
+    text = re.sub(r'<think>.*?</think>', '', text, flags=re.DOTALL)
+    # Remove "thought ..." at the start (MedGemma sometimes outputs this)
+    # Keep everything from TOOL_CALL: or ANSWER: onwards
+    if text.lower().strip().startswith('thought'):
+        # Find where the actual content starts
+        tool_match = re.search(r'(TOOL_CALL:.*)', text, re.IGNORECASE | re.DOTALL)
+        answer_match = re.search(r'(ANSWER:.*)', text, re.IGNORECASE | re.DOTALL)
+        if tool_match:
+            text = tool_match.group(1)
+        elif answer_match:
+            text = answer_match.group(1)
+    return text.strip()
+async def call_llm(prompt: str) -> str:
+    """Call LLM and get response (non-streaming)."""
+    async with httpx.AsyncClient(timeout=300.0) as client:
+        response = await client.post(
+            f"{LLAMA_SERVER_URL}/completion",
+            headers=LLM_HEADERS,
+            json={
+                "prompt": prompt,
+                "n_predict": 1024,
+                "temperature": 0.7,
+                "stop": ["<end_of_turn>", "</s>", "<|im_end|>"],
+                "stream": False
+            }
+        )
+        response.raise_for_status()
+        result = response.json()
+        return result.get("content", "").strip()
+async def stream_llm(prompt: str) -> AsyncGenerator[str, None]:
+    """Stream LLM response token by token."""
+    async with httpx.AsyncClient(timeout=300.0) as client:
+        async with client.stream(
+            "POST",
+            f"{LLAMA_SERVER_URL}/completion",
+            headers=LLM_HEADERS,
+            json={
+                "prompt": prompt,
+                "n_predict": 1024,
+                "temperature": 0.7,
+                "stop": ["<end_of_turn>", "</s>", "<|im_end|>"],
+                "stream": True
+            }
+        ) as response:
+            async for line in response.aiter_lines():
+                if line.startswith("data: "):
+                    data = line[6:]
+                    if data.strip() == "[DONE]":
+                        break
+                    try:
+                        chunk = json.loads(data)
+                        content = chunk.get("content", "")
+                        if content:
+                            yield content
+                    except json.JSONDecodeError:
+                        pass
+async def run_agent(patient_id: str, question: str) -> AsyncGenerator[dict, None]:
+    """
+    Run the agent loop with streaming support.
+    Yields events:
+    - {"type": "status", "message": "..."}
+    - {"type": "tool_call", "tool": "...", "args": {...}}
+    - {"type": "tool_result", "tool": "...", "result": "..."}
+    - {"type": "chart_data", "data": {...}}
+    - {"type": "answer_start"}
+    - {"type": "token", "content": "..."}
+    - {"type": "answer_end"}
+    - {"type": "error", "message": "..."}
+    """
+    system = build_system_prompt(patient_id)
+    history = []
+    yield {"type": "status", "message": "Analyzing your question..."}
+    for step in range(MAX_STEPS):
+        prompt = build_prompt(system, question, history)
+        # Stream the response and detect tool calls vs answers
+        full_response = ""
+        is_tool_call = False
+        is_streaming_answer = False
+        try:
+            async for token in stream_llm(prompt):
+                full_response += token
+                # Check for tool call patterns anywhere in response
+                has_tool_json = ('"tool"' in full_response and '"args"' in full_response)
+                has_tool_marker = ("TOOL_CALL:" in full_response or
+                                   "```tool" in full_response.lower() or
+                                   "```json" in full_response.lower() or
+                                   has_tool_json)
+                # If we see tool patterns, keep buffering until JSON is complete
+                if has_tool_marker:
+                    if full_response.count('{') > 0 and full_response.count('{') == full_response.count('}'):
+                        is_tool_call = True
+                        break
+                    continue  # Keep buffering
+                # Check for PARTIAL tool markers - keep buffering
+                stripped = full_response.strip().upper()
+                if stripped.startswith("TOOL") or stripped.startswith("`"):
+                    continue  # Wait for more tokens
+                # Check for thinking patterns - keep buffering
+                thinking_patterns = ["thought", "thinking", "let me", "i need to", "i will", "step 1", "1."]
+                has_thinking = any(p in full_response.lower()[:200] for p in thinking_patterns)
+                if has_thinking:
+                    # Model is thinking - keep buffering until we see what it decides
+                    # But set a limit to avoid infinite buffering
+                    if len(full_response) < 2000:
+                        continue
+                # No tool call or thinking patterns - stream as direct answer
+                if "ANSWER:" in full_response:
+                    if not is_streaming_answer:
+                        is_streaming_answer = True
+                        yield {"type": "answer_start", "content": ""}
+                        answer_part = full_response.split("ANSWER:", 1)[1]
+                        if answer_part.strip():
+                            yield {"type": "token", "content": answer_part}
+                    else:
+                        yield {"type": "token", "content": token}
+                else:
+                    # Direct answer without ANSWER: prefix
+                    if not is_streaming_answer:
+                        is_streaming_answer = True
+                        yield {"type": "answer_start", "content": ""}
+                        yield {"type": "token", "content": full_response}
+                    else:
+                        yield {"type": "token", "content": token}
+        except Exception as e:
+            yield {"type": "error", "message": f"LLM error: {str(e)}"}
+            return
+        # If we were streaming an answer, we're done
+        if is_streaming_answer:
+            yield {"type": "answer_end", "content": ""}
+            return
+        # Handle tool call
+        full_response = filter_thinking(full_response)
+        tool_call = parse_tool_call(full_response)
+        if tool_call:
+            tool_name = tool_call.get("tool", "")
+            tool_args = tool_call.get("args", {})
+            if "patient_id" not in tool_args:
+                tool_args["patient_id"] = patient_id
+            yield {"type": "tool_call", "tool": tool_name, "args": tool_args}
+            # Execute tool
+            result = execute_tool(tool_name, tool_args)
+            # For chart tools, return immediately
+            if tool_name in ["get_vital_chart_data", "get_lab_chart_data", "compare_before_after_treatment"]:
+                try:
+                    parsed = json.loads(result)
+                    if "chart_type" in parsed and "error" not in parsed:
+                        yield {"type": "chart_data", "data": parsed}
+                        chart_title = parsed.get("title", "chart")
+                        if "summary" in parsed:
+                            summary_text = "\n".join(parsed["summary"])
+                            yield {"type": "answer_start", "content": ""}
+                            yield {"type": "token", "content": f"Here's your {chart_title.lower()}.\n\n**Changes:** {summary_text}\n\nDiscuss these results with your healthcare provider."}
+                            yield {"type": "answer_end", "content": ""}
+                        else:
+                            yield {"type": "answer_start", "content": ""}
+                            yield {"type": "token", "content": f"Here's your {chart_title.lower()}. If you notice any concerning patterns, please discuss with your healthcare provider."}
+                            yield {"type": "answer_end", "content": ""}
+                        return
+                except:
+                    pass
+            # Show tool result
+            display_result = result[:500] + "..." if len(result) > 500 else result
+            yield {"type": "tool_result", "tool": tool_name, "result": display_result}
+            # Add to history
+            history_result = result[:300] + "\n... [truncated]" if len(result) > 300 else result
+            history.append({"role": "assistant", "content": full_response})
+            history.append({"role": "tool_result", "tool": tool_name, "content": history_result})
+        else:
+            # No tool call detected - treat response as answer
+            yield {"type": "answer_start", "content": ""}
+            yield {"type": "token", "content": filter_thinking(full_response)}
+            yield {"type": "answer_end", "content": ""}
+            return
+    # Max steps reached - stream final answer
+    yield {"type": "status", "message": "Generating final answer..."}
+    prompt = build_prompt(system, question, history)
+    prompt += "\nProvide your ANSWER now based on the information gathered:"
+    try:
+        yield {"type": "answer_start", "content": ""}
+        async for token in stream_llm(prompt):
+            # Skip thinking blocks and ANSWER: prefix
+            yield {"type": "token", "content": token}
+        yield {"type": "answer_end", "content": ""}
+    except Exception as e:
+        yield {"type": "error", "message": f"Failed to generate answer: {str(e)}"}
+async def run_agent_simple(patient_id: str, question: str) -> str:
+    """Simple interface - returns just the final answer."""
+    answer = ""
+    async for event in run_agent(patient_id, question):
+        if event["type"] == "answer":
+            answer = event["content"]
+        elif event["type"] == "error":
+            answer = f"Error: {event['message']}"
+    return answer

server.py CHANGED Viewed

@@ -398,66 +398,12 @@ async def health_check():
 # Agent endpoint (v2 with discovery, planning, fact extraction)
 # ============================================================================
 from agent_v2 import run_agent_v2
-from agent_v3 import run_agent_v3, chat_with_agent_v3
-class AgenticChatRequest(BaseModel):
-    patient_id: str
-    message: str
-    include_context: bool = True
-    agentic_mode: bool = False  # Enable enhanced reasoning trace
 @app.post("/api/agent/chat")
-async def agent_chat_endpoint(request: AgenticChatRequest):
-    async def generate():
-        try:
-            if request.agentic_mode:
-                # Use enhanced agent v3 with visible reasoning
-                async for event in run_agent_v3(request.patient_id, request.message, stream_reasoning=True):
-                    yield f"data: {json.dumps(event)}\n\n"
-            else:
-                # Use standard agent v2
-                async for event in run_agent_v2(request.patient_id, request.message):
-                    yield f"data: {json.dumps(event)}\n\n"
-        except Exception as e:
-            yield f"data: {json.dumps({'type': 'error', 'message': str(e)})}\n\n"
-        yield "data: [DONE]\n\n"
-    return StreamingResponse(
-        generate(),
-        media_type="text/event-stream",
-        headers={
-            "Cache-Control": "no-cache",
-            "Connection": "keep-alive",
-            "X-Accel-Buffering": "no"
-        }
-    )
-# =============================================================================
-# AGENTIC WORKFLOW SHOWCASE ENDPOINT
-# =============================================================================
-@app.post("/api/agent/comprehensive")
-async def comprehensive_previsit_summary(request: ChatRequest):
-    """
-    Generate a comprehensive pre-visit summary using the enhanced agentic workflow.
-    This endpoint showcases:
-    1. DISCOVER: Analyze available patient data
-    2. PLAN: Create multi-step execution plan
-    3. EXECUTE: Call multiple tools with self-correction
-    4. REFLECT: Verify completeness
-    5. SYNTHESIZE: Generate comprehensive summary
-    The reasoning trace is streamed to show the agent's decision-making process.
-    """
-    # Force comprehensive query
-    comprehensive_query = f"""Prepare a comprehensive pre-visit summary for my upcoming appointment.
-    Include: all my medical conditions, current medications, recent vital signs with trends,
-    any allergies, and recent lab results. {request.message}"""
     async def generate():
         try:
-            async for event in run_agent_v3(request.patient_id, comprehensive_query, stream_reasoning=True):
                 yield f"data: {json.dumps(event)}\n\n"
         except Exception as e:
             yield f"data: {json.dumps({'type': 'error', 'message': str(e)})}\n\n"

 # Agent endpoint (v2 with discovery, planning, fact extraction)
 # ============================================================================
 from agent_v2 import run_agent_v2
 @app.post("/api/agent/chat")
+async def agent_chat_endpoint(request: ChatRequest):
     async def generate():
         try:
+            async for event in run_agent_v2(request.patient_id, request.message):
                 yield f"data: {json.dumps(event)}\n\n"
         except Exception as e:
             yield f"data: {json.dumps({'type': 'error', 'message': str(e)})}\n\n"

static/index.html CHANGED Viewed

@@ -286,151 +286,6 @@
             transition: all 0.2s;
         }
         .chip:hover { border-color: var(--primary); color: var(--primary); background: rgba(94, 114, 228, 0.1); }
-        /* Agentic chip - special styling for comprehensive summary */
-        .chip.agentic-chip {
-            background: linear-gradient(135deg, rgba(94, 114, 228, 0.2), rgba(45, 206, 137, 0.2));
-            border-color: var(--primary);
-            color: var(--text-main);
-            font-weight: 600;
-        }
-        .chip.agentic-chip:hover {
-            background: linear-gradient(135deg, rgba(94, 114, 228, 0.3), rgba(45, 206, 137, 0.3));
-            border-color: var(--success);
-        }
-        /* Reasoning Trace Panel */
-        .reasoning-trace {
-            background: linear-gradient(145deg, #1a1f2e, #171c29);
-            border: 1px solid var(--border-color);
-            border-left: 3px solid var(--primary);
-            border-radius: 12px;
-            margin: 10px 0;
-            padding: 15px;
-            font-size: 13px;
-        }
-        .reasoning-trace-header {
-            display: flex;
-            align-items: center;
-            gap: 8px;
-            color: var(--primary);
-            font-weight: 600;
-            margin-bottom: 12px;
-        }
-        .reasoning-step {
-            display: flex;
-            align-items: flex-start;
-            gap: 10px;
-            padding: 8px 0;
-            border-bottom: 1px solid rgba(255,255,255,0.05);
-        }
-        .reasoning-step:last-child {
-            border-bottom: none;
-        }
-        .reasoning-phase {
-            font-size: 11px;
-            font-weight: 700;
-            text-transform: uppercase;
-            padding: 3px 8px;
-            border-radius: 4px;
-            min-width: 70px;
-            text-align: center;
-        }
-        .reasoning-phase.discover { background: rgba(17, 205, 239, 0.2); color: #11cdef; }
-        .reasoning-phase.plan { background: rgba(94, 114, 228, 0.2); color: #5e72e4; }
-        .reasoning-phase.execute { background: rgba(251, 99, 64, 0.2); color: #fb6340; }
-        .reasoning-phase.reflect { background: rgba(251, 191, 36, 0.2); color: #fbbf24; }
-        .reasoning-phase.synthesize { background: rgba(45, 206, 137, 0.2); color: #2dce89; }
-        .reasoning-content {
-            flex: 1;
-        }
-        .reasoning-action {
-            color: var(--text-main);
-        }
-        .reasoning-result {
-            color: var(--text-muted);
-            font-size: 12px;
-            margin-top: 4px;
-        }
-        /* Execution Plan Display */
-        .execution-plan {
-            background: var(--secondary);
-            border-radius: 8px;
-            padding: 12px;
-            margin: 10px 0;
-        }
-        .plan-header {
-            color: var(--primary);
-            font-weight: 600;
-            margin-bottom: 10px;
-            display: flex;
-            align-items: center;
-            gap: 6px;
-        }
-        .plan-step {
-            display: flex;
-            align-items: center;
-            gap: 8px;
-            padding: 6px 0;
-            font-size: 13px;
-        }
-        .plan-step-num {
-            background: var(--primary);
-            color: white;
-            width: 20px;
-            height: 20px;
-            border-radius: 50%;
-            display: flex;
-            align-items: center;
-            justify-content: center;
-            font-size: 11px;
-            font-weight: 700;
-        }
-        .plan-step-tool {
-            color: var(--info);
-            font-family: monospace;
-        }
-        .plan-step-reason {
-            color: var(--text-muted);
-        }
-        /* Agentic Progress in Report Panel */
-        .agentic-progress {
-            text-align: left;
-            padding: 10px;
-        }
-        .agentic-progress .progress-header {
-            font-weight: 600;
-            color: var(--primary);
-            margin-bottom: 12px;
-            display: flex;
-            align-items: center;
-            gap: 8px;
-        }
-        .reasoning-steps {
-            max-height: 200px;
-            overflow-y: auto;
-        }
-        .reasoning-step-mini {
-            display: flex;
-            align-items: center;
-            gap: 8px;
-            padding: 6px 0;
-            font-size: 12px;
-            color: var(--text-main);
-            animation: fadeIn 0.3s ease;
-        }
-        .reasoning-step-mini .step-emoji {
-            font-size: 14px;
-        }
-        .reasoning-step-mini .step-text {
-            color: var(--text-muted);
-        }
-        @keyframes fadeIn {
-            from { opacity: 0; transform: translateY(-5px); }
-            to { opacity: 1; transform: translateY(0); }
-        }
         /* Report Toggle Button - Inline & Compact */
         .btn-toggle-report {
@@ -1789,8 +1644,6 @@
         // ==========================================
         // CHAT LOGIC
         // ==========================================
-        let currentReasoningCard = null;  // Track reasoning card for updates
         async function sendMessage() {
             const message = chatInput.value.trim();
             if (!message || !patientId) return;
@@ -1804,85 +1657,6 @@
             chatSend.disabled = false;
             chatInput.focus();
         }
-        async function processAgentStream(response, isAgentic = false) {
-            const reader = response.body.getReader();
-            const decoder = new TextDecoder();
-            while (true) {
-                const { done, value } = await reader.read();
-                if (done) break;
-                const chunk = decoder.decode(value);
-                const lines = chunk.split('\n');
-                for (const line of lines) {
-                    const trimmedLine = line.trim();
-                    if (!trimmedLine || !trimmedLine.startsWith('data: ')) continue;
-                    const dataStr = trimmedLine.slice(6).trim();
-                    if (dataStr === '[DONE]') return;
-                    try {
-                        const event = JSON.parse(dataStr);
-                        handleAgentEvent(event, isAgentic);
-                    } catch (e) {
-                        console.log('Skipping non-JSON chunk:', dataStr);
-                    }
-                }
-            }
-        }
-        function handleAgentEvent(event, isAgentic) {
-            switch (event.type) {
-                // Standard agent events
-                case 'status':
-                    break;
-                case 'discovery':
-                    if (event.summary) {
-                        addDiscoveryCard(event.summary, event.manifest);
-                    }
-                    break;
-                case 'plan':
-                    if (event.tools && event.tools.length > 0) {
-                        addPlanCard(event.tools);
-                    }
-                    break;
-                case 'tool_call':
-                    break;
-                case 'tool_result':
-                    if (event.facts && event.facts.trim()) {
-                        addFactsCard(event.tool, event.facts, event.raw_preview);
-                    } else if (event.result && event.result.trim()) {
-                        addToolResult(event.tool, event.result);
-                    }
-                    break;
-                case 'chart_data':
-                    renderChartWidget(event.data);
-                    break;
-                case 'chart':
-                    // Agent v3 chart event
-                    if (event.data) {
-                        renderChartWidget(event.data);
-                    }
-                    break;
-                case 'answer':
-                    addAssistantMessage(event.content);
-                    break;
-                case 'answer_start':
-                    startStreamingAnswer();
-                    break;
-                case 'token':
-                    appendStreamingToken(event.content);
-                    break;
-                case 'answer_end':
-                    endStreamingAnswer();
-                    break;
-                case 'error':
-                    addSystemLog(event.message, 'error');
-                    break;
-            }
-        }
         async function sendAgentMessage(message) {
             try {
@@ -1892,7 +1666,73 @@
                     body: JSON.stringify({ patient_id: patientId, message })
                 });
-                await processAgentStream(response, false);
             } catch (error) {
                 addSystemLog('Network Error', 'error');
                 console.error(error);
@@ -2069,108 +1909,22 @@
             if (!patientId) return;
             const btn = document.getElementById('btnGenerateReport');
-            const placeholder = document.getElementById('reportPlaceholder');
             btn.classList.add('loading');
             btn.disabled = true;
-            btn.innerHTML = '🤖 Gathering Data...';
-            // Show reasoning steps in placeholder area
-            placeholder.innerHTML = `
-                <div class="agentic-progress">
-                    <div class="progress-header">🤖 AI Agent Working...</div>
-                    <div class="reasoning-steps" id="reportReasoningSteps"></div>
-                </div>
-            `;
-            placeholder.style.display = 'block';
-            document.getElementById('reportPreview').style.display = 'none';
-            // Open panel if not already open
-            const panel = document.getElementById('reportPanel');
-            if (!panel.classList.contains('open')) {
-                toggleReportPanel();
-            }
             try {
-                // Phase 1: Run agentic workflow to gather comprehensive data
-                const agentResponse = await fetch('/api/agent/comprehensive', {
-                    method: 'POST',
-                    headers: { 'Content-Type': 'application/json' },
-                    body: JSON.stringify({ patient_id: patientId, message: '' })
-                });
-                const reader = agentResponse.body.getReader();
-                const decoder = new TextDecoder();
-                let agentFacts = [];
-                let agentCharts = [];
-                // Process the agentic stream
-                while (true) {
-                    const { done, value } = await reader.read();
-                    if (done) break;
-                    const chunk = decoder.decode(value);
-                    const lines = chunk.split('\n');
-                    for (const line of lines) {
-                        const trimmedLine = line.trim();
-                        if (!trimmedLine || !trimmedLine.startsWith('data: ')) continue;
-                        const dataStr = trimmedLine.slice(6).trim();
-                        if (dataStr === '[DONE]') break;
-                        try {
-                            const event = JSON.parse(dataStr);
-                            // Show reasoning steps
-                            if (event.type === 'reasoning') {
-                                addReportReasoningStep(event.phase, event.action, event.result);
-                            }
-                            // Show execution plan
-                            if (event.type === 'plan' && event.summary) {
-                                addReportReasoningStep('plan', `Planning ${event.summary.length} tool calls`, null);
-                            }
-                            // Collect chart data
-                            if (event.type === 'chart' && event.data) {
-                                agentCharts.push(event.data);
-                            }
-                            // Collect token responses as facts
-                            if (event.type === 'token') {
-                                // Accumulate for final summary
-                            }
-                            // Track completion
-                            if (event.type === 'done') {
-                                addReportReasoningStep('done', `✓ Gathered ${event.facts_collected} data points`, null);
-                            }
-                        } catch (e) {
-                            // Skip non-JSON
-                        }
-                    }
-                }
-                // Update button
-                btn.innerHTML = '📋 Creating Report...';
-                addReportReasoningStep('synthesize', 'Generating PDF report...', null);
-                // Phase 2: Generate the actual report
-                const reportResponse = await fetch('/api/report/generate', {
                     method: 'POST',
                     headers: { 'Content-Type': 'application/json' },
                     body: JSON.stringify({
                         patient_id: patientId,
                         conversation: conversationHistory,
                         tool_results: collectedToolResults,
-                        attachments: collectedAttachments,
-                        comprehensive: true  // Flag that we ran agentic workflow
                     })
                 });
-                const data = await reportResponse.json();
                 if (data.success) {
                     currentReport = data;
@@ -2179,55 +1933,23 @@
                     // Update toggle button
                     document.getElementById('btnToggleReport').classList.add('has-report');
-                    // Show success
-                    addReportReasoningStep('done', '✓ Report ready!', null);
-                    // Brief delay then show preview
-                    setTimeout(() => {
-                        placeholder.style.display = 'none';
-                        document.getElementById('reportPreview').style.display = 'block';
-                    }, 1000);
                 } else {
-                    throw new Error(data.error || 'Unknown error');
                 }
             } catch (error) {
                 console.error('Report generation error:', error);
-                placeholder.innerHTML = `
-                    <div class="icon">❌</div>
-                    <p>Failed to generate report. Please try again.</p>
-                `;
             } finally {
                 btn.classList.remove('loading');
                 btn.disabled = false;
-                btn.innerHTML = '📋 Generate Report';
             }
         }
-        function addReportReasoningStep(phase, action, result) {
-            const container = document.getElementById('reportReasoningSteps');
-            if (!container) return;
-            const phaseEmoji = {
-                'discover': '🔍',
-                'plan': '📋',
-                'execute': '⚙️',
-                'reflect': '🤔',
-                'synthesize': '✨',
-                'done': '✅'
-            };
-            const step = document.createElement('div');
-            step.className = 'reasoning-step-mini';
-            step.innerHTML = `
-                <span class="step-emoji">${phaseEmoji[phase] || '•'}</span>
-                <span class="step-text">${escapeHtml(action)}</span>
-            `;
-            container.appendChild(step);
-            // Auto-scroll
-            container.scrollTop = container.scrollHeight;
-        }
         function updateReportPreview(report) {
             document.getElementById('reportPlaceholder').style.display = 'none';
             const preview = document.getElementById('reportPreview');

             transition: all 0.2s;
         }
         .chip:hover { border-color: var(--primary); color: var(--primary); background: rgba(94, 114, 228, 0.1); }
         /* Report Toggle Button - Inline & Compact */
         .btn-toggle-report {
         // ==========================================
         // CHAT LOGIC
         // ==========================================
         async function sendMessage() {
             const message = chatInput.value.trim();
             if (!message || !patientId) return;
             chatSend.disabled = false;
             chatInput.focus();
         }
         async function sendAgentMessage(message) {
             try {
                     body: JSON.stringify({ patient_id: patientId, message })
                 });
+                const reader = response.body.getReader();
+                const decoder = new TextDecoder();
+                while (true) {
+                    const { done, value } = await reader.read();
+                    if (done) break;
+                    const chunk = decoder.decode(value);
+                    const lines = chunk.split('\n');
+                    for (const line of lines) {
+                        const trimmedLine = line.trim();
+                        if (!trimmedLine || !trimmedLine.startsWith('data: ')) continue;
+                        const dataStr = trimmedLine.slice(6).trim();
+                        if (dataStr === '[DONE]') return;
+                        try {
+                            const event = JSON.parse(dataStr);
+                            switch (event.type) {
+                                case 'status':
+                                    // Skip status messages - we have the feedback card now
+                                    break;
+                                case 'discovery':
+                                    // Agent v2: Show what data is available
+                                    if (event.summary) {
+                                        addDiscoveryCard(event.summary, event.manifest);
+                                    }
+                                    break;
+                                case 'plan':
+                                    // Agent v2: Show planned tools with reasons
+                                    if (event.tools && event.tools.length > 0) {
+                                        addPlanCard(event.tools);
+                                    }
+                                    break;
+                                case 'tool_call':
+                                    // Skip tool call logs - handled by feedback card
+                                    break;
+                                case 'tool_result':
+                                    // Agent v2: Display extracted facts, fallback to raw result
+                                    if (event.facts && event.facts.trim()) {
+                                        addFactsCard(event.tool, event.facts, event.raw_preview);
+                                    } else if (event.result && event.result.trim()) {
+                                        addToolResult(event.tool, event.result);
+                                    }
+                                    break;
+                                case 'chart_data':
+                                    renderChartWidget(event.data); break;
+                                case 'answer':
+                                    addAssistantMessage(event.content); break;
+                                case 'answer_start':
+                                    startStreamingAnswer();
+                                    break;
+                                case 'token':
+                                    appendStreamingToken(event.content);
+                                    break;
+                                case 'answer_end':
+                                    endStreamingAnswer();
+                                    break;
+                                case 'error':
+                                    addSystemLog(event.message, 'error'); break;
+                            }
+                        } catch (e) {
+                            console.log('Skipping non-JSON chunk:', dataStr);
+                        }
+                    }
+                }
             } catch (error) {
                 addSystemLog('Network Error', 'error');
                 console.error(error);
             if (!patientId) return;
             const btn = document.getElementById('btnGenerateReport');
             btn.classList.add('loading');
             btn.disabled = true;
             try {
+                const response = await fetch('/api/report/generate', {
                     method: 'POST',
                     headers: { 'Content-Type': 'application/json' },
                     body: JSON.stringify({
                         patient_id: patientId,
                         conversation: conversationHistory,
                         tool_results: collectedToolResults,
+                        attachments: collectedAttachments
                     })
                 });
+                const data = await response.json();
                 if (data.success) {
                     currentReport = data;
                     // Update toggle button
                     document.getElementById('btnToggleReport').classList.add('has-report');
+                    // Open panel if not already open
+                    const panel = document.getElementById('reportPanel');
+                    if (!panel.classList.contains('open')) {
+                        toggleReportPanel();
+                    }
                 } else {
+                    alert('Failed to generate report: ' + (data.error || 'Unknown error'));
                 }
             } catch (error) {
                 console.error('Report generation error:', error);
+                alert('Failed to generate report. Please try again.');
             } finally {
                 btn.classList.remove('loading');
                 btn.disabled = false;
             }
         }
         function updateReportPreview(report) {
             document.getElementById('reportPlaceholder').style.display = 'none';
             const preview = document.getElementById('reportPreview');