Spaces:

Fa987123
/

multimodal_previsit

Sleeping

File size: 21,211 Bytes

"""
MedGemma Agent using LangGraph
==============================
A proper graph-based workflow using LangGraph's StateGraph.

Workflow Graph:
    START -> discover -> skin_analysis -> plan -> execute -> reflect
                                           ^                    |
                                           |___(gaps found)_____|
                                                                |
                                                                v
                                                           synthesize -> END

Reuses all working logic from agent_v2.py:
- Dynamic tool registry with category filtering
- Condensed tool descriptions (token reduction)
- Semantic parameter normalization
- LLM-based error recovery
- Skin image analysis with Derm Foundation
"""

import os
import json
import re
from typing import Dict, List, Optional, Set, Any, Literal, AsyncGenerator
from typing_extensions import TypedDict, Annotated

# LangGraph imports
from langgraph.graph import StateGraph, START, END
from langgraph.checkpoint.memory import MemorySaver

# Reuse ALL working logic from agent_v2
from agent_v2 import (
    call_llm, stream_llm, filter_thinking,
    get_patient_manifest, plan_tools, execute_and_extract,
    reflect_on_facts, synthesize_answer,
    get_relevant_categories, get_filtered_tools_description,
    format_conversation_history,
    WorkflowPhase, AgentState,
    TOOLS
)
from tools import execute_tool, get_tools_description

LLAMA_SERVER_URL = os.environ.get('LLAMA_SERVER_URL', 'http://localhost:8080')


# =============================================================================
# State Definition
# =============================================================================

def _append_events(existing: List[Dict], new: List[Dict]) -> List[Dict]:
    """Reducer: append new events to existing list."""
    return existing + new

def _append_facts(existing: List[Dict], new: List[Dict]) -> List[Dict]:
    """Reducer: append new facts to existing list."""
    return existing + new

def _merge_tools(existing: Set[str], new: Set[str]) -> Set[str]:
    """Reducer: merge executed tools sets."""
    return existing | new


class GraphState(TypedDict):
    """State that flows through the LangGraph."""
    # Input (set once)
    patient_id: str
    question: str
    skin_image_data: Optional[str]
    conversation_history: List[Dict]  # Prior conversation turns
    
    # Discovery
    manifest: Dict
    
    # Planning
    planned_tools: List[Dict]
    
    # Execution  
    collected_facts: Annotated[List[Dict], _append_facts]
    executed_tools: Annotated[Set[str], _merge_tools]
    chart_data: Optional[Dict]
    
    # Skin analysis
    skin_analysis_result: Optional[Dict]
    skin_llm_prompt: Optional[str]
    
    # Reflection
    reflection_gaps: List[str]
    should_continue: bool
    
    # Control
    iteration: int
    max_iterations: int
    
    # Output - events collected by each node for streaming to UI
    events: Annotated[List[Dict], _append_events]
    final_answer: str


# =============================================================================
# Node Functions
# =============================================================================

async def discover_node(state: GraphState) -> Dict:
    """DISCOVER: Get patient data manifest."""
    print(f"[LANGGRAPH] discover_node")
    events = []
    
    events.append({"type": "status", "message": "Discovering available data..."})
    
    manifest = get_patient_manifest(state["patient_id"])
    
    # Build summary
    patient_info = manifest.get("patient_info", {})
    available = manifest.get("available_data", {})
    summary_parts = []
    if patient_info:
        summary_parts.append(f"Patient: {patient_info.get('name', 'Unknown')}, {patient_info.get('age', '?')}y, {patient_info.get('gender', '')}")
    data_counts = [f"{info['count']} {table}" for table, info in available.items()]
    if data_counts:
        summary_parts.append(f"Available: {', '.join(data_counts)}")
    
    events.append({"type": "discovery", "manifest": manifest, "summary": " | ".join(summary_parts)})
    
    return {
        "manifest": manifest,
        "events": events
    }


async def skin_analysis_node(state: GraphState) -> Dict:
    """SKIN ANALYSIS: Analyze uploaded skin image (if present)."""
    if not state.get("skin_image_data"):
        return {"events": []}
    
    print(f"[LANGGRAPH] skin_analysis_node - image: {len(state['skin_image_data'])} chars")
    events = []
    events.append({"type": "status", "message": "Analyzing skin image with Derm Foundation + SCIN Classifier..."})
    
    try:
        from tools import analyze_skin_image
        skin_result_str = analyze_skin_image(
            state["patient_id"], 
            state["skin_image_data"], 
            symptoms=state["question"]
        )
        skin_result = json.loads(skin_result_str)
        
        events.append({
            "type": "skin_analysis",
            "data": skin_result,
            "image_data": state["skin_image_data"]
        })
        
        # Extract facts from skin analysis
        facts = []
        if skin_result.get("status") == "success":
            facts_parts = []
            conditions = skin_result.get("conditions", [])
            if conditions:
                if isinstance(conditions[0], dict):
                    cond_strs = [f"{c.get('name', 'Unknown')} ({c.get('confidence', 0)}% - {c.get('likelihood', 'possible')})" for c in conditions[:3]]
                else:
                    cond_strs = [str(c) for c in conditions[:3]]
                facts_parts.append(f"Possible conditions: {', '.join(cond_strs)}")
            
            symptoms_img = skin_result.get("symptoms_from_image", [])
            if symptoms_img:
                if isinstance(symptoms_img[0], dict):
                    symp_strs = [s.get('name', '') for s in symptoms_img[:3] if s.get('name')]
                else:
                    symp_strs = [str(s) for s in symptoms_img[:3]]
                if symp_strs:
                    facts_parts.append(f"Detected symptoms: {', '.join(symp_strs)}")
            
            facts_str = ". ".join(facts_parts) if facts_parts else f"Skin image analyzed with {skin_result.get('model', 'Derm Foundation')}."
            facts_str += "\n" + skin_result.get("disclaimer", "")
            
            facts.append({"tool": "analyze_skin_image", "facts": facts_str})
        else:
            facts.append({"tool": "analyze_skin_image", "facts": f"Skin analysis error: {skin_result.get('error', 'Unknown')}"})
        
        return {
            "skin_analysis_result": skin_result,
            "skin_llm_prompt": skin_result.get("llm_synthesis_prompt", ""),
            "collected_facts": facts,
            "executed_tools": {"analyze_skin_image"},
            "events": events
        }
        
    except Exception as e:
        print(f"[LANGGRAPH] Skin analysis error: {e}")
        events.append({"type": "error", "message": f"Skin analysis error: {str(e)}"})
        return {"events": events}


async def plan_node(state: GraphState) -> Dict:
    """PLAN: Use LLM to identify which tools are needed."""
    iteration = state.get("iteration", 0) + 1
    print(f"[LANGGRAPH] plan_node - iteration {iteration}")
    events = []
    
    events.append({"type": "status", "message": f"Planning approach (iteration {iteration})..."})
    
    # Build an AgentState to reuse plan_tools from agent_v2
    agent_state = AgentState(
        patient_id=state["patient_id"],
        question=state["question"],
        conversation_history=state.get("conversation_history", []),
        manifest=state["manifest"],
        skin_image_data=state.get("skin_image_data"),
        executed_tools=state.get("executed_tools", set()),
        reflection_gaps=state.get("reflection_gaps", []),
        iteration=iteration,
        max_iterations=state.get("max_iterations", 3)
    )
    
    # If we have gaps from reflection, add context
    if state.get("reflection_gaps"):
        gap_context = f"\n\nPrevious iteration found these gaps: {', '.join(state['reflection_gaps'])}"
        agent_state.question = state["question"] + gap_context
    
    # Call the working plan_tools from agent_v2
    planned = await plan_tools(agent_state)
    
    # Remove already executed tools
    executed = state.get("executed_tools", set())
    planned = [t for t in planned if t.get("tool") not in executed]
    
    # Remove skin analysis if already done
    if state.get("skin_image_data"):
        planned = [t for t in planned if t.get("tool") != "analyze_skin_image"]
    
    relevant_categories = get_relevant_categories(state["question"], state["manifest"])
    
    events.append({
        "type": "plan",
        "tools": planned,
        "iteration": iteration,
        "tool_filtering": {
            "categories_used": sorted(relevant_categories),
            "total_tools": len(TOOLS)
        }
    })
    
    if not planned:
        print(f"[LANGGRAPH] No new tools to execute")
    
    return {
        "planned_tools": planned,
        "iteration": iteration,
        "reflection_gaps": [],  # Clear after use
        "events": events
    }


async def execute_node(state: GraphState) -> Dict:
    """EXECUTE: Run planned tools and extract facts."""
    print(f"[LANGGRAPH] execute_node - {len(state['planned_tools'])} tools")
    events = []
    new_facts = []
    new_executed = set()
    chart_data = None
    
    # Build AgentState for execute_and_extract
    agent_state = AgentState(
        patient_id=state["patient_id"],
        question=state["question"],
        manifest=state["manifest"],
        executed_tools=state.get("executed_tools", set()),
        iteration=state.get("iteration", 1),
        max_iterations=state.get("max_iterations", 3)
    )
    
    for tool_call in state["planned_tools"]:
        tool_name = tool_call.get("tool", "unknown")
        tool_args = tool_call.get("args", {})
        reason = tool_call.get("reason", "")
        
        if tool_name in state.get("executed_tools", set()):
            continue
        
        events.append({"type": "status", "message": f"Retrieving {tool_name}..."})
        events.append({"type": "tool_call", "tool": tool_name, "args": tool_args, "reason": reason})
        
        try:
            fact_result = await execute_and_extract(agent_state, tool_call)
            new_facts.append(fact_result)
            new_executed.add(tool_name)
            
            events.append({
                "type": "tool_result",
                "tool": tool_name,
                "facts": fact_result.get("facts", ""),
                "raw_preview": fact_result.get("raw_data", "")[:200]
            })
            
            # Check for chart data
            if agent_state.chart_data:
                chart_data = agent_state.chart_data
                events.append({"type": "chart_data", "data": chart_data})
                agent_state.chart_data = None
                
        except Exception as e:
            print(f"[LANGGRAPH] Tool error {tool_name}: {e}")
            events.append({"type": "tool_error", "tool": tool_name, "error": str(e)})
            new_executed.add(tool_name)
    
    return {
        "collected_facts": new_facts,
        "executed_tools": new_executed,
        "chart_data": chart_data,
        "events": events
    }


async def reflect_node(state: GraphState) -> Dict:
    """REFLECT: Evaluate if collected data is sufficient."""
    iteration = state.get("iteration", 1)
    max_iter = state.get("max_iterations", 3)
    print(f"[LANGGRAPH] reflect_node - iteration {iteration}/{max_iter}")
    events = []
    
    events.append({"type": "status", "message": "Reflecting on gathered information..."})
    
    # Build AgentState for reflect_on_facts
    agent_state = AgentState(
        patient_id=state["patient_id"],
        question=state["question"],
        collected_facts=state.get("collected_facts", []),
        executed_tools=state.get("executed_tools", set()),
        iteration=iteration,
        max_iterations=max_iter
    )
    
    reflection = await reflect_on_facts(agent_state)
    
    has_enough = reflection.get("has_enough_info", True)
    confidence = reflection.get("confidence", 0.8)
    gaps = reflection.get("gaps", [])
    
    events.append({
        "type": "reflection",
        "has_enough_info": has_enough,
        "confidence": confidence,
        "gaps": gaps,
        "reasoning": reflection.get("reasoning", ""),
        "iteration": iteration
    })
    
    should_continue = not has_enough and iteration < max_iter
    
    if has_enough:
        print(f"[LANGGRAPH] Reflection: Have enough info (confidence: {confidence})")
    else:
        print(f"[LANGGRAPH] Reflection: Need more info. Gaps: {gaps}")
    
    return {
        "reflection_gaps": gaps if should_continue else [],
        "should_continue": should_continue,
        "events": events
    }


async def synthesize_node(state: GraphState) -> Dict:
    """SYNTHESIZE: Mark ready for synthesis (actual streaming happens in runner)."""
    print(f"[LANGGRAPH] synthesize_node - marking ready")
    # Don't buffer tokens here - the runner will handle streaming directly
    return {
        "final_answer": "__READY_FOR_SYNTHESIS__",
        "events": [{"type": "status", "message": "Generating answer..."}]
    }


# =============================================================================
# Conditional Edge Functions
# =============================================================================

def should_execute(state: GraphState) -> Literal["execute", "synthesize"]:
    """After plan: execute tools or skip to synthesis."""
    if state.get("planned_tools"):
        return "execute"
    return "synthesize"


def should_loop_or_finish(state: GraphState) -> Literal["plan", "synthesize"]:
    """After reflect: loop back to plan or proceed to synthesis."""
    if state.get("should_continue", False):
        return "plan"
    return "synthesize"


# =============================================================================
# Build the Graph
# =============================================================================

def create_med_agent_graph():
    """
    Create and compile the MedGemma agent graph.
    
    Graph:
        START -> discover -> skin_analysis -> plan -> [execute or synthesize]
                                                ^           |
                                                |           v
                                                |        reflect -> [plan or synthesize]
                                                |___________|
    """
    graph = StateGraph(GraphState)
    
    # Add nodes
    graph.add_node("discover", discover_node)
    graph.add_node("skin_analysis", skin_analysis_node)
    graph.add_node("plan", plan_node)
    graph.add_node("execute", execute_node)
    graph.add_node("reflect", reflect_node)
    graph.add_node("synthesize", synthesize_node)
    
    # Edges: START -> discover -> skin_analysis -> plan
    graph.add_edge(START, "discover")
    graph.add_edge("discover", "skin_analysis")
    graph.add_edge("skin_analysis", "plan")
    
    # Conditional: plan -> execute (if tools) or synthesize (if none)
    graph.add_conditional_edges(
        "plan",
        should_execute,
        {"execute": "execute", "synthesize": "synthesize"}
    )
    
    # execute -> reflect
    graph.add_edge("execute", "reflect")
    
    # Conditional: reflect -> plan (loop back) or synthesize (done)
    graph.add_conditional_edges(
        "reflect",
        should_loop_or_finish,
        {"plan": "plan", "synthesize": "synthesize"}
    )
    
    # synthesize -> END
    graph.add_edge("synthesize", END)
    
    # Compile with checkpointer
    checkpointer = MemorySaver()
    compiled = graph.compile(checkpointer=checkpointer)
    
    print("[LANGGRAPH] Graph compiled successfully")
    return compiled


# Global graph instance (lazy init)
_GRAPH = None

def get_graph():
    """Get or create the compiled graph."""
    global _GRAPH
    if _GRAPH is None:
        _GRAPH = create_med_agent_graph()
    return _GRAPH


# =============================================================================
# Main Runner - Streams events to the UI
# =============================================================================

async def run_agent_langgraph(
    patient_id: str,
    question: str,
    skin_image_data: Optional[str] = None,
    conversation_history: Optional[List[Dict]] = None,
    thread_id: str = None
) -> AsyncGenerator[Dict, None]:
    """
    Run the LangGraph agent and yield SSE events for the UI.
    
    The graph handles discover→skin→plan→execute→reflect loop.
    Synthesis is streamed directly (not buffered) for real-time typing effect.
    """
    graph = get_graph()
    
    # Initial state
    initial_state = {
        "patient_id": patient_id,
        "question": question,
        "skin_image_data": skin_image_data,
        "conversation_history": conversation_history or [],
        "manifest": {},
        "planned_tools": [],
        "collected_facts": [],
        "executed_tools": set(),
        "chart_data": None,
        "skin_analysis_result": None,
        "skin_llm_prompt": None,
        "reflection_gaps": [],
        "should_continue": True,
        "iteration": 0,
        "max_iterations": 3,
        "events": [],
        "final_answer": ""
    }
    
    config = {"configurable": {"thread_id": thread_id or f"thread_{patient_id}_{id(question)}"}}
    
    final_state = None
    
    try:
        # Stream through graph - yields events from each node in real-time
        async for step in graph.astream(initial_state, config, stream_mode="updates"):
            for node_name, state_update in step.items():
                node_events = state_update.get("events", [])
                for event in node_events:
                    yield event
                
                # Capture state for synthesis
                if node_name == "synthesize":
                    final_state = state_update
        
        # Now get the full final state from the graph for synthesis
        # We need the accumulated state (collected_facts, manifest, etc.)
        graph_state = graph.get_state(config)
        full_state = graph_state.values if graph_state else initial_state
        
        # Stream synthesis tokens directly (not buffered!)
        yield {"type": "answer_start", "content": ""}
        
        agent_state = AgentState(
            patient_id=full_state.get("patient_id", patient_id),
            question=full_state.get("question", question),
            conversation_history=full_state.get("conversation_history", []),
            manifest=full_state.get("manifest", {}),
            collected_facts=full_state.get("collected_facts", []),
            skin_image_data=full_state.get("skin_image_data"),
            skin_analysis_result=full_state.get("skin_analysis_result"),
            skin_llm_prompt=full_state.get("skin_llm_prompt"),
            iteration=full_state.get("iteration", 1)
        )
        
        async for token in synthesize_answer(agent_state):
            yield {"type": "token", "content": token}
        
        yield {"type": "answer_end", "content": ""}
        yield {
            "type": "workflow_complete",
            "iterations": full_state.get("iteration", 1),
            "tools_executed": list(full_state.get("executed_tools", set())),
        }
                    
    except Exception as e:
        import traceback
        traceback.print_exc()
        yield {"type": "error", "message": f"Agent error: {str(e)}"}


# =============================================================================
# Simple Interface for Testing
# =============================================================================

async def run_agent_langgraph_simple(patient_id: str, question: str) -> str:
    """Simple interface - returns just the final answer."""
    answer = ""
    async for event in run_agent_langgraph(patient_id, question):
        if event["type"] == "token":
            answer += event["content"]
        elif event["type"] == "error":
            answer = f"Error: {event['message']}"
    return answer


# =============================================================================
# Visualization
# =============================================================================

def get_graph_visualization():
    """Get Mermaid diagram of the workflow graph."""
    graph = get_graph()
    try:
        return graph.get_graph().draw_mermaid()
    except Exception:
        return """
graph TD
    START --> discover
    discover --> skin_analysis
    skin_analysis --> plan
    plan -->|has tools| execute
    plan -->|no tools| synthesize
    execute --> reflect
    reflect -->|gaps found| plan
    reflect -->|enough info| synthesize
    synthesize --> END
"""


if __name__ == "__main__":
    print("MedGemma LangGraph Agent")
    print("=" * 50)
    print("\nWorkflow Graph (Mermaid):\n")
    print(get_graph_visualization())