Spaces:

Boopster
/

reachy_mini_minder

Running

App Files Files Community

Boopster commited on Feb 9

Commit

2880ca9

1 Parent(s): 3d1c965

feat: Implement pattern detection and integrate graph query engine with session insights and new tools.

Browse files

Files changed (12) hide show

src/reachy_mini_conversation_app/graph_query_engine.py +307 -0
src/reachy_mini_conversation_app/main.py +6 -3
src/reachy_mini_conversation_app/memory_graph.py +87 -0
src/reachy_mini_conversation_app/openai_realtime.py +24 -6
src/reachy_mini_conversation_app/pattern_detector.py +424 -0
src/reachy_mini_conversation_app/profiles/_reachy_mini_minder_locked_profile/tools.txt +1 -0
src/reachy_mini_conversation_app/session_enrichment.py +33 -1
src/reachy_mini_conversation_app/tools/check_medication.py +4 -7
src/reachy_mini_conversation_app/tools/core_tools.py +1 -0
src/reachy_mini_conversation_app/tools/query_health_history.py +133 -0
tests/test_graph_query_engine.py +213 -0
tests/test_pattern_detector.py +276 -0

src/reachy_mini_conversation_app/graph_query_engine.py ADDED Viewed

	@@ -0,0 +1,307 @@

+"""Dynamic Cypher query engine for natural language → Neo4j graph queries.
+Translates natural language health questions into Cypher queries using an LLM,
+executes them in a read-only Neo4j session, and returns human-readable answers.
+Safety stack:
+1. LLM system prompt restricts to read-only Cypher (MATCH/RETURN/WHERE only)
+2. Regex validation rejects mutations before execution
+3. Neo4j driver uses execute_read() for driver-level read-only enforcement
+4. PII guard redacts patient info before LLM, hydrates after
+"""
+from __future__ import annotations
+import re
+import json
+import logging
+from typing import Any, Dict, List, Optional
+from openai import AsyncOpenAI
+from reachy_mini_conversation_app.config import config
+logger = logging.getLogger(__name__)
+# ---- Cypher Safety --------------------------------------------------------
+_FORBIDDEN_KEYWORDS = re.compile(
+    r"\b(CREATE|MERGE|DELETE|DETACH|SET|REMOVE|DROP|CALL|LOAD\s+CSV|FOREACH)\b",
+    re.IGNORECASE,
+)
+_ALLOWED_PATTERN = re.compile(
+    r"^\s*(MATCH|OPTIONAL\s+MATCH|WITH|WHERE|RETURN|ORDER\s+BY|LIMIT|SKIP|UNWIND|AS|AND|OR|NOT|IN|IS|NULL|COUNT|SUM|AVG|MIN|MAX|COLLECT|DISTINCT|CASE|WHEN|THEN|ELSE|END|EXISTS|SIZE|COALESCE|HEAD|LAST|RANGE|REDUCE|NONE|ANY|ALL|SINGLE|FILTER|EXTRACT|datetime|date|duration|toString|toInteger|toFloat|toLower|toUpper|trim|split|replace|substring|left|right|length|nodes|relationships|labels|type|id|properties|keys|startNode|endNode|point|distance)",
+    re.IGNORECASE,
+)
+def validate_cypher(cypher: str) -> bool:
+    """Validate that a Cypher query is read-only.
+    Returns True if the query appears safe to execute, False otherwise.
+    """
+    if not cypher or not cypher.strip():
+        return False
+    # Reject any mutation keywords
+    if _FORBIDDEN_KEYWORDS.search(cypher):
+        logger.warning("Cypher rejected — contains forbidden keyword: %s", cypher[:200])
+        return False
+    # Must start with MATCH, OPTIONAL MATCH, WITH, or RETURN
+    stripped = cypher.strip()
+    if not re.match(
+        r"^\s*(MATCH|OPTIONAL\s+MATCH|WITH|RETURN|UNWIND)\b", stripped, re.IGNORECASE
+    ):
+        logger.warning(
+            "Cypher rejected — does not start with MATCH/WITH/RETURN: %s", cypher[:200]
+        )
+        return False
+    return True
+# ---- System prompt for Cypher generation -----------------------------------
+_CYPHER_SYSTEM_PROMPT = """You are a Cypher query generator for a Neo4j health knowledge graph.
+RULES — you MUST follow these strictly:
+1. Generate ONLY read-only Cypher queries. Use MATCH, RETURN, WHERE, WITH, ORDER BY, LIMIT, SKIP, UNWIND.
+2. NEVER use CREATE, MERGE, DELETE, SET, REMOVE, DROP, CALL, LOAD CSV, or FOREACH.
+3. NEVER modify the graph in any way.
+4. Return ONLY the Cypher query — no explanation, no markdown, no code fences.
+5. Use parameterised values where possible (e.g., $patient_name).
+6. Keep queries concise — prefer aggregation over returning raw data.
+7. Use neutral, factual language in aliases (e.g., "count" not "caused_by_count").
+8. When the user asks about "today", use `date()` for comparison.
+9. For time ranges, use `datetime() - duration('P{n}D')` syntax.
+{schema}
+EXAMPLES:
+Q: "How many headaches did I have this week?"
+A: MATCH (p:Person {{name: $patient_name}})-[:EXPERIENCED]->(e:Event {{type: 'headache'}})
+WHERE e.timestamp >= datetime() - duration('P7D')
+RETURN count(e) AS headache_count
+Q: "What medications am I taking?"
+A: MATCH (p:Person {{name: $patient_name}})-[:TAKES]->(m:Medication)
+RETURN m.name AS medication, m.dose AS dose, m.frequency AS frequency
+Q: "When did I last see my doctor?"
+A: MATCH (p:Person {{name: $patient_name}})-[:SEES]->(d:Person {{role: 'neurologist'}})
+OPTIONAL MATCH (p)-[:EXPERIENCED]->(e:Event {{type: 'doctor_visit'}})
+RETURN d.name AS doctor, e.timestamp AS last_visit
+ORDER BY e.timestamp DESC LIMIT 1
+"""
+_ANSWER_SYSTEM_PROMPT = """You are a friendly health companion summarising data from a knowledge graph.
+RULES:
+1. Summarise the query results in plain, conversational English.
+2. Use factual language — say "logged", "recorded", "showed up" — never "caused", "triggered".
+3. If the results are empty, say so helpfully (e.g., "I don't have any records of that yet").
+4. Keep it concise — this will be spoken aloud by a robot.
+5. Never give medical advice. State facts only.
+6. If the data is a count, state the number naturally.
+7. Refer to the user by name if provided, otherwise say "you".
+"""
+class GraphQueryEngine:
+    """LLM-powered Cypher query generation and execution.
+    Uses gpt-4.1-mini for Cypher generation and answer formatting.
+    All queries execute in read-only Neo4j transactions.
+    """
+    def __init__(self, graph_memory: Any) -> None:
+        """Initialise with a connected GraphMemory instance."""
+        self._graph = graph_memory
+        self._client: Optional[AsyncOpenAI] = None
+        self._schema_cache: Optional[str] = None
+    def _get_client(self) -> AsyncOpenAI:
+        """Lazy-init the OpenAI client."""
+        if self._client is None:
+            self._client = AsyncOpenAI(api_key=config.OPENAI_API_KEY)
+        return self._client
+    def _get_schema(self) -> str:
+        """Get the graph schema description, with caching."""
+        if self._schema_cache is None:
+            self._schema_cache = self._graph.get_schema_description()
+        return self._schema_cache
+    def invalidate_schema_cache(self) -> None:
+        """Clear the schema cache (e.g., after enrichment adds new node types)."""
+        self._schema_cache = None
+    async def generate_cypher(
+        self, question: str, patient_name: str = "Patient"
+    ) -> str:
+        """Generate a read-only Cypher query from a natural language question.
+        Args:
+            question: The user's natural language health question.
+            patient_name: The patient name to parameterise in the query.
+        Returns:
+            A Cypher query string.
+        Raises:
+            ValueError: If the generated query fails safety validation.
+        """
+        client = self._get_client()
+        schema = self._get_schema()
+        system_prompt = _CYPHER_SYSTEM_PROMPT.format(schema=schema)
+        response = await client.chat.completions.create(
+            model="gpt-4.1-mini",
+            messages=[
+                {"role": "system", "content": system_prompt},
+                {"role": "user", "content": question},
+            ],
+            temperature=0.0,
+            max_tokens=500,
+        )
+        cypher = response.choices[0].message.content.strip()
+        # Strip markdown code fences if the LLM wraps them despite instructions
+        if cypher.startswith("```"):
+            lines = cypher.split("\n")
+            # Remove first and last line (```cypher and ```)
+            cypher = "\n".join(
+                line for line in lines if not line.strip().startswith("```")
+            ).strip()
+        if not validate_cypher(cypher):
+            raise ValueError(
+                f"Generated Cypher failed safety validation: {cypher[:200]}"
+            )
+        logger.info("Generated Cypher: %s", cypher)
+        return cypher
+    async def execute(
+        self,
+        cypher: str,
+        patient_name: str = "Patient",
+    ) -> List[Dict[str, Any]]:
+        """Execute a validated Cypher query in a read-only transaction.
+        Args:
+            cypher: The Cypher query to execute.
+            patient_name: The patient name to bind to $patient_name param.
+        Returns:
+            List of result records as dicts.
+        """
+        if not self._graph or not self._graph.is_connected:
+            logger.warning("Graph not connected, cannot execute query")
+            return []
+        params = {"patient_name": patient_name}
+        return self._graph.execute_read(cypher, params)
+    async def format_answer(
+        self,
+        question: str,
+        results: List[Dict[str, Any]],
+        patient_name: str = "Patient",
+    ) -> str:
+        """Format query results into a natural language answer.
+        Args:
+            question: The original user question.
+            results: The raw Cypher query results.
+            patient_name: The patient's name for personalisation.
+        Returns:
+            A friendly, spoken-word summary of the results.
+        """
+        client = self._get_client()
+        # Serialise results for the LLM
+        if not results:
+            results_text = "No results found."
+        else:
+            # Convert datetime objects etc. to strings for JSON serialisation
+            serialisable = []
+            for record in results:
+                row = {}
+                for k, v in record.items():
+                    row[k] = (
+                        str(v)
+                        if not isinstance(v, (str, int, float, bool, type(None)))
+                        else v
+                    )
+                serialisable.append(row)
+            results_text = json.dumps(serialisable, indent=2, default=str)
+        user_message = (
+            f"User question: {question}\n"
+            f"Patient name: {patient_name}\n"
+            f"Query results:\n{results_text}"
+        )
+        response = await client.chat.completions.create(
+            model="gpt-4.1-mini",
+            messages=[
+                {"role": "system", "content": _ANSWER_SYSTEM_PROMPT},
+                {"role": "user", "content": user_message},
+            ],
+            temperature=0.3,
+            max_tokens=300,
+        )
+        return response.choices[0].message.content.strip()
+    async def query(
+        self,
+        question: str,
+        patient_name: str = "Patient",
+    ) -> Dict[str, Any]:
+        """End-to-end: question → Cypher → execute → natural language answer.
+        Args:
+            question: The user's natural language health question.
+            patient_name: The patient's name.
+        Returns:
+            Dict with keys: answer (str), cypher (str), result_count (int),
+            raw_results (list).
+        """
+        try:
+            cypher = await self.generate_cypher(question, patient_name)
+            results = await self.execute(cypher, patient_name)
+            answer = await self.format_answer(question, results, patient_name)
+            return {
+                "answer": answer,
+                "cypher": cypher,
+                "result_count": len(results),
+                "raw_results": results,
+            }
+        except ValueError as e:
+            logger.warning("Query generation failed: %s", e)
+            return {
+                "answer": "I wasn't able to look that up in your health records right now. Could you try rephrasing?",
+                "cypher": "",
+                "result_count": 0,
+                "raw_results": [],
+                "error": str(e),
+            }
+        except Exception as e:
+            logger.exception("Graph query failed: %s", e)
+            return {
+                "answer": "I had trouble accessing your health records. Let me try again later.",
+                "cypher": "",
+                "result_count": 0,
+                "raw_results": [],
+                "error": str(e),
+            }

src/reachy_mini_conversation_app/main.py CHANGED Viewed

@@ -138,6 +138,7 @@ def run(
         logger.debug("Conversation log pruning skipped: %s", e)
     # Initialize session enrichment pipeline (optional Neo4j connection)
     try:
         from reachy_mini_conversation_app.session_enrichment import (
             init_session_enrichment,
@@ -145,15 +146,16 @@ def run(
         from reachy_mini_conversation_app.memory_graph import GraphMemory
         # Try to connect to Neo4j if available
-        graph = GraphMemory()
-        if graph.connect():
-            init_session_enrichment(graph_memory=graph)
             enable_session_enrichment()
             logger.info("Session enrichment enabled with Neo4j")
         else:
             init_session_enrichment(graph_memory=None)
             enable_session_enrichment()
             logger.info("Session enrichment enabled (no Neo4j)")
     except Exception as e:
         logger.warning("Session enrichment not available: %s", e)
@@ -165,6 +167,7 @@ def run(
         head_wobbler=head_wobbler,
         entry_state_manager=entry_state,
         database=minder_db,
     )
     handler = create_handler(deps, instance_path=instance_path)

         logger.debug("Conversation log pruning skipped: %s", e)
     # Initialize session enrichment pipeline (optional Neo4j connection)
+    graph_memory = None
     try:
         from reachy_mini_conversation_app.session_enrichment import (
             init_session_enrichment,
         from reachy_mini_conversation_app.memory_graph import GraphMemory
         # Try to connect to Neo4j if available
+        graph_memory = GraphMemory()
+        if graph_memory.connect():
+            init_session_enrichment(graph_memory=graph_memory)
             enable_session_enrichment()
             logger.info("Session enrichment enabled with Neo4j")
         else:
             init_session_enrichment(graph_memory=None)
             enable_session_enrichment()
             logger.info("Session enrichment enabled (no Neo4j)")
+            graph_memory = None  # Clear ref — not connected
     except Exception as e:
         logger.warning("Session enrichment not available: %s", e)
         head_wobbler=head_wobbler,
         entry_state_manager=entry_state,
         database=minder_db,
+        graph_memory=graph_memory,
     )
     handler = create_handler(deps, instance_path=instance_path)

src/reachy_mini_conversation_app/memory_graph.py CHANGED Viewed

@@ -82,6 +82,11 @@ class GraphMemory:
             self._driver.close()
             self._driver = None
     def _execute(
         self, query: str, parameters: Optional[Dict[str, Any]] = None
     ) -> List[Dict[str, Any]]:
@@ -94,6 +99,88 @@ class GraphMemory:
             result = session.run(query, parameters or {})
             return [dict(record) for record in result]
     # -------------------------------------------------------------------------
     # Node Creation
     # -------------------------------------------------------------------------

             self._driver.close()
             self._driver = None
+    @property
+    def is_connected(self) -> bool:
+        """Check if the driver is connected."""
+        return self._driver is not None
     def _execute(
         self, query: str, parameters: Optional[Dict[str, Any]] = None
     ) -> List[Dict[str, Any]]:
             result = session.run(query, parameters or {})
             return [dict(record) for record in result]
+    def execute_read(
+        self, query: str, parameters: Optional[Dict[str, Any]] = None
+    ) -> List[Dict[str, Any]]:
+        """Execute a Cypher query in a read-only transaction.
+        This is the safe path for LLM-generated queries — the driver
+        will reject any write operations even if they slip past validation.
+        """
+        if not self._driver:
+            logger.warning("Neo4j not connected. Read query skipped.")
+            return []
+        def _read_tx(tx: Any) -> List[Dict[str, Any]]:
+            result = tx.run(query, parameters or {})
+            return [dict(record) for record in result]
+        with self._driver.session() as session:
+            return session.execute_read(_read_tx)
+    def get_schema_description(self) -> str:
+        """Return a human-readable description of the graph schema.
+        Attempts live introspection via `db.labels()` and
+        `db.relationshipTypes()`; falls back to the hardcoded schema
+        constants when Neo4j is not connected.
+        """
+        if self._driver:
+            try:
+                labels = [
+                    r["label"]
+                    for r in self._execute("CALL db.labels() YIELD label RETURN label")
+                ]
+                rel_types = [
+                    r["relationshipType"]
+                    for r in self._execute(
+                        "CALL db.relationshipTypes() YIELD relationshipType RETURN relationshipType"
+                    )
+                ]
+                # Get property keys for each label
+                prop_lines = []
+                for label in labels:
+                    props = self._execute(
+                        f"MATCH (n:{label}) WITH keys(n) AS ks UNWIND ks AS k "
+                        "RETURN DISTINCT k ORDER BY k LIMIT 20"
+                    )
+                    keys = [r["k"] for r in props]
+                    prop_lines.append(
+                        f"  (:{label}) — properties: {', '.join(keys) if keys else 'none'}"
+                    )
+                lines = [
+                    "## Neo4j Graph Schema",
+                    "",
+                    "### Node Labels",
+                    *prop_lines,
+                    "",
+                    "### Relationship Types",
+                    *[f"  {rt}" for rt in rel_types],
+                ]
+                return "\n".join(lines)
+            except Exception as e:
+                logger.debug("Live schema introspection failed, using fallback: %s", e)
+        # Fallback to hardcoded schema from class docstring
+        return (
+            "## Neo4j Graph Schema\n\n"
+            "### Node Labels\n"
+            "  (:Person) — properties: name, role, updated_at\n"
+            "  (:Medication) — properties: name, dose, frequency, symptom_category, updated_at\n"
+            "  (:Symptom) — properties: type, severity, updated_at\n"
+            "  (:Event) — properties: type, timestamp, notes\n"
+            "  (:Entity) — properties: name\n\n"
+            "### Relationship Types\n"
+            "  TAKES — (Person)-[:TAKES]->(Medication)\n"
+            "  EXPERIENCED — (Person)-[:EXPERIENCED]->(Event)\n"
+            "  TRIGGERED_BY — (Event)-[:TRIGGERED_BY]->(Symptom)\n"
+            "  HAS_CAREGIVER — (Person)-[:HAS_CAREGIVER]->(Person)\n"
+            "  MONITORS — (Person)-[:MONITORS]->(Person)\n"
+            "  SEES — (Person)-[:SEES]->(Person)\n"
+            "  MENTIONED_WITH — (Entity)-[:MENTIONED_WITH]->(Entity)"
+        )
     # -------------------------------------------------------------------------
     # Node Creation
     # -------------------------------------------------------------------------

src/reachy_mini_conversation_app/openai_realtime.py CHANGED Viewed

@@ -141,20 +141,38 @@ class OpenaiRealtimeHandler(RealtimeHandler):
         try:
             from reachy_mini_conversation_app.session_enrichment import (
                 get_session_enrichment,
             )
             enrichment = get_session_enrichment()
             if enrichment and enrichment._graph:
                 patient_name = profile.get("name") if profile else None
                 if patient_name:
                     ctx = enrichment._graph.format_context_for_prompt(patient_name)
                     if ctx:
-                        state.graph_context = ctx
-                        logger.info(
-                            "Injected graph context for %s (%d chars)",
-                            patient_name,
-                            len(ctx),
-                        )
         except Exception as e:
             logger.debug("Graph context injection skipped: %s", e)

         try:
             from reachy_mini_conversation_app.session_enrichment import (
                 get_session_enrichment,
+                get_latest_insights,
             )
             enrichment = get_session_enrichment()
+            graph_parts = []
+            # Patient context from Neo4j
             if enrichment and enrichment._graph:
                 patient_name = profile.get("name") if profile else None
                 if patient_name:
                     ctx = enrichment._graph.format_context_for_prompt(patient_name)
                     if ctx:
+                        graph_parts.append(ctx)
+            # Pattern insights from last session's analysis
+            insights = get_latest_insights()
+            if insights:
+                from reachy_mini_conversation_app.pattern_detector import (
+                    format_insights_for_prompt,
+                )
+                insights_text = format_insights_for_prompt(insights)
+                if insights_text:
+                    graph_parts.append(insights_text)
+            if graph_parts:
+                state.graph_context = "\n\n".join(graph_parts)
+                logger.info(
+                    "Injected graph context (%d chars, %d insights)",
+                    len(state.graph_context),
+                    len(insights),
+                )
         except Exception as e:
             logger.debug("Graph context injection skipped: %s", e)

src/reachy_mini_conversation_app/pattern_detector.py ADDED Viewed

	@@ -0,0 +1,424 @@

+"""Pattern detection across the Neo4j health knowledge graph.
+Analyses the graph for temporal correlations, frequency changes, and
+medication-symptom co-occurrence patterns. All findings use neutral,
+observational language — never causal claims.
+Safety constraints:
+- Uses "co-occurred", "appeared together", "trend" — never "caused" or "triggered"
+- All queries are read-only (use execute_read)
+- Results are scored by confidence (sample size × effect strength)
+"""
+from __future__ import annotations
+import logging
+from dataclasses import dataclass, field
+from typing import Any, Dict, List, Optional
+logger = logging.getLogger(__name__)
+@dataclass
+class Insight:
+    """A detected health pattern or trend."""
+    pattern_type: str  # "correlation", "frequency_change", "temporal", "adherence"
+    summary: str  # Human-readable one-liner (spoken aloud)
+    detail: str  # Longer explanation
+    confidence: float  # 0.0-1.0 based on sample size and effect strength
+    entities: List[str] = field(default_factory=list)  # Entity names involved
+    period_days: int = 30  # Analysis window
+    def to_dict(self) -> Dict[str, Any]:
+        return {
+            "pattern_type": self.pattern_type,
+            "summary": self.summary,
+            "detail": self.detail,
+            "confidence": self.confidence,
+            "entities": self.entities,
+            "period_days": self.period_days,
+        }
+class PatternDetector:
+    """Analyse the Neo4j knowledge graph for health patterns.
+    All analysis queries are pre-written Cypher (not LLM-generated) —
+    pattern templates are safe to hardcode since they are read-only.
+    """
+    # Minimum event count required for a pattern to be considered meaningful
+    MIN_SAMPLE_SIZE = 3
+    def __init__(self, graph_memory: Any) -> None:
+        """Initialise with a connected GraphMemory instance."""
+        self._graph = graph_memory
+    def _query(
+        self, cypher: str, params: Optional[Dict[str, Any]] = None
+    ) -> List[Dict[str, Any]]:
+        """Execute a read-only query. Returns empty list if graph unavailable."""
+        if not self._graph or not self._graph.is_connected:
+            return []
+        try:
+            return self._graph.execute_read(cypher, params or {})
+        except Exception as e:
+            logger.warning("Pattern query failed: %s", e)
+            return []
+    # ------------------------------------------------------------------
+    # Pattern detectors
+    # ------------------------------------------------------------------
+    def detect_medication_symptom_correlation(
+        self, patient_name: str, days: int = 30
+    ) -> List[Insight]:
+        """Find medications and symptoms that co-occur within a time window.
+        Looks for medications taken on the same day as symptom events.
+        """
+        query = """
+        MATCH (p:Person {name: $patient_name})-[:TAKES]->(m:Medication)
+        MATCH (p)-[:EXPERIENCED]->(e:Event)
+        WHERE e.timestamp >= datetime() - duration('P' + toString($days) + 'D')
+          AND e.type IN ['headache', 'migraine_episode', 'confusion', 'fatigue',
+                         'mood_change', 'dizziness', 'vision_change']
+        WITH m.name AS medication, e.type AS symptom,
+             count(e) AS co_occurrence_count,
+             collect(DISTINCT date(e.timestamp)) AS dates
+        WHERE co_occurrence_count >= $min_sample
+        RETURN medication, symptom, co_occurrence_count,
+               size(dates) AS distinct_days
+        ORDER BY co_occurrence_count DESC
+        LIMIT 10
+        """
+        results = self._query(
+            query,
+            {
+                "patient_name": patient_name,
+                "days": days,
+                "min_sample": self.MIN_SAMPLE_SIZE,
+            },
+        )
+        insights = []
+        for r in results:
+            med = r["medication"]
+            sym = r["symptom"]
+            count = r["co_occurrence_count"]
+            d_days = r["distinct_days"]
+            # Confidence: more days and more events = higher confidence
+            confidence = min(1.0, (d_days / days) * (count / (days * 0.5)))
+            insights.append(
+                Insight(
+                    pattern_type="correlation",
+                    summary=f"{sym.replace('_', ' ').title()} appeared on {d_days} of the days you took {med} in the last {days} days.",
+                    detail=f"{count} {sym.replace('_', ' ')} events co-occurred with {med} across {d_days} distinct days.",
+                    confidence=round(confidence, 2),
+                    entities=[med, sym],
+                    period_days=days,
+                )
+            )
+        return insights
+    def detect_frequency_changes(
+        self, patient_name: str, event_type: str = "headache", days: int = 30
+    ) -> List[Insight]:
+        """Detect if event frequency increased or decreased vs the prior period.
+        Compares the last N days to the N days before that.
+        """
+        query = """
+        MATCH (p:Person {name: $patient_name})-[:EXPERIENCED]->(e:Event {type: $event_type})
+        WITH e,
+             CASE WHEN e.timestamp >= datetime() - duration('P' + toString($days) + 'D')
+                  THEN 'recent'
+                  ELSE CASE WHEN e.timestamp >= datetime() - duration('P' + toString($days * 2) + 'D')
+                       THEN 'prior'
+                       ELSE 'older'
+                  END
+             END AS period
+        WHERE period IN ['recent', 'prior']
+        RETURN period, count(e) AS event_count
+        ORDER BY period
+        """
+        results = self._query(
+            query,
+            {
+                "patient_name": patient_name,
+                "event_type": event_type,
+                "days": days,
+            },
+        )
+        if len(results) < 2:
+            return []
+        counts = {r["period"]: r["event_count"] for r in results}
+        recent = counts.get("recent", 0)
+        prior = counts.get("prior", 0)
+        if recent == prior or (recent + prior) < self.MIN_SAMPLE_SIZE:
+            return []
+        if prior == 0:
+            if recent >= self.MIN_SAMPLE_SIZE:
+                return [
+                    Insight(
+                        pattern_type="frequency_change",
+                        summary=f"You've logged {recent} {event_type.replace('_', ' ')} events in the last {days} days — this is new.",
+                        detail=f"{recent} events in last {days} days vs 0 in the prior {days} days.",
+                        confidence=0.6,
+                        entities=[event_type],
+                        period_days=days,
+                    )
+                ]
+            return []
+        pct_change = ((recent - prior) / prior) * 100
+        direction = "increased" if pct_change > 0 else "decreased"
+        abs_pct = abs(int(pct_change))
+        # Only report meaningful changes (>25%)
+        if abs_pct < 25:
+            return []
+        confidence = min(1.0, (recent + prior) / (days * 0.5) * (abs_pct / 100))
+        friendly_type = event_type.replace("_", " ")
+        return [
+            Insight(
+                pattern_type="frequency_change",
+                summary=f"Your {friendly_type} frequency has {direction} by about {abs_pct}% compared to the previous {days} days.",
+                detail=f"{recent} events in the last {days} days vs {prior} in the prior {days} days ({direction} {abs_pct}%).",
+                confidence=round(min(confidence, 1.0), 2),
+                entities=[event_type],
+                period_days=days,
+            )
+        ]
+    def detect_missed_medication_impact(
+        self, patient_name: str, days: int = 30
+    ) -> List[Insight]:
+        """Correlate missed medications with symptom severity the following day."""
+        query = """
+        MATCH (p:Person {name: $patient_name})-[:TAKES]->(m:Medication)
+        MATCH (p)-[:EXPERIENCED]->(e:Event)
+        WHERE e.timestamp >= datetime() - duration('P' + toString($days) + 'D')
+          AND e.type IN ['headache', 'migraine_episode', 'confusion', 'fatigue']
+        OPTIONAL MATCH (p)-[:EXPERIENCED]->(missed:Event {type: 'medication_missed'})
+        WHERE missed.timestamp >= e.timestamp - duration('P1D')
+          AND missed.timestamp <= e.timestamp
+        WITH m.name AS medication, e.type AS symptom,
+             count(DISTINCT e) AS total_events,
+             count(DISTINCT missed) AS preceded_by_miss
+        WHERE total_events >= $min_sample AND preceded_by_miss > 0
+        RETURN medication, symptom, total_events, preceded_by_miss,
+               toFloat(preceded_by_miss) / toFloat(total_events) AS miss_ratio
+        ORDER BY miss_ratio DESC
+        LIMIT 5
+        """
+        results = self._query(
+            query,
+            {
+                "patient_name": patient_name,
+                "days": days,
+                "min_sample": self.MIN_SAMPLE_SIZE,
+            },
+        )
+        insights = []
+        for r in results:
+            ratio = r["miss_ratio"]
+            if ratio < 0.3:  # Only report if >30% correlation
+                continue
+            med = r["medication"]
+            sym = r["symptom"].replace("_", " ")
+            pct = int(ratio * 100)
+            insights.append(
+                Insight(
+                    pattern_type="adherence",
+                    summary=f"About {pct}% of your {sym} events were preceded by missing {med} the day before.",
+                    detail=f"{r['preceded_by_miss']} of {r['total_events']} {sym} events came after a missed {med} dose.",
+                    confidence=round(min(ratio, 1.0), 2),
+                    entities=[med, r["symptom"]],
+                    period_days=days,
+                )
+            )
+        return insights
+    def detect_temporal_patterns(
+        self, patient_name: str, days: int = 30
+    ) -> List[Insight]:
+        """Find time-of-day and day-of-week clustering of symptoms."""
+        query = """
+        MATCH (p:Person {name: $patient_name})-[:EXPERIENCED]->(e:Event)
+        WHERE e.timestamp >= datetime() - duration('P' + toString($days) + 'D')
+          AND e.type IN ['headache', 'migraine_episode', 'confusion', 'fatigue',
+                         'mood_change', 'dizziness']
+        WITH e.type AS symptom,
+             CASE
+               WHEN e.timestamp.hour < 6 THEN 'early_morning'
+               WHEN e.timestamp.hour < 12 THEN 'morning'
+               WHEN e.timestamp.hour < 18 THEN 'afternoon'
+               ELSE 'evening'
+             END AS time_of_day,
+             e.timestamp.dayOfWeek AS day_of_week,
+             count(e) AS event_count
+        RETURN symptom, time_of_day, day_of_week, event_count
+        ORDER BY event_count DESC
+        LIMIT 20
+        """
+        results = self._query(
+            query,
+            {
+                "patient_name": patient_name,
+                "days": days,
+            },
+        )
+        if not results:
+            return []
+        # Group by symptom and find dominant time pattern
+        symptom_times: Dict[str, Dict[str, int]] = {}
+        for r in results:
+            sym = r["symptom"]
+            tod = r["time_of_day"]
+            count = r["event_count"]
+            if sym not in symptom_times:
+                symptom_times[sym] = {}
+            symptom_times[sym][tod] = symptom_times[sym].get(tod, 0) + count
+        insights = []
+        for sym, time_counts in symptom_times.items():
+            total = sum(time_counts.values())
+            if total < self.MIN_SAMPLE_SIZE:
+                continue
+            # Find dominant time period
+            dominant_time = max(time_counts, key=time_counts.get)
+            dominant_count = time_counts[dominant_time]
+            ratio = dominant_count / total
+            # Only report if >50% of events cluster in one time period
+            if ratio < 0.5:
+                continue
+            pct = int(ratio * 100)
+            friendly_sym = sym.replace("_", " ")
+            friendly_time = dominant_time.replace("_", " ")
+            insights.append(
+                Insight(
+                    pattern_type="temporal",
+                    summary=f"About {pct}% of your {friendly_sym} events tend to happen in the {friendly_time}.",
+                    detail=f"{dominant_count} of {total} {friendly_sym} events occurred during {friendly_time} hours over the last {days} days.",
+                    confidence=round(min(ratio, 1.0), 2),
+                    entities=[sym, dominant_time],
+                    period_days=days,
+                )
+            )
+        return insights
+    # ------------------------------------------------------------------
+    # Run all detectors
+    # ------------------------------------------------------------------
+    def run_analysis(self, patient_name: str, days: int = 30) -> List[Insight]:
+        """Run all pattern detectors and return sorted insights.
+        Args:
+            patient_name: The patient's name for graph queries.
+            days: Analysis window in days.
+        Returns:
+            List of Insight objects sorted by confidence (highest first).
+        """
+        if not self._graph or not self._graph.is_connected:
+            logger.debug("Graph not available for pattern detection")
+            return []
+        all_insights: List[Insight] = []
+        # Run each detector
+        for detector_name, detector_fn in [
+            (
+                "medication_symptom_correlation",
+                self.detect_medication_symptom_correlation,
+            ),
+            (
+                "frequency_changes_headache",
+                lambda p, d: self.detect_frequency_changes(p, "headache", d),
+            ),
+            (
+                "frequency_changes_migraine",
+                lambda p, d: self.detect_frequency_changes(p, "migraine_episode", d),
+            ),
+            (
+                "frequency_changes_confusion",
+                lambda p, d: self.detect_frequency_changes(p, "confusion", d),
+            ),
+            ("missed_medication_impact", self.detect_missed_medication_impact),
+            ("temporal_patterns", self.detect_temporal_patterns),
+        ]:
+            try:
+                insights = detector_fn(patient_name, days)
+                all_insights.extend(insights)
+                if insights:
+                    logger.info(
+                        "Pattern detector '%s' found %d insights",
+                        detector_name,
+                        len(insights),
+                    )
+            except Exception as e:
+                logger.warning("Pattern detector '%s' failed: %s", detector_name, e)
+        # Sort by confidence descending, keep top insights
+        all_insights.sort(key=lambda i: i.confidence, reverse=True)
+        # Cap at 5 most confident insights
+        top_insights = all_insights[:5]
+        if top_insights:
+            logger.info(
+                "Pattern detection complete: %d insights (top confidence: %.2f)",
+                len(top_insights),
+                top_insights[0].confidence,
+            )
+        return top_insights
+def format_insights_for_prompt(insights: List[Insight]) -> str:
+    """Format pattern insights for injection into the system prompt.
+    Returns a block suitable for appending to graph_context in SessionState.
+    """
+    if not insights:
+        return ""
+    lines = [
+        "",
+        "## Recent Health Insights",
+        "The following patterns were detected in the patient's health data.",
+        "Mention them naturally if relevant to the conversation — don't force them.",
+        "Use observational language only (e.g., 'I noticed', 'it looks like').",
+        "",
+    ]
+    for i, insight in enumerate(insights, 1):
+        lines.append(
+            f"{i}. **{insight.pattern_type.replace('_', ' ').title()}** "
+            f"(confidence: {insight.confidence:.0%}): {insight.summary}"
+        )
+    return "\n".join(lines)

src/reachy_mini_conversation_app/profiles/_reachy_mini_minder_locked_profile/tools.txt CHANGED Viewed

@@ -7,6 +7,7 @@ log_entry
 entry_control
 get_recent_entries
 check_medication
 # Onboarding & setup tools (unchanged)
 get_current_datetime

 entry_control
 get_recent_entries
 check_medication
+query_health_history
 # Onboarding & setup tools (unchanged)
 get_current_datetime

src/reachy_mini_conversation_app/session_enrichment.py CHANGED Viewed

@@ -290,6 +290,7 @@ class SessionEnrichment:
 # -------------------------------------------------------------------------
 _enrichment_instance: Optional[SessionEnrichment] = None
 def init_session_enrichment(graph_memory: Optional[Any] = None) -> SessionEnrichment:
@@ -312,6 +313,14 @@ def get_session_enrichment() -> Optional[SessionEnrichment]:
     return _enrichment_instance
 async def on_session_end(
     session_id: str,
     turns: List[Dict[str, Any]],
@@ -319,6 +328,9 @@ async def on_session_end(
 ) -> Dict[str, int]:
     """Hook called when a conversation session ends.
     Args:
         session_id: The session that ended.
         turns: All turns from that session.
@@ -327,12 +339,32 @@ async def on_session_end(
     Returns:
         Enrichment result counts.
     """
     if not _enrichment_instance:
         logger.warning("Session enrichment not initialized")
         return {}
-    return await _enrichment_instance.enrich_session(
         session_id=session_id,
         turns=turns,
         patient_name=patient_name,
     )

 # -------------------------------------------------------------------------
 _enrichment_instance: Optional[SessionEnrichment] = None
+_latest_insights: List[Any] = []  # Cached pattern insights
 def init_session_enrichment(graph_memory: Optional[Any] = None) -> SessionEnrichment:
     return _enrichment_instance
+def get_latest_insights() -> List[Any]:
+    """Get the most recently detected pattern insights.
+    Returns list of Insight objects from the last pattern detection run.
+    """
+    return _latest_insights
 async def on_session_end(
     session_id: str,
     turns: List[Dict[str, Any]],
 ) -> Dict[str, int]:
     """Hook called when a conversation session ends.
+    After enrichment, runs pattern detection if the graph is available
+    and caches insights for the next session's system prompt.
     Args:
         session_id: The session that ended.
         turns: All turns from that session.
     Returns:
         Enrichment result counts.
     """
+    global _latest_insights
     if not _enrichment_instance:
         logger.warning("Session enrichment not initialized")
         return {}
+    counts = await _enrichment_instance.enrich_session(
         session_id=session_id,
         turns=turns,
         patient_name=patient_name,
     )
+    # Run pattern detection after enrichment
+    if _enrichment_instance._graph and patient_name:
+        try:
+            from reachy_mini_conversation_app.pattern_detector import PatternDetector
+            detector = PatternDetector(_enrichment_instance._graph)
+            _latest_insights = detector.run_analysis(patient_name, days=30)
+            if _latest_insights:
+                logger.info(
+                    "Pattern detection found %d insights after session %s",
+                    len(_latest_insights),
+                    session_id[:8],
+                )
+        except Exception as e:
+            logger.warning("Pattern detection failed: %s", e)
+    return counts

src/reachy_mini_conversation_app/tools/check_medication.py CHANGED Viewed

@@ -112,7 +112,10 @@ class CheckMedicationTool(Tool):
     ) -> Dict[str, Any]:
         """Query Neo4j for medication events."""
         try:
-            from reachy_mini_conversation_app.memory_graph import GraphMemory
             # Get patient name from profile
             patient_name = "Patient"  # Default
@@ -120,17 +123,11 @@ class CheckMedicationTool(Tool):
                 profile = deps.database.get_or_create_profile()
                 patient_name = profile.get("name", "Patient")
-            graph = GraphMemory()
-            if not graph.connect():
-                logger.debug("Neo4j not available for medication check")
-                return {"logged": False}
             result = graph.check_medication_today(
                 patient_name=patient_name,
                 medication_name=medication_name,
                 time_of_day=time_of_day,
             )
-            graph.close()
             if result.get("logged"):
                 return self._format_neo4j_result(result, medication_name, time_of_day)

     ) -> Dict[str, Any]:
         """Query Neo4j for medication events."""
         try:
+            graph = deps.graph_memory
+            if not graph or not graph.is_connected:
+                logger.debug("Neo4j not available for medication check")
+                return {"logged": False}
             # Get patient name from profile
             patient_name = "Patient"  # Default
                 profile = deps.database.get_or_create_profile()
                 patient_name = profile.get("name", "Patient")
             result = graph.check_medication_today(
                 patient_name=patient_name,
                 medication_name=medication_name,
                 time_of_day=time_of_day,
             )
             if result.get("logged"):
                 return self._format_neo4j_result(result, medication_name, time_of_day)

src/reachy_mini_conversation_app/tools/core_tools.py CHANGED Viewed

@@ -61,6 +61,7 @@ class ToolDependencies:
     # Mini-Minder deps
     entry_state_manager: Any | None = None  # EntryStateManager
     database: Any | None = None  # MiniMinderDB
 # Tool base class

     # Mini-Minder deps
     entry_state_manager: Any | None = None  # EntryStateManager
     database: Any | None = None  # MiniMinderDB
+    graph_memory: Any | None = None  # GraphMemory (Neo4j)
 # Tool base class

src/reachy_mini_conversation_app/tools/query_health_history.py ADDED Viewed

	@@ -0,0 +1,133 @@

+"""Query health history tool.
+Allows users to ask natural language questions about their health history
+via voice. Uses GraphQueryEngine for LLM-generated Cypher queries against
+the Neo4j knowledge graph.
+Examples:
+- "How many headaches did I have this week?"
+- "What medications am I taking?"
+- "When did I last see my doctor?"
+- "Show me patterns in my symptoms"
+"""
+from __future__ import annotations
+import logging
+from typing import Any, Dict, Optional
+from reachy_mini_conversation_app.tools.core_tools import Tool, ToolDependencies
+from reachy_mini_conversation_app.stream_api import emit_ui_component
+logger = logging.getLogger(__name__)
+class QueryHealthHistoryTool(Tool):
+    """Answer natural language questions about health history via Neo4j graph."""
+    name = "query_health_history"
+    description = (
+        "Answer questions about the user's health history, medications, symptoms, "
+        "and patterns using the knowledge graph. Use when the user asks things like "
+        "'How many headaches did I have this week?', 'When did I last see Dr Patel?', "
+        "'What medications am I taking?', 'Show me patterns in my symptoms', "
+        "'Have my migraines been getting worse?', etc."
+    )
+    parameters_schema = {
+        "type": "object",
+        "properties": {
+            "question": {
+                "type": "string",
+                "description": "The health history question to answer",
+            },
+        },
+        "required": ["question"],
+    }
+    async def __call__(
+        self,
+        deps: ToolDependencies,
+        question: str = "",
+    ) -> Dict[str, Any]:
+        """Query the Neo4j knowledge graph with a natural language question.
+        Returns dict with:
+        - answer: str (natural language, suitable for spoken response)
+        - result_count: int
+        - source: str ("graph" or "unavailable")
+        """
+        if not question:
+            return {
+                "answer": "I didn't catch a question. Could you ask me something specific about your health history?",
+                "source": "unavailable",
+            }
+        # Check graph availability
+        graph = deps.graph_memory
+        if not graph or not graph.is_connected:
+            return {
+                "answer": (
+                    "I don't have access to your health history graph right now. "
+                    "I can still help with things I remember from our conversations."
+                ),
+                "source": "unavailable",
+            }
+        # Get patient name for query parameterisation
+        patient_name = "Patient"
+        if deps.database:
+            profile = deps.database.get_or_create_profile()
+            patient_name = profile.get("display_name") or profile.get("name", "Patient")
+        # PII guard: redact before sending to LLM
+        redacted_question = question
+        pii_mapping: Dict[str, str] = {}
+        try:
+            from reachy_mini_conversation_app.pii_guard import get_pii_guard
+            guard = get_pii_guard()
+            if guard:
+                redacted_question, pii_mapping = guard.redact(question)
+                if pii_mapping:
+                    logger.debug(
+                        "PII redacted from question: %s", list(pii_mapping.keys())
+                    )
+        except Exception as e:
+            logger.debug("PII guard not available: %s", e)
+        # Run the query engine
+        from reachy_mini_conversation_app.graph_query_engine import GraphQueryEngine
+        engine = GraphQueryEngine(graph)
+        result = await engine.query(redacted_question, patient_name)
+        # PII guard: hydrate the answer
+        answer = result.get("answer", "I couldn't find that information.")
+        if pii_mapping:
+            try:
+                guard = get_pii_guard()
+                if guard:
+                    answer = guard.hydrate(answer, pii_mapping)
+            except Exception:
+                pass  # Answer is still useful without hydration
+        # Emit a GenUI component if we got interesting results
+        if result.get("result_count", 0) > 0:
+            try:
+                emit_ui_component(
+                    "InsightCard",
+                    {
+                        "title": "Health Query",
+                        "summary": answer,
+                        "detail": f"Based on {result['result_count']} record(s) in your health graph.",
+                        "source": "graph_query",
+                    },
+                )
+            except Exception as e:
+                logger.debug("InsightCard emission failed: %s", e)
+        return {
+            "answer": answer,
+            "result_count": result.get("result_count", 0),
+            "source": "graph",
+        }

tests/test_graph_query_engine.py ADDED Viewed

	@@ -0,0 +1,213 @@

+"""Tests for the GraphQueryEngine — Cypher validation and safety guardrails."""
+import pytest
+from unittest.mock import AsyncMock, MagicMock, patch
+from reachy_mini_conversation_app.graph_query_engine import (
+    validate_cypher,
+    GraphQueryEngine,
+)
+class TestCypherValidation:
+    """Test the Cypher safety validation function."""
+    def test_rejects_create(self):
+        assert validate_cypher("CREATE (n:Person {name: 'test'})") is False
+    def test_rejects_merge(self):
+        assert validate_cypher("MERGE (n:Person {name: 'test'})") is False
+    def test_rejects_delete(self):
+        assert validate_cypher("MATCH (n) DELETE n") is False
+    def test_rejects_detach_delete(self):
+        assert validate_cypher("MATCH (n) DETACH DELETE n") is False
+    def test_rejects_set(self):
+        assert validate_cypher("MATCH (n) SET n.name = 'test'") is False
+    def test_rejects_remove(self):
+        assert validate_cypher("MATCH (n) REMOVE n.name") is False
+    def test_rejects_drop(self):
+        assert validate_cypher("DROP INDEX ON :Person(name)") is False
+    def test_rejects_call(self):
+        assert validate_cypher("CALL db.labels()") is False
+    def test_rejects_load_csv(self):
+        assert validate_cypher("LOAD CSV FROM 'file:///data.csv' AS line") is False
+    def test_rejects_foreach(self):
+        assert (
+            validate_cypher("MATCH (n) FOREACH (x IN [1,2] | SET n.val = x)") is False
+        )
+    def test_rejects_empty_string(self):
+        assert validate_cypher("") is False
+    def test_rejects_whitespace_only(self):
+        assert validate_cypher("   ") is False
+    def test_allows_simple_match(self):
+        assert validate_cypher("MATCH (n) RETURN n") is True
+    def test_allows_match_with_where(self):
+        assert (
+            validate_cypher(
+                "MATCH (p:Person {name: $patient_name})-[:TAKES]->(m:Medication) "
+                "WHERE m.dose IS NOT NULL RETURN m.name, m.dose"
+            )
+            is True
+        )
+    def test_allows_optional_match(self):
+        assert (
+            validate_cypher(
+                "OPTIONAL MATCH (p:Person)-[:EXPERIENCED]->(e:Event) RETURN count(e)"
+            )
+            is True
+        )
+    def test_allows_with_clause(self):
+        assert (
+            validate_cypher(
+                "MATCH (p:Person {name: $patient_name}) "
+                "WITH p "
+                "MATCH (p)-[:TAKES]->(m:Medication) "
+                "RETURN m.name"
+            )
+            is True
+        )
+    def test_allows_aggregation(self):
+        assert (
+            validate_cypher(
+                "MATCH (p:Person)-[:EXPERIENCED]->(e:Event {type: 'headache'}) "
+                "RETURN count(e) AS headache_count, avg(e.severity) AS avg_severity"
+            )
+            is True
+        )
+    def test_allows_order_by_limit(self):
+        assert (
+            validate_cypher(
+                "MATCH (p:Person)-[:EXPERIENCED]->(e:Event) "
+                "RETURN e.type, e.timestamp "
+                "ORDER BY e.timestamp DESC LIMIT 10"
+            )
+            is True
+        )
+    def test_allows_unwind(self):
+        assert (
+            validate_cypher(
+                "UNWIND $names AS name " "MATCH (p:Person {name: name}) RETURN p"
+            )
+            is True
+        )
+    def test_rejects_create_case_insensitive(self):
+        """Mutations should be caught regardless of case."""
+        assert validate_cypher("match (n) Create (m:Test)") is False
+        assert validate_cypher("MATCH (n) create (m:Test)") is False
+        assert validate_cypher("match (n) CREATE (m:Test)") is False
+    def test_rejects_query_not_starting_with_match(self):
+        """Queries must start with MATCH, WITH, RETURN, or UNWIND."""
+        assert validate_cypher("RETURN 1") is True
+        assert validate_cypher("EXPLAIN MATCH (n) RETURN n") is False
+class TestGraphQueryEngineSchemaDescription:
+    """Test schema description from GraphMemory."""
+    def test_fallback_schema_when_not_connected(self):
+        """Schema description should return a fallback when not connected."""
+        mock_graph = MagicMock()
+        mock_graph.is_connected = False
+        mock_graph.get_schema_description.return_value = (
+            "## Neo4j Graph Schema\n\n### Node Labels\n  (:Person)"
+        )
+        engine = GraphQueryEngine(mock_graph)
+        schema = engine._get_schema()
+        assert "Neo4j Graph Schema" in schema
+    def test_schema_caching(self):
+        """Schema should only be fetched once."""
+        mock_graph = MagicMock()
+        mock_graph.get_schema_description.return_value = "cached schema"
+        engine = GraphQueryEngine(mock_graph)
+        _ = engine._get_schema()
+        _ = engine._get_schema()
+        # Should only be called once
+        mock_graph.get_schema_description.assert_called_once()
+    def test_cache_invalidation(self):
+        """After invalidation, schema should be re-fetched."""
+        mock_graph = MagicMock()
+        mock_graph.get_schema_description.return_value = "schema v1"
+        engine = GraphQueryEngine(mock_graph)
+        s1 = engine._get_schema()
+        assert s1 == "schema v1"
+        mock_graph.get_schema_description.return_value = "schema v2"
+        engine.invalidate_schema_cache()
+        s2 = engine._get_schema()
+        assert s2 == "schema v2"
+@pytest.mark.asyncio
+class TestGraphQueryEngineExecution:
+    """Test query execution path (mocked)."""
+    async def test_execute_uses_read_session(self):
+        """Ensure execute() calls execute_read, not _execute."""
+        mock_graph = MagicMock()
+        mock_graph.is_connected = True
+        mock_graph.execute_read.return_value = [{"count": 3}]
+        engine = GraphQueryEngine(mock_graph)
+        results = await engine.execute(
+            "MATCH (n:Person)-[:EXPERIENCED]->(e:Event) RETURN count(e) AS count",
+            patient_name="Elena",
+        )
+        mock_graph.execute_read.assert_called_once()
+        assert results == [{"count": 3}]
+    async def test_execute_returns_empty_when_disconnected(self):
+        """Ensure execute() returns empty list when graph is not connected."""
+        mock_graph = MagicMock()
+        mock_graph.is_connected = False
+        engine = GraphQueryEngine(mock_graph)
+        results = await engine.execute("MATCH (n) RETURN n")
+        assert results == []
+        mock_graph.execute_read.assert_not_called()
+    async def test_query_handles_generation_error_gracefully(self):
+        """Full query() should return a friendly error message on failure."""
+        mock_graph = MagicMock()
+        mock_graph.is_connected = True
+        mock_graph.get_schema_description.return_value = "test schema"
+        engine = GraphQueryEngine(mock_graph)
+        # Mock the OpenAI client to raise an error
+        with patch.object(engine, "_get_client") as mock_client:
+            mock_client.return_value.chat.completions.create = AsyncMock(
+                side_effect=Exception("API error")
+            )
+            result = await engine.query("How many headaches?", "Elena")
+        assert "error" in result
+        assert "answer" in result
+        assert result["result_count"] == 0

tests/test_pattern_detector.py ADDED Viewed

	@@ -0,0 +1,276 @@

+"""Tests for the PatternDetector and Insight formatting."""
+import pytest
+from unittest.mock import MagicMock
+from reachy_mini_conversation_app.pattern_detector import (
+    PatternDetector,
+    Insight,
+    format_insights_for_prompt,
+)
+class TestInsight:
+    """Test the Insight dataclass."""
+    def test_to_dict(self):
+        insight = Insight(
+            pattern_type="correlation",
+            summary="Test summary",
+            detail="Test detail",
+            confidence=0.75,
+            entities=["med1", "headache"],
+            period_days=30,
+        )
+        d = insight.to_dict()
+        assert d["pattern_type"] == "correlation"
+        assert d["confidence"] == 0.75
+        assert "med1" in d["entities"]
+    def test_default_entities_and_period(self):
+        insight = Insight(
+            pattern_type="test",
+            summary="s",
+            detail="d",
+            confidence=0.5,
+        )
+        assert insight.entities == []
+        assert insight.period_days == 30
+class TestFormatInsightsForPrompt:
+    """Test formatting insights for system prompt injection."""
+    def test_empty_insights_returns_empty(self):
+        assert format_insights_for_prompt([]) == ""
+    def test_single_insight_format(self):
+        insights = [
+            Insight(
+                pattern_type="correlation",
+                summary="Headache appeared on 5 days.",
+                detail="detail",
+                confidence=0.8,
+            )
+        ]
+        result = format_insights_for_prompt(insights)
+        assert "Recent Health Insights" in result
+        assert "Correlation" in result
+        assert "80%" in result
+        assert "Headache appeared on 5 days." in result
+    def test_multiple_insights_numbered(self):
+        insights = [
+            Insight(
+                pattern_type="correlation", summary="s1", detail="d1", confidence=0.9
+            ),
+            Insight(
+                pattern_type="frequency_change",
+                summary="s2",
+                detail="d2",
+                confidence=0.7,
+            ),
+        ]
+        result = format_insights_for_prompt(insights)
+        assert "1." in result
+        assert "2." in result
+    def test_observational_language_guidance(self):
+        """Prompt should instruct the model to use observational language."""
+        insights = [
+            Insight(pattern_type="test", summary="s", detail="d", confidence=0.5),
+        ]
+        result = format_insights_for_prompt(insights)
+        assert "observational" in result.lower() or "I noticed" in result
+class TestPatternDetectorRunAnalysis:
+    """Test the run_analysis orchestration."""
+    def test_returns_empty_when_disconnected(self):
+        mock_graph = MagicMock()
+        mock_graph.is_connected = False
+        detector = PatternDetector(mock_graph)
+        insights = detector.run_analysis("Elena", days=30)
+        assert insights == []
+    def test_returns_empty_when_no_graph(self):
+        detector = PatternDetector(None)
+        insights = detector.run_analysis("Elena", days=30)
+        assert insights == []
+    def test_continues_on_individual_detector_failure(self):
+        """If one detector fails, the others should still run."""
+        mock_graph = MagicMock()
+        mock_graph.is_connected = True
+        # execute_read will raise on first call, return empty on subsequent
+        mock_graph.execute_read.side_effect = [
+            Exception("Neo4j error"),  # medication_symptom_correlation
+            [],  # frequency_changes headache
+            [],  # frequency_changes migraine
+            [],  # frequency_changes confusion
+            [],  # missed_medication_impact
+            [],  # temporal_patterns
+        ]
+        detector = PatternDetector(mock_graph)
+        # Should not raise, just log warnings
+        insights = detector.run_analysis("Elena", days=30)
+        assert isinstance(insights, list)
+    def test_sorts_by_confidence_descending(self):
+        """Insights should be sorted by confidence (highest first)."""
+        mock_graph = MagicMock()
+        mock_graph.is_connected = True
+        # Mock medication_symptom_correlation to return data
+        mock_graph.execute_read.side_effect = [
+            # medication_symptom_correlation
+            [
+                {
+                    "medication": "Med A",
+                    "symptom": "headache",
+                    "co_occurrence_count": 10,
+                    "distinct_days": 8,
+                },
+                {
+                    "medication": "Med B",
+                    "symptom": "fatigue",
+                    "co_occurrence_count": 3,
+                    "distinct_days": 3,
+                },
+            ],
+            # All other detectors return empty
+            [],
+            [],
+            [],
+            [],
+            [],
+        ]
+        detector = PatternDetector(mock_graph)
+        insights = detector.run_analysis("Elena", days=30)
+        if len(insights) >= 2:
+            assert insights[0].confidence >= insights[1].confidence
+    def test_caps_at_five_insights(self):
+        """Should return at most 5 insights."""
+        mock_graph = MagicMock()
+        mock_graph.is_connected = True
+        # Return lots of correlations
+        mock_graph.execute_read.side_effect = [
+            [
+                {
+                    "medication": f"Med{i}",
+                    "symptom": f"sym{i}",
+                    "co_occurrence_count": 5,
+                    "distinct_days": 5,
+                }
+                for i in range(10)
+            ],
+            [],
+            [],
+            [],
+            [],
+            [],
+        ]
+        detector = PatternDetector(mock_graph)
+        insights = detector.run_analysis("Elena", days=30)
+        assert len(insights) <= 5
+class TestPatternDetectorInsightLanguage:
+    """Verify insights never use causal language."""
+    def test_correlation_summary_is_neutral(self):
+        mock_graph = MagicMock()
+        mock_graph.is_connected = True
+        mock_graph.execute_read.side_effect = [
+            [
+                {
+                    "medication": "Topiramate",
+                    "symptom": "headache",
+                    "co_occurrence_count": 5,
+                    "distinct_days": 5,
+                }
+            ],
+            [],
+            [],
+            [],
+            [],
+            [],
+        ]
+        detector = PatternDetector(mock_graph)
+        insights = detector.run_analysis("Elena", days=30)
+        for insight in insights:
+            summary_lower = insight.summary.lower()
+            assert (
+                "caused" not in summary_lower
+            ), f"Causal language in: {insight.summary}"
+            assert (
+                "triggered" not in summary_lower
+            ), f"Causal language in: {insight.summary}"
+            assert (
+                "because" not in summary_lower
+            ), f"Causal language in: {insight.summary}"
+class TestPatternDetectorFrequencyChanges:
+    """Test the frequency change detector."""
+    def test_detects_increase(self):
+        mock_graph = MagicMock()
+        mock_graph.is_connected = True
+        mock_graph.execute_read.return_value = [
+            {"period": "prior", "event_count": 2},
+            {"period": "recent", "event_count": 6},
+        ]
+        detector = PatternDetector(mock_graph)
+        insights = detector.detect_frequency_changes("Elena", "headache", days=30)
+        assert len(insights) == 1
+        assert "increased" in insights[0].summary.lower()
+    def test_detects_decrease(self):
+        mock_graph = MagicMock()
+        mock_graph.is_connected = True
+        mock_graph.execute_read.return_value = [
+            {"period": "prior", "event_count": 10},
+            {"period": "recent", "event_count": 3},
+        ]
+        detector = PatternDetector(mock_graph)
+        insights = detector.detect_frequency_changes("Elena", "headache", days=30)
+        assert len(insights) == 1
+        assert "decreased" in insights[0].summary.lower()
+    def test_ignores_small_changes(self):
+        """Changes under 25% should not generate insights."""
+        mock_graph = MagicMock()
+        mock_graph.is_connected = True
+        mock_graph.execute_read.return_value = [
+            {"period": "prior", "event_count": 10},
+            {"period": "recent", "event_count": 11},
+        ]
+        detector = PatternDetector(mock_graph)
+        insights = detector.detect_frequency_changes("Elena", "headache", days=30)
+        assert len(insights) == 0
+    def test_handles_insufficient_data(self):
+        """Should return nothing with fewer than MIN_SAMPLE_SIZE events."""
+        mock_graph = MagicMock()
+        mock_graph.is_connected = True
+        mock_graph.execute_read.return_value = [
+            {"period": "recent", "event_count": 1},
+        ]
+        detector = PatternDetector(mock_graph)
+        insights = detector.detect_frequency_changes("Elena", "headache", days=30)
+        assert len(insights) == 0