Spaces:

moazeldegwy
/

mealgraph

Sleeping

App Files Files Community

moazeldegwy commited on 25 days ago

Commit

c504fac

2 Parent(s): bcd961e 34b037e

Merge Phase 2 into Phase 3 base

Browse files

Files changed (23) hide show

.env.example +23 -0
.gitignore +60 -0
Makefile +26 -0
agents.py +569 -436
config.py +113 -6
logging_setup.py +77 -0
nutritionmas.py +66 -39
pyproject.toml +52 -0
requirements.txt +31 -0
schemas.py +215 -0
tests/__init__.py +0 -0
tests/conftest.py +99 -0
tests/test_api_pool.py +51 -0
tests/test_quantities_finder.py +90 -0
tests/test_schemas.py +135 -0
tests/test_settings.py +53 -0
tests/test_smoke.py +64 -0
tests/test_typed_agents.py +184 -0
tests/test_validation_agent.py +201 -0
tools.py +236 -211
utils.py +335 -258
validation.py +327 -0
workflow.py +86 -72

.env.example ADDED Viewed

	@@ -0,0 +1,23 @@

+# Comma-separated list of Gemini API keys. The system rotates through them
+# and respects per-key RPM/RPD limits when NUTRITION_MAS_ENABLE_RATE_LIMITING=true.
+NUTRITION_MAS_GEMINI_API_KEYS=key_one,key_two
+# Optional infra paths.
+# When set, every agent/tool I/O is dumped to LOG_DIR/<subdir>/<timestamp>.json
+NUTRITION_MAS_LOG_DIR=
+# When set, LangGraph checkpoints are persisted to disk (instead of memory).
+NUTRITION_MAS_PERSISTENCE_DIR=
+# Debug switches (default: off)
+NUTRITION_MAS_DEBUG_MODE=false
+NUTRITION_MAS_DEBUG_LEVEL=full        # 'full' | 'output'
+# JSON-encoded dict, see config.Settings for shape. Defaults to all/all.
+# NUTRITION_MAS_DEBUG_SCOPES={"agents": ["CoachAgent"], "tools": ["all"]}
+# Rate limiting (default: on)
+NUTRITION_MAS_ENABLE_RATE_LIMITING=true
+# Optional LangSmith tracing (Phase 6 will wire this up properly)
+# LANGCHAIN_TRACING_V2=true
+# LANGCHAIN_API_KEY=
+# LANGCHAIN_PROJECT=Nutrition-MAS

.gitignore ADDED Viewed

	@@ -0,0 +1,60 @@

+# Python
+__pycache__/
+*.py[cod]
+*$py.class
+*.so
+.Python
+build/
+develop-eggs/
+dist/
+downloads/
+eggs/
+.eggs/
+lib/
+lib64/
+parts/
+sdist/
+var/
+wheels/
+*.egg-info/
+.installed.cfg
+*.egg
+MANIFEST
+# Virtual environments
+.venv/
+venv/
+env/
+ENV/
+.env
+# IDE
+.vscode/
+.idea/
+.claude/
+*.swp
+*.swo
+*~
+.DS_Store
+# Project
+logs/
+checkpoints/
+data/cache/
+*.sqlite
+*.sqlite3
+*.db
+.cache/
+# Pytest / coverage
+.pytest_cache/
+.coverage
+htmlcov/
+.mypy_cache/
+.ruff_cache/
+# Notebook
+.ipynb_checkpoints/
+# LangSmith / tracing
+.langsmith/

Makefile ADDED Viewed

	@@ -0,0 +1,26 @@

+.PHONY: install dev test lint format clean run
+install:
+	pip install -r requirements.txt
+dev:
+	pip install -e ".[dev]"
+test:
+	pytest -ra -q
+test-cov:
+	pytest -ra --cov=. --cov-report=term-missing --cov-report=html
+lint:
+	ruff check .
+format:
+	ruff format .
+clean:
+	rm -rf .pytest_cache .ruff_cache .mypy_cache htmlcov .coverage
+	find . -type d -name __pycache__ -exec rm -rf {} +
+run:
+	python -c "import nutritionmas; print('Module imports OK')"

agents.py CHANGED Viewed

@@ -1,489 +1,622 @@
-from typing import Dict, Any
-from utils import extract_and_parse_json, set_nested, update_memory_partition, save_to_json, should_debug
-from tools import ComputationTool, WebSearchTool, QuantitiesFinder
-from datetime import datetime
 import json
-import config
 class CoachAgent:
     def __init__(self, llm_instance):
         self.llm = llm_instance
     def handle_task(self, state: Dict[str, Any]) -> Dict[str, Any]:
-        memory_str = json.dumps(state["memory"], indent=2)
         response_steps = state.get("response_steps", [])
-        response_steps_str = json.dumps(response_steps, indent=2) if response_steps else "None"
-        truncated_history = []
         for msg in state["conversation_history"]:
             if msg["role"] == "assistant" and len(msg["content"]) > 200:
-                truncated_content = msg["content"][:200] + "... (full response in memory)"
-                truncated_history.append({"role": "assistant", "content": truncated_content})
             else:
                 truncated_history.append(msg)
-        history_str = "\n".join([f"{msg['role']}: {msg['content']}" for msg in truncated_history])
-        observation = f"""User query: {state['user_question']}
-        Memory State: {memory_str}
-        Current Response Steps: {response_steps_str}
-        Previous Tool Result: {state.get('agent_result', 'None')}
-        Conversation history: {history_str}"""
-        prompt = f"""
-        You are the Coach Agent (central orchestrator) of a nutrition MAS.
-        Current State: {observation}
-        Primary responsibilities:
-        - Translate user intent to a concrete workflow of response_steps (use the shared response_step schema).
-        - Enforce system rules (MedicalAssessment must be completed before Planner.
-        - Decide and perform actions: call_agent, call_tool, ask_user, write_memory, compose_response.
-        Inputs:
-        - observation (string)
-        - memory partitions: user_profile, medical_history, flags_and_assessments, plans
-        - response_steps (may be None or list)
-        Behavior rules (mandatory):
-        1. If response_steps is None or empty, generate a response_steps list with explicit ordered steps (max 6 steps). Each step must include id, actor, prerequisites, and status "pending".
-          - Typical personal-workflow (if user asks for personalized plan):
-            1) Validate required user data (height, weight, age, sex, activity_level, allergies, goal). If missing -> ask_user.
-            2) Update memory (if user provided new data). [action: write_memory]
-            3) Call MedicalAssessmentAgent with task to assess user.
-            4) Wait for assessment to be completed and stored into memory.
-            5) Call PlannerAgent with relevent task.
-        2. When calling any agent, set the called step status to "in_progress" and include `prerequisites` satisfied by your observation.
-        3. Only call PlannerAgent if memory.flags_and_assessments exists and contains "assessment_status":"assessment_complete". If not, call MedicalAssessmentAgent.
-        4. When new user personal data is detected in user input, add steps to:
-          - propose memory update (write_memory)
-          - call MedicalAssessmentAgent if needed
-          - re-plan if needed
-        5. For any "write_memory" action, provide the full partition contents in params.data (not diffs). The Coach is responsible to merge and store.
-        Action outputs: respond with a JSON object:
-        {{
-          "observation": "...",
-          "thought": "...",
-          "response_steps": [ ... ],
-          "action": "call_agent | call_tool | ask_user | write_memory | compose_response",
-          "params": {{ ... }}
-        }}
-        Examples:
-        - call_agent params: {{"agent_name":"MedicalAssessmentAgent", "task":"task description"}}
-        - compose_response params:{{"text":"Complete response in markdown"}}
-        Rules:
-        - When composing the response, extract and include relevant information from the memory state (e.g., calorie target, plan details, dietary restrictions) in markdown format for readability.
-        - Always include a "trace" field in composed responses summarizing which agents/tools were called for and which sources were used.
-        - For high-risk profiles (e.g., requires_professional_consultation: true); in such cases append a bold warning at the end of the diet plan response advising professional consultation before implementation.
-        """
-        if should_debug('agents', 'CoachAgent'):
-            print(f"\n--- Coach Agent Turn {state['num_turns'] + 1} ---")
-        if should_debug('agents', 'CoachAgent') and config.DEBUG_LEVEL == 'full':
-            print(f"Raw LLM input:\n{prompt}")
-        response = self.llm(prompt)[0]
-        if should_debug('agents', 'CoachAgent'):
-            print(f"Coach Raw Response:\n{response}")
-        parsed = extract_and_parse_json(response)
-        # Add high-level print for user mode
-        if not config.DEBUG_MODE:
-            action = parsed.get("action")
-            params = parsed.get("params", {})
-            print_str = "\n🏋️‍♂️Coach Agent: "
-            if action == "call_agent":
-                print_str += f"Calling {params.get('agent_name')} with task '{params.get('task')}'"
-            elif action == "call_tool":
-                print_str += f"Using {params.get('tool_name')} with task '{params.get('task')}'"
-            elif action == "ask_user":
-                print_str += f"Asking user: {params.get('prompt')}"
-            elif action == "write_memory":
-                print_str += f"Writing to memory partition '{params.get('partition')}'"
-            elif action == "compose_response":
-                print_str += "Composing final response"
-            print(print_str)
-        current_action = {
-            "action": parsed.get("action"),
-            "params": parsed.get("params", {})
-        }
-        response_steps = parsed.get("response_steps", state.get("response_steps", []))
-        log_data = {
-            "prompt": prompt,
-            "output":response,
-            "parsed": parsed,
-            "timestamp": datetime.now().isoformat()
-        }
-        save_to_json(log_data, f'coach_agent_{datetime.now().isoformat()}.json', subdirectory='CoachAgent')
         return {
             **state,
             "current_action": current_action,
-            "response_steps": response_steps,
             "num_turns": state["num_turns"] + 1,
-            "agent_result": None
         }
 class MedicalAssessmentAgent:
-    def __init__(self, llm_instance, computation_tool: ComputationTool, web_search_tool: WebSearchTool):
         self.llm = llm_instance
         self.computation_tool = computation_tool
         self.web_search_tool = web_search_tool
     def handle_task(self, task: str, memory: Dict[str, Any]) -> str:
-        print(f"\n👨🏻‍⚕️ MEDICAL ASSESSMENT AGENT STARTED")
-        # Build relevant memory context
         relevant_memory = {
             "user_profile": memory.get("user_profile", {}),
             "medical_history": memory.get("medical_history", {}),
         }
-        memory_str = json.dumps(relevant_memory, indent=2)
-        tool_results = []
-        assessment_plan = []
-        max_iterations = 15
-        iteration = 0
-        while iteration < max_iterations:
-            tool_results_str = "\n".join([f"Tool Result {i+1}: {result}" for i, result in enumerate(tool_results)])
-            prompt = f"""
-            You are the Medical Assessment Agent. Your job: produce an evidence-based assessment and the set of clinical flags and calculations needed by the Planner and Validation agents.
-            Task: {task}
-            Current Memory: {memory_str}
-            Current Assessment Plan: {assessment_plan}
-            Previous Tool Results: {tool_results_str}
-            Available tools: ComputationTool, WebSearchTool
-            Mandatory behavior (do not skip):
-            1. Critical data check: confirm presence of age, sex, height, weight, activity_level, allergies, medications. If any critical field is missing -> action: ask_user (return which fields).
-            2. Use ComputationTool for all numeric calculations (BMI, BMR, TDEE, calorie targets, macro targets). Provide computation inputs with inside the task description.
-            3. Use WebSearchTool to fetch authoritative guidelines where relevant (WHO, USDA, clinical guidelines). Always capture the source(s) used with timestamped citations.
-            4. Produce a compact assessment_plan (3-6 steps max) that lists each computational/search step, its status, and result.
-            - When generating the assessment_plan (if empty or None), follow this exact sequence (assuming critical data is present; if not, prepend a step for ask_user):
-              1. Call ComputationTool to calculate BMI, BMR, TDEE, and a single daily_target_calories (integer) based on the user's goal, all in one tool call.
-              2. Call ComputationTool to calculate macro_targets (protein_g, fat_g, carbohydrates_g as single integers) optimized for the user's goal given the daily_target_calories.
-              3. Call WebSearchTool to find dietary guidelines related to the user based on their profile and medical history to manage conditions.
-              4-6. Additional steps if needed (e.g., synthesis, further searches/computations for specific risks).
-            5. Return a `assessment_complete` containing:
-              - assessment_summary
-              - calculations: {{BMI, BMR, TDEE, daily_target_calories, macro_targets}}
-                - daily_target_calories: a single integer value (e.g., 2750)
-                - macro_targets: {{"protein_g": int, "fat_g": int, "carbohydrates_g": int}} (single integer values for each, no ranges)
-              - flags_to_set: [e.g., "high_ldl", "diabetes_risk"]
-              - recommendations: clinical dietary constraints or urgent issues (e.g., "refer to PCP for suspected iron deficiency")
-              - requires_professional_consultation: True/False (True if the case is medically sensitive)
-              - trace: a single paragraph summarizing which agents/tools were called and key steps.
-            6. If any calculation or guideline retrieval fails due to tool error:
-              - fallback to best-known guideline values only if necessary (mark "data_confidence": 0.xx).
-              - set "requires_tool_retry": true in the response.
-            Response JSON must contain:
-            - medical_reasoning: detailed rationale
-            - observation: missing/available info
-            - risk_assessment_priorities: ordered list of 1-4 priorities
-            - assessment_plan: list of response_step objects (schema above)
-            - action: either {{"type":"call_tool","tool_name":"ComputationTool" or "WebSearchTool","tool_task": "<task string>"}} or {{"type":"assessment_complete",...}}
-            """
-            if should_debug('agents', 'MedicalAssessmentAgent'):
-                print(f"\n--- Medical Assessment Agent Iteration {iteration + 1} ---")
-            if should_debug('agents', 'MedicalAssessmentAgent') and config.DEBUG_LEVEL == 'full':
-                print(f"Raw LLM input:\n{prompt}")
-            response = self.llm(prompt)[0]
-            if should_debug('agents', 'MedicalAssessmentAgent'):
-                print(f"Medical Assessment Raw Response:\n{response}")
-            parsed = extract_and_parse_json(response)
-            # Add high-level print for user mode
-            if not config.DEBUG_MODE:
-                action_type = parsed.get("action", {}).get("type")
-                if action_type == "call_tool":
-                    tool_name = parsed["action"].get("tool_name")
-                    tool_task = parsed["action"].get("tool_task")
-                    print(f"👨🏻‍⚕️ Medical Assessment Agent: Using {tool_name} for '{tool_task}'")
-                elif action_type == "ask_user":
-                    fields = parsed["action"].get("fields", [])
-                    print(f"👨🏻‍⚕️ Medical Assessment Agent: Asking user for missing fields: {', '.join(fields)}")
-                elif action_type == "assessment_complete":
-                    print("👨🏻‍⚕️ Medical Assessment Agent: Completing assessment")
-            if "assessment_plan" in parsed:
-                assessment_plan = parsed["assessment_plan"]
-            action = parsed.get("action", {})
-            action_type = action.get("type")
-            if action_type == "call_tool":
-                tool_name = action.get("tool_name")
-                tool_task = action.get("tool_task")
-                if tool_name == "ComputationTool":
-                    if tool_task:
-                        result = self.computation_tool.handle_task(tool_task)
-                    else:
-                        result = "Missing 'tool_task' for ComputationTool"
-                elif tool_name == "WebSearchTool":
-                    if tool_task:
-                        result = self.web_search_tool.handle_task(tool_task)
-                    else:
-                        result = "Missing 'tool_task' for WebSearchTool"
-                else:
-                    result = f"Unknown tool: {tool_name}"
-                tool_results.append(f"{tool_name}: {result}")
-            elif action_type == "ask_user":
-                fields = action.get("fields", [])
-                result = f"Missing critical fields: {', '.join(fields)}. Please provide the following information to continue the assessment."
-                print(f"👨🏻‍⚕️ MEDICAL ASSESSMENT AGENT: User query needed - {result}")
-                return result
-            elif action_type == "assessment_complete":
-                assessment_summary = action.get("assessment_summary")
-                flags_to_set = action.get("flags_to_set", [])
-                recommendations = action.get("recommendations", [])
-                requires_professional_consultation = action.get("requires_professional_consultation", False)
-                calculations = action.get("calculations", {})  # Now a dict as per new prompt
-                evidence_sources = action.get("evidence_sources", [])
-                trace = action.get("trace", "")
-                if action.get("requires_tool_retry", False):
-                    result = "Assessment requires tool retry due to failures. Please re-run with fixed tools."
-                    print(f"👨🏻‍⚕️ MEDICAL ASSESSMENT AGENT: Tool retry needed - {result}")
-                    return result  # Return early without updating memory
-                # Update memory using update_memory_partition
-                update_memory_partition(memory, "flags_and_assessments", {
-                    "assessment_summary": assessment_summary,
-                    "flags": flags_to_set,
-                    "recommendations": recommendations,
-                    "requires_professional_consultation": requires_professional_consultation,
-                    "calculations": calculations,
-                    "evidence_sources": evidence_sources,
-                    "trace": trace,
-                    "assessment_timestamp": datetime.now().isoformat()  # Retained timestamp
-                })
-                # Log the assessment (updated to include new fields)
-                log_data = {
-                    "task": task,
-                    "memory_input": relevant_memory,
-                    "tool_results": tool_results,
-                    "assessment_summary": assessment_summary,
-                    "flags_set": flags_to_set,
-                    "recommendations": recommendations,
-                    "requires_professional_consultation": requires_professional_consultation,
-                    "evidence_sources": evidence_sources,
-                    "trace": trace,
-                    "timestamp": datetime.now().isoformat()
-                }
-                save_to_json(log_data, f'medical_assessment_{datetime.now().isoformat()}.json', subdirectory='MedicalAssessment')
-                result = assessment_summary
-                print(f"👨🏻‍⚕️ MEDICAL ASSESSMENT AGENT COMPLETED: {result}")
-                return result
             else:
-                print(f"Unknown action type: {parsed}")
                 break
-            iteration += 1
-        # Fallback if max iterations reached
-        result = f"Medical assessment stopped after {max_iterations} iterations"
-        print(f"👨🏻‍⚕️ MEDICAL ASSESSMENT AGENT Stopped (MAX ITERATIONS)")
-        return result
 class PlannerAgent:
-    def __init__(self, llm_instance, computation_tool: ComputationTool, web_search_tool: WebSearchTool, quantities_finder: QuantitiesFinder):
         self.llm = llm_instance
         self.computation_tool = computation_tool
         self.web_search_tool = web_search_tool
         self.quantities_finder = quantities_finder
     def handle_task(self, task: str, memory: Dict[str, Any]) -> str:
-        print(f"\n📋 PLANNER AGENT STARTED")
         relevant_memory = {
             "user_profile": memory.get("user_profile", {}),
             "flags_and_assessments": memory.get("flags_and_assessments", {}),
         }
-        memory_str = json.dumps(relevant_memory, indent=2)
-        tool_results = []
-        planning_steps = []
-        max_iterations = 15
-        iteration = 0
-        while iteration < max_iterations:
-            tool_results_str = "\n".join([f"Tool Result {i+1}: {res}" for i, res in enumerate(tool_results)]) if tool_results else "None"
-            planning_steps_str = json.dumps(planning_steps, indent=2) if planning_steps else "None"
-            plan_status = relevant_memory.get("flags_and_assessments", {}).get("assessment_status", "none")
-            prompt = f"""
-You are the Planner Agent. Create personalized meal plans constrained by the medical assessment.
-Task: {task}
-Current Memory: {memory_str}
-Current Planning Steps: {planning_steps_str}
-Previous Tool Results: {tool_results_str}
-Available Tools: WebSearchTool, QuantitiesFinder
-Mandatory behavior & rules:
-1. Precondition: Do NOT start planing unless user medical assessment exists in memory (flags_and_assessments is not empty). If missing, return action: {{"type":"provide_plan", "final_plan":{{"Can't draft plan as flags_and_assessments is empty, please use MedicalAssessmentAgent"}}}}
-2. Batch behavior:
-   - Always group related items when using tools. Example: fetch nutrition facts for all foods in one WebSearchTool call instead of multiple calls.
-3. For each food in the draft:
-   - Use WebSearchTool to fetch nutrition facts for a standard serving size (or 100g cooked) (e.g., "Find nutrition facts (calories, protein, fat, carbohydrates) for the following items,...").
-   - If WebSearchTool fails for >2 items, stop retrying and use your internal knowledge.
-4. Acceptable tolerances:
-   - Calories: within ±3% of daily_target_calories
-   - Macronutrients: within ±5% of each macro target
-5. Exclude all items listed in allergies and avoid disliked foods unless necessary for balance, in which case propose alternatives.
-6. Flexible Planning: If task requests a multi-day plan (e.g., 7 days), fall back to a shorter balanced plan (1–2 unique days) and instruct user to repeat/rotate.
-7. QuantitiesFinder Format: When calling 'QuantitiesFinder', the 'tool_task' MUST be a JSON STRING. This string is the serialized version of an object containing "foods" and "targets".
-    - "foods": A list of dictionaries. Each dictionary must have:
-      - name, calories, protein, fat, carbohydrates (per 100g)
-      - estimated_g: Your "best guess" for a realistic quantity (e.g., 150g). The solver will be penalized for deviating from this, so it will try to stay close.
-    - "targets": A dictionary containing: calories, protein, fat, carbohydrates.
-    - Example: "tool_task": "{{\"foods\": [...], \"targets\": {{...}}}}"
-Planning Steps Handling:
-- If Current Planning Steps is empty or 'None', you MUST adopt the following fixed 6-step plan as your primary workflow.
-[
-{{"id": 1, "description": "Analyze requirements, "Draft a realistic diet plan. For each food, assign a realistic 'estimated_g' (e.g., 150g chicken)."", "status": "pending"}},
-{{"id": 1, "description": "Analyze drafted plan, determine a list of all ingredients in the darafted plan, and batch-gather their nutritional facts (calories, protein, fat, carbohydrates) using WebSearchTool.", "status": "pending"}},
-{{"id": 3, "description": "Call 'QuantitiesFinder' (PuLP solver) with all nutritional data, targets, and bounds to calculate precise quantities.", "status": "pending"}},
-{{"id": 4, "description": "Update the drafted plan with the precise quantities returned by the QuantitiesFinder.", "status": "pending"}},
-{{"id": 4, "description": "Provide the final plan 'provide_plan'", "status": "pending"}}
-]
-- If Current Planning Steps is provided... You may remain in a step for multiple iterations if necessary to meet all targets, as outlined in the Iterative Correction Loop rule.
-Return JSON:
-- observation, thought
-- planning_steps (full list of response_step objects)
-- action: one of {{
-    "type":"call_tool","tool_name":...,"tool_task":...,
-    "type":"draft_plan","drafted_plan":{{...}},
-    "type":"provide_plan","final_plan":{{...}}
-}}
-Notes:
-- Keep each plan realistic and culturally appropriate (regional foods if provided).
-- Trace: at the end of the plan, summarize which agents/tools were called.
-- Always include the full updated planning_steps in your response JSON to persist across iterations.
-"""
-            if should_debug('agents', 'PlannerAgent'):
-                print(f"\n--- Planner Agent Iteration {iteration + 1} ---")
-            if should_debug('agents', 'PlannerAgent') and config.DEBUG_LEVEL == 'full':
-                print(f"Raw LLM input:\n{prompt}")
-            response = self.llm(prompt)[0]
-            if should_debug('agents', 'PlannerAgent'):
-                print(f"Planner Raw Response:\n{response}")
-            parsed = extract_and_parse_json(response)
-            # Add high-level print for user mode
-            if not config.DEBUG_MODE:
-                action_type = parsed.get("action", {}).get("type")
-                print_str = "📋 Planner Agent: "
-                if action_type == "call_tool":
-                    tool_name = parsed["action"].get("tool_name")
-                    tool_task = parsed["action"].get("tool_task")
-                    print_str += f"Using {tool_name} for '{tool_task}'"
-                elif action_type == "draft_plan":
-                    print_str += "Drafting plan"
-                elif action_type == "provide_plan":
-                    print_str += "Finalizing plan"
-                print(print_str)
-            planning_steps = parsed.get("planning_steps", planning_steps)
-            action = parsed.get("action", {})
-            action_type = action.get("type")
-            if action_type == "call_tool":
-                tool_name = action.get("tool_name")
-                tool_task = action.get("tool_task")
-                if tool_name and tool_task:
-                    print(f"Calling {tool_name} with task: {tool_task}")
-                    if tool_name == "ComputationTool":
-                        result = self.computation_tool.handle_task(tool_task)
-                    elif tool_name == "WebSearchTool":
-                        result = self.web_search_tool.handle_task(tool_task)
-                    elif tool_name == "QuantitiesFinder":
-                        result = self.quantities_finder.handle_task(tool_task)
-                    else:
-                        result = f"Unknown tool: {tool_name}"
-                    tool_results.append(f"{tool_name}: {result}")
-                else:
-                    print("Missing tool_name or tool_task")
-            elif action_type == "draft_plan":
-                drafted_plan = action.get("drafted_plan")
-                if drafted_plan:
-                    if "plans" not in memory:
-                        memory["plans"] = {}
-                    memory["plans"]["drafted_plan"] = drafted_plan
-                    result = "Plan drafted and stored in memory"
-                    tool_results.append(result)
                 else:
-                    result = "Drafted plan not provided"
-                    tool_results.append(result)
-            elif action_type == "provide_plan":
-                final_plan = action.get("final_plan")
-                if "error" in final_plan:
-                    print(f"\n📋 PLANNER AGENT ERROR: {final_plan}")
-                    return json.dumps(final_plan)
-                else:
-                    final_plan = final_plan or memory["plans"].get("drafted_plan")
-                    if final_plan:
-                        memory["plans"]["current_plan"] = final_plan
-                        memory["plans"]["plan_timestamp"] = datetime.now().isoformat()
-                        if "drafted_plan" in memory["plans"]:
-                            del memory["plans"]["drafted_plan"]
-                        result = "Planning completed with validated plan"
-                        tool_results.append(result)
-                        log_data = {
-                            "task": task,
-                            "memory_input": relevant_memory,
-                            "tool_results": tool_results,
-                            "final_response": parsed,
-                            "timestamp": datetime.now().isoformat()
-                        }
-                        save_to_json(log_data, f'planner_agent_{datetime.now().isoformat()}.json', subdirectory='PlannerAgent')
-                        print(f"\n📋 PLANNER AGENT COMPLETED: {result}")
-                        return json.dumps(final_plan) if isinstance(final_plan, dict) else final_plan
-                    else:
-                        result = "Cannot finalize: missing plan"
-                        tool_results.append(result)
             else:
-                print(f"Unknown action type: {action_type}")
                 break
-            iteration += 1
-            memory_str = json.dumps({
-                "user_profile": memory.get("user_profile", {}),
-                "flags_and_assessments": memory.get("flags_and_assessments", {}),
-                "plans": memory.get("plans", {})
-            }, indent=2)
-        result = f"Planning stopped after {max_iterations} iterations with {len(tool_results)} actions"
-        print(f"📋 PLANNER AGENT Stopped (MAX ITERATIONS)")
-        return result

+"""Agent implementations.
+Phase 1: every agent's per-turn output is now a Pydantic model from
+``schemas``. The prompts are split so the static system rules sit in a
+module-level constant (eligible for Gemini's implicit prompt cache) and only
+the dynamic state changes per call.
+The action-dispatch loops are still *inside* the agent classes — Phase 2 will
+break them into LangGraph subgraphs with parallel tool nodes and the
+ValidationAgent critic loop.
+"""
+from __future__ import annotations
 import json
+from datetime import datetime
+from typing import Any, Dict, List, Optional
+from config import get_settings
+from logging_setup import get_logger
+from schemas import (
+    CoachDecision,
+    MedicalAssessmentDecision,
+    MedicalAssessmentResult,
+    PlannerDecision,
+)
+from tools import ComputationTool, QuantitiesFinder, WebSearchTool
+from utils import save_to_json, should_debug, update_memory_partition
+_coach_logger = get_logger("agents.coach")
+_medical_logger = get_logger("agents.medical")
+_planner_logger = get_logger("agents.planner")
+# ---------------------------------------------------------------------------
+# Coach
+# ---------------------------------------------------------------------------
+_COACH_SYSTEM_PROMPT = """\
+You are the Coach Agent (central orchestrator) of a nutrition Multi-Agent System.
+Primary responsibilities:
+- Translate user intent into a concrete workflow of response_steps.
+- Enforce system rules (MedicalAssessment must complete before Planner runs).
+- Decide and perform exactly one action per turn: call_agent, call_tool,
+  ask_user, write_memory, or compose_response.
+Inputs each turn:
+- observation (string built from user query + memory + history)
+- memory partitions: user_profile, medical_history, flags_and_assessments, plans
+- response_steps (list, may be empty on the first turn)
+Behaviour rules (mandatory):
+1. If response_steps is empty, generate ordered steps (max 7). Each step
+   must include id, actor, prerequisites, and status "pending".
+   Typical personal-workflow (when the user asks for a personalised plan):
+     1) Validate required user data (height, weight, age, sex, activity_level,
+        allergies, goal). If missing -> ask_user.
+     2) Update memory if the user provided new data [action: write_memory].
+     3) Call MedicalAssessmentAgent with a task to assess the user.
+     4) Wait for assessment to be completed and stored in memory.
+     5) Call PlannerAgent with the relevant task.
+     6) Call ValidationAgent to grade the plan.
+     7) If validation verdict == "revise", re-call PlannerAgent with the
+        validation issues prepended to the task; otherwise compose_response.
+2. When calling any agent, set the called step status to "in_progress" and
+   include prerequisites satisfied by your observation.
+3. Only call PlannerAgent if memory.flags_and_assessments contains an
+   "assessment_status" of "assessment_complete". If missing, call
+   MedicalAssessmentAgent first.
+4. After every PlannerAgent run, you MUST call ValidationAgent before
+   composing the response. Inspect memory.flags_and_assessments.last_validation:
+     * verdict == "pass": proceed to compose_response.
+     * verdict == "revise": call PlannerAgent again with task =
+       "Revise the plan to address: " + each issue.description joined by "; ".
+       Cap revisions at 2; on the third attempt, compose_response with the
+       best plan available and append the unresolved issues as warnings.
+     * verdict == "reject": compose_response with a clear refusal explaining
+       the violation; do NOT show the plan. Append a HITL escalation chip
+       (text marker the UI will render).
+5. When new personal data appears in user input, add steps to: propose memory
+   update (write_memory), call MedicalAssessmentAgent if needed, re-plan if
+   needed.
+6. For any write_memory action, provide the full partition contents in
+   params.data (not diffs). The Coach is responsible for merging and storing.
+Output JSON shape (enforced by schema):
+{
+  "observation": "...",
+  "thought": "...",
+  "response_steps": [ ... ],
+  "action": "call_agent | call_tool | ask_user | write_memory | compose_response",
+  "params": { ... }
+}
+Required params per action:
+- call_agent:       {"agent_name": "...", "task": "..."}
+- call_tool:        {"tool_name": "...", "task": "..."}
+- ask_user:         {"prompt": "..."}
+- write_memory:     {"partition": "...", "data": {...}}
+- compose_response: {"text": "...markdown..."}
+Composition rules:
+- When composing the response, extract relevant information from memory state
+  (calorie target, plan details, dietary restrictions, citations) in markdown.
+- Always include a "trace" line summarising which agents/tools contributed.
+- For high-risk profiles (requires_professional_consultation == true), append
+  a bold warning advising professional consultation before implementation.
+"""
 class CoachAgent:
     def __init__(self, llm_instance):
         self.llm = llm_instance
     def handle_task(self, state: Dict[str, Any]) -> Dict[str, Any]:
+        settings = get_settings()
+        memory_str = json.dumps(state["memory"], indent=2, default=str)
         response_steps = state.get("response_steps", [])
+        response_steps_str = (
+            json.dumps(response_steps, indent=2, default=str) if response_steps else "None"
+        )
+        truncated_history: List[Dict[str, str]] = []
         for msg in state["conversation_history"]:
             if msg["role"] == "assistant" and len(msg["content"]) > 200:
+                truncated_history.append(
+                    {"role": "assistant", "content": msg["content"][:200] + "... (full response in memory)"}
+                )
             else:
                 truncated_history.append(msg)
+        history_str = "\n".join(f"{m['role']}: {m['content']}" for m in truncated_history)
+        observation = (
+            f"User query: {state['user_question']}\n"
+            f"Memory State: {memory_str}\n"
+            f"Current Response Steps: {response_steps_str}\n"
+            f"Previous Tool Result: {state.get('agent_result', 'None')}\n"
+            f"Conversation history: {history_str}"
+        )
+        prompt = f"{_COACH_SYSTEM_PROMPT}\n\n--- Current State ---\n{observation}"
+        if should_debug("agents", "CoachAgent"):
+            _coach_logger.debug("--- Coach Agent Turn %d ---", state["num_turns"] + 1)
+            if settings.debug_level == "full":
+                _coach_logger.debug("Raw LLM input:\n%s", prompt)
+        decision = self.llm.call_typed(prompt, CoachDecision)
+        if decision is None:
+            return self._fallback_state(state, "Coach decision could not be parsed.")
+        if should_debug("agents", "CoachAgent"):
+            _coach_logger.debug("Coach decision:\n%s", decision.model_dump_json(indent=2))
+        if not settings.debug_mode:
+            self._log_user_mode_action(decision)
+        current_action = {"action": decision.action, "params": decision.params}
+        new_steps = [s.model_dump() for s in decision.response_steps] or state.get("response_steps", [])
+        save_to_json(
+            {
+                "prompt": prompt,
+                "decision": decision.model_dump(),
+                "timestamp": datetime.now().isoformat(),
+            },
+            f"coach_agent_{datetime.now().isoformat()}.json",
+            subdirectory="CoachAgent",
+        )
         return {
             **state,
             "current_action": current_action,
+            "response_steps": new_steps,
             "num_turns": state["num_turns"] + 1,
+            "agent_result": None,
         }
+    @staticmethod
+    def _log_user_mode_action(decision: CoachDecision) -> None:
+        params = decision.params or {}
+        action = decision.action
+        if action == "call_agent":
+            msg = f"Calling {params.get('agent_name')} with task '{params.get('task')}'"
+        elif action == "call_tool":
+            msg = f"Using {params.get('tool_name')} with task '{params.get('task')}'"
+        elif action == "ask_user":
+            msg = f"Asking user: {params.get('prompt')}"
+        elif action == "write_memory":
+            msg = f"Writing to memory partition '{params.get('partition')}'"
+        elif action == "compose_response":
+            msg = "Composing final response"
+        else:
+            msg = f"Unknown action: {action}"
+        _coach_logger.info("\n🏋️‍♂️Coach Agent: %s", msg)
+    @staticmethod
+    def _fallback_state(state: Dict[str, Any], message: str) -> Dict[str, Any]:
+        _coach_logger.error(message)
+        return {
+            **state,
+            "current_action": {
+                "action": "compose_response",
+                "params": {"text": f"Sorry — I hit an internal error while planning. ({message})"},
+                "_parse_error": True,
+            },
+            "num_turns": state["num_turns"] + 1,
+            "agent_result": None,
+        }
+# ---------------------------------------------------------------------------
+# Medical Assessment
+# ---------------------------------------------------------------------------
+_MEDICAL_SYSTEM_PROMPT = """\
+You are the Medical Assessment Agent. Produce an evidence-based assessment and
+the clinical flags / calculations the Planner and Validation agents need.
+Available tools: ComputationTool, WebSearchTool.
+Mandatory behaviour (do not skip):
+1. Critical data check: confirm presence of age, sex, height, weight,
+   activity_level, allergies, medications. If any critical field is missing,
+   set action_type="ask_user" and list the missing names in ``fields``.
+2. Use ComputationTool for ALL numeric calculations (BMI, BMR, TDEE, calorie
+   targets, macro targets). Pass numeric inputs in tool_task.
+3. Use WebSearchTool to fetch authoritative guidelines (WHO, USDA, ADA,
+   EFSA). Capture source URLs with timestamps.
+4. Produce a compact assessment_plan (3-6 steps). Default sequence:
+   a) ComputationTool: BMI, BMR, TDEE, daily_target_calories (single int).
+   b) ComputationTool: macro_targets (protein_g, fat_g, carbohydrates_g - all
+      single ints, no ranges) optimised for the user's goal.
+   c) WebSearchTool: dietary guidelines for the user's conditions.
+   d-f) Optional follow-ups for specific risks.
+5. When complete, set action_type="assessment_complete" and populate
+   ``result`` (a MedicalAssessmentResult) with:
+     - assessment_summary
+     - calculations: { BMI, BMR, TDEE, daily_target_calories,
+                       macro_targets: { protein_g, fat_g, carbohydrates_g } }
+     - flags_to_set (e.g. ["high_ldl", "diabetes_risk"])
+     - recommendations (clinical dietary constraints / urgent issues)
+     - requires_professional_consultation (True for medically sensitive cases)
+     - evidence_sources (list of URLs)
+     - trace (one paragraph summarising agent/tool usage)
+6. If any tool call fails, fall back to best-known values, set
+   data_confidence below 1.0, and mark requires_tool_retry=true.
+Output JSON shape (enforced by schema):
+{
+  "medical_reasoning": "...",
+  "observation": "...",
+  "risk_assessment_priorities": [...],
+  "assessment_plan": [...],
+  "action_type": "call_tool" | "ask_user" | "assessment_complete",
+  "tool_name": "ComputationTool" | "WebSearchTool" | null,
+  "tool_task": "..." | null,
+  "fields": [...],            // only when ask_user
+  "result": { ... }           // only when assessment_complete
+}
+"""
 class MedicalAssessmentAgent:
+    MAX_ITERATIONS = 15
+    def __init__(
+        self,
+        llm_instance,
+        computation_tool: ComputationTool,
+        web_search_tool: WebSearchTool,
+    ):
         self.llm = llm_instance
         self.computation_tool = computation_tool
         self.web_search_tool = web_search_tool
     def handle_task(self, task: str, memory: Dict[str, Any]) -> str:
+        _medical_logger.info("\n👨🏻‍⚕️ MEDICAL ASSESSMENT AGENT STARTED")
+        settings = get_settings()
         relevant_memory = {
             "user_profile": memory.get("user_profile", {}),
             "medical_history": memory.get("medical_history", {}),
         }
+        memory_str = json.dumps(relevant_memory, indent=2, default=str)
+        tool_results: List[str] = []
+        assessment_plan: List[dict] = []
+        for iteration in range(self.MAX_ITERATIONS):
+            tool_results_str = (
+                "\n".join(f"Tool Result {i+1}: {r}" for i, r in enumerate(tool_results)) or "None"
+            )
+            assessment_plan_str = (
+                json.dumps(assessment_plan, indent=2, default=str) if assessment_plan else "None"
+            )
+            prompt = (
+                f"{_MEDICAL_SYSTEM_PROMPT}\n\n--- Task & State ---\n"
+                f"Task: {task}\n"
+                f"Current Memory: {memory_str}\n"
+                f"Current Assessment Plan: {assessment_plan_str}\n"
+                f"Previous Tool Results: {tool_results_str}\n"
+            )
+            if should_debug("agents", "MedicalAssessmentAgent"):
+                _medical_logger.debug("--- Medical Assessment Iteration %d ---", iteration + 1)
+                if settings.debug_level == "full":
+                    _medical_logger.debug("Raw LLM input:\n%s", prompt)
+            decision = self.llm.call_typed(prompt, MedicalAssessmentDecision)
+            if decision is None:
+                _medical_logger.error("Medical decision parse failed at iteration %d", iteration + 1)
+                return "Medical assessment failed: could not parse LLM decision."
+            if should_debug("agents", "MedicalAssessmentAgent"):
+                _medical_logger.debug("Medical decision:\n%s", decision.model_dump_json(indent=2))
+            if decision.assessment_plan:
+                assessment_plan = [s.model_dump() for s in decision.assessment_plan]
+            if not settings.debug_mode:
+                self._log_user_mode_action(decision)
+            if decision.action_type == "call_tool":
+                tool_results.append(f"{decision.tool_name}: {self._dispatch_tool(decision)}")
+            elif decision.action_type == "ask_user":
+                fields = decision.fields or []
+                msg = (
+                    f"Missing critical fields: {', '.join(fields)}. "
+                    "Please provide the following information to continue the assessment."
+                )
+                _medical_logger.info("👨🏻‍⚕️ MEDICAL ASSESSMENT AGENT: User query needed - %s", msg)
+                return msg
+            elif decision.action_type == "assessment_complete":
+                return self._finalize(task, decision, memory, relevant_memory, tool_results)
             else:
+                _medical_logger.error("Unknown action_type: %s", decision.action_type)
                 break
+        _medical_logger.warning("👨🏻‍⚕️ MEDICAL ASSESSMENT AGENT Stopped (MAX ITERATIONS)")
+        return f"Medical assessment stopped after {self.MAX_ITERATIONS} iterations"
+    # ------------------------------------------------------------------
+    def _dispatch_tool(self, decision: MedicalAssessmentDecision) -> str:
+        tool_name = decision.tool_name
+        tool_task = decision.tool_task
+        if not tool_task:
+            return f"Missing 'tool_task' for {tool_name}"
+        if tool_name == "ComputationTool":
+            return self.computation_tool.handle_task(tool_task)
+        if tool_name == "WebSearchTool":
+            return self.web_search_tool.handle_task(tool_task)
+        return f"Unknown tool: {tool_name}"
+    @staticmethod
+    def _log_user_mode_action(decision: MedicalAssessmentDecision) -> None:
+        if decision.action_type == "call_tool":
+            _medical_logger.info(
+                "👨🏻‍⚕️ Medical Assessment Agent: Using %s for '%s'",
+                decision.tool_name,
+                decision.tool_task,
+            )
+        elif decision.action_type == "ask_user":
+            _medical_logger.info(
+                "👨🏻‍⚕️ Medical Assessment Agent: Asking user for missing fields: %s",
+                ", ".join(decision.fields or []),
+            )
+        elif decision.action_type == "assessment_complete":
+            _medical_logger.info("👨🏻‍⚕️ Medical Assessment Agent: Completing assessment")
+    def _finalize(
+        self,
+        task: str,
+        decision: MedicalAssessmentDecision,
+        memory: Dict[str, Any],
+        relevant_memory: Dict[str, Any],
+        tool_results: List[str],
+    ) -> str:
+        result: Optional[MedicalAssessmentResult] = decision.result
+        if result is None:
+            _medical_logger.error("assessment_complete decision missing result payload")
+            return "Medical assessment failed: completion payload missing."
+        if result.requires_tool_retry:
+            msg = "Assessment requires tool retry due to tool failures."
+            _medical_logger.warning("👨🏻‍⚕️ MEDICAL ASSESSMENT AGENT: Tool retry needed - %s", msg)
+            return msg
+        update_memory_partition(
+            memory,
+            "flags_and_assessments",
+            {
+                "assessment_summary": result.assessment_summary,
+                "flags": result.flags_to_set,
+                "recommendations": result.recommendations,
+                "requires_professional_consultation": result.requires_professional_consultation,
+                "calculations": result.calculations.model_dump(),
+                "evidence_sources": result.evidence_sources,
+                "data_confidence": result.data_confidence,
+                "trace": result.trace,
+                "assessment_status": "assessment_complete",
+                "assessment_timestamp": datetime.now().isoformat(),
+            },
+        )
+        save_to_json(
+            {
+                "task": task,
+                "memory_input": relevant_memory,
+                "tool_results": tool_results,
+                "result": result.model_dump(),
+                "timestamp": datetime.now().isoformat(),
+            },
+            f"medical_assessment_{datetime.now().isoformat()}.json",
+            subdirectory="MedicalAssessment",
+        )
+        _medical_logger.info("👨🏻‍⚕️ MEDICAL ASSESSMENT AGENT COMPLETED: %s", result.assessment_summary)
+        return result.assessment_summary
+# ---------------------------------------------------------------------------
+# Planner
+# ---------------------------------------------------------------------------
+_PLANNER_SYSTEM_PROMPT = """\
+You are the Planner Agent. Create personalised meal plans constrained by the
+medical assessment.
+Available tools: WebSearchTool, QuantitiesFinder, ComputationTool.
+Mandatory behaviour & rules:
+1. Precondition: do NOT plan unless flags_and_assessments has an
+   "assessment_status" of "assessment_complete". If missing, return
+   action_type="provide_plan" with final_plan={"error": "..."} explaining the
+   blocker and suggesting MedicalAssessmentAgent.
+2. Batch tool calls: fetch nutrition facts for ALL foods in one WebSearchTool
+   call rather than one call per item.
+3. For each food in the draft, look up per-100g nutrition (calories, protein,
+   fat, carbohydrates). If WebSearchTool fails for >2 items, fall back to
+   internal knowledge.
+4. Tolerances: calories +/- 3%, each macro +/- 5% of target.
+5. Exclude allergens and disliked foods. Propose alternatives if necessary
+   for balance.
+6. Multi-day requests: emit a 1-2 day plan and instruct the user to rotate.
+7. QuantitiesFinder format: tool_task MUST be a JSON STRING containing
+   {"foods": [...], "targets": {...}}. Each food needs name, calories,
+   protein, fat, carbohydrates (per 100g) and estimated_g (your best guess).
+Planning Steps Handling:
+- If Current Planning Steps is empty/None, adopt this fixed 5-step plan:
+  1. Draft a realistic plan; assign a realistic estimated_g per food.
+  2. Batch-gather nutrition facts via WebSearchTool.
+  3. Call QuantitiesFinder with foods + targets to compute precise grams.
+  4. Update the draft with the solver's quantities.
+  5. Provide the final plan via action_type="provide_plan".
+- If steps are provided, you may iterate within a step until targets are met.
+Output JSON shape (enforced by schema):
+{
+  "observation": "...",
+  "thought": "...",
+  "planning_steps": [...],
+  "action_type": "call_tool" | "draft_plan" | "provide_plan",
+  "tool_name": "WebSearchTool" | "QuantitiesFinder" | "ComputationTool" | null,
+  "tool_task": "..." | null,
+  "drafted_plan": { ... } | null,
+  "final_plan": { ... } | null
+}
+Notes:
+- Keep plans realistic and culturally appropriate (regional foods if provided).
+- Include a "trace" line in the final plan summarising agents/tools used.
+- Always echo the full updated planning_steps so they persist across turns.
+"""
 class PlannerAgent:
+    MAX_ITERATIONS = 15
+    def __init__(
+        self,
+        llm_instance,
+        computation_tool: ComputationTool,
+        web_search_tool: WebSearchTool,
+        quantities_finder: QuantitiesFinder,
+    ):
         self.llm = llm_instance
         self.computation_tool = computation_tool
         self.web_search_tool = web_search_tool
         self.quantities_finder = quantities_finder
     def handle_task(self, task: str, memory: Dict[str, Any]) -> str:
+        _planner_logger.info("\n📋 PLANNER AGENT STARTED")
+        settings = get_settings()
         relevant_memory = {
             "user_profile": memory.get("user_profile", {}),
             "flags_and_assessments": memory.get("flags_and_assessments", {}),
         }
+        tool_results: List[str] = []
+        planning_steps: List[dict] = []
+        for iteration in range(self.MAX_ITERATIONS):
+            memory_str = json.dumps(
+                {
+                    "user_profile": memory.get("user_profile", {}),
+                    "flags_and_assessments": memory.get("flags_and_assessments", {}),
+                    "plans": memory.get("plans", {}),
+                },
+                indent=2,
+                default=str,
+            )
+            tool_results_str = (
+                "\n".join(f"Tool Result {i+1}: {r}" for i, r in enumerate(tool_results)) or "None"
+            )
+            planning_steps_str = (
+                json.dumps(planning_steps, indent=2, default=str) if planning_steps else "None"
+            )
+            prompt = (
+                f"{_PLANNER_SYSTEM_PROMPT}\n\n--- Task & State ---\n"
+                f"Task: {task}\n"
+                f"Current Memory: {memory_str}\n"
+                f"Current Planning Steps: {planning_steps_str}\n"
+                f"Previous Tool Results: {tool_results_str}\n"
+            )
+            if should_debug("agents", "PlannerAgent"):
+                _planner_logger.debug("--- Planner Iteration %d ---", iteration + 1)
+                if settings.debug_level == "full":
+                    _planner_logger.debug("Raw LLM input:\n%s", prompt)
+            decision = self.llm.call_typed(prompt, PlannerDecision)
+            if decision is None:
+                _planner_logger.error("Planner decision parse failed at iteration %d", iteration + 1)
+                return "Planner failed: could not parse LLM decision."
+            if should_debug("agents", "PlannerAgent"):
+                _planner_logger.debug("Planner decision:\n%s", decision.model_dump_json(indent=2))
+            if decision.planning_steps:
+                planning_steps = [s.model_dump() for s in decision.planning_steps]
+            if not settings.debug_mode:
+                self._log_user_mode_action(decision)
+            if decision.action_type == "call_tool":
+                tool_results.append(f"{decision.tool_name}: {self._dispatch_tool(decision)}")
+            elif decision.action_type == "draft_plan":
+                if decision.drafted_plan:
+                    memory.setdefault("plans", {})["drafted_plan"] = decision.drafted_plan
+                    tool_results.append("Plan drafted and stored in memory")
                 else:
+                    tool_results.append("Drafted plan not provided")
+            elif decision.action_type == "provide_plan":
+                final = decision.final_plan or memory.get("plans", {}).get("drafted_plan")
+                # Error escape hatch (e.g. precondition not met)
+                if isinstance(final, dict) and "error" in final:
+                    _planner_logger.error("📋 PLANNER AGENT ERROR: %s", final)
+                    return json.dumps(final)
+                if not final:
+                    tool_results.append("Cannot finalize: missing plan")
+                    continue  # let the loop try another iteration
+                memory.setdefault("plans", {})
+                memory["plans"]["current_plan"] = final
+                memory["plans"]["plan_timestamp"] = datetime.now().isoformat()
+                memory["plans"].pop("drafted_plan", None)
+                save_to_json(
+                    {
+                        "task": task,
+                        "memory_input": relevant_memory,
+                        "tool_results": tool_results,
+                        "final_response": decision.model_dump(),
+                        "timestamp": datetime.now().isoformat(),
+                    },
+                    f"planner_agent_{datetime.now().isoformat()}.json",
+                    subdirectory="PlannerAgent",
+                )
+                _planner_logger.info("\n📋 PLANNER AGENT COMPLETED")
+                return json.dumps(final) if isinstance(final, dict) else str(final)
             else:
+                _planner_logger.error("Unknown action_type: %s", decision.action_type)
                 break
+        _planner_logger.warning("📋 PLANNER AGENT Stopped (MAX ITERATIONS)")
+        return (
+            f"Planning stopped after {self.MAX_ITERATIONS} iterations "
+            f"with {len(tool_results)} actions"
+        )
+    # ------------------------------------------------------------------
+    def _dispatch_tool(self, decision: PlannerDecision) -> str:
+        tool_name = decision.tool_name
+        tool_task = decision.tool_task
+        if not tool_name or not tool_task:
+            return "Missing tool_name or tool_task"
+        if tool_name == "ComputationTool":
+            return self.computation_tool.handle_task(tool_task)
+        if tool_name == "WebSearchTool":
+            return self.web_search_tool.handle_task(tool_task)
+        if tool_name == "QuantitiesFinder":
+            return self.quantities_finder.handle_task(tool_task)
+        return f"Unknown tool: {tool_name}"
+    @staticmethod
+    def _log_user_mode_action(decision: PlannerDecision) -> None:
+        if decision.action_type == "call_tool":
+            _planner_logger.info(
+                "📋 Planner Agent: Using %s for '%s'",
+                decision.tool_name,
+                decision.tool_task,
+            )
+        elif decision.action_type == "draft_plan":
+            _planner_logger.info("📋 Planner Agent: Drafting plan")
+        elif decision.action_type == "provide_plan":
+            _planner_logger.info("📋 Planner Agent: Finalizing plan")

config.py CHANGED Viewed

@@ -1,6 +1,113 @@
-# Nutrition MAS Configuration
-LOG_DIR = None
-PERSISTENCE_DIR = None
-DEBUG_MODE = False
-DEBUG_LEVEL = 'full'  # 'full' or 'output'
-DEBUG_SCOPES = {'agents': ['all'], 'tools': ['all']}

+"""Nutrition MAS configuration.
+This module exposes a Pydantic-Settings ``Settings`` singleton plus a small
+backward-compatibility shim so existing code can still read ``config.DEBUG_MODE``,
+``config.LOG_DIR``, etc. New code should import :func:`get_settings` directly::
+    from config import get_settings
+    settings = get_settings()
+    if settings.debug_mode:
+        ...
+Mutation must go through :func:`set_settings` (the legacy ``config.X = y`` write
+pattern would otherwise silently shadow the Pydantic value).
+"""
+from __future__ import annotations
+from typing import Any, Dict, List, Optional
+from pydantic import Field
+from pydantic_settings import BaseSettings, SettingsConfigDict
+class Settings(BaseSettings):
+    """Process-wide configuration.
+    Values are loaded from (in order of precedence): direct ``set_settings``
+    calls, environment variables prefixed with ``NUTRITION_MAS_``, the ``.env``
+    file in the project root, and the defaults declared here.
+    """
+    model_config = SettingsConfigDict(
+        env_prefix="NUTRITION_MAS_",
+        env_file=".env",
+        env_file_encoding="utf-8",
+        extra="ignore",
+        case_sensitive=False,
+        validate_assignment=True,
+    )
+    # --- Logging / persistence -------------------------------------------------
+    log_dir: Optional[str] = None
+    persistence_dir: Optional[str] = None
+    # --- Debug switches --------------------------------------------------------
+    debug_mode: bool = False
+    debug_level: str = "full"  # 'full' or 'output'
+    debug_scopes: Dict[str, List[str]] = Field(
+        default_factory=lambda: {"agents": ["all"], "tools": ["all"]}
+    )
+    # --- LLM / rate limiting ---------------------------------------------------
+    enable_rate_limiting: bool = True
+    gemini_api_keys: List[str] = Field(default_factory=list)
+# Singleton holder. Instantiated lazily so tests can set env vars before first read.
+_settings: Optional[Settings] = None
+def get_settings() -> Settings:
+    """Return the process-wide ``Settings`` instance, creating it on first call."""
+    global _settings
+    if _settings is None:
+        _settings = Settings()
+    return _settings
+def reset_settings() -> None:
+    """Drop the cached singleton so the next ``get_settings`` call re-reads env.
+    Intended for use in tests.
+    """
+    global _settings
+    _settings = None
+def set_settings(**updates: Any) -> Settings:
+    """Update fields on the singleton ``Settings``.
+    Accepts both legacy upper-case names (``DEBUG_MODE``) and Pydantic field
+    names (``debug_mode``). Returns the updated settings instance.
+    """
+    s = get_settings()
+    for raw_key, value in updates.items():
+        attr = _LEGACY_ATTR_MAP.get(raw_key, raw_key.lower())
+        if not hasattr(s, attr):
+            raise AttributeError(f"Settings has no attribute {attr!r}")
+        setattr(s, attr, value)
+    return s
+# --- Legacy attribute proxy ----------------------------------------------------
+# Existing code does ``import config`` then reads ``config.DEBUG_MODE`` etc.
+# PEP 562 ``__getattr__`` lets us forward those reads to the singleton.
+_LEGACY_ATTR_MAP: Dict[str, str] = {
+    "DEBUG_MODE": "debug_mode",
+    "DEBUG_LEVEL": "debug_level",
+    "DEBUG_SCOPES": "debug_scopes",
+    "LOG_DIR": "log_dir",
+    "PERSISTENCE_DIR": "persistence_dir",
+    "ENABLE_RATE_LIMITING": "enable_rate_limiting",
+}
+def __getattr__(name: str) -> Any:  # noqa: D401 — module-level dunder
+    """PEP 562: forward legacy CONST-style reads to the Settings singleton."""
+    if name in _LEGACY_ATTR_MAP:
+        return getattr(get_settings(), _LEGACY_ATTR_MAP[name])
+    raise AttributeError(f"module {__name__!r} has no attribute {name!r}")
+__all__ = ["Settings", "get_settings", "reset_settings", "set_settings"]

logging_setup.py ADDED Viewed

	@@ -0,0 +1,77 @@

+"""Centralised logging for Nutrition MAS.
+Agents and tools used to ``print`` directly to stdout. That worked in a notebook
+but coupled the agentic system to the I/O layer. This module provides a single
+``get_logger`` entrypoint so:
+* user-mode emoji status lines flow through ``logger.info`` (visible by default),
+* debug-mode raw LLM dumps flow through ``logger.debug`` (hidden unless
+  ``settings.debug_mode`` is True),
+* later phases can attach extra handlers (SSE event stream for the API, JSON
+  file handler for trace persistence, etc.) without touching agent code.
+Idempotent: calling :func:`configure_logging` more than once is a no-op unless
+``force=True``.
+"""
+from __future__ import annotations
+import logging
+import sys
+from config import get_settings
+_BASE = "nutrition_mas"
+_CONFIGURED = False
+def configure_logging(*, force: bool = False) -> None:
+    """Wire up the ``nutrition_mas`` logger tree.
+    Reads ``settings.debug_mode`` to choose between INFO (user mode) and DEBUG.
+    Safe to call from library code; only the first call attaches a handler.
+    """
+    global _CONFIGURED
+    if _CONFIGURED and not force:
+        return
+    settings = get_settings()
+    level = logging.DEBUG if settings.debug_mode else logging.INFO
+    handler = logging.StreamHandler(sys.stdout)
+    handler.setLevel(level)
+    handler.setFormatter(logging.Formatter("%(message)s"))
+    root = logging.getLogger(_BASE)
+    root.handlers = [handler]
+    root.setLevel(level)
+    root.propagate = False
+    _CONFIGURED = True
+def get_logger(name: str) -> logging.Logger:
+    """Return a sub-logger under the ``nutrition_mas`` namespace.
+    Conventional names: ``agents.coach``, ``agents.medical``, ``tools.computation``,
+    ``utils.api_pool``.
+    """
+    if not _CONFIGURED:
+        configure_logging()
+    return logging.getLogger(f"{_BASE}.{name}")
+def refresh_level() -> None:
+    """Re-read ``settings.debug_mode`` and adjust handler levels in place.
+    Call this after toggling debug mode at runtime.
+    """
+    settings = get_settings()
+    level = logging.DEBUG if settings.debug_mode else logging.INFO
+    root = logging.getLogger(_BASE)
+    root.setLevel(level)
+    for handler in root.handlers:
+        handler.setLevel(level)
+__all__ = ["configure_logging", "get_logger", "refresh_level"]

nutritionmas.py CHANGED Viewed

@@ -1,45 +1,52 @@
 import os
-import config
-from state import NutritionState, initialize_empty_memory
-from utils import create_llm, save_to_json, APIPoolManager
-from tools import ComputationTool, WebSearchTool, QuantitiesFinder
 from agents import CoachAgent, MedicalAssessmentAgent, PlannerAgent
 from workflow import setup_workflow as setup_workflow_workflow
-from datetime import datetime
-import random
-import json
-from typing import Optional, Dict, Any, List
-from IPython.display import display, Markdown
-def debug(level: str = 'full', scopes: Optional[Dict[str, List[str]]] = None):
-    """
-    Enable debug mode with specified level and scopes.
     Args:
         level: 'full' (default) to show inputs and outputs, or 'output' to show only outputs.
-        scopes: Optional dict like {'agents': ['all'], 'tools': ['ComputationTool']}.
                 If None, defaults to all agents and tools.
     """
-    config.DEBUG_MODE = True
-    config.DEBUG_LEVEL = level
     if scopes is None:
-        config.DEBUG_SCOPES = {'agents': ['all'], 'tools': ['all']}
-    else:
-        config.DEBUG_SCOPES = scopes
-def logging(log_dir=None, persistence_dir=None):
-    """
-    Set the directories for logging and persistence.
-    If log_dir is provided, logging will be enabled to that directory.
-    If persistence_dir is provided, file-based persistence will be used for checkpoints.
-    If not provided, logging is disabled, and in-memory persistence is used.
     """
     if log_dir is not None:
-        config.LOG_DIR = log_dir
-        os.makedirs(config.LOG_DIR, exist_ok=True)
     if persistence_dir is not None:
-        config.PERSISTENCE_DIR = persistence_dir
-        os.makedirs(config.PERSISTENCE_DIR, exist_ok=True)
 # Default model configurations (without API keys, as they will be provided by the user)
 DEFAULT_MODEL_CONFIGS = {
@@ -71,6 +78,13 @@ DEFAULT_MODEL_CONFIGS = {
         "thinking_budget": 600,
         "params": {"max_tokens": 5120, "temperature": 0.3}
     },
     "user_simulator": {
         "type": "gemini",
         "model_name": "gemini-2.5-flash",
@@ -115,25 +129,32 @@ def create_llm_instances(api_keys: list[str], model_overrides: Optional[Dict[str
         rate_limits = None
     manager = APIPoolManager(api_keys, rate_limits)
-    print(f"APIPoolManager initialized with {'rate limiting enabled' if enable_rate_limiting else 'rate limiting disabled'} and {len(api_keys)} API keys.")
-    model_configs = {}
     for key in DEFAULT_MODEL_CONFIGS:
-        config = DEFAULT_MODEL_CONFIGS[key].copy()
         if model_overrides and key in model_overrides:
             override = model_overrides[key]
             if "model_name" in override:
-                config["model_name"] = override["model_name"]
             if "params" in override:
-                config["params"].update(override["params"])
-        model_configs[key] = config
     LLM_INSTANCES = {
         "main": create_llm(model_configs["main"], manager),
         "agents_llm": create_llm(model_configs["agents_llm"], manager),
         "tools_llm": create_llm(model_configs["tools_llm"], manager),
         "planner_agent": create_llm(model_configs["planner_agent"], manager),
-        "user_simulator": create_llm(model_configs["user_simulator"], manager)
     }
 def initialize_tools():
@@ -156,10 +177,16 @@ def initialize_agents():
     MAIN_LLM = LLM_INSTANCES["main"]
     AGENTS_LLM = LLM_INSTANCES["agents_llm"]
     PLANNER_LLM = LLM_INSTANCES["planner_agent"]
     AGENTS = {
         "CoachAgent": CoachAgent(MAIN_LLM),
-        "MedicalAssessmentAgent": MedicalAssessmentAgent(AGENTS_LLM, TOOLS["ComputationTool"], TOOLS["WebSearchTool"]),
-        "PlannerAgent": PlannerAgent(PLANNER_LLM, TOOLS["ComputationTool"], TOOLS["WebSearchTool"], TOOLS["QuantitiesFinder"])
     }
 def setup_workflow():

+import json
 import os
+from datetime import datetime
+from typing import Any, Dict, List, Optional
+from IPython.display import Markdown, display
 from agents import CoachAgent, MedicalAssessmentAgent, PlannerAgent
+from config import set_settings
+from logging_setup import get_logger, refresh_level
+from state import initialize_empty_memory
+from tools import ComputationTool, QuantitiesFinder, WebSearchTool
+from utils import APIPoolManager, create_llm
+from validation import ValidationAgent
 from workflow import setup_workflow as setup_workflow_workflow
+_logger = get_logger("nutritionmas")
+def debug(level: str = "full", scopes: Optional[Dict[str, List[str]]] = None) -> None:
+    """Enable debug mode with the given level and scopes.
     Args:
         level: 'full' (default) to show inputs and outputs, or 'output' to show only outputs.
+        scopes: Optional dict like ``{'agents': ['all'], 'tools': ['ComputationTool']}``.
                 If None, defaults to all agents and tools.
     """
     if scopes is None:
+        scopes = {"agents": ["all"], "tools": ["all"]}
+    set_settings(debug_mode=True, debug_level=level, debug_scopes=scopes)
+    refresh_level()
+def logging(log_dir: Optional[str] = None, persistence_dir: Optional[str] = None) -> None:  # noqa: A001 - public name kept for backwards compat
+    """Set directories for log files and LangGraph checkpoint persistence.
+    If ``log_dir`` is provided, agent/tool I/O is dumped there as JSON.
+    If ``persistence_dir`` is provided, LangGraph checkpoints are persisted to disk.
+    If neither is set, logging is disabled and persistence is in-memory.
     """
+    updates: Dict[str, Any] = {}
     if log_dir is not None:
+        os.makedirs(log_dir, exist_ok=True)
+        updates["log_dir"] = log_dir
     if persistence_dir is not None:
+        os.makedirs(persistence_dir, exist_ok=True)
+        updates["persistence_dir"] = persistence_dir
+    if updates:
+        set_settings(**updates)
 # Default model configurations (without API keys, as they will be provided by the user)
 DEFAULT_MODEL_CONFIGS = {
         "thinking_budget": 600,
         "params": {"max_tokens": 5120, "temperature": 0.3}
     },
+    "validation_agent": {
+        "type": "gemini",
+        "model_name": "gemini-2.5-flash",
+        "structured_output": True,
+        "thinking_budget": 300,
+        "params": {"max_tokens": 3072, "temperature": 0.2}
+    },
     "user_simulator": {
         "type": "gemini",
         "model_name": "gemini-2.5-flash",
         rate_limits = None
     manager = APIPoolManager(api_keys, rate_limits)
+    _logger.info(
+        "APIPoolManager initialized with %s and %d API keys.",
+        "rate limiting enabled" if enable_rate_limiting else "rate limiting disabled",
+        len(api_keys),
+    )
+    # Note: previously this loop used a local variable named ``config`` which
+    # shadowed the imported ``config`` module — now ``cfg`` to avoid the trap.
+    model_configs: Dict[str, Dict[str, Any]] = {}
     for key in DEFAULT_MODEL_CONFIGS:
+        cfg = DEFAULT_MODEL_CONFIGS[key].copy()
         if model_overrides and key in model_overrides:
             override = model_overrides[key]
             if "model_name" in override:
+                cfg["model_name"] = override["model_name"]
             if "params" in override:
+                cfg["params"] = {**cfg.get("params", {}), **override["params"]}
+        model_configs[key] = cfg
     LLM_INSTANCES = {
         "main": create_llm(model_configs["main"], manager),
         "agents_llm": create_llm(model_configs["agents_llm"], manager),
         "tools_llm": create_llm(model_configs["tools_llm"], manager),
         "planner_agent": create_llm(model_configs["planner_agent"], manager),
+        "validation_agent": create_llm(model_configs["validation_agent"], manager),
+        "user_simulator": create_llm(model_configs["user_simulator"], manager),
     }
 def initialize_tools():
     MAIN_LLM = LLM_INSTANCES["main"]
     AGENTS_LLM = LLM_INSTANCES["agents_llm"]
     PLANNER_LLM = LLM_INSTANCES["planner_agent"]
+    VALIDATION_LLM = LLM_INSTANCES["validation_agent"]
     AGENTS = {
         "CoachAgent": CoachAgent(MAIN_LLM),
+        "MedicalAssessmentAgent": MedicalAssessmentAgent(
+            AGENTS_LLM, TOOLS["ComputationTool"], TOOLS["WebSearchTool"]
+        ),
+        "PlannerAgent": PlannerAgent(
+            PLANNER_LLM, TOOLS["ComputationTool"], TOOLS["WebSearchTool"], TOOLS["QuantitiesFinder"]
+        ),
+        "ValidationAgent": ValidationAgent(VALIDATION_LLM),
     }
 def setup_workflow():

pyproject.toml ADDED Viewed

	@@ -0,0 +1,52 @@

+[build-system]
+requires = ["setuptools>=68", "wheel"]
+build-backend = "setuptools.build_meta"
+[project]
+name = "nutrition-mas"
+version = "0.2.0"
+description = "Multi-agent system for personalised nutrition planning, built on LangGraph + Gemini."
+readme = "README.md"
+requires-python = ">=3.10"
+license = { text = "MIT" }
+authors = [{ name = "Moaz Eldegwy", email = "moazeldegwy@gmail.com" }]
+dependencies = [
+    "langgraph>=0.2.50,<0.3",
+    "langchain-core>=0.3.20,<0.4",
+    "google-genai>=0.3.0",
+    "pydantic>=2.9,<3",
+    "pydantic-settings>=2.6,<3",
+    "pulp>=2.9,<3",
+    "ddgs>=6.3,<7",
+    "json-repair>=0.30",
+    "python-dotenv>=1.0,<2",
+    "ipython>=8.0",
+]
+[project.optional-dependencies]
+dev = [
+    "pytest>=8.0",
+    "pytest-asyncio>=0.24",
+    "pytest-cov>=5.0",
+    "ruff>=0.7",
+]
+[tool.setuptools]
+py-modules = ["agents", "config", "nutritionmas", "state", "tools", "utils", "workflow", "logging_setup"]
+[tool.ruff]
+line-length = 110
+target-version = "py310"
+[tool.ruff.lint]
+select = ["E", "F", "I", "B", "UP", "SIM"]
+ignore = ["E501"]
+[tool.pytest.ini_options]
+testpaths = ["tests"]
+addopts = "-ra --strict-markers"
+markers = [
+    "integration: tests that hit a real LLM (skipped by default)",
+    "slow: long-running tests",
+]

requirements.txt ADDED Viewed

	@@ -0,0 +1,31 @@

+# Core agent framework
+langgraph>=0.2.50,<0.3
+langchain-core>=0.3.20,<0.4
+# LLM provider (Gemini)
+google-genai>=0.3.0
+# Schemas / settings
+pydantic>=2.9,<3
+pydantic-settings>=2.6,<3
+# Optimization (meal-quantities solver)
+pulp>=2.9,<3
+# Web search fallback
+ddgs>=6.3,<7
+# JSON repair fallback (kept until Phase 1 makes it a measured fallback)
+json-repair>=0.30
+# Env loading
+python-dotenv>=1.0,<2
+# Markdown rendering for notebook display (kept for backwards compat)
+ipython>=8.0
+# Tests / dev
+pytest>=8.0
+pytest-asyncio>=0.24
+pytest-cov>=5.0
+ruff>=0.7

schemas.py ADDED Viewed

	@@ -0,0 +1,215 @@

+"""Pydantic models for agent inputs and outputs.
+This module is the contract between the LLM, the orchestration layer, and the
+test suite. Every agent's decision now passes through one of these models — so:
+* Gemini's ``response_schema`` (constrained decoding) returns guaranteed-shape
+  JSON; we no longer rely on regex / ``json_repair`` for the high-stakes path.
+* Tests can construct decisions directly without hand-crafted JSON strings.
+* Phase 2 can split agent loops into LangGraph nodes that pass typed objects
+  between them.
+Where Gemini's schema support is fussy (e.g. discriminated unions with
+``$ref``), we keep the outer envelope strict and leave per-action ``params``
+as a free dict — the agent dispatcher validates it at use time.
+"""
+from __future__ import annotations
+from typing import Any, Dict, List, Literal, Optional
+from pydantic import BaseModel, Field
+# ---------------------------------------------------------------------------
+# Shared / leaf types
+# ---------------------------------------------------------------------------
+StepStatus = Literal["pending", "in_progress", "completed", "skipped", "failed"]
+class ResponseStep(BaseModel):
+    """A single step in the Coach's response plan."""
+    id: int
+    actor: str = Field(
+        description="Who executes this step. Examples: 'CoachAgent', 'MedicalAssessmentAgent', "
+        "'PlannerAgent', 'ValidationAgent', 'user'.",
+    )
+    description: str
+    prerequisites: List[str] = Field(default_factory=list)
+    status: StepStatus = "pending"
+class MacroTargets(BaseModel):
+    """Daily macronutrient targets in grams (single integer values)."""
+    protein_g: int = Field(ge=0)
+    fat_g: int = Field(ge=0)
+    carbohydrates_g: int = Field(ge=0)
+class Calculations(BaseModel):
+    """Derived anthropometric + nutritional values from the assessment."""
+    BMI: float = Field(ge=0)
+    BMR: float = Field(ge=0)
+    TDEE: float = Field(ge=0)
+    daily_target_calories: int = Field(ge=0)
+    macro_targets: MacroTargets
+# ---------------------------------------------------------------------------
+# Coach Agent
+# ---------------------------------------------------------------------------
+CoachActionType = Literal[
+    "call_agent",
+    "call_tool",
+    "ask_user",
+    "write_memory",
+    "compose_response",
+]
+class CoachDecision(BaseModel):
+    """Single turn of the Coach orchestrator.
+    Outer shape is strict; ``params`` is left as a dict because Gemini's
+    schema layer struggles with deeply discriminated unions. The dispatcher
+    in :mod:`workflow` validates ``params`` against the action type.
+    """
+    observation: str
+    thought: str
+    response_steps: List[ResponseStep] = Field(default_factory=list)
+    action: CoachActionType
+    params: Dict[str, Any] = Field(
+        default_factory=dict,
+        description=(
+            "Action-specific parameters. Required keys per action: "
+            "call_agent={agent_name, task}, call_tool={tool_name, task}, "
+            "ask_user={prompt}, write_memory={partition, data}, "
+            "compose_response={text}."
+        ),
+    )
+# ---------------------------------------------------------------------------
+# Medical Assessment Agent
+# ---------------------------------------------------------------------------
+MedicalActionType = Literal["call_tool", "ask_user", "assessment_complete"]
+class MedicalAssessmentResult(BaseModel):
+    """Final payload stored in ``memory.flags_and_assessments``."""
+    assessment_summary: str
+    flags_to_set: List[str] = Field(default_factory=list)
+    recommendations: List[str] = Field(default_factory=list)
+    requires_professional_consultation: bool = False
+    calculations: Calculations
+    evidence_sources: List[str] = Field(default_factory=list)
+    trace: str = ""
+    requires_tool_retry: bool = False
+    data_confidence: float = Field(default=1.0, ge=0.0, le=1.0)
+class MedicalAssessmentDecision(BaseModel):
+    """Per-iteration output of the Medical Assessment Agent loop."""
+    medical_reasoning: str
+    observation: str
+    risk_assessment_priorities: List[str] = Field(default_factory=list)
+    assessment_plan: List[ResponseStep] = Field(default_factory=list)
+    action_type: MedicalActionType
+    # action-specific fields (kept flat — see CoachDecision rationale)
+    tool_name: Optional[str] = None
+    tool_task: Optional[str] = None
+    fields: List[str] = Field(default_factory=list)  # for ask_user
+    result: Optional[MedicalAssessmentResult] = None  # for assessment_complete
+# ---------------------------------------------------------------------------
+# Planner Agent
+# ---------------------------------------------------------------------------
+PlannerActionType = Literal["call_tool", "draft_plan", "provide_plan"]
+class FoodItem(BaseModel):
+    """A single ingredient on the plan, post-solver."""
+    name: str
+    grams: float = Field(ge=0)
+    calories: float = Field(ge=0)
+    protein_g: float = Field(ge=0)
+    fat_g: float = Field(ge=0)
+    carbohydrates_g: float = Field(ge=0)
+    meal_group: Optional[str] = None
+class FinalPlan(BaseModel):
+    """The shape stored in ``memory.plans.current_plan``."""
+    days: List[List[FoodItem]] = Field(
+        description="One inner list per day. Most plans return a single day.",
+    )
+    daily_totals: Dict[str, float] = Field(default_factory=dict)
+    notes: str = ""
+    sources: List[str] = Field(default_factory=list)
+    trace: str = ""
+class PlannerDecision(BaseModel):
+    """Per-iteration output of the Planner Agent loop."""
+    observation: str
+    thought: str
+    planning_steps: List[ResponseStep] = Field(default_factory=list)
+    action_type: PlannerActionType
+    tool_name: Optional[str] = None
+    tool_task: Optional[str] = None
+    drafted_plan: Optional[Dict[str, Any]] = None  # free shape pre-solver
+    final_plan: Optional[Dict[str, Any]] = None  # free shape until validation lands in Phase 2
+# ---------------------------------------------------------------------------
+# Validation Agent (lands in Phase 2; defined here so Phase 1 schemas are
+# the single source of truth)
+# ---------------------------------------------------------------------------
+ValidationVerdict = Literal["pass", "revise", "reject"]
+ValidationSeverity = Literal["low", "medium", "high"]
+class ValidationIssue(BaseModel):
+    code: str = Field(description="Stable error code, e.g. 'allergy_violation'.")
+    description: str
+    severity: ValidationSeverity = "medium"
+class ValidationDecision(BaseModel):
+    verdict: ValidationVerdict
+    issues: List[ValidationIssue] = Field(default_factory=list)
+    notes: str = ""
+    requires_human_review: bool = False
+__all__ = [
+    "Calculations",
+    "CoachActionType",
+    "CoachDecision",
+    "FinalPlan",
+    "FoodItem",
+    "MacroTargets",
+    "MedicalActionType",
+    "MedicalAssessmentDecision",
+    "MedicalAssessmentResult",
+    "PlannerActionType",
+    "PlannerDecision",
+    "ResponseStep",
+    "StepStatus",
+    "ValidationDecision",
+    "ValidationIssue",
+    "ValidationSeverity",
+    "ValidationVerdict",
+]

tests/__init__.py ADDED Viewed

File without changes

tests/conftest.py ADDED Viewed

	@@ -0,0 +1,99 @@

+"""Shared pytest fixtures.
+The mock LLM here is the workhorse for offline tests — it lets us run agents
+end-to-end without paying for Gemini calls. Phase 1 will give us schema-typed
+agent responses; until then, each test passes the raw JSON string the agent
+expects to receive.
+"""
+from __future__ import annotations
+import json
+from typing import Any, Dict, List
+import pytest
+from config import reset_settings, set_settings
+from utils import APIPoolManager, LLM
+class MockLLM(LLM):
+    """LLM stub that returns canned responses in order.
+    Tests construct it with a list of either:
+    * a JSON-string (returned as-is for the untyped __call__ path),
+    * a dict (JSON-serialised on push; validated against the requested schema
+      on call_typed),
+    * a Pydantic ``BaseModel`` instance (returned as-is from call_typed; its
+      ``.model_dump_json()`` is used for __call__).
+    Each call pops the next item. Out-of-script calls raise so missing
+    fixtures are noisy rather than silent.
+    """
+    def __init__(self, responses: List[Any]) -> None:
+        self._responses: List[Any] = list(responses)
+        self.calls: List[str] = []
+        self.typed_calls: List[tuple[str, type]] = []
+    def _next(self, prompt: str) -> Any:
+        self.calls.append(prompt)
+        if not self._responses:
+            raise AssertionError(
+                f"MockLLM ran out of canned responses (call #{len(self.calls)}). "
+                f"Last prompt:\n{prompt[:300]}"
+            )
+        return self._responses.pop(0)
+    def __call__(self, prompt: str, **_: Any) -> List[str]:
+        item = self._next(prompt)
+        if hasattr(item, "model_dump_json"):
+            return [item.model_dump_json()]
+        if isinstance(item, dict):
+            return [json.dumps(item)]
+        return [str(item)]
+    def call_typed(self, prompt: str, response_model: type, **_: Any):
+        from pydantic import BaseModel
+        self.typed_calls.append((prompt, response_model))
+        item = self._next(prompt)
+        if isinstance(item, BaseModel):
+            return item if isinstance(item, response_model) else None
+        if isinstance(item, dict):
+            try:
+                return response_model.model_validate(item)
+            except Exception:
+                return None
+        if isinstance(item, str):
+            try:
+                return response_model.model_validate_json(item)
+            except Exception:
+                return None
+        return None
+    def format_prompt(self, messages: List[Dict[str, str]]) -> str:
+        return "\n".join(f"{m['role']}: {m['content']}" for m in messages)
+@pytest.fixture
+def mock_llm_factory():
+    """Factory to build a MockLLM from a list of canned responses."""
+    return MockLLM
+@pytest.fixture(autouse=True)
+def fresh_settings():
+    """Reset the Settings singleton before/after each test for isolation."""
+    reset_settings()
+    set_settings(debug_mode=False, log_dir=None, persistence_dir=None)
+    yield
+    reset_settings()
+@pytest.fixture
+def api_pool_no_limits():
+    """An APIPoolManager with rate limiting disabled — for unit tests that
+    don't care about throttling."""
+    return APIPoolManager(["test-key-1", "test-key-2"], rate_limits=None)

tests/test_api_pool.py ADDED Viewed

	@@ -0,0 +1,51 @@

+"""Unit tests for the APIPoolManager rate limiter."""
+from __future__ import annotations
+import time
+import pytest
+from utils import APIPoolManager
+def test_round_robin_no_limits() -> None:
+    pool = APIPoolManager(["k1", "k2", "k3"], rate_limits=None)
+    seen = [pool.get_next_key("any-model") for _ in range(6)]
+    # With no limits we should walk through all keys at least twice.
+    assert set(seen) == {"k1", "k2", "k3"}
+def test_rpm_spacing_enforced() -> None:
+    """With RPM=60 we expect a ~1s spacing between consecutive uses of the
+    same key. Two-key pool should let us avoid the wait."""
+    pool = APIPoolManager(["k1", "k2"], rate_limits={"m": (60, 1000)})
+    k_a = pool.get_next_key("m")
+    pool.record_usage(k_a, "m", time.time())
+    k_b = pool.get_next_key("m")
+    assert k_a != k_b, "Round-robin should pick the other key when one is hot"
+def test_rpd_exhaustion_drops_key() -> None:
+    """A key that hits its daily limit must be removed from active pool."""
+    pool = APIPoolManager(["k1", "k2"], rate_limits={"m": (60, 2)})
+    for _ in range(2):
+        k = pool.get_next_key("m")
+        pool.record_usage(k, "m")
+    # By now both keys may have hit their RPD=2. Next call should still work
+    # if at least one key has capacity, else raise RuntimeError.
+    keys_left = list(pool.active_keys)
+    if not keys_left:
+        with pytest.raises(RuntimeError):
+            pool.get_next_key("m")
+    else:
+        # Drain the remaining one too.
+        for _ in range(2):
+            try:
+                k = pool.get_next_key("m")
+                pool.record_usage(k, "m")
+            except RuntimeError:
+                break
+        assert not pool.active_keys, "Both keys should be exhausted now"

tests/test_quantities_finder.py ADDED Viewed

	@@ -0,0 +1,90 @@

+"""Smoke tests for the PuLP-backed QuantitiesFinder.
+Pure deterministic tool — no LLM, no network. Should be the fastest test in
+the suite and the one we trust most.
+"""
+from __future__ import annotations
+import json
+from tools import QuantitiesFinder
+def test_basic_two_food_balance() -> None:
+    """Two foods, simple targets — solver must find quantities within a few
+    percent of the target."""
+    qf = QuantitiesFinder()
+    payload = {
+        "foods": [
+            {
+                "name": "chicken_breast",
+                "calories": 165,
+                "protein": 31,
+                "fat": 3.6,
+                "carbohydrates": 0,
+                "estimated_g": 200,
+            },
+            {
+                "name": "rice_cooked",
+                "calories": 130,
+                "protein": 2.7,
+                "fat": 0.3,
+                "carbohydrates": 28,
+                "estimated_g": 200,
+            },
+        ],
+        "targets": {"calories": 700, "protein": 65, "fat": 8, "carbohydrates": 60},
+    }
+    result = json.loads(qf.handle_task(json.dumps(payload)))
+    assert "quantities" in result and "achieved" in result, f"Bad shape: {result}"
+    achieved = result["achieved"]
+    # The solver minimises weighted deviation across all 4 nutrients. With only
+    # two foods (chicken and rice) it cannot hit every target tightly — it will
+    # nail fat/carbs (constrained by rice) and trade off calories/protein.
+    # We assert it lands within 20% of every target, which is the realistic
+    # feasibility envelope for a 2-food problem.
+    for nut, target in [("calories", 700), ("protein", 65), ("fat", 8), ("carbohydrates", 60)]:
+        deviation = abs(achieved[nut] - target) / target
+        assert deviation < 0.20, f"{nut} achieved={achieved[nut]} target={target} dev={deviation:.2%}"
+def test_invalid_payload_returns_error() -> None:
+    qf = QuantitiesFinder()
+    bad = {"foods": [{"name": "x"}], "targets": {}}  # missing required keys
+    result = json.loads(qf.handle_task(json.dumps(bad)))
+    assert "error" in result, f"Expected an error key, got {result}"
+def test_min_max_bounds_respected() -> None:
+    qf = QuantitiesFinder()
+    payload = {
+        "foods": [
+            {
+                "name": "egg",
+                "calories": 155,
+                "protein": 13,
+                "fat": 11,
+                "carbohydrates": 1.1,
+                "estimated_g": 100,
+                "min_g": 50,
+                "max_g": 120,
+            },
+            {
+                "name": "oats",
+                "calories": 389,
+                "protein": 17,
+                "fat": 7,
+                "carbohydrates": 66,
+                "estimated_g": 80,
+                "min_g": 30,
+                "max_g": 150,
+            },
+        ],
+        "targets": {"calories": 500, "protein": 25, "fat": 15, "carbohydrates": 50},
+    }
+    result = json.loads(qf.handle_task(json.dumps(payload)))
+    qty = result["quantities"]
+    assert 50 <= qty["egg"] <= 120
+    assert 30 <= qty["oats"] <= 150

tests/test_schemas.py ADDED Viewed

	@@ -0,0 +1,135 @@

+"""Validate the Pydantic schemas that anchor every agent decision in Phase 1."""
+from __future__ import annotations
+import pytest
+from pydantic import ValidationError
+from schemas import (
+    Calculations,
+    CoachDecision,
+    FinalPlan,
+    FoodItem,
+    MacroTargets,
+    MedicalAssessmentDecision,
+    MedicalAssessmentResult,
+    PlannerDecision,
+    ResponseStep,
+    ValidationDecision,
+)
+# ---- Coach -----------------------------------------------------------------
+def test_coach_decision_call_agent() -> None:
+    d = CoachDecision(
+        observation="user wants a plan",
+        thought="need assessment first",
+        response_steps=[
+            ResponseStep(id=1, actor="MedicalAssessmentAgent", description="assess"),
+        ],
+        action="call_agent",
+        params={"agent_name": "MedicalAssessmentAgent", "task": "assess user"},
+    )
+    assert d.action == "call_agent"
+    assert d.params["agent_name"] == "MedicalAssessmentAgent"
+def test_coach_decision_invalid_action_rejected() -> None:
+    with pytest.raises(ValidationError):
+        CoachDecision(
+            observation="x",
+            thought="x",
+            response_steps=[],
+            action="not_a_real_action",  # type: ignore[arg-type]
+            params={},
+        )
+# ---- Medical assessment ----------------------------------------------------
+def test_medical_assessment_complete_round_trip() -> None:
+    payload = {
+        "medical_reasoning": "BMI within normal range; protein target raised for muscle gain",
+        "observation": "all fields present",
+        "risk_assessment_priorities": ["maintain micronutrient adequacy"],
+        "assessment_plan": [],
+        "action_type": "assessment_complete",
+        "result": {
+            "assessment_summary": "healthy male, hypertrophy goal",
+            "flags_to_set": [],
+            "recommendations": ["maintain hydration"],
+            "requires_professional_consultation": False,
+            "calculations": {
+                "BMI": 23.4,
+                "BMR": 1750,
+                "TDEE": 2700,
+                "daily_target_calories": 2900,
+                "macro_targets": {"protein_g": 180, "fat_g": 70, "carbohydrates_g": 360},
+            },
+        },
+    }
+    decision = MedicalAssessmentDecision.model_validate(payload)
+    assert decision.action_type == "assessment_complete"
+    assert isinstance(decision.result, MedicalAssessmentResult)
+    assert decision.result.calculations.macro_targets.protein_g == 180
+def test_calculations_negative_values_rejected() -> None:
+    with pytest.raises(ValidationError):
+        Calculations(
+            BMI=-1,  # negative not allowed
+            BMR=1700,
+            TDEE=2500,
+            daily_target_calories=2200,
+            macro_targets=MacroTargets(protein_g=120, fat_g=60, carbohydrates_g=250),
+        )
+# ---- Planner ---------------------------------------------------------------
+def test_planner_provide_plan_with_dict_final_plan() -> None:
+    decision = PlannerDecision(
+        observation="all data ready",
+        thought="returning final plan",
+        planning_steps=[],
+        action_type="provide_plan",
+        final_plan={"days": [{"breakfast": "oats"}], "trace": "Coach->Planner"},
+    )
+    assert decision.action_type == "provide_plan"
+    assert decision.final_plan is not None
+def test_food_item_strict_grams_non_negative() -> None:
+    with pytest.raises(ValidationError):
+        FoodItem(
+            name="oats",
+            grams=-10,
+            calories=389,
+            protein_g=17,
+            fat_g=7,
+            carbohydrates_g=66,
+        )
+def test_final_plan_minimal() -> None:
+    plan = FinalPlan(
+        days=[
+            [
+                FoodItem(
+                    name="oats",
+                    grams=80,
+                    calories=311,
+                    protein_g=14,
+                    fat_g=5,
+                    carbohydrates_g=53,
+                )
+            ]
+        ],
+        daily_totals={"calories": 311},
+    )
+    assert plan.days[0][0].name == "oats"
+# ---- Validation (Phase 2 schemas, declared in Phase 1) ---------------------
+def test_validation_decision_default_pass() -> None:
+    v = ValidationDecision(verdict="pass")
+    assert v.verdict == "pass"
+    assert v.issues == []

tests/test_settings.py ADDED Viewed

	@@ -0,0 +1,53 @@

+"""Verify the new Pydantic ``Settings`` and the legacy ``config.X`` proxy."""
+from __future__ import annotations
+import config
+from config import get_settings, reset_settings, set_settings
+def test_defaults() -> None:
+    s = get_settings()
+    assert s.debug_mode is False
+    assert s.debug_level == "full"
+    assert s.enable_rate_limiting is True
+    assert s.log_dir is None
+    assert s.debug_scopes == {"agents": ["all"], "tools": ["all"]}
+def test_set_settings_pydantic_names() -> None:
+    set_settings(debug_mode=True, log_dir="/tmp/x")
+    s = get_settings()
+    assert s.debug_mode is True
+    assert s.log_dir == "/tmp/x"
+def test_set_settings_legacy_names() -> None:
+    set_settings(DEBUG_MODE=True, LOG_DIR="/tmp/y", DEBUG_LEVEL="output")
+    s = get_settings()
+    assert s.debug_mode is True
+    assert s.log_dir == "/tmp/y"
+    assert s.debug_level == "output"
+def test_pep562_legacy_reads() -> None:
+    """Existing code that does ``config.DEBUG_MODE`` still works."""
+    set_settings(debug_mode=True, debug_scopes={"agents": ["CoachAgent"], "tools": ["all"]})
+    assert config.DEBUG_MODE is True
+    assert config.DEBUG_SCOPES == {"agents": ["CoachAgent"], "tools": ["all"]}
+def test_unknown_attr_raises() -> None:
+    try:
+        _ = config.NOT_A_THING  # type: ignore[attr-defined]
+    except AttributeError as e:
+        assert "NOT_A_THING" in str(e)
+    else:
+        raise AssertionError("Expected AttributeError")
+def test_reset_settings_round_trip() -> None:
+    set_settings(debug_mode=True)
+    assert get_settings().debug_mode is True
+    reset_settings()
+    assert get_settings().debug_mode is False  # back to default

tests/test_smoke.py ADDED Viewed

	@@ -0,0 +1,64 @@

+"""Top-level smoke tests: every module must import cleanly outside Colab."""
+from __future__ import annotations
+def test_imports_work_outside_colab() -> None:
+    """The Phase 0 cleanup removed ``from google.colab import userdata``;
+    confirm every module can be imported in a plain Python process."""
+    import agents  # noqa: F401
+    import config  # noqa: F401
+    import logging_setup  # noqa: F401
+    import nutritionmas  # noqa: F401
+    import state  # noqa: F401
+    import tools  # noqa: F401
+    import utils  # noqa: F401
+    import workflow  # noqa: F401
+def test_only_one_geminillm_class_in_utils() -> None:
+    """Phase 0 deleted the duplicate ``GeminiLLM`` definition. Make sure it
+    doesn't sneak back."""
+    import inspect
+    import utils
+    geminis = [
+        cls
+        for name, cls in inspect.getmembers(utils, inspect.isclass)
+        if name == "GeminiLLM" and cls.__module__ == "utils"
+    ]
+    assert len(geminis) == 1
+def test_initialize_empty_memory_shape() -> None:
+    from state import initialize_empty_memory
+    mem = initialize_empty_memory()
+    assert set(mem.keys()) == {"user_profile", "medical_history", "flags_and_assessments", "plans"}
+    assert all(v == {} for v in mem.values())
+def test_default_model_configs_present() -> None:
+    """Model topology is a contract the rest of the system depends on.
+    Phase 2 adds 'validation_agent' (Gemini Flash; cheap critic loop)."""
+    from nutritionmas import DEFAULT_MODEL_CONFIGS
+    expected = {
+        "main",
+        "agents_llm",
+        "tools_llm",
+        "planner_agent",
+        "validation_agent",
+        "user_simulator",
+    }
+    assert set(DEFAULT_MODEL_CONFIGS.keys()) == expected
+def test_create_llm_instances_requires_keys() -> None:
+    import pytest
+    from nutritionmas import create_llm_instances
+    with pytest.raises(ValueError, match="At least one API key"):
+        create_llm_instances([])

tests/test_typed_agents.py ADDED Viewed

	@@ -0,0 +1,184 @@

+"""End-to-end-ish tests of the typed agent path with MockLLM.
+These don't hit Gemini; they verify that an agent which received a typed
+``CoachDecision`` / ``MedicalAssessmentDecision`` / ``PlannerDecision`` from
+its LLM produces the expected state mutations.
+"""
+from __future__ import annotations
+from typing import Any, Dict
+import pytest
+from agents import CoachAgent, MedicalAssessmentAgent, PlannerAgent
+from schemas import (
+    Calculations,
+    CoachDecision,
+    MacroTargets,
+    MedicalAssessmentDecision,
+    MedicalAssessmentResult,
+    PlannerDecision,
+)
+from state import initialize_empty_memory
+# ---- Coach -----------------------------------------------------------------
+def test_coach_emits_call_agent_action(mock_llm_factory) -> None:
+    canned = CoachDecision(
+        observation="needs assessment",
+        thought="route to medical",
+        response_steps=[],
+        action="call_agent",
+        params={"agent_name": "MedicalAssessmentAgent", "task": "assess"},
+    )
+    coach = CoachAgent(mock_llm_factory([canned]))
+    state: Dict[str, Any] = {
+        "memory": initialize_empty_memory(),
+        "user_question": "make me a plan",
+        "conversation_history": [{"role": "user", "content": "make me a plan"}],
+        "current_action": None,
+        "agent_result": None,
+        "num_turns": 0,
+        "max_turns": 10,
+        "previous_actions": [],
+        "response_steps": [],
+    }
+    out = coach.handle_task(state)
+    assert out["current_action"]["action"] == "call_agent"
+    assert out["current_action"]["params"]["agent_name"] == "MedicalAssessmentAgent"
+    assert out["num_turns"] == 1
+def test_coach_falls_back_when_decision_unparseable(mock_llm_factory) -> None:
+    coach = CoachAgent(mock_llm_factory(["{not even close to JSON"]))
+    state: Dict[str, Any] = {
+        "memory": initialize_empty_memory(),
+        "user_question": "anything",
+        "conversation_history": [],
+        "current_action": None,
+        "agent_result": None,
+        "num_turns": 0,
+        "max_turns": 10,
+        "previous_actions": [],
+        "response_steps": [],
+    }
+    out = coach.handle_task(state)
+    # Coach injects a compose_response with _parse_error so the workflow can short-circuit
+    assert out["current_action"]["action"] == "compose_response"
+    assert out["current_action"].get("_parse_error") is True
+# ---- Medical ---------------------------------------------------------------
+def test_medical_assessment_complete_writes_memory(mock_llm_factory) -> None:
+    """A single assessment_complete decision should land in memory partition."""
+    result = MedicalAssessmentResult(
+        assessment_summary="healthy adult",
+        flags_to_set=["maintenance"],
+        recommendations=["balanced diet"],
+        requires_professional_consultation=False,
+        calculations=Calculations(
+            BMI=22.0,
+            BMR=1600,
+            TDEE=2400,
+            daily_target_calories=2400,
+            macro_targets=MacroTargets(protein_g=150, fat_g=70, carbohydrates_g=300),
+        ),
+        evidence_sources=["who.int"],
+        trace="Medical agent ran one iteration",
+    )
+    canned = MedicalAssessmentDecision(
+        medical_reasoning="single-shot",
+        observation="all data present",
+        risk_assessment_priorities=["maintenance"],
+        assessment_plan=[],
+        action_type="assessment_complete",
+        result=result,
+    )
+    # Need a stub for the tools (won't be called in single-iteration assessment_complete)
+    class _StubTool:
+        def handle_task(self, _: str) -> str:
+            return ""
+    agent = MedicalAssessmentAgent(mock_llm_factory([canned]), _StubTool(), _StubTool())
+    memory = initialize_empty_memory()
+    memory["user_profile"] = {
+        "age": 30,
+        "sex": "male",
+        "height": 180,
+        "weight": 75,
+        "activity_level": "moderate",
+        "allergies": [],
+        "medications": [],
+    }
+    summary = agent.handle_task("assess this user", memory)
+    assert summary == "healthy adult"
+    fa = memory["flags_and_assessments"]
+    assert fa["assessment_status"] == "assessment_complete"
+    assert fa["calculations"]["macro_targets"]["protein_g"] == 150
+def test_medical_ask_user_returns_field_list(mock_llm_factory) -> None:
+    canned = MedicalAssessmentDecision(
+        medical_reasoning="missing weight + height",
+        observation="incomplete",
+        risk_assessment_priorities=[],
+        assessment_plan=[],
+        action_type="ask_user",
+        fields=["weight", "height"],
+    )
+    class _StubTool:
+        def handle_task(self, _: str) -> str:
+            return ""
+    agent = MedicalAssessmentAgent(mock_llm_factory([canned]), _StubTool(), _StubTool())
+    out = agent.handle_task("assess", initialize_empty_memory())
+    assert "weight" in out and "height" in out
+# ---- Planner ---------------------------------------------------------------
+def test_planner_provide_plan_stores_to_memory(mock_llm_factory) -> None:
+    canned = PlannerDecision(
+        observation="ready",
+        thought="finalising",
+        planning_steps=[],
+        action_type="provide_plan",
+        final_plan={
+            "days": [{"breakfast": "oats", "lunch": "chicken+rice"}],
+            "trace": "Planner one-shot",
+        },
+    )
+    class _StubTool:
+        def handle_task(self, _: str) -> str:
+            return ""
+    agent = PlannerAgent(mock_llm_factory([canned]), _StubTool(), _StubTool(), _StubTool())
+    memory = initialize_empty_memory()
+    memory["flags_and_assessments"] = {"assessment_status": "assessment_complete"}
+    out = agent.handle_task("make me a one-day plan", memory)
+    assert "trace" in out
+    assert memory["plans"]["current_plan"]["days"][0]["breakfast"] == "oats"
+    assert "plan_timestamp" in memory["plans"]
+def test_planner_error_payload_short_circuits(mock_llm_factory) -> None:
+    canned = PlannerDecision(
+        observation="missing assessment",
+        thought="precondition violated",
+        planning_steps=[],
+        action_type="provide_plan",
+        final_plan={"error": "flags_and_assessments empty; run MedicalAssessmentAgent first"},
+    )
+    class _StubTool:
+        def handle_task(self, _: str) -> str:
+            return ""
+    agent = PlannerAgent(mock_llm_factory([canned]), _StubTool(), _StubTool(), _StubTool())
+    out = agent.handle_task("make a plan", initialize_empty_memory())
+    assert "error" in out
+    assert "MedicalAssessmentAgent" in out

tests/test_validation_agent.py ADDED Viewed

	@@ -0,0 +1,201 @@

+"""Tests for the ValidationAgent critic loop.
+Most of the value lives in the deterministic checks — they are pure code,
+require no LLM, and can be exercised cheaply across edge cases.
+"""
+from __future__ import annotations
+from typing import Any, Dict
+from schemas import ValidationDecision
+from state import initialize_empty_memory
+from validation import ValidationAgent
+# A minimal deterministic-only stub LLM for the cases that should never need
+# the LLM layer (allergy violation -> verdict "reject" short-circuits LLM).
+class _NeverCalledLLM:
+    def call_typed(self, *args: Any, **kwargs: Any):
+        raise AssertionError("LLM should not be called when deterministic check rejects.")
+# ---------------------------------------------------------------------------
+def _build_memory(
+    *,
+    allergies=None,
+    dislikes="",
+    target_calories=2000,
+    macros=(150, 70, 200),
+) -> Dict[str, Any]:
+    memory = initialize_empty_memory()
+    memory["user_profile"] = {
+        "name": "Test",
+        "country": "Egypt",
+        "allergies": allergies or [],
+        "food_dislikes": dislikes,
+    }
+    memory["flags_and_assessments"] = {
+        "assessment_status": "assessment_complete",
+        "calculations": {
+            "BMI": 22,
+            "BMR": 1600,
+            "TDEE": 2000,
+            "daily_target_calories": target_calories,
+            "macro_targets": {
+                "protein_g": macros[0],
+                "fat_g": macros[1],
+                "carbohydrates_g": macros[2],
+            },
+        },
+        "flags": [],
+        "recommendations": [],
+        "requires_professional_consultation": False,
+    }
+    return memory
+def _set_plan(memory: Dict[str, Any], plan: Dict[str, Any]) -> None:
+    memory.setdefault("plans", {})["current_plan"] = plan
+# ---- deterministic-only paths ----------------------------------------------
+def test_passes_when_plan_within_tolerances(mock_llm_factory) -> None:
+    memory = _build_memory(target_calories=2000, macros=(150, 70, 200))
+    _set_plan(
+        memory,
+        {
+            "days": [
+                {
+                    "name": "chicken_breast",
+                    "calories": 1000,
+                    "protein_g": 100,
+                    "fat_g": 30,
+                    "carbohydrates_g": 100,
+                },
+                {
+                    "name": "rice",
+                    "calories": 1000,
+                    "protein_g": 50,
+                    "fat_g": 40,
+                    "carbohydrates_g": 100,
+                },
+            ],
+        },
+    )
+    # Pre-supply a "no-issues" LLM verdict so the LLM layer is happy.
+    llm = mock_llm_factory([ValidationDecision(verdict="pass", issues=[])])
+    agent = ValidationAgent(llm)
+    out = agent.handle_task("validate plan", memory)
+    decision = ValidationDecision.model_validate_json(out)
+    assert decision.verdict == "pass"
+    assert decision.issues == []
+    assert memory["flags_and_assessments"]["last_validation"]["verdict"] == "pass"
+def test_allergy_violation_rejected_without_llm() -> None:
+    memory = _build_memory(allergies=["peanut"])
+    _set_plan(
+        memory,
+        {
+            "days": [
+                {
+                    "name": "peanut butter sandwich",
+                    "calories": 400,
+                    "protein_g": 15,
+                    "fat_g": 20,
+                    "carbohydrates_g": 40,
+                }
+            ]
+        },
+    )
+    agent = ValidationAgent(_NeverCalledLLM())
+    out = agent.handle_task("validate", memory)
+    decision = ValidationDecision.model_validate_json(out)
+    assert decision.verdict == "reject"
+    assert any(i.code == "allergy_violation" for i in decision.issues)
+def test_calorie_deviation_triggers_revise(mock_llm_factory) -> None:
+    memory = _build_memory(target_calories=2000)
+    _set_plan(
+        memory,
+        {
+            "days": [
+                {
+                    "name": "tiny salad",
+                    "calories": 800,  # way under 2000 target -> 60% deviation
+                    "protein_g": 30,
+                    "fat_g": 20,
+                    "carbohydrates_g": 60,
+                }
+            ]
+        },
+    )
+    llm = mock_llm_factory([ValidationDecision(verdict="pass", issues=[])])
+    agent = ValidationAgent(llm)
+    out = agent.handle_task("validate", memory)
+    decision = ValidationDecision.model_validate_json(out)
+    assert decision.verdict == "revise"
+    assert any(i.code == "calorie_deviation" for i in decision.issues)
+def test_disliked_food_only_low_severity_still_passes(mock_llm_factory) -> None:
+    memory = _build_memory(dislikes="okra", target_calories=2000)
+    _set_plan(
+        memory,
+        {
+            "days": [
+                {
+                    "name": "okra stew",
+                    "calories": 2000,
+                    "protein_g": 150,
+                    "fat_g": 70,
+                    "carbohydrates_g": 200,
+                }
+            ]
+        },
+    )
+    llm = mock_llm_factory([ValidationDecision(verdict="pass", issues=[])])
+    agent = ValidationAgent(llm)
+    decision = ValidationDecision.model_validate_json(agent.handle_task("validate", memory))
+    # Low-severity issues alone don't escalate the verdict.
+    assert decision.verdict == "pass"
+    assert any(i.code == "disliked_food" and i.severity == "low" for i in decision.issues)
+def test_missing_plan_rejected() -> None:
+    memory = _build_memory()
+    # Intentionally no current_plan set
+    agent = ValidationAgent(_NeverCalledLLM())
+    decision = ValidationDecision.model_validate_json(agent.handle_task("validate", memory))
+    assert decision.verdict == "reject"
+    assert any(i.code == "missing_plan" for i in decision.issues)
+def test_requires_human_review_propagates(mock_llm_factory) -> None:
+    memory = _build_memory()
+    memory["flags_and_assessments"]["requires_professional_consultation"] = True
+    _set_plan(
+        memory,
+        {
+            "days": [
+                {
+                    "name": "balanced meal",
+                    "calories": 2000,
+                    "protein_g": 150,
+                    "fat_g": 70,
+                    "carbohydrates_g": 200,
+                }
+            ]
+        },
+    )
+    llm = mock_llm_factory([ValidationDecision(verdict="pass", issues=[])])
+    agent = ValidationAgent(llm)
+    decision = ValidationDecision.model_validate_json(agent.handle_task("validate", memory))
+    # Even on a clean pass, HITL flag must propagate from the assessment.
+    assert decision.requires_human_review is True

tools.py CHANGED Viewed

@@ -1,29 +1,75 @@
 import re
 import subprocess
 import tempfile
-import time
-from time import sleep
-import os
 from datetime import datetime
-from utils import save_to_json, should_debug
 from ddgs import DDGS
-import config
-import json
-from pulp import *
 class QuantitiesFinder:
-    def __init__(self):
         pass
     @staticmethod
-    def _round(v):
         if v is None:
             return 0.0
         return round(float(v), 2)
     @staticmethod
-    def _round_structure(obj):
         if isinstance(obj, dict):
             return {k: QuantitiesFinder._round_structure(v) for k, v in obj.items()}
         if isinstance(obj, list):
@@ -33,303 +79,281 @@ class QuantitiesFinder:
         return obj
     def handle_task(self, task: str) -> str:
-        print(f"\n📊 ENHANCED QUANTITIES FINDER (V3) TOOL STARTED")
-        # --- Define Weights ---
-        W_NUTRITION = 1.0  # Priority 1: Hitting daily totals
-        W_ESTIMATE_DEFAULT = 0.1  # Priority 2: Default "soft" estimate penalty
         try:
             data = json.loads(task)
             foods = data["foods"]
             targets = data["targets"]
-            # --- 1. VALIDATION ---
             required_nutrients = ["calories", "protein", "fat", "carbohydrates"]
             for food in foods:
-                if not all(
-                    key in food
-                    for key in ["name"] + required_nutrients + ["estimated_g"]
-                ):
                     raise ValueError(
                         "Each food must have name, calories, protein, fat, carbohydrates, and estimated_g."
                     )
             if not all(key in targets for key in required_nutrients):
-                raise ValueError(
-                    "Targets must include calories, protein, fat, carbohydrates."
-                )
             prob = LpProblem("Nutrient_Optimization", LpMinimize)
-            # --- 2. VARIABLES (Unchanged from V2) ---
             g = {}
             for food in foods:
-                food_name = food["name"]
-                min_bound = food.get("min_g", 0)
-                max_bound = food.get("max_g")
-                g[food_name] = LpVariable(
-                    f"g_{food_name}", lowBound=min_bound, upBound=max_bound
-                )
-            # --- 3. NUTRITION DEVIATIONS (Unchanged) ---
-            nutrients = required_nutrients
-            totals = {}
-            for nut in nutrients:
-                totals[nut] = lpSum(
-                    (g[food["name"]] / 100) * food[nut] for food in foods
                 )
-            d_pos = {nut: LpVariable(f"d_pos_{nut}", lowBound=0) for nut in nutrients}
-            d_neg = {nut: LpVariable(f"d_neg_{nut}", lowBound=0) for nut in nutrients}
-            for nut in nutrients:
                 prob += totals[nut] - targets[nut] <= d_pos[nut]
                 prob += targets[nut] - totals[nut] <= d_neg[nut]
-            # --- 3.5 MEAL-LEVEL CONSTRAINTS (Unchanged from V2) ---
-            meal_constraints = data.get("meal_constraints", [])
-            if meal_constraints:
-                print("Applying meal-level constraints...")
-                for constraint in meal_constraints:
-                    group_name = constraint.get("group_name")
-                    if not group_name:
-                        continue
-                    group_foods = [
-                        f for f in foods if f.get("meal_group") == group_name
-                    ]
-                    if not group_foods:
-                        print(f"Warning: No foods found for meal_group '{group_name}'")
-                        continue
-                    for nut in nutrients:
-                        max_val = constraint.get(f"max_{nut}")
-                        if max_val is not None:
-                            meal_total = lpSum(
-                                (g[f["name"]] / 100) * f[nut] for f in group_foods
-                            )
-                            prob += (
-                                meal_total <= max_val,
-                                f"Meal_{group_name}_max_{nut}",
-                            )
-                            print(f"  -> Constraint: {group_name} max {nut} <= {max_val}")
-                        min_val = constraint.get(f"min_{nut}")
-                        if min_val is not None:
-                            meal_total = lpSum(
-                                (g[f["name"]] / 100) * f[nut] for f in group_foods
-                            )
-                            prob += (
-                                meal_total >= min_val,
-                                f"Meal_{group_name}_min_{nut}",
-                            )
-                            print(f"  -> Constraint: {group_name} min {nut} >= {min_val}")
-            # --- 4. ESTIMATE DEVIATIONS (ENHANCED) ---
-            # This section now reads a per-item 'estimate_weight'
-            dev_est_pos = {
-                food["name"]: LpVariable(f"dev_est_pos_{food['name']}", lowBound=0)
-                for food in foods
-            }
-            dev_est_neg = {
-                food["name"]: LpVariable(f"dev_est_neg_{food['name']}", lowBound=0)
-                for food in foods
-            }
             for food in foods:
-                food_name = food["name"]
-                estimate = food["estimated_g"]
-                prob += g[food_name] - estimate <= dev_est_pos[food_name]
-                prob += estimate - g[food_name] <= dev_est_neg[food_name]
-            # --- 5. OBJECTIVE FUNCTION (ENHANCED) ---
-            # Goal 1: (Unchanged)
             nutrition_objective = lpSum(
-                (d_pos[nut] + d_neg[nut]) / max(targets[nut], 1) for nut in nutrients
             )
-            # Goal 2: (ENHANCED)
-            # Now uses the per-item 'estimate_weight' if provided,
-            # otherwise, it falls back to the default.
             estimate_objective = lpSum(
-                (
-                    f.get("estimate_weight", W_ESTIMATE_DEFAULT)
-                    * (dev_est_pos[f["name"]] + dev_est_neg[f["name"]])
-                )
                 / max(f["estimated_g"], 1)
                 for f in foods
                 if f["estimated_g"] > 0
             )
-            # Combined objective
             prob += (W_NUTRITION * nutrition_objective) + estimate_objective
-            # --- 6. SOLVE & RETURN (Unchanged) ---
             prob.solve(PULP_CBC_CMD(msg=0))
             if LpStatus[prob.status] != "Optimal":
                 raise ValueError(
                     "No optimal solution found (problem may be infeasible). Check your targets and constraints."
                 )
             quantities = {name: value(g[name]) for name in g}
-            achieved = {nut: value(totals[nut]) for nut in nutrients}
-            result = {"quantities": quantities, "achieved": achieved}
-            result = QuantitiesFinder._round_structure(result)
-            print(f"Solution Status: {LpStatus[prob.status]}")
-            print(f"Quantities (g): {json.dumps(result['quantities'], indent=2)}")
-            print(
-                f"Achieved Nutrition (around): {json.dumps(result['achieved'], indent=2)}"
             )
-            print(
-                f"Target Nutrition: {json.dumps(QuantitiesFinder._round_structure(targets), indent=2)}"
             )
-            print(f"\n📊 QUANTITIES FINDER COMPLETED")
             return json.dumps(result)
-        except Exception as e:
-            error_result = {"error": str(e)}
-            print(f"QuantitiesFinder Error: {str(e)}")
-            return json.dumps(error_result)
 class ComputationTool:
     def __init__(self, llm_instance):
         self.llm = llm_instance
     def handle_task(self, task_description: str) -> str:
-        print(f"\n🤖 COMPUTATION TOOL STARTED")
-        instruction = "You are a Python coding assistant. Generate only the Python code required to perform the given task. Do not forget to print the result. Do not add explanations."
         prompt = f"{instruction}\n\nTask: {task_description}\n\nCode:"
-        if should_debug('tools', 'ComputationTool') and config.DEBUG_LEVEL == 'full':
-            print(f"Computation Tool Prompt:\n{prompt}")
         code_response = self.llm(prompt)[0]
-        if should_debug('tools', 'ComputationTool'):
-            print(f"Computation Tool Response:\n{code_response}")
-        # Try to extract code from markdown blocks first, then use raw response
-        code_match = re.search(r"```python\n(.*?)\n```", code_response, re.DOTALL)
-        if not code_match:
-            code_match = re.search(r"```\n(.*?)\n```", code_response, re.DOTALL)
-        if code_match:
-            code_to_execute = code_match.group(1).strip()
-            # print(f"Extracted code from markdown blocks")
-        else:
-            code_to_execute = code_response.strip()
-            # print(f"Using raw response as code")
         execution_result = execute_python_code_raw(code_to_execute)
-        log_data = {
-            "instruction": instruction,
-            "input": task_description,
-            "output": code_to_execute,
-            "execution_result": execution_result,
-            "timestamp": datetime.now().isoformat()
-        }
-        save_to_json(log_data, f'computation_tool_{datetime.now().isoformat()}.json', subdirectory='ComputationTool')
-        print(f"🤖 COMPUTATION COMPLETED\n{execution_result}")
         return execution_result
 class WebSearchTool:
     def __init__(self, llm_instance):
         self.llm = llm_instance
     def handle_task(self, research_task: str) -> str:
-        print(f"\n🌐 WEB SEARCH TOOL STARTED")
         try:
             task_data = json.loads(research_task)
-            if isinstance(task_data, dict) and 'queries' in task_data and isinstance(task_data['queries'], list):
-                print("JSON query list detected. Converting to single text task.")
-                research_question = " ".join(task_data['queries'])
             else:
-                print("Single question mode detected (non-query JSON). Generating queries.")
                 research_question = research_task
         except (json.JSONDecodeError, TypeError):
-            print("Single question mode detected (plain text). Generating queries.")
             research_question = research_task
-        query_instruction = "Formulate concise search queries for DuckDuckGo based on the given question. Output only the queries, one per line."
         query_prompt = f"{query_instruction}\n\nQuestion: {research_question}\n\nQueries:"
-        if should_debug('tools', 'WebSearchTool') and config.DEBUG_LEVEL == 'full':
-            print(f"Web Search Query Prompt:\n{query_prompt}")
         search_queries_text = self.llm(query_prompt)[0]
-        if should_debug('tools', 'WebSearchTool'):
-            print(f"Web Search Query Response:\n{search_queries_text}")
-        search_queries = [q.strip() for q in search_queries_text.split('\n') if q.strip()] or [research_question]
-        if should_debug('tools', 'WebSearchTool'):
-            print(f"Parsed queries: {search_queries}")
         all_raw_results = []
-        for i, query in enumerate(search_queries):
             raw_results = search_web_raw(query, num_results=10)
-            print(f"Search results:\n{raw_results[:200]}...")
             all_raw_results.append(f"Results for '{query}':\n{raw_results}")
             sleep(1)
         raw_search_output = "\n\n".join(all_raw_results)
-        synthesis_instruction = f"""Synthesize a concise answer to:
-        Question: {research_question}
-        Based on:
-        ---
-        {raw_search_output}
-        ---
-        """
-        if should_debug('tools', 'WebSearchTool') and config.DEBUG_LEVEL == 'full':
-            print(f"Web Search Synthesis Instruction:\n{synthesis_instruction}")
         synthesized_answer = self.llm(synthesis_instruction)[0]
-        if should_debug('tools', 'WebSearchTool'):
-            print(f"Web Search Synthesis Response:\n{synthesized_answer}")
         timestamp = datetime.now().isoformat()
-        save_to_json({
-            "instruction": query_instruction,
-            "input": research_question,
-            "output": search_queries_text,
-            "timestamp": timestamp
-        }, f'web_search_tool_queries_{timestamp}.json', subdirectory='WebSearchTool')
-        save_to_json({
-            "instruction": synthesis_instruction,
-            "input": raw_search_output,
-            "output": synthesized_answer,
-            "timestamp": timestamp
-        }, f'web_search_tool_synthesis_{timestamp}.json', subdirectory='WebSearchTool')
-        print(f"\n🌐 WEB SEARCH TOOL Result:\n{synthesized_answer}\n")
         return synthesized_answer
 def execute_python_code_raw(code_string: str) -> str:
-    if should_debug('tools', 'ComputationTool') and config.DEBUG_LEVEL == 'full':
-            print(f"🐍 Executing Code (raw):\n{code_string}")
     try:
         with tempfile.NamedTemporaryFile(mode="w", suffix=".py", delete=False) as tmp_script:
             tmp_script.write(code_string)
             script_path = tmp_script.name
-        process = subprocess.run(["python", script_path], capture_output=True, text=True, timeout=30)
-        os.remove(script_path)
         if process.returncode == 0:
             return f"Output:\n{process.stdout if process.stdout else 'Code executed successfully.'}"
-        else:
-            return f"Error:\n{process.stderr}"
-    except Exception as e:
         return f"Execution Exception: {str(e)}"
     finally:
-        if os.path.exists(script_path):
             os.remove(script_path)
 def search_web_raw(query: str, num_results: int = 3) -> str:
-    print(f"🌐 Searching Web (raw) for: {query}")
     max_retries = 3
     for attempt in range(max_retries):
         try:
@@ -337,12 +361,13 @@ def search_web_raw(query: str, num_results: int = 3) -> str:
                 results = list(ddgs.text(query, max_results=num_results, timelimit="m"))
             if not results:
                 return "No search results found."
-            return "\n".join([f"Title: {r.get('title')}\nURL: {r.get('href')}\nSnippet: {r.get('body')}" for r in results])
-        except Exception as e:
             if attempt < max_retries - 1:
                 sleep(1)
                 continue
             return f"Search Exception after {max_retries} attempts: {str(e)}"

+"""Tools layer.
+Phase 1 cleanup notes:
+* Replaced ``print`` with namespaced loggers so user-mode emoji output is
+  filterable and the API/UI in Phase 7 can subscribe to it as events.
+* Reads ``settings.debug_mode`` via :func:`config.get_settings` instead of the
+  legacy module-level globals.
+The :class:`ComputationTool` still shells out to ``subprocess.run(['python', ...])``
+- **this is a known security issue**, fixed in Phase 4 by either deterministic
+formula functions or a ``RestrictedPython`` sandbox.
+"""
+from __future__ import annotations
+import json
+import os
 import re
 import subprocess
 import tempfile
 from datetime import datetime
+from time import sleep
+from typing import Any
 from ddgs import DDGS
+from pulp import (
+    LpMinimize,
+    LpProblem,
+    LpStatus,
+    LpVariable,
+    PULP_CBC_CMD,
+    lpSum,
+    value,
+)
+from config import get_settings
+from logging_setup import get_logger
+from utils import save_to_json, should_debug
+_qf_logger = get_logger("tools.quantities_finder")
+_comp_logger = get_logger("tools.computation")
+_web_logger = get_logger("tools.web_search")
+# ---------------------------------------------------------------------------
+# QuantitiesFinder (PuLP LP solver)
+# ---------------------------------------------------------------------------
 class QuantitiesFinder:
+    """Linear-program solver that turns an LLM-drafted plan into precise grams.
+    The schema is:
+        {
+            "foods": [{name, calories, protein, fat, carbohydrates,
+                       estimated_g, [min_g, max_g, meal_group, estimate_weight]}, ...],
+            "targets": {calories, protein, fat, carbohydrates},
+            "meal_constraints": [{group_name, [max_<nut>], [min_<nut>]}, ...]   # optional
+        }
+    """
+    def __init__(self) -> None:
         pass
     @staticmethod
+    def _round(v: Any) -> float:
         if v is None:
             return 0.0
         return round(float(v), 2)
     @staticmethod
+    def _round_structure(obj: Any) -> Any:
         if isinstance(obj, dict):
             return {k: QuantitiesFinder._round_structure(v) for k, v in obj.items()}
         if isinstance(obj, list):
         return obj
     def handle_task(self, task: str) -> str:
+        _qf_logger.info("\n📊 ENHANCED QUANTITIES FINDER (V3) TOOL STARTED")
+        # Priority 1: hit daily totals; Priority 2: stay close to per-item estimates.
+        W_NUTRITION = 1.0
+        W_ESTIMATE_DEFAULT = 0.1
         try:
             data = json.loads(task)
             foods = data["foods"]
             targets = data["targets"]
+            # 1. Validation
             required_nutrients = ["calories", "protein", "fat", "carbohydrates"]
             for food in foods:
+                if not all(key in food for key in ["name"] + required_nutrients + ["estimated_g"]):
                     raise ValueError(
                         "Each food must have name, calories, protein, fat, carbohydrates, and estimated_g."
                     )
             if not all(key in targets for key in required_nutrients):
+                raise ValueError("Targets must include calories, protein, fat, carbohydrates.")
             prob = LpProblem("Nutrient_Optimization", LpMinimize)
+            # 2. Variables
             g = {}
             for food in foods:
+                g[food["name"]] = LpVariable(
+                    f"g_{food['name']}",
+                    lowBound=food.get("min_g", 0),
+                    upBound=food.get("max_g"),
                 )
+            # 3. Nutrition deviations
+            totals = {
+                nut: lpSum((g[f["name"]] / 100) * f[nut] for f in foods) for nut in required_nutrients
+            }
+            d_pos = {nut: LpVariable(f"d_pos_{nut}", lowBound=0) for nut in required_nutrients}
+            d_neg = {nut: LpVariable(f"d_neg_{nut}", lowBound=0) for nut in required_nutrients}
+            for nut in required_nutrients:
                 prob += totals[nut] - targets[nut] <= d_pos[nut]
                 prob += targets[nut] - totals[nut] <= d_neg[nut]
+            # 3.5 Optional meal-level constraints
+            for constraint in data.get("meal_constraints", []) or []:
+                group_name = constraint.get("group_name")
+                if not group_name:
+                    continue
+                group_foods = [f for f in foods if f.get("meal_group") == group_name]
+                if not group_foods:
+                    _qf_logger.warning("No foods found for meal_group '%s'", group_name)
+                    continue
+                for nut in required_nutrients:
+                    meal_total = lpSum((g[f["name"]] / 100) * f[nut] for f in group_foods)
+                    if (max_val := constraint.get(f"max_{nut}")) is not None:
+                        prob += (meal_total <= max_val, f"Meal_{group_name}_max_{nut}")
+                        _qf_logger.debug("Constraint: %s max %s <= %s", group_name, nut, max_val)
+                    if (min_val := constraint.get(f"min_{nut}")) is not None:
+                        prob += (meal_total >= min_val, f"Meal_{group_name}_min_{nut}")
+                        _qf_logger.debug("Constraint: %s min %s >= %s", group_name, nut, min_val)
+            # 4. Estimate deviations (per-item soft anchor)
+            dev_est_pos = {f["name"]: LpVariable(f"dev_est_pos_{f['name']}", lowBound=0) for f in foods}
+            dev_est_neg = {f["name"]: LpVariable(f"dev_est_neg_{f['name']}", lowBound=0) for f in foods}
             for food in foods:
+                name = food["name"]
+                est = food["estimated_g"]
+                prob += g[name] - est <= dev_est_pos[name]
+                prob += est - g[name] <= dev_est_neg[name]
+            # 5. Objective
             nutrition_objective = lpSum(
+                (d_pos[nut] + d_neg[nut]) / max(targets[nut], 1) for nut in required_nutrients
             )
             estimate_objective = lpSum(
+                f.get("estimate_weight", W_ESTIMATE_DEFAULT)
+                * (dev_est_pos[f["name"]] + dev_est_neg[f["name"]])
                 / max(f["estimated_g"], 1)
                 for f in foods
                 if f["estimated_g"] > 0
             )
             prob += (W_NUTRITION * nutrition_objective) + estimate_objective
+            # 6. Solve
             prob.solve(PULP_CBC_CMD(msg=0))
             if LpStatus[prob.status] != "Optimal":
                 raise ValueError(
                     "No optimal solution found (problem may be infeasible). Check your targets and constraints."
                 )
             quantities = {name: value(g[name]) for name in g}
+            achieved = {nut: value(totals[nut]) for nut in required_nutrients}
+            result = QuantitiesFinder._round_structure({"quantities": quantities, "achieved": achieved})
+            _qf_logger.info("Solution Status: %s", LpStatus[prob.status])
+            _qf_logger.info("Quantities (g): %s", json.dumps(result["quantities"], indent=2))
+            _qf_logger.info(
+                "Achieved Nutrition (around): %s",
+                json.dumps(result["achieved"], indent=2),
             )
+            _qf_logger.info(
+                "Target Nutrition: %s",
+                json.dumps(QuantitiesFinder._round_structure(targets), indent=2),
             )
+            _qf_logger.info("\n📊 QUANTITIES FINDER COMPLETED")
             return json.dumps(result)
+        except Exception as e:  # noqa: BLE001
+            _qf_logger.error("QuantitiesFinder Error: %s", str(e))
+            return json.dumps({"error": str(e)})
+# ---------------------------------------------------------------------------
+# ComputationTool (LLM-generated Python; ⚠ replace in Phase 4)
+# ---------------------------------------------------------------------------
 class ComputationTool:
     def __init__(self, llm_instance):
         self.llm = llm_instance
     def handle_task(self, task_description: str) -> str:
+        _comp_logger.info("\n🤖 COMPUTATION TOOL STARTED")
+        settings = get_settings()
+        instruction = (
+            "You are a Python coding assistant. Generate only the Python code required "
+            "to perform the given task. Do not forget to print the result. Do not add explanations."
+        )
         prompt = f"{instruction}\n\nTask: {task_description}\n\nCode:"
+        if should_debug("tools", "ComputationTool") and settings.debug_level == "full":
+            _comp_logger.debug("Computation Tool Prompt:\n%s", prompt)
         code_response = self.llm(prompt)[0]
+        if should_debug("tools", "ComputationTool"):
+            _comp_logger.debug("Computation Tool Response:\n%s", code_response)
+        match = re.search(r"```python\n(.*?)\n```", code_response, re.DOTALL)
+        if not match:
+            match = re.search(r"```\n(.*?)\n```", code_response, re.DOTALL)
+        code_to_execute = match.group(1).strip() if match else code_response.strip()
         execution_result = execute_python_code_raw(code_to_execute)
+        save_to_json(
+            {
+                "instruction": instruction,
+                "input": task_description,
+                "output": code_to_execute,
+                "execution_result": execution_result,
+                "timestamp": datetime.now().isoformat(),
+            },
+            f"computation_tool_{datetime.now().isoformat()}.json",
+            subdirectory="ComputationTool",
+        )
+        _comp_logger.info("🤖 COMPUTATION COMPLETED\n%s", execution_result)
         return execution_result
+# ---------------------------------------------------------------------------
+# WebSearchTool (DuckDuckGo + LLM synthesis)
+# ---------------------------------------------------------------------------
 class WebSearchTool:
     def __init__(self, llm_instance):
         self.llm = llm_instance
     def handle_task(self, research_task: str) -> str:
+        _web_logger.info("\n🌐 WEB SEARCH TOOL STARTED")
+        settings = get_settings()
         try:
             task_data = json.loads(research_task)
+            if (
+                isinstance(task_data, dict)
+                and "queries" in task_data
+                and isinstance(task_data["queries"], list)
+            ):
+                _web_logger.info("JSON query list detected. Converting to single text task.")
+                research_question = " ".join(task_data["queries"])
             else:
+                _web_logger.info("Single question mode (non-query JSON). Generating queries.")
                 research_question = research_task
         except (json.JSONDecodeError, TypeError):
+            _web_logger.info("Single question mode (plain text). Generating queries.")
             research_question = research_task
+        query_instruction = (
+            "Formulate concise search queries for DuckDuckGo based on the given question. "
+            "Output only the queries, one per line."
+        )
         query_prompt = f"{query_instruction}\n\nQuestion: {research_question}\n\nQueries:"
+        if should_debug("tools", "WebSearchTool") and settings.debug_level == "full":
+            _web_logger.debug("Web Search Query Prompt:\n%s", query_prompt)
         search_queries_text = self.llm(query_prompt)[0]
+        if should_debug("tools", "WebSearchTool"):
+            _web_logger.debug("Web Search Query Response:\n%s", search_queries_text)
+        search_queries = [q.strip() for q in search_queries_text.split("\n") if q.strip()] or [
+            research_question
+        ]
+        if should_debug("tools", "WebSearchTool"):
+            _web_logger.debug("Parsed queries: %s", search_queries)
         all_raw_results = []
+        for query in search_queries:
             raw_results = search_web_raw(query, num_results=10)
+            _web_logger.info("Search results: %s...", raw_results[:200])
             all_raw_results.append(f"Results for '{query}':\n{raw_results}")
             sleep(1)
         raw_search_output = "\n\n".join(all_raw_results)
+        synthesis_instruction = (
+            f"Synthesize a concise answer to:\n"
+            f"Question: {research_question}\n"
+            f"Based on:\n---\n{raw_search_output}\n---\n"
+        )
+        if should_debug("tools", "WebSearchTool") and settings.debug_level == "full":
+            _web_logger.debug("Web Search Synthesis Instruction:\n%s", synthesis_instruction)
         synthesized_answer = self.llm(synthesis_instruction)[0]
+        if should_debug("tools", "WebSearchTool"):
+            _web_logger.debug("Web Search Synthesis Response:\n%s", synthesized_answer)
         timestamp = datetime.now().isoformat()
+        save_to_json(
+            {
+                "instruction": query_instruction,
+                "input": research_question,
+                "output": search_queries_text,
+                "timestamp": timestamp,
+            },
+            f"web_search_tool_queries_{timestamp}.json",
+            subdirectory="WebSearchTool",
+        )
+        save_to_json(
+            {
+                "instruction": synthesis_instruction,
+                "input": raw_search_output,
+                "output": synthesized_answer,
+                "timestamp": timestamp,
+            },
+            f"web_search_tool_synthesis_{timestamp}.json",
+            subdirectory="WebSearchTool",
+        )
+        _web_logger.info("\n🌐 WEB SEARCH TOOL Result:\n%s\n", synthesized_answer)
         return synthesized_answer
+# ---------------------------------------------------------------------------
+# Helpers
+# ---------------------------------------------------------------------------
 def execute_python_code_raw(code_string: str) -> str:
+    """⚠ Phase 4 will replace this with a sandbox or deterministic functions."""
+    settings = get_settings()
+    if should_debug("tools", "ComputationTool") and settings.debug_level == "full":
+        _comp_logger.debug("🐍 Executing Code (raw):\n%s", code_string)
+    script_path = ""
     try:
         with tempfile.NamedTemporaryFile(mode="w", suffix=".py", delete=False) as tmp_script:
             tmp_script.write(code_string)
             script_path = tmp_script.name
+        process = subprocess.run(
+            ["python", script_path],
+            capture_output=True,
+            text=True,
+            timeout=30,
+        )
         if process.returncode == 0:
             return f"Output:\n{process.stdout if process.stdout else 'Code executed successfully.'}"
+        return f"Error:\n{process.stderr}"
+    except Exception as e:  # noqa: BLE001
         return f"Execution Exception: {str(e)}"
     finally:
+        if script_path and os.path.exists(script_path):
             os.remove(script_path)
 def search_web_raw(query: str, num_results: int = 3) -> str:
+    _web_logger.info("🌐 Searching Web (raw) for: %s", query)
     max_retries = 3
     for attempt in range(max_retries):
         try:
                 results = list(ddgs.text(query, max_results=num_results, timelimit="m"))
             if not results:
                 return "No search results found."
+            return "\n".join(
+                f"Title: {r.get('title')}\nURL: {r.get('href')}\nSnippet: {r.get('body')}"
+                for r in results
+            )
+        except Exception as e:  # noqa: BLE001
             if attempt < max_retries - 1:
                 sleep(1)
                 continue
             return f"Search Exception after {max_retries} attempts: {str(e)}"
+    return "Search Exception: exhausted retries"

utils.py CHANGED Viewed

@@ -1,139 +1,134 @@
-import os
 import json
-import re
-import config
-from typing import TypedDict, List, Optional, Dict, Any, Tuple
 import pickle
-from langgraph.checkpoint.base import BaseCheckpointSaver
-from google import genai
-from google.genai import types
-from datetime import datetime, date
 import time
-from google.colab import userdata
-from json_repair import repair_json
 from collections import deque
 from threading import Lock
-# LANGSMITH SETUP FOR DEBUGGING
-# os.environ["LANGCHAIN_TRACING_V2"] = "true"
-# os.environ["LANGCHAIN_API_KEY"] = userdata.get("LANGCHAIN_API_KEY")
-# os.environ["LANGCHAIN_PROJECT"] = "Nutrition-MAS-v1"
-def should_debug(scope: str, name: str) -> bool:
-    if not config.DEBUG_MODE:
-        return False
-    if scope not in config.DEBUG_SCOPES:
-        return False
-    scopes_list = config.DEBUG_SCOPES[scope]
-    return 'all' in scopes_list or name in scopes_list
-def save_to_json(data: Dict[str, Any], filename: str, subdirectory: str = None):
-    if config.LOG_DIR is None:
-        # print("Logging is disabled. Skipping save_to_json.")
-        return
-    if subdirectory:
-        log_dir = os.path.join(config.LOG_DIR, subdirectory)
-    else:
-        log_dir = config.LOG_DIR
-    os.makedirs(log_dir, exist_ok=True)
-    filepath = os.path.join(log_dir, filename)
-    with open(filepath, 'w') as f:
-        json.dump(data, f, indent=2)
-class LLM:
-    def __call__(self, prompt: str, **kwargs) -> list[str]:
-        pass
-    def format_prompt(self, messages: List[Dict[str, str]]) -> str:
-        pass
-class GeminiLLM(LLM):
-    def __init__(self, model_name: str, structured_output: bool = False, thinking_budget: int = 300, manager=None, **kwargs):
-        self.model_name = model_name
-        self.structured_output = structured_output
-        self.thinking_budget = thinking_budget
-        self.kwargs = kwargs
-        self.manager = manager
-    def __call__(self, prompt: str, **kwargs) -> list[str]:
-        if self.manager is None:
-            raise ValueError("APIPoolManager must be provided for rate limiting.")
-        merged_kwargs = {**self.kwargs, **kwargs}
-        # Get next available API key
-        api_key = self.manager.get_next_key(self.model_name)
-        try:
-            client = genai.Client(api_key=api_key)
-            contents = [
-                types.Content(
-                    role="user",
-                    parts=[types.Part.from_text(text=prompt)],
-                )
-            ]
-            if self.structured_output:
-                generate_content_config = types.GenerateContentConfig(
-                    thinking_config=types.ThinkingConfig(
-                        thinking_budget=self.thinking_budget,
-                    ),
-                    response_mime_type="application/json",
-                    max_output_tokens=merged_kwargs.get("max_tokens", 5120),
-                    temperature=merged_kwargs.get("temperature", 0.3),
-                )
-            else:
-                generate_content_config = types.GenerateContentConfig(
-                    thinking_config=types.ThinkingConfig(
-                        thinking_budget=self.thinking_budget,
-                    ),
-                    response_mime_type="text/plain",
-                    max_output_tokens=merged_kwargs.get("max_tokens", 5120),
-                    temperature=merged_kwargs.get("temperature", 0.3),
-                )
-            response_text = ""
-            start_time = time.time()
-            for chunk in client.models.generate_content_stream(
-                model=self.model_name,
-                contents=contents,
-                config=generate_content_config,
-            ):
-                if chunk.text:
-                    response_text += chunk.text
-            # Record usage only on successful completion
-            completion_time = time.time()
-            if self.manager.rate_limits is not None:
-                self.manager.record_usage(api_key, self.model_name, completion_time)
-            if config.DEBUG_MODE:
-                print(f"LLM call completed for {self.model_name} using key {api_key[-4:]} in {completion_time - start_time:.2f}s")
-            return [response_text.strip()]
-        except Exception as e:
-            # Do not record usage on error to avoid inflating limits for failed calls
-            # print(f"LLM call failed for {self.model_name} using key {api_key[-4:]}: {str(e)}")
-            return [f"Error: LLM call failed - {str(e)}"]
-    def format_prompt(self, messages: List[Dict[str, str]]) -> str:
-        prompt = ""
-        for msg in messages:
-            if msg["role"] == "system":
-                prompt += f"System: {msg['content']}\n"
-            elif msg["role"] == "user":
-                prompt += f"User: {msg['content']}\n"
-            elif msg["role"] == "assistant":
-                prompt += f"Assistant: {msg['content']}\n"
-        prompt += "Assistant:"
-        return prompt
-# In utils.py, update the GeminiLLM class as follows:
 class GeminiLLM(LLM):
-    def __init__(self, model_name: str, structured_output: bool = False, thinking_budget: int = 300, manager=None, **kwargs):
         self.model_name = model_name
         self.structured_output = structured_output
         self.thinking_budget = thinking_budget
@@ -141,79 +136,167 @@ class GeminiLLM(LLM):
         self.manager = manager
         self.is_gemma = "gemma" in model_name.lower()
         if self.is_gemma:
             self.structured_output = False
             self.thinking_budget = None
-        # No self.client or self.api_key; created dynamically
-    def __call__(self, prompt: str, **kwargs) -> list[str]:
         if self.manager is None:
             raise ValueError("APIPoolManager must be provided for rate limiting.")
         merged_kwargs = {**self.kwargs, **kwargs}
-        # Get next available API key
         api_key = self.manager.get_next_key(self.model_name)
         try:
             client = genai.Client(api_key=api_key)
-            contents = [
-                types.Content(
-                    role="user",
-                    parts=[types.Part.from_text(text=prompt)],
-                )
-            ]
-            if self.is_gemma:
-                generate_content_config = types.GenerateContentConfig(
-                    response_mime_type="text/plain",
-                    max_output_tokens=merged_kwargs.get("max_tokens", 5120),
-                    temperature=merged_kwargs.get("temperature", 0.3),
                 )
             else:
-                if self.structured_output:
-                    generate_content_config = types.GenerateContentConfig(
-                        thinking_config=types.ThinkingConfig(
-                            thinking_budget=self.thinking_budget,
-                        ),
-                        response_mime_type="application/json",
-                        max_output_tokens=merged_kwargs.get("max_tokens", 5120),
-                        temperature=merged_kwargs.get("temperature", 0.3),
-                    )
-                else:
-                    generate_content_config = types.GenerateContentConfig(
-                        thinking_config=types.ThinkingConfig(
-                            thinking_budget=self.thinking_budget,
-                        ),
-                        response_mime_type="text/plain",
-                        max_output_tokens=merged_kwargs.get("max_tokens", 5120),
-                        temperature=merged_kwargs.get("temperature", 0.3),
-                    )
-            response_text = ""
-            start_time = time.time()
-            for chunk in client.models.generate_content_stream(
-                model=self.model_name,
-                contents=contents,
-                config=generate_content_config,
-            ):
-                if chunk.text:
-                    response_text += chunk.text
-            # Record usage only on successful completion
             completion_time = time.time()
             if self.manager.rate_limits is not None:
                 self.manager.record_usage(api_key, self.model_name, completion_time)
-            if config.DEBUG_MODE:
-                print(f"LLM call completed for {self.model_name} using key {api_key[-4:]} in {completion_time - start_time:.2f}s")
-            return [response_text.strip()]
-        except Exception as e:
-            # Do not record usage on error to avoid inflating limits for failed calls
-            # print(f"LLM call failed for {self.model_name} using key {api_key[-4:]}: {str(e)}")
-            return [f"Error: LLM call failed - {str(e)}"]
     def format_prompt(self, messages: List[Dict[str, str]]) -> str:
         prompt = ""
@@ -228,12 +311,19 @@ class GeminiLLM(LLM):
         return prompt
 class APIPoolManager:
-    def __init__(self, api_keys: List[str], rate_limits: Optional[Dict[str, Tuple[int, int]]] = None):
-        """
-        rate_limits: { model_name: (RPM, RPD) }
-        usage: { api_key: { model: { "timestamps": deque(maxlen=rpm), "daily_requests": int, "last_day": date } } }
-        """
         self.api_keys = list(api_keys)
         self.active_keys = list(api_keys)
         self.rate_limits = rate_limits
@@ -244,16 +334,15 @@ class APIPoolManager:
         if rate_limits is not None:
             for key in api_keys:
                 self.usage[key] = {}
-                for model, (rpm, rpd) in rate_limits.items():
                     self.usage[key][model] = {
                         "timestamps": deque(maxlen=max(1, rpm)),
                         "daily_requests": 0,
-                        "last_day": date.today()
                     }
-        else:
-            self.usage = {}
-    def _refresh_daily(self, key: str, model: str):
         usage = self.usage[key][model]
         today = date.today()
         if usage["last_day"] < today:
@@ -266,25 +355,18 @@ class APIPoolManager:
         self._refresh_daily(key, model)
         _, rpd = self.rate_limits[model]
         if self.usage[key][model]["daily_requests"] >= rpd:
-            # drop this api key
             if key in self.active_keys:
                 self.active_keys.remove(key)
             return False
         return True
     def _key_wait_info(self, key: str, model: str) -> Tuple[float, float]:
-        """
-        Return tuple (wait_slot_seconds, wait_spacing_seconds)
-        - wait_slot_seconds: time until an RPM slot frees because deque is full (0 if slot available)
-        - wait_spacing_seconds: time until spacing interval satisfied relative to last timestamp (0 if spacing ok)
-        """
         if self.rate_limits is None:
             return 0.0, 0.0
         rpm, _ = self.rate_limits[model]
         usage = self.usage[key][model]
         now = time.time()
-        # Clean old timestamps > 60s
         timestamps = usage["timestamps"]
         while timestamps and now - timestamps[0] > 60:
             timestamps.popleft()
@@ -295,7 +377,7 @@ class APIPoolManager:
             wait_slot = max(0.0, 60.0 - (now - oldest))
         wait_spacing = 0.0
-        if len(timestamps) > 0:
             time_since_last = now - timestamps[-1]
             min_interval = 60.0 / rpm if rpm > 0 else 0.0
             wait_spacing = max(0.0, min_interval - time_since_last)
@@ -303,9 +385,6 @@ class APIPoolManager:
         return wait_slot, wait_spacing
     def can_use_now(self, key: str, model: str) -> bool:
-        """
-        True if key is active, RPD ok, and both slot and spacing waits are zero.
-        """
         if key not in self.active_keys:
             return False
         if not self._key_is_rpd_ok(key, model):
@@ -313,30 +392,22 @@ class APIPoolManager:
         wait_slot, wait_spacing = self._key_wait_info(key, model)
         return wait_slot <= 0.0 and wait_spacing <= 0.0
     def get_next_key(self, model: str, max_sleep_once: bool = True) -> str:
-        """
-        Choose an API key that can be used immediately for the given model.
-        If none available now, compute minimum sleep needed across all keys, sleep once,
-        then re-evaluate. Loop until a key is found or no keys left.
-        """
         with self.lock:
             if not self.active_keys:
                 raise RuntimeError("No available API keys left due to rate limits.")
-            # Quick pass: try to find an immediately-available key starting from current_index
             n = len(self.active_keys)
             for i in range(n):
                 idx = (self.current_index + i) % n
                 key = self.active_keys[idx]
                 if self.can_use_now(key, model):
-                    # advance pointer fairly to next key for next call
                     self.current_index = (idx + 1) % max(1, len(self.active_keys))
                     return key
-            # If we reach here: no key is available *right now*
-            # compute minimal wait across active keys
-            min_wait = None
-            for key in list(self.active_keys):  # list() to be safe if removal happens
                 if not self._key_is_rpd_ok(key, model):
                     continue
                 wait_slot, wait_spacing = self._key_wait_info(key, model)
@@ -345,139 +416,145 @@ class APIPoolManager:
                     min_wait = wait
             if min_wait is None:
-                # No keys left after RPD filtering
                 raise RuntimeError("No available API keys left (RPD exhausted).")
         if min_wait and min_wait > 0:
-            if max_sleep_once:
-                time.sleep(min_wait)
-            else:
-                time.sleep(min_wait)
         return self.get_next_key(model, max_sleep_once=True)
-    def record_usage(self, key: str, model: str, timestamp: Optional[float] = None):
-        """
-        Call this after you receive the response to record actual usage/time.
-        timestamp default is now (time of completion).
-        """
         if self.rate_limits is None:
             return
         t = timestamp or time.time()
         with self.lock:
             if key not in self.active_keys:
-                # safety - if key was removed in-between, ignore or re-add depending on policy
                 return
             self._refresh_daily(key, model)
             self.usage[key][model]["timestamps"].append(t)
             self.usage[key][model]["daily_requests"] += 1
-            # Remove if daily limit reached
             _, rpd = self.rate_limits[model]
             if self.usage[key][model]["daily_requests"] >= rpd:
                 if key in self.active_keys:
                     self.active_keys.remove(key)
-def create_llm(config: dict, manager) -> LLM:
     if config["type"] == "gemini":
-        structured_output = config.get("structured_output", False)
-        thinking_budget = config.get("thinking_budget", 300)
-        llm = GeminiLLM(
             model_name=config["model_name"],
-            structured_output=structured_output,
-            thinking_budget=thinking_budget,
             manager=manager,
-            **config.get("params", {})
         )
-        return llm
-    else:
-        raise ValueError(f"Unknown LLM type: {config['type']}")
 def extract_and_parse_json(text: str) -> Dict[str, Any]:
-    """Enhanced JSON extraction and parsing with multiple fallback strategies"""
     try:
         return json.loads(text.strip())
-    except:
         pass
-    json_match = re.search(r'```json\s*(.*?)\s*```', text, re.DOTALL)
-    if json_match:
         try:
-            return json.loads(json_match.group(1))
-        except:
             pass
-    json_match = re.search(r'\{.*\}', text, re.DOTALL)
-    if json_match:
         try:
-            repaired_json = repair_json(json_match.group(0))
-            return json.loads(repaired_json)
-        except:
             pass
     try:
-        repaired_json = repair_json(text)
-        return json.loads(repaired_json)
     except Exception as e:
-        print(f"All JSON parsing strategies failed: {str(e)}")
         return {
             "thought": f"JSON parsing failed: {str(e)}",
             "action": "compose_response",
-            "params": {
-                "text": f"I encountered an error processing your request. Original response: {text[:200]}..."
-            },
             "_parse_error": True,
-            "_original_text": text
         }
-def set_nested(d: Dict[str, Any], key: str, value: Any):
-    keys = key.split('.')
     for k in keys[:-1]:
         d = d.setdefault(k, {})
     d[keys[-1]] = value
-def get_memory_summary(memory: Dict[str, Any], partitions: List[str] = None) -> str:
-    """Get a formatted summary of specific memory partitions"""
     if partitions is None:
         partitions = ["user_profile", "medical_history", "flags_and_assessments", "plans"]
-    summary = {}
     for partition in partitions:
-        if partition in memory and memory[partition]:
-            summary[partition] = memory[partition]
-        else:
-            summary[partition] = "empty"
-    return json.dumps(summary, indent=2)
 def update_memory_partition(memory: Dict[str, Any], partition: str, data: Any) -> None:
-    """Safely update a memory partition with new data"""
     if partition not in memory:
         memory[partition] = {}
     if isinstance(data, dict) and isinstance(memory[partition], dict):
         memory[partition].update(data)
     else:
         memory[partition] = data
-    if config.DEBUG_MODE:
-        print(f"Updated memory partition '{partition}' with new data")
 class FileCheckpointSaver(BaseCheckpointSaver):
-    def __init__(self, directory: str):
         self.directory = directory
         os.makedirs(directory, exist_ok=True)
     def put(self, config: Dict[str, Any], checkpoint: Dict[str, Any]) -> None:
-        """Save checkpoint to file"""
         thread_id = config.get("configurable", {}).get("thread_id", "default")
         filepath = os.path.join(self.directory, f"checkpoint_{thread_id}.pkl")
-        with open(filepath, 'wb') as f:
             pickle.dump(checkpoint, f)
     def get(self, config: Dict[str, Any]) -> Optional[Dict[str, Any]]:
-        """Load checkpoint from file"""
         thread_id = config.get("configurable", {}).get("thread_id", "default")
         filepath = os.path.join(self.directory, f"checkpoint_{thread_id}.pkl")
         if os.path.exists(filepath):
-            with open(filepath, 'rb') as f:
                 return pickle.load(f)
         return None

+"""Utilities: LLM wrapper, API-key pool with rate limiting, JSON helpers, and a
+LangGraph file checkpointer.
+Phase 0 cleanup notes:
+* Removed the duplicate ``GeminiLLM`` definition (the second class silently
+  shadowed the first; both remained import-visible).
+* Dropped ``from google.colab import userdata`` so the module imports cleanly
+  outside Colab. API keys come in via ``create_llm_instances`` or env.
+* Replaced ``print(...)`` calls with module loggers under ``nutrition_mas.*``.
+* Routed all reads of ``config.X`` through :func:`config.get_settings`.
+Larger refactors (Pydantic-typed agent IO, native Gemini ``response_schema``,
+async ``acall``) land in Phase 1.
+"""
+from __future__ import annotations
 import json
+import os
 import pickle
+import re
 import time
 from collections import deque
+from dataclasses import dataclass, field
+from datetime import date, datetime
 from threading import Lock
+from typing import Any, Dict, List, Optional, Tuple, Type, TypeVar
+from google import genai
+from google.genai import types
+from json_repair import repair_json
+from langgraph.checkpoint.base import BaseCheckpointSaver
+from pydantic import BaseModel, ValidationError
+from config import get_settings
+from logging_setup import get_logger
+_logger = get_logger("utils")
+_llm_logger = get_logger("llm.gemini")
+_pool_logger = get_logger("utils.api_pool")
+T = TypeVar("T", bound=BaseModel)
+# --- Phase 1 fallback metrics --------------------------------------------------
+@dataclass
+class ParseMetrics:
+    """Counts native-vs-fallback parses across the process.
+    Phase 1's goal is to drive ``fallback_parses`` to zero. Phase 2 will surface
+    these via the eval harness.
+    """
+    native_parses: int = 0  # response.parsed worked first try
+    fallback_parses: int = 0  # had to invoke extract_and_parse_json
+    schema_failures: int = 0  # output failed Pydantic validation altogether
+    by_model: Dict[str, Dict[str, int]] = field(default_factory=dict)
+    def record(self, model: str, kind: str) -> None:
+        if kind == "native":
+            self.native_parses += 1
+        elif kind == "fallback":
+            self.fallback_parses += 1
+        elif kind == "failure":
+            self.schema_failures += 1
+        slot = self.by_model.setdefault(model, {"native": 0, "fallback": 0, "failure": 0})
+        slot[kind] = slot.get(kind, 0) + 1
+_parse_metrics = ParseMetrics()
+def get_parse_metrics() -> ParseMetrics:
+    """Return the global parse-metrics singleton (read-only-ish)."""
+    return _parse_metrics
+# --- Debug-scope helper --------------------------------------------------------
+def should_debug(scope: str, name: str) -> bool:
+    """Return True when this scope/name is enabled in ``settings.debug_scopes``."""
+    settings = get_settings()
+    if not settings.debug_mode:
+        return False
+    if scope not in settings.debug_scopes:
+        return False
+    scopes_list = settings.debug_scopes[scope]
+    return "all" in scopes_list or name in scopes_list
+# --- Filesystem logging --------------------------------------------------------
+def save_to_json(data: Dict[str, Any], filename: str, subdirectory: Optional[str] = None) -> None:
+    """Persist a structured payload to ``settings.log_dir`` if logging is on."""
+    settings = get_settings()
+    if settings.log_dir is None:
+        return
+    log_dir = os.path.join(settings.log_dir, subdirectory) if subdirectory else settings.log_dir
+    os.makedirs(log_dir, exist_ok=True)
+    # Filenames may contain ``:`` from ISO timestamps which is invalid on Windows.
+    safe_name = filename.replace(":", "-")
+    filepath = os.path.join(log_dir, safe_name)
+    with open(filepath, "w", encoding="utf-8") as f:
+        json.dump(data, f, indent=2, default=str)
+# --- LLM abstractions ----------------------------------------------------------
+class LLM:
+    """Minimal LLM contract: callable returning a list with one string."""
+    def __call__(self, prompt: str, **kwargs: Any) -> list[str]:  # pragma: no cover - interface
+        raise NotImplementedError
+    def format_prompt(self, messages: List[Dict[str, str]]) -> str:  # pragma: no cover - interface
+        raise NotImplementedError
 class GeminiLLM(LLM):
+    """Synchronous Gemini wrapper with API-key pooling.
+    Phase 1 will add an ``acall`` async path and replace the JSON-in-text
+    contract with native ``response_schema`` Pydantic models.
+    """
+    def __init__(
+        self,
+        model_name: str,
+        structured_output: bool = False,
+        thinking_budget: int = 300,
+        manager: Optional["APIPoolManager"] = None,
+        **kwargs: Any,
+    ) -> None:
         self.model_name = model_name
         self.structured_output = structured_output
         self.thinking_budget = thinking_budget
         self.manager = manager
         self.is_gemma = "gemma" in model_name.lower()
         if self.is_gemma:
+            # Gemma family doesn't support thinking_config or JSON response schema.
             self.structured_output = False
             self.thinking_budget = None
+    def __call__(self, prompt: str, **kwargs: Any) -> list[str]:
+        """Untyped streaming call. Returns ``[response_text]``.
+        Backwards-compat path used by code that still parses JSON-from-text.
+        Prefer :meth:`call_typed` when a Pydantic schema is available.
+        """
+        text, _ = self._invoke(prompt, response_schema=None, **kwargs)
+        return [text]
+    def call_typed(
+        self,
+        prompt: str,
+        response_model: Type[T],
+        **kwargs: Any,
+    ) -> Optional[T]:
+        """Call Gemini with constrained-decoded JSON matching ``response_model``.
+        Returns a parsed instance of ``response_model``, or ``None`` if every
+        parse strategy failed (in which case the parse-metrics ``schema_failures``
+        counter is incremented so the eval harness can spot it).
+        """
+        text, parsed = self._invoke(prompt, response_schema=response_model, **kwargs)
+        # Strategy 1: SDK already parsed it for us via response_schema.
+        if isinstance(parsed, response_model):
+            _parse_metrics.record(self.model_name, "native")
+            return parsed
+        # Strategy 2: SDK gave us a dict; try to validate it.
+        if isinstance(parsed, dict):
+            try:
+                instance = response_model.model_validate(parsed)
+                _parse_metrics.record(self.model_name, "native")
+                return instance
+            except ValidationError as e:
+                _llm_logger.debug("response.parsed dict failed Pydantic validation: %s", e)
+        # Strategy 3: regex / json_repair fallback on the raw text.
+        try:
+            data = extract_and_parse_json(text)
+            instance = response_model.model_validate(data)
+            _parse_metrics.record(self.model_name, "fallback")
+            _llm_logger.warning(
+                "Used JSON-repair fallback for %s on model %s — fix the prompt or schema",
+                response_model.__name__,
+                self.model_name,
+            )
+            return instance
+        except (ValidationError, Exception) as e:  # noqa: BLE001
+            _parse_metrics.record(self.model_name, "failure")
+            _llm_logger.error(
+                "Failed to parse %s from %s response: %s",
+                response_model.__name__,
+                self.model_name,
+                str(e),
+            )
+            return None
+    def _invoke(
+        self,
+        prompt: str,
+        response_schema: Optional[Type[BaseModel]] = None,
+        **kwargs: Any,
+    ) -> Tuple[str, Any]:
+        """Single Gemini round-trip. Returns ``(text, response.parsed)``.
+        ``parsed`` is whatever the SDK populated on ``response.parsed`` —
+        usually a Pydantic instance when ``response_schema`` is supplied, ``None``
+        otherwise.
+        """
         if self.manager is None:
             raise ValueError("APIPoolManager must be provided for rate limiting.")
         merged_kwargs = {**self.kwargs, **kwargs}
         api_key = self.manager.get_next_key(self.model_name)
         try:
             client = genai.Client(api_key=api_key)
+            contents = [types.Content(role="user", parts=[types.Part.from_text(text=prompt)])]
+            generate_content_config = self._build_config(merged_kwargs, response_schema=response_schema)
+            start_time = time.time()
+            # Non-streaming when we want response.parsed (the streaming API
+            # doesn't populate it). Streaming for free-text plain calls.
+            if response_schema is not None:
+                response = client.models.generate_content(
+                    model=self.model_name,
+                    contents=contents,
+                    config=generate_content_config,
                 )
+                response_text = response.text or ""
+                parsed = getattr(response, "parsed", None)
             else:
+                response_text = ""
+                parsed = None
+                for chunk in client.models.generate_content_stream(
+                    model=self.model_name,
+                    contents=contents,
+                    config=generate_content_config,
+                ):
+                    if chunk.text:
+                        response_text += chunk.text
             completion_time = time.time()
             if self.manager.rate_limits is not None:
                 self.manager.record_usage(api_key, self.model_name, completion_time)
+            _llm_logger.debug(
+                "LLM call completed for %s using key …%s in %.2fs (schema=%s)",
+                self.model_name,
+                api_key[-4:],
+                completion_time - start_time,
+                response_schema.__name__ if response_schema else "none",
+            )
+            return response_text.strip(), parsed
+        except Exception as e:  # noqa: BLE001 — narrow this in Phase 4 (per-error retries)
+            _llm_logger.warning(
+                "LLM call failed for %s using key …%s: %s",
+                self.model_name,
+                api_key[-4:],
+                str(e),
+            )
+            return f"Error: LLM call failed - {str(e)}", None
+    def _build_config(
+        self,
+        merged_kwargs: Dict[str, Any],
+        response_schema: Optional[Type[BaseModel]] = None,
+    ) -> types.GenerateContentConfig:
+        max_tokens = merged_kwargs.get("max_tokens", 5120)
+        temperature = merged_kwargs.get("temperature", 0.3)
+        if self.is_gemma:
+            # Gemma can't do thinking_config or response_schema.
+            return types.GenerateContentConfig(
+                response_mime_type="text/plain",
+                max_output_tokens=max_tokens,
+                temperature=temperature,
+            )
+        thinking_cfg = types.ThinkingConfig(thinking_budget=self.thinking_budget)
+        if response_schema is not None:
+            return types.GenerateContentConfig(
+                thinking_config=thinking_cfg,
+                response_mime_type="application/json",
+                response_schema=response_schema,
+                max_output_tokens=max_tokens,
+                temperature=temperature,
+            )
+        mime = "application/json" if self.structured_output else "text/plain"
+        return types.GenerateContentConfig(
+            thinking_config=thinking_cfg,
+            response_mime_type=mime,
+            max_output_tokens=max_tokens,
+            temperature=temperature,
+        )
     def format_prompt(self, messages: List[Dict[str, str]]) -> str:
         prompt = ""
         return prompt
+# --- API key pool with optional rate limiting ----------------------------------
 class APIPoolManager:
+    """Round-robin Gemini API keys with per-key RPM/RPD enforcement.
+    ``rate_limits`` is ``{model_name: (rpm, rpd)}``. When ``None``, the pool
+    just rotates keys without any throttling.
+    """
+    def __init__(
+        self,
+        api_keys: List[str],
+        rate_limits: Optional[Dict[str, Tuple[int, int]]] = None,
+    ) -> None:
         self.api_keys = list(api_keys)
         self.active_keys = list(api_keys)
         self.rate_limits = rate_limits
         if rate_limits is not None:
             for key in api_keys:
                 self.usage[key] = {}
+                for model, (rpm, _rpd) in rate_limits.items():
                     self.usage[key][model] = {
                         "timestamps": deque(maxlen=max(1, rpm)),
                         "daily_requests": 0,
+                        "last_day": date.today(),
                     }
+    # --- internal helpers ------------------------------------------------------
+    def _refresh_daily(self, key: str, model: str) -> None:
         usage = self.usage[key][model]
         today = date.today()
         if usage["last_day"] < today:
         self._refresh_daily(key, model)
         _, rpd = self.rate_limits[model]
         if self.usage[key][model]["daily_requests"] >= rpd:
             if key in self.active_keys:
                 self.active_keys.remove(key)
             return False
         return True
     def _key_wait_info(self, key: str, model: str) -> Tuple[float, float]:
         if self.rate_limits is None:
             return 0.0, 0.0
         rpm, _ = self.rate_limits[model]
         usage = self.usage[key][model]
         now = time.time()
         timestamps = usage["timestamps"]
         while timestamps and now - timestamps[0] > 60:
             timestamps.popleft()
             wait_slot = max(0.0, 60.0 - (now - oldest))
         wait_spacing = 0.0
+        if timestamps:
             time_since_last = now - timestamps[-1]
             min_interval = 60.0 / rpm if rpm > 0 else 0.0
             wait_spacing = max(0.0, min_interval - time_since_last)
         return wait_slot, wait_spacing
     def can_use_now(self, key: str, model: str) -> bool:
         if key not in self.active_keys:
             return False
         if not self._key_is_rpd_ok(key, model):
         wait_slot, wait_spacing = self._key_wait_info(key, model)
         return wait_slot <= 0.0 and wait_spacing <= 0.0
+    # --- public API ------------------------------------------------------------
     def get_next_key(self, model: str, max_sleep_once: bool = True) -> str:
         with self.lock:
             if not self.active_keys:
                 raise RuntimeError("No available API keys left due to rate limits.")
             n = len(self.active_keys)
             for i in range(n):
                 idx = (self.current_index + i) % n
                 key = self.active_keys[idx]
                 if self.can_use_now(key, model):
                     self.current_index = (idx + 1) % max(1, len(self.active_keys))
                     return key
+            min_wait: Optional[float] = None
+            for key in list(self.active_keys):
                 if not self._key_is_rpd_ok(key, model):
                     continue
                 wait_slot, wait_spacing = self._key_wait_info(key, model)
                     min_wait = wait
             if min_wait is None:
                 raise RuntimeError("No available API keys left (RPD exhausted).")
         if min_wait and min_wait > 0:
+            _pool_logger.debug("Waiting %.2fs for next API slot", min_wait)
+            time.sleep(min_wait)
         return self.get_next_key(model, max_sleep_once=True)
+    def record_usage(self, key: str, model: str, timestamp: Optional[float] = None) -> None:
         if self.rate_limits is None:
             return
         t = timestamp or time.time()
         with self.lock:
             if key not in self.active_keys:
                 return
             self._refresh_daily(key, model)
             self.usage[key][model]["timestamps"].append(t)
             self.usage[key][model]["daily_requests"] += 1
             _, rpd = self.rate_limits[model]
             if self.usage[key][model]["daily_requests"] >= rpd:
                 if key in self.active_keys:
                     self.active_keys.remove(key)
+# --- Factory -------------------------------------------------------------------
+def create_llm(config: dict, manager: APIPoolManager) -> LLM:
+    """Instantiate an LLM from a config dict."""
     if config["type"] == "gemini":
+        return GeminiLLM(
             model_name=config["model_name"],
+            structured_output=config.get("structured_output", False),
+            thinking_budget=config.get("thinking_budget", 300),
             manager=manager,
+            **config.get("params", {}),
         )
+    raise ValueError(f"Unknown LLM type: {config['type']}")
+# --- JSON helpers --------------------------------------------------------------
 def extract_and_parse_json(text: str) -> Dict[str, Any]:
+    """Best-effort JSON extraction with a chain of fallbacks.
+    Phase 1 makes this a measured *fallback* path only — agents will use
+    Gemini's native ``response_schema`` for guaranteed structure. Until then,
+    this remains the primary parser.
+    """
     try:
         return json.loads(text.strip())
+    except Exception:
         pass
+    fenced = re.search(r"```json\s*(.*?)\s*```", text, re.DOTALL)
+    if fenced:
         try:
+            return json.loads(fenced.group(1))
+        except Exception:
             pass
+    braces = re.search(r"\{.*\}", text, re.DOTALL)
+    if braces:
         try:
+            return json.loads(repair_json(braces.group(0)))
+        except Exception:
             pass
     try:
+        return json.loads(repair_json(text))
     except Exception as e:
+        _logger.warning("All JSON parsing strategies failed: %s", str(e))
         return {
             "thought": f"JSON parsing failed: {str(e)}",
             "action": "compose_response",
+            "params": {"text": f"I encountered an error processing your request. Original response: {text[:200]}..."},
             "_parse_error": True,
+            "_original_text": text,
         }
+def set_nested(d: Dict[str, Any], key: str, value: Any) -> None:
+    """Assign ``value`` at a dotted-path key inside a nested dict."""
+    keys = key.split(".")
     for k in keys[:-1]:
         d = d.setdefault(k, {})
     d[keys[-1]] = value
+def get_memory_summary(memory: Dict[str, Any], partitions: Optional[List[str]] = None) -> str:
+    """Format selected memory partitions as JSON for prompt embedding."""
     if partitions is None:
         partitions = ["user_profile", "medical_history", "flags_and_assessments", "plans"]
+    summary: Dict[str, Any] = {}
     for partition in partitions:
+        summary[partition] = memory[partition] if partition in memory and memory[partition] else "empty"
+    return json.dumps(summary, indent=2, default=str)
 def update_memory_partition(memory: Dict[str, Any], partition: str, data: Any) -> None:
+    """Merge ``data`` into ``memory[partition]`` (or assign when types disagree)."""
     if partition not in memory:
         memory[partition] = {}
     if isinstance(data, dict) and isinstance(memory[partition], dict):
         memory[partition].update(data)
     else:
         memory[partition] = data
+    _logger.debug("Updated memory partition %r with new data", partition)
+# --- Checkpointer --------------------------------------------------------------
 class FileCheckpointSaver(BaseCheckpointSaver):
+    """Pickle LangGraph checkpoints to ``directory/checkpoint_<thread_id>.pkl``."""
+    def __init__(self, directory: str) -> None:
         self.directory = directory
         os.makedirs(directory, exist_ok=True)
     def put(self, config: Dict[str, Any], checkpoint: Dict[str, Any]) -> None:
         thread_id = config.get("configurable", {}).get("thread_id", "default")
         filepath = os.path.join(self.directory, f"checkpoint_{thread_id}.pkl")
+        with open(filepath, "wb") as f:
             pickle.dump(checkpoint, f)
     def get(self, config: Dict[str, Any]) -> Optional[Dict[str, Any]]:
         thread_id = config.get("configurable", {}).get("thread_id", "default")
         filepath = os.path.join(self.directory, f"checkpoint_{thread_id}.pkl")
         if os.path.exists(filepath):
+            with open(filepath, "rb") as f:
                 return pickle.load(f)
         return None
+__all__ = [
+    "APIPoolManager",
+    "FileCheckpointSaver",
+    "GeminiLLM",
+    "LLM",
+    "create_llm",
+    "extract_and_parse_json",
+    "get_memory_summary",
+    "save_to_json",
+    "set_nested",
+    "should_debug",
+    "update_memory_partition",
+]

validation.py ADDED Viewed

	@@ -0,0 +1,327 @@

+"""ValidationAgent — the critic in the generator-critic loop.
+Why this exists
+----------------
+The original README promised a ``ValidationAgent`` but it was never
+implemented; the system shipped plans straight from the Planner to the user.
+Modern multi-agent literature (Anthropic's research-system writeup, every
+LangGraph reflection-pattern tutorial) is unanimous that a separate critic
+node materially raises output quality on tasks with hard constraints.
+Design
+------
+We combine two layers:
+1. **Deterministic checks** (no LLM, no cost, instant):
+   * allergy violations,
+   * calorie deviation > 3 % of daily target,
+   * each macro deviation > 5 % of its target,
+   * disliked foods present (advisory),
+   * professional-consultation flag set without disclaimer.
+2. **LLM-graded checks** (one Gemini round-trip, structured output):
+   * medical-flag respect (e.g., diabetes user should avoid high-GL meals),
+   * citation presence for clinical recommendations,
+   * cultural appropriateness against user's country/cuisine preference.
+Verdict semantics
+-----------------
+* ``pass``   — Coach proceeds to ``compose_response``.
+* ``revise`` — Issues are bundled into the next Planner task; Coach loops back
+               to ``call_agent('PlannerAgent', task=...)``. Capped at 2
+               revisions (enforced by Coach prompt) to avoid infinite loops.
+* ``reject`` — Hard stop with ``severity='high'``. Coach must compose a
+               warning + HITL escalation chip (Phase 4 wires up the chip).
+"""
+from __future__ import annotations
+import json
+from datetime import datetime
+from typing import Any, Dict, List, Optional, Tuple
+from logging_setup import get_logger
+from schemas import ValidationDecision, ValidationIssue
+from utils import save_to_json
+_logger = get_logger("agents.validation")
+# Tolerances are class-level so tests/configs can override.
+CALORIE_TOLERANCE = 0.03  # +/- 3 %
+MACRO_TOLERANCE = 0.05  # +/- 5 %
+_VALIDATION_SYSTEM_PROMPT = """\
+You are the Validation Agent. You receive a meal plan and the medical
+assessment context. Your job is to grade the plan, NOT redesign it.
+Mandatory checks (in addition to the deterministic ones already supplied):
+1. Medical-flag respect: for each flag in flags_and_assessments.flags
+   (e.g., "diabetes_risk", "high_ldl"), confirm the plan does not contain
+   foods that contraindicate the flag. Cite which food fails which flag.
+2. Evidence: clinical recommendations in flags_and_assessments.recommendations
+   must be reflected in the plan or notes. Mention any unaddressed item.
+3. Cultural appropriateness: if user_profile.country is set, confirm at
+   least 60 % of foods are commonly available / culturally familiar there.
+   Otherwise emit a low-severity issue suggesting substitutions.
+Output JSON shape (enforced by schema):
+{
+  "verdict": "pass" | "revise" | "reject",
+  "issues": [
+    {"code": "...", "description": "...",
+     "severity": "low" | "medium" | "high"}
+  ],
+  "notes": "...",
+  "requires_human_review": false
+}
+Rules:
+- Mark requires_human_review=true if any issue has severity="high" OR if
+  flags_and_assessments.requires_professional_consultation is true.
+- Use verdict="reject" only for hard safety violations (allergy made it
+  through, food explicitly contraindicated by medication).
+- Use verdict="revise" for fixable problems (over-budget calories, missing
+  guideline citation, monotonous menu).
+- Use verdict="pass" only when issues is empty OR all issues are severity="low".
+"""
+class ValidationAgent:
+    """Generator-critic gate for the Planner's output."""
+    def __init__(self, llm_instance):
+        self.llm = llm_instance
+    # ------------------------------------------------------------------
+    # Public API
+    # ------------------------------------------------------------------
+    def handle_task(self, task: str, memory: Dict[str, Any]) -> str:
+        """Validate the current plan in ``memory.plans.current_plan``.
+        Returns a JSON string of ``ValidationDecision.model_dump()`` so the
+        Coach can read structured fields back out (``verdict``, ``issues``).
+        """
+        _logger.info("\n🛡️ VALIDATION AGENT STARTED")
+        plan = memory.get("plans", {}).get("current_plan")
+        if plan is None:
+            _logger.warning("No current_plan in memory; nothing to validate.")
+            verdict = ValidationDecision(
+                verdict="reject",
+                issues=[
+                    ValidationIssue(
+                        code="missing_plan",
+                        description="No current_plan in memory; Planner did not finalise.",
+                        severity="high",
+                    )
+                ],
+                notes="Validator received no plan. Re-run PlannerAgent.",
+                requires_human_review=False,
+            )
+            return self._save_and_return(task, memory, verdict)
+        # 1. Deterministic checks
+        det_issues = self._deterministic_checks(plan, memory)
+        # 2. LLM-graded checks (only if deterministic ones don't already reject)
+        llm_decision: Optional[ValidationDecision] = None
+        hard_block = any(i.severity == "high" for i in det_issues)
+        if not hard_block:
+            llm_decision = self._llm_review(plan, memory, det_issues)
+        # 3. Merge
+        all_issues = list(det_issues)
+        notes_parts: List[str] = []
+        requires_hr = False
+        if llm_decision is not None:
+            all_issues.extend(llm_decision.issues)
+            if llm_decision.notes:
+                notes_parts.append(llm_decision.notes)
+            requires_hr |= llm_decision.requires_human_review
+        # Force human review when the medical assessment said so.
+        if memory.get("flags_and_assessments", {}).get("requires_professional_consultation"):
+            requires_hr = True
+        verdict = self._compute_verdict(all_issues)
+        decision = ValidationDecision(
+            verdict=verdict,
+            issues=all_issues,
+            notes=" | ".join(notes_parts) if notes_parts else "",
+            requires_human_review=requires_hr,
+        )
+        _logger.info("🛡️ Validation verdict: %s (%d issue(s))", verdict, len(all_issues))
+        return self._save_and_return(task, memory, decision)
+    # ------------------------------------------------------------------
+    # Deterministic layer
+    # ------------------------------------------------------------------
+    @staticmethod
+    def _deterministic_checks(plan: Dict[str, Any], memory: Dict[str, Any]) -> List[ValidationIssue]:
+        issues: List[ValidationIssue] = []
+        user_profile = memory.get("user_profile", {}) or {}
+        allergies = {a.strip().lower() for a in user_profile.get("allergies", []) or [] if a}
+        dislikes_raw = user_profile.get("food_dislikes", "") or ""
+        dislikes = {d.strip().lower() for d in dislikes_raw.split(",") if d.strip()}
+        flags = memory.get("flags_and_assessments", {}) or {}
+        calc = flags.get("calculations", {}) or {}
+        target_calories = calc.get("daily_target_calories")
+        macro_targets = calc.get("macro_targets") or {}
+        # Walk plan, accumulating foods and totals.
+        foods, totals = ValidationAgent._extract_foods_and_totals(plan)
+        # 1. Allergy violations (severity high — never let these through)
+        for food in foods:
+            name = (food.get("name") or "").lower()
+            for allergen in allergies:
+                if allergen and allergen in name:
+                    issues.append(
+                        ValidationIssue(
+                            code="allergy_violation",
+                            description=f"Food '{name}' matches allergen '{allergen}'.",
+                            severity="high",
+                        )
+                    )
+        # 2. Disliked foods (advisory)
+        for food in foods:
+            name = (food.get("name") or "").lower()
+            for d in dislikes:
+                if d and d in name:
+                    issues.append(
+                        ValidationIssue(
+                            code="disliked_food",
+                            description=f"Food '{name}' matches user dislike '{d}'.",
+                            severity="low",
+                        )
+                    )
+        # 3. Calorie tolerance
+        if target_calories and totals.get("calories"):
+            dev = abs(totals["calories"] - target_calories) / target_calories
+            if dev > CALORIE_TOLERANCE:
+                issues.append(
+                    ValidationIssue(
+                        code="calorie_deviation",
+                        description=(
+                            f"Plan total {totals['calories']:.0f} kcal vs target "
+                            f"{target_calories} kcal ({dev*100:.1f}% deviation)."
+                        ),
+                        severity="medium",
+                    )
+                )
+        # 4. Macro tolerances
+        macro_map = {"protein_g": "protein", "fat_g": "fat", "carbohydrates_g": "carbohydrates"}
+        for tgt_key, plan_key in macro_map.items():
+            target = macro_targets.get(tgt_key)
+            actual = totals.get(plan_key)
+            if target and actual:
+                dev = abs(actual - target) / target
+                if dev > MACRO_TOLERANCE:
+                    issues.append(
+                        ValidationIssue(
+                            code=f"{plan_key}_deviation",
+                            description=(
+                                f"{plan_key} total {actual:.0f}g vs target {target}g "
+                                f"({dev*100:.1f}% deviation)."
+                            ),
+                            severity="medium",
+                        )
+                    )
+        return issues
+    @staticmethod
+    def _extract_foods_and_totals(
+        plan: Dict[str, Any],
+    ) -> Tuple[List[Dict[str, Any]], Dict[str, float]]:
+        """Best-effort: support both 'days' shape and a flat dict-of-foods.
+        We tolerate the LLM's free-form ``drafted_plan`` shape too, since the
+        Planner's final_plan isn't yet strictly typed.
+        """
+        foods: List[Dict[str, Any]] = []
+        totals: Dict[str, float] = {"calories": 0.0, "protein": 0.0, "fat": 0.0, "carbohydrates": 0.0}
+        def _walk(node: Any) -> None:
+            if isinstance(node, list):
+                for item in node:
+                    _walk(item)
+            elif isinstance(node, dict):
+                if "name" in node and any(k in node for k in ("calories", "calories_g", "kcal")):
+                    foods.append(node)
+                    totals["calories"] += float(node.get("calories", node.get("kcal", 0)) or 0)
+                    totals["protein"] += float(node.get("protein_g", node.get("protein", 0)) or 0)
+                    totals["fat"] += float(node.get("fat_g", node.get("fat", 0)) or 0)
+                    totals["carbohydrates"] += float(
+                        node.get("carbohydrates_g", node.get("carbohydrates", 0)) or 0
+                    )
+                else:
+                    for v in node.values():
+                        _walk(v)
+        _walk(plan)
+        # Plans may also surface daily_totals directly — prefer those when present.
+        if isinstance(plan, dict) and "daily_totals" in plan:
+            dt = plan["daily_totals"]
+            for k in ("calories", "protein", "fat", "carbohydrates"):
+                if k in dt:
+                    totals[k] = float(dt[k])
+        return foods, totals
+    # ------------------------------------------------------------------
+    # LLM layer
+    # ------------------------------------------------------------------
+    def _llm_review(
+        self,
+        plan: Dict[str, Any],
+        memory: Dict[str, Any],
+        deterministic_issues: List[ValidationIssue],
+    ) -> Optional[ValidationDecision]:
+        det_summary = "\n".join(f"- [{i.severity}] {i.code}: {i.description}" for i in deterministic_issues) or "None"
+        prompt = (
+            f"{_VALIDATION_SYSTEM_PROMPT}\n\n--- Plan ---\n{json.dumps(plan, indent=2, default=str)}\n\n"
+            f"--- User profile ---\n{json.dumps(memory.get('user_profile', {}), indent=2, default=str)}\n\n"
+            f"--- Medical assessment ---\n"
+            f"{json.dumps(memory.get('flags_and_assessments', {}), indent=2, default=str)}\n\n"
+            f"--- Deterministic findings already raised ---\n{det_summary}\n\n"
+            "Add only NEW issues. Do not repeat the deterministic ones."
+        )
+        decision = self.llm.call_typed(prompt, ValidationDecision)
+        if decision is None:
+            _logger.warning("Validator LLM call returned no parseable decision; skipping LLM layer.")
+        return decision
+    # ------------------------------------------------------------------
+    @staticmethod
+    def _compute_verdict(issues: List[ValidationIssue]) -> str:
+        if any(i.severity == "high" for i in issues):
+            return "reject"
+        if any(i.severity == "medium" for i in issues):
+            return "revise"
+        return "pass"
+    @staticmethod
+    def _save_and_return(task: str, memory: Dict[str, Any], decision: ValidationDecision) -> str:
+        # Persist to memory so the Coach can inspect the verdict next turn.
+        memory.setdefault("flags_and_assessments", {})
+        memory["flags_and_assessments"]["last_validation"] = decision.model_dump()
+        memory["flags_and_assessments"]["last_validation_at"] = datetime.now().isoformat()
+        save_to_json(
+            {
+                "task": task,
+                "decision": decision.model_dump(),
+                "timestamp": datetime.now().isoformat(),
+            },
+            f"validation_agent_{datetime.now().isoformat()}.json",
+            subdirectory="ValidationAgent",
+        )
+        return decision.model_dump_json()

workflow.py CHANGED Viewed

@@ -1,121 +1,135 @@
-from langgraph.graph import StateGraph, END
 from langgraph.checkpoint.memory import MemorySaver
 from state import NutritionState
-from utils import extract_and_parse_json, set_nested, FileCheckpointSaver
-from datetime import datetime
-import json
-import config
 def should_continue(state: NutritionState) -> str:
-    if state["current_action"] and state["current_action"]["action"] in ["compose_response", "ask_user"]:
         return "end"
     if state["num_turns"] >= state["max_turns"]:
         return "end"
     return "execute_action"
 def coach_node(state: NutritionState, coach_agent) -> NutritionState:
     return coach_agent.handle_task(state)
-def execute_action_node(state: NutritionState, agents, tools) -> NutritionState:
-    action = state["current_action"]
-    if not action or not action.get("action"):
         return state
-    if config.DEBUG_MODE:
-        print(f"Executing Action: {action['action']}")
-    # Add more specific high-level print for user mode
-    if not config.DEBUG_MODE:
-        if action["action"] == "call_agent":
-            agent_name = action["params"]["agent_name"]
-            task = action["params"]["task"]
-        elif action["action"] == "call_tool":
-            tool_name = action["params"]["tool_name"]
-            task = action["params"]["task"]
-        elif action["action"] == "ask_user":
-            print(f"❓Asking user: {action['params']['prompt']}")
-        elif action["action"] == "write_memory":
-            print(f"Writing to memory partition: {action['params']['partition']}")
-    # Handle JSON parsing errors
     if action.get("_parse_error"):
         error_message = "I encountered an error processing the request. Let me try a different approach."
         state["conversation_history"].append({"role": "assistant", "content": error_message})
         return {**state, "agent_result": error_message}
-    # Initialize previous_actions if not present
-    if 'previous_actions' not in state:
-        state['previous_actions'] = []
     try:
-        if action["action"] == "call_agent":
-            agent_name = action["params"]["agent_name"]
-            task = action["params"]["task"]
             agent_result = agents[agent_name].handle_task(task, state["memory"])
-            # Set success message instead of full result
-            success_message = f"{agent_name} task completed and stored in the memory successfully" if agent_result else f"{agent_name} task failed"
-            action_description = f"Called agent {agent_name} with task: {task}"
-            state['previous_actions'].append(action_description)
             return {**state, "agent_result": success_message}
-        elif action["action"] == "call_tool":
-            tool_name = action["params"]["tool_name"]
-            task = action["params"]["task"]
-            tool_result = tools[tool_name].handle_task(task) if tool_name in tools else f"Unknown tool: {tool_name}"
-            action_description = f"Called tool {tool_name} with task: {task}"
-            state['previous_actions'].append(action_description)
             return {**state, "agent_result": tool_result}
-        elif action["action"] == "write_memory":
-            partition = action["params"]["partition"]
-            data = action["params"]["data"]
             updated_data = {**data, "last_updated": datetime.now().isoformat()}
             set_nested(state["memory"], partition, updated_data)
-            action_description = f"Wrote to memory partition: {partition}"
-            state['previous_actions'].append(action_description)
             return {**state, "agent_result": "Memory updated successfully"}
-        elif action["action"] == "compose_response":
-            response_text = action["params"].get("text") or action["params"].get("response")
             if not response_text:
                 raise ValueError("Missing 'text' or 'response' in params for compose_response")
             state["conversation_history"].append({"role": "assistant", "content": response_text})
-            action_description = "Composed response to user"
-            state['previous_actions'].append(action_description)
             return {**state, "agent_result": response_text}
-        elif action["action"] == "ask_user":
-            prompt_text = action["params"]["prompt"]
             state["conversation_history"].append({"role": "assistant", "content": prompt_text})
-            action_description = f"Asked user: {prompt_text}"
-            state['previous_actions'].append(action_description)
             return {**state, "agent_result": "User prompted for input"}
-        else:
-            action_description = f"Executed {action['action']} with params: {action.get('params', {})}"
-            state['previous_actions'].append(action_description)
-            return {**state, "agent_result": f"Unknown action: {action['action']}"}
-    except Exception as e:
-        error_result = f"Error executing {action['action']}: {str(e)}"
-        action_description = f"Attempted {action['action']} with params: {action.get('params', {})}"
-        state['previous_actions'].append(action_description)
-        return {**state, "agent_result": error_result}
-def setup_workflow(coach_agent, agents, tools, persistence_dir=None):
     workflow = StateGraph(NutritionState)
     workflow.add_node("coach", lambda state: coach_node(state, coach_agent))
     workflow.add_node("execute_action", lambda state: execute_action_node(state, agents, tools))
     workflow.set_entry_point("coach")
     workflow.add_edge("coach", "execute_action")
-    workflow.add_conditional_edges("execute_action", should_continue, {"execute_action": "coach", "end": END})
     if persistence_dir:
         checkpointer = FileCheckpointSaver(persistence_dir)
-        print(f"MAS workflow compiled with file-based persistence at {persistence_dir}.")
     else:
         checkpointer = MemorySaver()
-        print("MAS workflow compiled with in-memory persistence.")
-    app = workflow.compile(checkpointer=checkpointer)
-    return app

+"""LangGraph wiring for the Coach <-> action loop.
+Phase 1 keeps the same two-node graph (``coach`` -> ``execute_action`` -> loop)
+so the public contract is unchanged. Phase 2 will explode this into subgraphs
+with parallel branches and a Validator critic loop.
+"""
+from __future__ import annotations
+from datetime import datetime
+from typing import Any, Dict
 from langgraph.checkpoint.memory import MemorySaver
+from langgraph.graph import END, StateGraph
+from config import get_settings
+from logging_setup import get_logger
 from state import NutritionState
+from utils import FileCheckpointSaver, set_nested
+_logger = get_logger("workflow")
 def should_continue(state: NutritionState) -> str:
+    """Edge predicate: stop on terminal action or when we hit max_turns."""
+    current = state.get("current_action") or {}
+    if current.get("action") in {"compose_response", "ask_user"}:
         return "end"
     if state["num_turns"] >= state["max_turns"]:
         return "end"
     return "execute_action"
 def coach_node(state: NutritionState, coach_agent) -> NutritionState:
     return coach_agent.handle_task(state)
+def execute_action_node(state: NutritionState, agents: Dict[str, Any], tools: Dict[str, Any]) -> NutritionState:
+    action = state.get("current_action") or {}
+    action_name = action.get("action")
+    params = action.get("params", {}) or {}
+    if not action_name:
         return state
+    settings = get_settings()
+    if settings.debug_mode:
+        _logger.debug("Executing Action: %s", action_name)
+    else:
+        if action_name == "ask_user":
+            _logger.info("❓ Asking user: %s", params.get("prompt"))
+        elif action_name == "write_memory":
+            _logger.info("Writing to memory partition: %s", params.get("partition"))
     if action.get("_parse_error"):
         error_message = "I encountered an error processing the request. Let me try a different approach."
         state["conversation_history"].append({"role": "assistant", "content": error_message})
         return {**state, "agent_result": error_message}
+    if "previous_actions" not in state:
+        state["previous_actions"] = []
     try:
+        if action_name == "call_agent":
+            agent_name = params["agent_name"]
+            task = params["task"]
             agent_result = agents[agent_name].handle_task(task, state["memory"])
+            success_message = (
+                f"{agent_name} task completed and stored in the memory successfully"
+                if agent_result
+                else f"{agent_name} task failed"
+            )
+            state["previous_actions"].append(f"Called agent {agent_name} with task: {task}")
             return {**state, "agent_result": success_message}
+        if action_name == "call_tool":
+            tool_name = params["tool_name"]
+            task = params["task"]
+            tool_result = (
+                tools[tool_name].handle_task(task) if tool_name in tools else f"Unknown tool: {tool_name}"
+            )
+            state["previous_actions"].append(f"Called tool {tool_name} with task: {task}")
             return {**state, "agent_result": tool_result}
+        if action_name == "write_memory":
+            partition = params["partition"]
+            data = params["data"]
             updated_data = {**data, "last_updated": datetime.now().isoformat()}
             set_nested(state["memory"], partition, updated_data)
+            state["previous_actions"].append(f"Wrote to memory partition: {partition}")
             return {**state, "agent_result": "Memory updated successfully"}
+        if action_name == "compose_response":
+            response_text = params.get("text") or params.get("response")
             if not response_text:
                 raise ValueError("Missing 'text' or 'response' in params for compose_response")
             state["conversation_history"].append({"role": "assistant", "content": response_text})
+            state["previous_actions"].append("Composed response to user")
             return {**state, "agent_result": response_text}
+        if action_name == "ask_user":
+            prompt_text = params["prompt"]
             state["conversation_history"].append({"role": "assistant", "content": prompt_text})
+            state["previous_actions"].append(f"Asked user: {prompt_text}")
             return {**state, "agent_result": "User prompted for input"}
+        state["previous_actions"].append(f"Executed {action_name} with params: {params}")
+        return {**state, "agent_result": f"Unknown action: {action_name}"}
+    except Exception as e:  # noqa: BLE001
+        _logger.exception("Error executing %s", action_name)
+        state["previous_actions"].append(f"Attempted {action_name} with params: {params}")
+        return {**state, "agent_result": f"Error executing {action_name}: {str(e)}"}
+def setup_workflow(coach_agent, agents: Dict[str, Any], tools: Dict[str, Any], persistence_dir: str | None = None):
     workflow = StateGraph(NutritionState)
     workflow.add_node("coach", lambda state: coach_node(state, coach_agent))
     workflow.add_node("execute_action", lambda state: execute_action_node(state, agents, tools))
     workflow.set_entry_point("coach")
     workflow.add_edge("coach", "execute_action")
+    workflow.add_conditional_edges(
+        "execute_action",
+        should_continue,
+        {"execute_action": "coach", "end": END},
+    )
     if persistence_dir:
         checkpointer = FileCheckpointSaver(persistence_dir)
+        _logger.info("MAS workflow compiled with file-based persistence at %s.", persistence_dir)
     else:
         checkpointer = MemorySaver()
+        _logger.info("MAS workflow compiled with in-memory persistence.")
+    return workflow.compile(checkpointer=checkpointer)