Spaces:
Runtime error
Runtime error
| from typing import Dict, Any, Optional | |
| from src.utils.llm_client import GroqClient | |
| from src.data.schema import get_schema_info | |
| from src.utils.validator import QueryValidator | |
| import json | |
| class QueryPlanner: | |
| """ | |
| Generates execution plans (SQL/Python code) from natural language queries. | |
| Attributes: | |
| llm: GroqClient instance for LLM interactions | |
| schema: Dataset schema metadata | |
| validator: QueryValidator for code safety checks | |
| """ | |
| def __init__(self) -> None: | |
| self.llm: GroqClient = GroqClient() | |
| self.schema: Dict[str, Any] = get_schema_info() | |
| self.validator: QueryValidator = QueryValidator() | |
| def generate_plan(self, user_query: str) -> Dict[str, Optional[str]]: | |
| """ | |
| Generates a python code snippet to answer the user query. | |
| Args: | |
| user_query: Natural language question about health data | |
| Returns: | |
| Dictionary containing: | |
| - query_code: Generated Python/Pandas code | |
| - explanation: Human-readable plan description | |
| - error: Error message if generation failed, None otherwise | |
| """ | |
| system_prompt = f""" | |
| You are an expert Python Data Analyst. | |
| Your task is to generate Python/Pandas code based on the provided dataset schema. | |
| DATASET SCHEMAS: | |
| {json.dumps(self.schema, indent=2)} | |
| HEALTH METRIC INTERPRETATION GUIDE: | |
| - "Perfect/Normal Hemoglobin": Male(13.8-17.2 g/dL), Female(12.1-15.1 g/dL). Use 12-17 as a general filter. | |
| - "Abnormal Blood Pressure": Use 'Blood_Pressure_Abnormality' == 1. | |
| - "High Stress": Use 'Level_of_Stress' == 3. | |
| - "Obese": BMI >= 30. | |
| - "Smoker": 'Smoking' == 1. | |
| AVAILABLE DATAFRAMES: | |
| - df1 (Health Metrics) | |
| - df2 (Physical Activity) | |
| RULES: | |
| 1. Use ONLY pandas/numpy operations. | |
| 2. If you need data from both, join them on 'Patient_Number' (df1.merge(df2, on='Patient_Number')). | |
| 3. Variable 'result' must contain the final answer. | |
| 4. Return ONLY the python code inside markdown blocks. | |
| EXAMPLE: "Find female smokers over 90" | |
| ```python | |
| result = df1[(df1['Sex'] == 1) & (df1['Smoking'] == 1) & (df1['Age'] > 90)] | |
| ``` | |
| """ | |
| llm_response = self.llm.generate(user_query, system_message=system_prompt) | |
| # Check for LLM generation failure | |
| if llm_response.startswith("ERROR_LLM_GEN_FAILED"): | |
| return { | |
| "query_code": "", | |
| "explanation": "LLM Generation failed after all fallbacks.", | |
| "error": llm_response | |
| } | |
| # Parse logic | |
| code = self.validator.clean_code(llm_response) | |
| # Validate logic | |
| is_safe, message = self.validator.validate(code) | |
| if not is_safe: | |
| return { | |
| "query_code": "", | |
| "explanation": "Query generation failed safety/syntax checks.", | |
| "error": message | |
| } | |
| return { | |
| "query_code": code, | |
| "explanation": "Generated pandas query based on schema and health thresholds.", | |
| "error": None | |
| } | |