""" Intent Parser - Layer 1: User Intent Understanding This module implements the first layer of the three-layer architecture. It uses LLM to understand user's natural language goals and converts them to structured AnalysisIntent objects. CRITICAL BOUNDARIES: - ✅ LLM is used ONLY for semantic understanding - ❌ NO numerical calculations - ❌ NO regulatory decisions - ❌ NO data sufficiency checks """ import json import re from typing import Dict, Any, Optional, Tuple, List from dataclasses import asdict from schemas.analysis_intent import ( AnalysisIntent, AnalysisType, AnalysisPurpose, UserPreferences, HardConstraints, ExtractedDataSummary, ) from prompts.intent_prompts import IntentPrompts from layers.model_invoker import ModelInvoker class IntentParser: """ Layer 1: User Intent Understanding. Converts natural language analysis goals into structured AnalysisIntent. Uses LLM purely for semantic understanding - no calculations or decisions. """ def __init__(self, model_invoker: Optional[ModelInvoker] = None): """ Initialize the intent parser. Args: model_invoker: LLM invoker instance. Creates new one if not provided. """ self.model_invoker = model_invoker or ModelInvoker() def parse( self, user_goal: str, data_summary: Optional[ExtractedDataSummary] = None ) -> AnalysisIntent: """ Parse user's natural language goal into structured intent. Args: user_goal: User's raw analysis goal text data_summary: Optional summary of extracted data Returns: AnalysisIntent structure for Layer 2 """ if data_summary is None: data_summary = ExtractedDataSummary() # Get prompts system_prompt, user_prompt = IntentPrompts.get_intent_prompt( user_goal=user_goal, n_batches=len(data_summary.batch_ids), n_conditions=len(data_summary.conditions), cqa_list=", ".join(data_summary.cqa_list) if data_summary.cqa_list else "未知", max_timepoint=max(data_summary.available_timepoints) if data_summary.available_timepoints else 0 ) # Call LLM response = self.model_invoker.invoke( system_prompt=system_prompt, user_prompt=user_prompt, temperature=0.1 # Low temperature for consistent parsing ) if not response.success: # Fallback to default intent return self._create_default_intent(user_goal, data_summary) # Parse LLM response try: parsed = self._extract_json(response.content) return self._build_intent(user_goal, parsed, data_summary) except Exception as e: # Fallback on parse error return self._create_default_intent(user_goal, data_summary, str(e)) def _extract_json(self, text: str) -> Dict[str, Any]: """Extract JSON from LLM response (handles markdown code blocks).""" # Try to find JSON in markdown code block json_match = re.search(r'```(?:json)?\s*([\s\S]*?)\s*```', text) if json_match: json_str = json_match.group(1) else: # Try to find raw JSON json_str = text.strip() # Clean up common issues json_str = json_str.strip() if json_str.startswith('{') and json_str.endswith('}'): return json.loads(json_str) raise ValueError(f"Could not extract valid JSON from: {text[:200]}") def _build_intent( self, raw_goal: str, parsed: Dict[str, Any], data_summary: ExtractedDataSummary ) -> AnalysisIntent: """Build AnalysisIntent from parsed JSON.""" # Map analysis type analysis_type_str = parsed.get("analysis_type", "trend_assessment") try: analysis_type = AnalysisType(analysis_type_str) except ValueError: analysis_type = AnalysisType.TREND_ASSESSMENT # Map purpose purpose_str = parsed.get("purpose", "rd_reference") try: purpose = AnalysisPurpose(purpose_str) except ValueError: purpose = AnalysisPurpose.RD_REFERENCE # Build preferences preferences = UserPreferences( allow_extrapolation=parsed.get("allow_extrapolation", True), target_timepoints=parsed.get("target_timepoints", [24, 36]), required_confidence=parsed.get("required_confidence", 0.95) ) # Build constraints spec_limit = parsed.get("specification_limit") if spec_limit is None or spec_limit <= 0: spec_limit = 0.5 # Default constraints = HardConstraints( purpose=purpose, primary_cqa=parsed.get("primary_cqa", "总杂质"), specification_limit=spec_limit ) # Build intent intent = AnalysisIntent( raw_goal=raw_goal, analysis_type=analysis_type, preferences=preferences, constraints=constraints, data_summary=data_summary, parse_confidence=parsed.get("parse_confidence", 0.5), ambiguities=parsed.get("ambiguities", []) ) return intent def _create_default_intent( self, raw_goal: str, data_summary: ExtractedDataSummary, error_msg: str = None ) -> AnalysisIntent: """Create a default intent when parsing fails.""" # Try simple keyword matching for analysis type goal_lower = raw_goal.lower() if any(kw in goal_lower for kw in ["预测", "货架期", "shelf"]): analysis_type = AnalysisType.SHELF_LIFE_PREDICTION elif any(kw in goal_lower for kw in ["对比", "筛选", "最优", "比较"]): analysis_type = AnalysisType.BATCH_COMPARISON elif any(kw in goal_lower for kw in ["风险", "超标", "合规"]): analysis_type = AnalysisType.RISK_EVALUATION else: analysis_type = AnalysisType.TREND_ASSESSMENT # Extract timepoints if mentioned timepoints = [24, 36] # Default tp_match = re.findall(r'(\d+)\s*[Mm月]', raw_goal) if tp_match: timepoints = [int(t) for t in tp_match] ambiguities = [] if error_msg: ambiguities.append(f"LLM解析失败: {error_msg}") return AnalysisIntent( raw_goal=raw_goal, analysis_type=analysis_type, preferences=UserPreferences( target_timepoints=timepoints ), constraints=HardConstraints(), data_summary=data_summary, parse_confidence=0.3, # Low confidence for fallback ambiguities=ambiguities ) def parse_with_confirmation( self, user_goal: str, data_summary: Optional[ExtractedDataSummary] = None ) -> Tuple[AnalysisIntent, bool, str]: """ Parse intent and determine if user confirmation is needed. Returns: Tuple of (intent, needs_confirmation, confirmation_prompt) """ intent = self.parse(user_goal, data_summary) if intent.is_high_confidence(): return intent, False, "" # Build confirmation prompt confirmation_parts = [ "请确认以下解析结果:", f"- 分析类型: {intent.analysis_type.value}", f"- 目标时间点: {intent.preferences.target_timepoints}", f"- 主要CQA: {intent.constraints.primary_cqa}", f"- 分析目的: {intent.constraints.purpose.value}", ] if intent.ambiguities: confirmation_parts.append("\n⚠️ 存在以下不确定项:") for amb in intent.ambiguities: confirmation_parts.append(f" - {amb}") confirmation_prompt = "\n".join(confirmation_parts) return intent, True, confirmation_prompt