Spaces:

Kevinshh
/

Preformu

Running

File size: 8,589 Bytes

e335526

"""

Intent Parser - Layer 1: User Intent Understanding



This module implements the first layer of the three-layer architecture.

It uses LLM to understand user's natural language goals and converts them

to structured AnalysisIntent objects.



CRITICAL BOUNDARIES:

- ✅ LLM is used ONLY for semantic understanding

- ❌ NO numerical calculations

- ❌ NO regulatory decisions

- ❌ NO data sufficiency checks

"""

import json
import re
from typing import Dict, Any, Optional, Tuple, List
from dataclasses import asdict

from schemas.analysis_intent import (
    AnalysisIntent,
    AnalysisType,
    AnalysisPurpose,
    UserPreferences,
    HardConstraints,
    ExtractedDataSummary,
)
from prompts.intent_prompts import IntentPrompts
from layers.model_invoker import ModelInvoker


class IntentParser:
    """

    Layer 1: User Intent Understanding.

    

    Converts natural language analysis goals into structured AnalysisIntent.

    Uses LLM purely for semantic understanding - no calculations or decisions.

    """
    
    def __init__(self, model_invoker: Optional[ModelInvoker] = None):
        """

        Initialize the intent parser.

        

        Args:

            model_invoker: LLM invoker instance. Creates new one if not provided.

        """
        self.model_invoker = model_invoker or ModelInvoker()
    
    def parse(

        self,

        user_goal: str,

        data_summary: Optional[ExtractedDataSummary] = None

    ) -> AnalysisIntent:
        """

        Parse user's natural language goal into structured intent.

        

        Args:

            user_goal: User's raw analysis goal text

            data_summary: Optional summary of extracted data

            

        Returns:

            AnalysisIntent structure for Layer 2

        """
        if data_summary is None:
            data_summary = ExtractedDataSummary()
        
        # Get prompts
        system_prompt, user_prompt = IntentPrompts.get_intent_prompt(
            user_goal=user_goal,
            n_batches=len(data_summary.batch_ids),
            n_conditions=len(data_summary.conditions),
            cqa_list=", ".join(data_summary.cqa_list) if data_summary.cqa_list else "未知",
            max_timepoint=max(data_summary.available_timepoints) if data_summary.available_timepoints else 0
        )
        
        # Call LLM
        response = self.model_invoker.invoke(
            system_prompt=system_prompt,
            user_prompt=user_prompt,
            temperature=0.1  # Low temperature for consistent parsing
        )
        
        if not response.success:
            # Fallback to default intent
            return self._create_default_intent(user_goal, data_summary)
        
        # Parse LLM response
        try:
            parsed = self._extract_json(response.content)
            return self._build_intent(user_goal, parsed, data_summary)
        except Exception as e:
            # Fallback on parse error
            return self._create_default_intent(user_goal, data_summary, str(e))
    
    def _extract_json(self, text: str) -> Dict[str, Any]:
        """Extract JSON from LLM response (handles markdown code blocks)."""
        # Try to find JSON in markdown code block
        json_match = re.search(r'```(?:json)?\s*([\s\S]*?)\s*```', text)
        if json_match:
            json_str = json_match.group(1)
        else:
            # Try to find raw JSON
            json_str = text.strip()
        
        # Clean up common issues
        json_str = json_str.strip()
        if json_str.startswith('{') and json_str.endswith('}'):
            return json.loads(json_str)
        
        raise ValueError(f"Could not extract valid JSON from: {text[:200]}")
    
    def _build_intent(

        self,

        raw_goal: str,

        parsed: Dict[str, Any],

        data_summary: ExtractedDataSummary

    ) -> AnalysisIntent:
        """Build AnalysisIntent from parsed JSON."""
        
        # Map analysis type
        analysis_type_str = parsed.get("analysis_type", "trend_assessment")
        try:
            analysis_type = AnalysisType(analysis_type_str)
        except ValueError:
            analysis_type = AnalysisType.TREND_ASSESSMENT
        
        # Map purpose
        purpose_str = parsed.get("purpose", "rd_reference")
        try:
            purpose = AnalysisPurpose(purpose_str)
        except ValueError:
            purpose = AnalysisPurpose.RD_REFERENCE
        
        # Build preferences
        preferences = UserPreferences(
            allow_extrapolation=parsed.get("allow_extrapolation", True),
            target_timepoints=parsed.get("target_timepoints", [24, 36]),
            required_confidence=parsed.get("required_confidence", 0.95)
        )
        
        # Build constraints
        spec_limit = parsed.get("specification_limit")
        if spec_limit is None or spec_limit <= 0:
            spec_limit = 0.5  # Default
        
        constraints = HardConstraints(
            purpose=purpose,
            primary_cqa=parsed.get("primary_cqa", "总杂质"),
            specification_limit=spec_limit
        )
        
        # Build intent
        intent = AnalysisIntent(
            raw_goal=raw_goal,
            analysis_type=analysis_type,
            preferences=preferences,
            constraints=constraints,
            data_summary=data_summary,
            parse_confidence=parsed.get("parse_confidence", 0.5),
            ambiguities=parsed.get("ambiguities", [])
        )
        
        return intent
    
    def _create_default_intent(

        self,

        raw_goal: str,

        data_summary: ExtractedDataSummary,

        error_msg: str = None

    ) -> AnalysisIntent:
        """Create a default intent when parsing fails."""
        
        # Try simple keyword matching for analysis type
        goal_lower = raw_goal.lower()
        
        if any(kw in goal_lower for kw in ["预测", "货架期", "shelf"]):
            analysis_type = AnalysisType.SHELF_LIFE_PREDICTION
        elif any(kw in goal_lower for kw in ["对比", "筛选", "最优", "比较"]):
            analysis_type = AnalysisType.BATCH_COMPARISON
        elif any(kw in goal_lower for kw in ["风险", "超标", "合规"]):
            analysis_type = AnalysisType.RISK_EVALUATION
        else:
            analysis_type = AnalysisType.TREND_ASSESSMENT
        
        # Extract timepoints if mentioned
        timepoints = [24, 36]  # Default
        tp_match = re.findall(r'(\d+)\s*[Mm月]', raw_goal)
        if tp_match:
            timepoints = [int(t) for t in tp_match]
        
        ambiguities = []
        if error_msg:
            ambiguities.append(f"LLM解析失败: {error_msg}")
        
        return AnalysisIntent(
            raw_goal=raw_goal,
            analysis_type=analysis_type,
            preferences=UserPreferences(
                target_timepoints=timepoints
            ),
            constraints=HardConstraints(),
            data_summary=data_summary,
            parse_confidence=0.3,  # Low confidence for fallback
            ambiguities=ambiguities
        )
    
    def parse_with_confirmation(

        self,

        user_goal: str,

        data_summary: Optional[ExtractedDataSummary] = None

    ) -> Tuple[AnalysisIntent, bool, str]:
        """

        Parse intent and determine if user confirmation is needed.

        

        Returns:

            Tuple of (intent, needs_confirmation, confirmation_prompt)

        """
        intent = self.parse(user_goal, data_summary)
        
        if intent.is_high_confidence():
            return intent, False, ""
        
        # Build confirmation prompt
        confirmation_parts = [
            "请确认以下解析结果：",
            f"- 分析类型: {intent.analysis_type.value}",
            f"- 目标时间点: {intent.preferences.target_timepoints}",
            f"- 主要CQA: {intent.constraints.primary_cqa}",
            f"- 分析目的: {intent.constraints.purpose.value}",
        ]
        
        if intent.ambiguities:
            confirmation_parts.append("\n⚠️ 存在以下不确定项：")
            for amb in intent.ambiguities:
                confirmation_parts.append(f"  - {amb}")
        
        confirmation_prompt = "\n".join(confirmation_parts)
        
        return intent, True, confirmation_prompt