File size: 7,936 Bytes
401b16c
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
import openai
import os
import dirtyjson as json
from typing import Dict, Any, Optional, Tuple
from enum import Enum
from pydantic import BaseModel

class IntentType(str, Enum):
    TRANSACTION = "transaction"
    QUERY = "query" 
    SEMANTIC_SEARCH = "semantic_search"
    GENERAL_INFO = "general_info"

class IntentResult(BaseModel):
    intent: IntentType
    confidence: float
    reasoning: str
    entities_hint: Optional[str] = None

class IntentClassifier:
    def __init__(self, api_key: Optional[str] = None):
        """Initialize OpenAI client for intent classification"""
        self.client = openai.OpenAI(
            api_key=api_key or os.getenv('OPENAI_API_KEY')
        )
    
    def classify_intent(self, user_message: str) -> IntentResult:
        """
        Classify user intent using OpenAI API
        Returns: IntentResult with intent type, confidence, and reasoning
        """
        
        system_prompt = """You are an expert intent classifier for a business chatbot that handles sales, purchases, and general information storage.

Given a user message, classify it into one of these intents:

1. **QUERY**: User wants to retrieve or analyze STRUCTURED data from SQL database tables
   - Examples: "How many USB drives did we buy?" (counts from purchases table)
   - Examples: "What's the total value of all sales?" (sum from sales table)
   - Examples: "Show me recent transactions" (list from transactions table)
   - Examples: "List all customers" (data from customers table)
   - Key indicators: Asking for counts, totals, lists, recent data from business transactions
   - Must be answerable from structured database tables (purchases, sales, customers, suppliers, products)

2. **SEMANTIC_SEARCH**: User wants to find contextual information, tasks, or unstructured data
   - Examples: "What does Mark need to do?" (searching for task/context info)
   - Examples: "Find events related to supplier meetings" (contextual search)
   - Examples: "When do I have the meeting with George?" (calendar/scheduling info)
   - Examples: "Show me similar purchases to this one" (similarity search)
   - Examples: "What did we discuss in the last meeting?" (meeting notes/context)
   - Key indicators: Questions about tasks, meetings, discussions, or contextual information
   - Information that would NOT be in structured database tables

3. **TRANSACTION**: User wants to record a business transaction (purchase or sale)
   - Examples: "Add a purchase of 20 USB drives from TechMart at €5 each"
   - Examples: "Sold 10 laptops to John Smith at €800 each"
   - Contains: product names, quantities, suppliers/customers, prices
   - Action: Recording new business data

4. **GENERAL_INFO**: User wants to store general business information or notes
   - It cannot be a question.
   - Examples: "Meeting with new supplier scheduled for next week"
   - Examples: "Remember to check inventory levels before next order"
   - Examples: "Mark needs to call the supplier tomorrow"
   - Contains: notes, reminders, general business information, task assignments

Return your response in this exact JSON format:
{
    "intent": "transaction|query|semantic_search|general_info",
    "confidence": 0.0-1.0,
    "reasoning": "Brief explanation of why you chose this intent",
    "entities_hint": "Optional: Key entities you detected (for transaction intent)"
}

Be precise and consider context carefully. If unsure, choose the most likely intent and indicate lower confidence."""

        user_prompt = f'Classify the intent of this user message: "{user_message}"'

        try:
            response = self.client.chat.completions.create(
                model="gpt-4o-mini",
                messages=[
                    {"role": "system", "content": system_prompt},
                    {"role": "user", "content": user_prompt}
                ],
                temperature=0.1,
                max_tokens=300
            )
            
            response_text = response.choices[0].message.content.strip()
            
            # Clean JSON response more carefully
            if response_text.startswith("```json"):
                response_text = response_text[7:]  
            if response_text.startswith("```"):
                response_text = response_text[3:]   
            if response_text.endswith("```"):
                response_text = response_text[:-3]  
            
            response_text = response_text.strip()
            
            # Parse JSON response
            try:
                result_dict = json.loads(response_text)
                
                # Validate intent value
                intent_value = result_dict.get("intent", "").lower()
                if intent_value not in [e.value for e in IntentType]:
                    print(f"Invalid intent value: {intent_value}")
                    return self._fallback_classification(user_message, f"Invalid intent: {intent_value}")
                
                return IntentResult(
                    intent=IntentType(intent_value),
                    confidence=float(result_dict.get("confidence", 0.5)),
                    reasoning=result_dict.get("reasoning", "No reasoning provided"),
                    entities_hint=result_dict.get("entities_hint")
                )
            except Exception as e:
                # Fallback if JSON parsing fails
                print(f"JSON parsing error: {e}")
                print(f"Raw response: {response_text}")
                return self._fallback_classification(user_message, f"JSON parsing failed: {str(e)}")
            
        except Exception as e:
            print(f"Error in intent classification: {e}")
            return self._fallback_classification(user_message, str(e))
    
    def _fallback_classification(self, user_message: str, error_info: str) -> IntentResult:
        """Fallback classification when OpenAI API fails"""
        message_lower = user_message.lower()
        
        # Simple keyword-based fallback
        transaction_keywords = ["purchase", "buy", "sold", "sale", "from", "to", "€", "$"]
        query_keywords = ["how many", "total", "list all", "recent transactions", "count"]
        search_keywords = ["similar", "like", "related", "about", "need to do", "meeting", "discuss", "task"]
        
        if any(keyword in message_lower for keyword in transaction_keywords):
            intent = IntentType.TRANSACTION
            confidence = 0.6
        elif any(keyword in message_lower for keyword in query_keywords):
            intent = IntentType.QUERY
            confidence = 0.6
        elif any(keyword in message_lower for keyword in search_keywords):
            intent = IntentType.SEMANTIC_SEARCH
            confidence = 0.6
        else:
            intent = IntentType.GENERAL_INFO
            confidence = 0.5
        
        return IntentResult(
            intent=intent,
            confidence=confidence,
            reasoning=f"Fallback classification due to API error: {error_info[:100]}",
            entities_hint=None
        )
    
    def get_intent_description(self, intent: IntentType) -> str:
        """Get human-readable description of intent type"""
        descriptions = {
            IntentType.TRANSACTION: "Recording a business transaction (purchase or sale)",
            IntentType.QUERY: "Retrieving or analyzing data from the database", 
            IntentType.SEMANTIC_SEARCH: "Finding similar events or information",
            IntentType.GENERAL_INFO: "Storing general business information or notes"
        }
        return descriptions.get(intent, "Unknown intent type")
    
    def batch_classify(self, messages: list[str]) -> list[IntentResult]:
        """Classify multiple messages efficiently"""
        results = []
        for message in messages:
            result = self.classify_intent(message)
            results.append(result)
        return results