Spaces:
Sleeping
Sleeping
File size: 7,936 Bytes
401b16c | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 | import openai
import os
import dirtyjson as json
from typing import Dict, Any, Optional, Tuple
from enum import Enum
from pydantic import BaseModel
class IntentType(str, Enum):
TRANSACTION = "transaction"
QUERY = "query"
SEMANTIC_SEARCH = "semantic_search"
GENERAL_INFO = "general_info"
class IntentResult(BaseModel):
intent: IntentType
confidence: float
reasoning: str
entities_hint: Optional[str] = None
class IntentClassifier:
def __init__(self, api_key: Optional[str] = None):
"""Initialize OpenAI client for intent classification"""
self.client = openai.OpenAI(
api_key=api_key or os.getenv('OPENAI_API_KEY')
)
def classify_intent(self, user_message: str) -> IntentResult:
"""
Classify user intent using OpenAI API
Returns: IntentResult with intent type, confidence, and reasoning
"""
system_prompt = """You are an expert intent classifier for a business chatbot that handles sales, purchases, and general information storage.
Given a user message, classify it into one of these intents:
1. **QUERY**: User wants to retrieve or analyze STRUCTURED data from SQL database tables
- Examples: "How many USB drives did we buy?" (counts from purchases table)
- Examples: "What's the total value of all sales?" (sum from sales table)
- Examples: "Show me recent transactions" (list from transactions table)
- Examples: "List all customers" (data from customers table)
- Key indicators: Asking for counts, totals, lists, recent data from business transactions
- Must be answerable from structured database tables (purchases, sales, customers, suppliers, products)
2. **SEMANTIC_SEARCH**: User wants to find contextual information, tasks, or unstructured data
- Examples: "What does Mark need to do?" (searching for task/context info)
- Examples: "Find events related to supplier meetings" (contextual search)
- Examples: "When do I have the meeting with George?" (calendar/scheduling info)
- Examples: "Show me similar purchases to this one" (similarity search)
- Examples: "What did we discuss in the last meeting?" (meeting notes/context)
- Key indicators: Questions about tasks, meetings, discussions, or contextual information
- Information that would NOT be in structured database tables
3. **TRANSACTION**: User wants to record a business transaction (purchase or sale)
- Examples: "Add a purchase of 20 USB drives from TechMart at €5 each"
- Examples: "Sold 10 laptops to John Smith at €800 each"
- Contains: product names, quantities, suppliers/customers, prices
- Action: Recording new business data
4. **GENERAL_INFO**: User wants to store general business information or notes
- It cannot be a question.
- Examples: "Meeting with new supplier scheduled for next week"
- Examples: "Remember to check inventory levels before next order"
- Examples: "Mark needs to call the supplier tomorrow"
- Contains: notes, reminders, general business information, task assignments
Return your response in this exact JSON format:
{
"intent": "transaction|query|semantic_search|general_info",
"confidence": 0.0-1.0,
"reasoning": "Brief explanation of why you chose this intent",
"entities_hint": "Optional: Key entities you detected (for transaction intent)"
}
Be precise and consider context carefully. If unsure, choose the most likely intent and indicate lower confidence."""
user_prompt = f'Classify the intent of this user message: "{user_message}"'
try:
response = self.client.chat.completions.create(
model="gpt-4o-mini",
messages=[
{"role": "system", "content": system_prompt},
{"role": "user", "content": user_prompt}
],
temperature=0.1,
max_tokens=300
)
response_text = response.choices[0].message.content.strip()
# Clean JSON response more carefully
if response_text.startswith("```json"):
response_text = response_text[7:]
if response_text.startswith("```"):
response_text = response_text[3:]
if response_text.endswith("```"):
response_text = response_text[:-3]
response_text = response_text.strip()
# Parse JSON response
try:
result_dict = json.loads(response_text)
# Validate intent value
intent_value = result_dict.get("intent", "").lower()
if intent_value not in [e.value for e in IntentType]:
print(f"Invalid intent value: {intent_value}")
return self._fallback_classification(user_message, f"Invalid intent: {intent_value}")
return IntentResult(
intent=IntentType(intent_value),
confidence=float(result_dict.get("confidence", 0.5)),
reasoning=result_dict.get("reasoning", "No reasoning provided"),
entities_hint=result_dict.get("entities_hint")
)
except Exception as e:
# Fallback if JSON parsing fails
print(f"JSON parsing error: {e}")
print(f"Raw response: {response_text}")
return self._fallback_classification(user_message, f"JSON parsing failed: {str(e)}")
except Exception as e:
print(f"Error in intent classification: {e}")
return self._fallback_classification(user_message, str(e))
def _fallback_classification(self, user_message: str, error_info: str) -> IntentResult:
"""Fallback classification when OpenAI API fails"""
message_lower = user_message.lower()
# Simple keyword-based fallback
transaction_keywords = ["purchase", "buy", "sold", "sale", "from", "to", "€", "$"]
query_keywords = ["how many", "total", "list all", "recent transactions", "count"]
search_keywords = ["similar", "like", "related", "about", "need to do", "meeting", "discuss", "task"]
if any(keyword in message_lower for keyword in transaction_keywords):
intent = IntentType.TRANSACTION
confidence = 0.6
elif any(keyword in message_lower for keyword in query_keywords):
intent = IntentType.QUERY
confidence = 0.6
elif any(keyword in message_lower for keyword in search_keywords):
intent = IntentType.SEMANTIC_SEARCH
confidence = 0.6
else:
intent = IntentType.GENERAL_INFO
confidence = 0.5
return IntentResult(
intent=intent,
confidence=confidence,
reasoning=f"Fallback classification due to API error: {error_info[:100]}",
entities_hint=None
)
def get_intent_description(self, intent: IntentType) -> str:
"""Get human-readable description of intent type"""
descriptions = {
IntentType.TRANSACTION: "Recording a business transaction (purchase or sale)",
IntentType.QUERY: "Retrieving or analyzing data from the database",
IntentType.SEMANTIC_SEARCH: "Finding similar events or information",
IntentType.GENERAL_INFO: "Storing general business information or notes"
}
return descriptions.get(intent, "Unknown intent type")
def batch_classify(self, messages: list[str]) -> list[IntentResult]:
"""Classify multiple messages efficiently"""
results = []
for message in messages:
result = self.classify_intent(message)
results.append(result)
return results |