Spaces:

Ancastal
/

Business_Chatbot

Sleeping

File size: 7,936 Bytes

401b16c

import openai
import os
import dirtyjson as json
from typing import Dict, Any, Optional, Tuple
from enum import Enum
from pydantic import BaseModel

class IntentType(str, Enum):
    TRANSACTION = "transaction"
    QUERY = "query" 
    SEMANTIC_SEARCH = "semantic_search"
    GENERAL_INFO = "general_info"

class IntentResult(BaseModel):
    intent: IntentType
    confidence: float
    reasoning: str
    entities_hint: Optional[str] = None

class IntentClassifier:
    def __init__(self, api_key: Optional[str] = None):
        """Initialize OpenAI client for intent classification"""
        self.client = openai.OpenAI(
            api_key=api_key or os.getenv('OPENAI_API_KEY')
        )
    
    def classify_intent(self, user_message: str) -> IntentResult:
        """
        Classify user intent using OpenAI API
        Returns: IntentResult with intent type, confidence, and reasoning
        """
        
        system_prompt = """You are an expert intent classifier for a business chatbot that handles sales, purchases, and general information storage.

Given a user message, classify it into one of these intents:

1. **QUERY**: User wants to retrieve or analyze STRUCTURED data from SQL database tables
   - Examples: "How many USB drives did we buy?" (counts from purchases table)
   - Examples: "What's the total value of all sales?" (sum from sales table)
   - Examples: "Show me recent transactions" (list from transactions table)
   - Examples: "List all customers" (data from customers table)
   - Key indicators: Asking for counts, totals, lists, recent data from business transactions
   - Must be answerable from structured database tables (purchases, sales, customers, suppliers, products)

2. **SEMANTIC_SEARCH**: User wants to find contextual information, tasks, or unstructured data
   - Examples: "What does Mark need to do?" (searching for task/context info)
   - Examples: "Find events related to supplier meetings" (contextual search)
   - Examples: "When do I have the meeting with George?" (calendar/scheduling info)
   - Examples: "Show me similar purchases to this one" (similarity search)
   - Examples: "What did we discuss in the last meeting?" (meeting notes/context)
   - Key indicators: Questions about tasks, meetings, discussions, or contextual information
   - Information that would NOT be in structured database tables

3. **TRANSACTION**: User wants to record a business transaction (purchase or sale)
   - Examples: "Add a purchase of 20 USB drives from TechMart at €5 each"
   - Examples: "Sold 10 laptops to John Smith at €800 each"
   - Contains: product names, quantities, suppliers/customers, prices
   - Action: Recording new business data

4. **GENERAL_INFO**: User wants to store general business information or notes
   - It cannot be a question.
   - Examples: "Meeting with new supplier scheduled for next week"
   - Examples: "Remember to check inventory levels before next order"
   - Examples: "Mark needs to call the supplier tomorrow"
   - Contains: notes, reminders, general business information, task assignments

Return your response in this exact JSON format:
{
    "intent": "transaction|query|semantic_search|general_info",
    "confidence": 0.0-1.0,
    "reasoning": "Brief explanation of why you chose this intent",
    "entities_hint": "Optional: Key entities you detected (for transaction intent)"
}

Be precise and consider context carefully. If unsure, choose the most likely intent and indicate lower confidence."""

        user_prompt = f'Classify the intent of this user message: "{user_message}"'

        try:
            response = self.client.chat.completions.create(
                model="gpt-4o-mini",
                messages=[
                    {"role": "system", "content": system_prompt},
                    {"role": "user", "content": user_prompt}
                ],
                temperature=0.1,
                max_tokens=300
            )
            
            response_text = response.choices[0].message.content.strip()
            
            # Clean JSON response more carefully
            if response_text.startswith("```json"):
                response_text = response_text[7:]  
            if response_text.startswith("```"):
                response_text = response_text[3:]   
            if response_text.endswith("```"):
                response_text = response_text[:-3]  
            
            response_text = response_text.strip()
            
            # Parse JSON response
            try:
                result_dict = json.loads(response_text)
                
                # Validate intent value
                intent_value = result_dict.get("intent", "").lower()
                if intent_value not in [e.value for e in IntentType]:
                    print(f"Invalid intent value: {intent_value}")
                    return self._fallback_classification(user_message, f"Invalid intent: {intent_value}")
                
                return IntentResult(
                    intent=IntentType(intent_value),
                    confidence=float(result_dict.get("confidence", 0.5)),
                    reasoning=result_dict.get("reasoning", "No reasoning provided"),
                    entities_hint=result_dict.get("entities_hint")
                )
            except Exception as e:
                # Fallback if JSON parsing fails
                print(f"JSON parsing error: {e}")
                print(f"Raw response: {response_text}")
                return self._fallback_classification(user_message, f"JSON parsing failed: {str(e)}")
            
        except Exception as e:
            print(f"Error in intent classification: {e}")
            return self._fallback_classification(user_message, str(e))
    
    def _fallback_classification(self, user_message: str, error_info: str) -> IntentResult:
        """Fallback classification when OpenAI API fails"""
        message_lower = user_message.lower()
        
        # Simple keyword-based fallback
        transaction_keywords = ["purchase", "buy", "sold", "sale", "from", "to", "€", "$"]
        query_keywords = ["how many", "total", "list all", "recent transactions", "count"]
        search_keywords = ["similar", "like", "related", "about", "need to do", "meeting", "discuss", "task"]
        
        if any(keyword in message_lower for keyword in transaction_keywords):
            intent = IntentType.TRANSACTION
            confidence = 0.6
        elif any(keyword in message_lower for keyword in query_keywords):
            intent = IntentType.QUERY
            confidence = 0.6
        elif any(keyword in message_lower for keyword in search_keywords):
            intent = IntentType.SEMANTIC_SEARCH
            confidence = 0.6
        else:
            intent = IntentType.GENERAL_INFO
            confidence = 0.5
        
        return IntentResult(
            intent=intent,
            confidence=confidence,
            reasoning=f"Fallback classification due to API error: {error_info[:100]}",
            entities_hint=None
        )
    
    def get_intent_description(self, intent: IntentType) -> str:
        """Get human-readable description of intent type"""
        descriptions = {
            IntentType.TRANSACTION: "Recording a business transaction (purchase or sale)",
            IntentType.QUERY: "Retrieving or analyzing data from the database", 
            IntentType.SEMANTIC_SEARCH: "Finding similar events or information",
            IntentType.GENERAL_INFO: "Storing general business information or notes"
        }
        return descriptions.get(intent, "Unknown intent type")
    
    def batch_classify(self, messages: list[str]) -> list[IntentResult]:
        """Classify multiple messages efficiently"""
        results = []
        for message in messages:
            result = self.classify_intent(message)
            results.append(result)
        return results