Spaces:

Ancastal
/

Business_Chatbot

Sleeping

App Files Files Community

Ancastal commited on Jun 26, 2025

Commit

401b16c

verified ·

1 Parent(s): c4f9694

Upload folder using huggingface_hub

Browse files

Files changed (28) hide show

.env.example +5 -0
.github/workflows/deploy.yml +169 -0
.github/workflows/update_space.yml +28 -0
.gitignore +215 -0
README.md +181 -7
add_sample_data.py +65 -0
config.yaml +155 -0
gui/gradio_interface.py +627 -0
main.py +61 -0
populate_sample_data.py +81 -0
requirements.txt +15 -0
reset_database.py +66 -0
run_gui.py +119 -0
src/chatbot.py +402 -0
src/config_manager.py +121 -0
src/database_manager.py +255 -0
src/entity_extractor.py +204 -0
src/intent_classifier.py +173 -0
src/models.py +104 -0
src/nl_to_sql.py +201 -0
src/rag_handler.py +204 -0
src/transaction_clarifier.py +281 -0
src/vector_store.py +214 -0
tests/test_chatbot.py +49 -0
tests/test_intent_classifier.py +98 -0
tests/test_interactive_transactions.py +179 -0
tests/test_nl_search.py +76 -0
tests/test_rag_search.py +132 -0

.env.example ADDED Viewed

	@@ -0,0 +1,5 @@

+# OpenAI API Configuration
+OPENAI_API_KEY=your_openai_api_key_here
+# Optional: Change the model used for NL to SQL conversion
+# OPENAI_MODEL=gpt-3.5-turbo

.github/workflows/deploy.yml ADDED Viewed

	@@ -0,0 +1,169 @@

+name: Deploy Gradio App
+on:
+  push:
+    branches: [ main ]
+  pull_request:
+    branches: [ main ]
+  workflow_dispatch:
+jobs:
+  deploy:
+    runs-on: ubuntu-latest
+    steps:
+    - name: Checkout code
+      uses: actions/checkout@v4
+    - name: Set up Python
+      uses: actions/setup-python@v4
+      with:
+        python-version: '3.11'
+    - name: Cache pip dependencies
+      uses: actions/cache@v3
+      with:
+        path: ~/.cache/pip
+        key: ${{ runner.os }}-pip-${{ hashFiles('**/requirements.txt') }}
+        restore-keys: |
+          ${{ runner.os }}-pip-
+    - name: Install dependencies
+      run: |
+        python -m pip install --upgrade pip
+        pip install -r requirements.txt
+    - name: Download spaCy model
+      run: python -m spacy download en_core_web_sm
+    - name: Create config file
+      run: |
+        cat > config.yaml << EOF
+        openai:
+          api_key: ${{ secrets.OPENAI_API_KEY }}
+          model: "gpt-3.5-turbo"
+          max_tokens: 1500
+          temperature: 0.7
+        database:
+          url: "sqlite:///chatbot.db"
+        vector_store:
+          persist_directory: "./chroma_db"
+          collection_name: "business_transactions"
+        intent_classifier:
+          confidence_threshold: 0.7
+        entity_extraction:
+          spacy_model: "en_core_web_sm"
+        EOF
+    - name: Initialize database
+      run: python -c "from src.database_manager import DatabaseManager; db = DatabaseManager(); db.create_tables()"
+    - name: Run tests (if available)
+      run: |
+        if [ -d "tests" ] && [ -n "$(ls -A tests/*.py 2>/dev/null)" ]; then
+          python -m pytest tests/ -v
+        else
+          echo "No tests found, skipping test step"
+        fi
+      continue-on-error: true
+    - name: Deploy to Hugging Face Spaces
+      if: github.ref == 'refs/heads/main'
+      env:
+        HF_TOKEN: ${{ secrets.HF_TOKEN }}
+        OPENAI_API_KEY: ${{ secrets.OPENAI_API_KEY }}
+      run: |
+        # Install huggingface_hub
+        pip install huggingface_hub
+        # Create a simple app.py for HF Spaces
+        cat > app.py << 'EOF'
+        #!/usr/bin/env python3
+        import os
+        import sys
+        from pathlib import Path
+        # Add gui directory to path
+        gui_dir = Path(__file__).parent / "gui"
+        sys.path.append(str(gui_dir))
+        if __name__ == "__main__":
+            from gradio_interface import GradioInterface
+            gui = GradioInterface()
+            gui.launch(
+                server_name="0.0.0.0",
+                server_port=7860,
+                share=False,
+                debug=False
+            )
+        EOF
+        # Create requirements.txt for HF Spaces
+        cp requirements.txt requirements_hf.txt
+        # Upload to Hugging Face Spaces
+        python -c "
+        from huggingface_hub import HfApi, upload_folder
+        import os
+        api = HfApi(token=os.environ['HF_TOKEN'])
+        # Create or update the space
+        try:
+            api.create_repo(
+                repo_id='${{ github.repository_owner }}/llm-chatbot',
+                repo_type='space',
+                space_sdk='gradio',
+                exist_ok=True
+            )
+            print('Space created/updated successfully')
+        except Exception as e:
+            print(f'Error creating space: {e}')
+        # Upload files
+        try:
+            upload_folder(
+                folder_path='.',
+                repo_id='${{ github.repository_owner }}/llm-chatbot',
+                repo_type='space',
+                token=os.environ['HF_TOKEN'],
+                ignore_patterns=['.git*', '__pycache__', '*.pyc', 'chroma_db', '*.db']
+            )
+            print('Files uploaded successfully')
+        except Exception as e:
+            print(f'Error uploading files: {e}')
+        "
+    - name: Deploy to Railway (Alternative)
+      if: github.ref == 'refs/heads/main' && env.RAILWAY_TOKEN != ''
+      env:
+        RAILWAY_TOKEN: ${{ secrets.RAILWAY_TOKEN }}
+      run: |
+        # Install Railway CLI
+        npm install -g @railway/cli
+        # Create Procfile for Railway
+        echo "web: python run_gui.py --host 0.0.0.0 --port \$PORT" > Procfile
+        # Deploy to Railway
+        railway login --token $RAILWAY_TOKEN
+        railway up
+      continue-on-error: true
+    - name: Deploy Summary
+      run: |
+        echo "🚀 Deployment completed!"
+        echo "📱 Your Gradio app should be available at:"
+        echo "   - Hugging Face Spaces: https://huggingface.co/spaces/${{ github.repository_owner }}/llm-chatbot"
+        if [ -n "${{ secrets.RAILWAY_TOKEN }}" ]; then
+          echo "   - Railway: Check Railway dashboard for URL"
+        fi
+        echo "🔧 Make sure to set the required secrets in your repository:"
+        echo "   - OPENAI_API_KEY: Your OpenAI API key"
+        echo "   - HF_TOKEN: Your Hugging Face token"
+        echo "   - RAILWAY_TOKEN: Your Railway token (optional)"

.github/workflows/update_space.yml ADDED Viewed

	@@ -0,0 +1,28 @@

+name: Run Python script
+on:
+  push:
+    branches:
+      - main
+jobs:
+  build:
+    runs-on: ubuntu-latest
+    steps:
+    - name: Checkout
+      uses: actions/checkout@v2
+    - name: Set up Python
+      uses: actions/setup-python@v2
+      with:
+        python-version: '3.9'
+    - name: Install Gradio
+      run: python -m pip install gradio
+    - name: Log in to Hugging Face
+      run: python -c 'import huggingface_hub; huggingface_hub.login(token="${{ secrets.hf_token }}")'
+    - name: Deploy to Spaces
+      run: gradio deploy

.gitignore ADDED Viewed

	@@ -0,0 +1,215 @@

+*.sqlite
+*.db
+*.json
+*.sql
+*.bin
+chroma_db/*
+# Byte-compiled / optimized / DLL files
+__pycache__/
+*.py[codz]
+*$py.class
+# C extensions
+*.so
+# Distribution / packaging
+.Python
+build/
+develop-eggs/
+dist/
+downloads/
+eggs/
+.eggs/
+lib/
+lib64/
+parts/
+sdist/
+var/
+wheels/
+share/python-wheels/
+*.egg-info/
+.installed.cfg
+*.egg
+MANIFEST
+# PyInstaller
+#  Usually these files are written by a python script from a template
+#  before PyInstaller builds the exe, so as to inject date/other infos into it.
+*.manifest
+*.spec
+# Installer logs
+pip-log.txt
+pip-delete-this-directory.txt
+# Unit test / coverage reports
+htmlcov/
+.tox/
+.nox/
+.coverage
+.coverage.*
+.cache
+nosetests.xml
+coverage.xml
+*.cover
+*.py.cover
+.hypothesis/
+.pytest_cache/
+cover/
+# Translations
+*.mo
+*.pot
+# Django stuff:
+*.log
+local_settings.py
+db.sqlite3
+db.sqlite3-journal
+# Flask stuff:
+instance/
+.webassets-cache
+# Scrapy stuff:
+.scrapy
+# Sphinx documentation
+docs/_build/
+# PyBuilder
+.pybuilder/
+target/
+# Jupyter Notebook
+.ipynb_checkpoints
+# IPython
+profile_default/
+ipython_config.py
+# pyenv
+#   For a library or package, you might want to ignore these files since the code is
+#   intended to run in multiple environments; otherwise, check them in:
+# .python-version
+# pipenv
+#   According to pypa/pipenv#598, it is recommended to include Pipfile.lock in version control.
+#   However, in case of collaboration, if having platform-specific dependencies or dependencies
+#   having no cross-platform support, pipenv may install dependencies that don't work, or not
+#   install all needed dependencies.
+#Pipfile.lock
+# UV
+#   Similar to Pipfile.lock, it is generally recommended to include uv.lock in version control.
+#   This is especially recommended for binary packages to ensure reproducibility, and is more
+#   commonly ignored for libraries.
+#uv.lock
+# poetry
+#   Similar to Pipfile.lock, it is generally recommended to include poetry.lock in version control.
+#   This is especially recommended for binary packages to ensure reproducibility, and is more
+#   commonly ignored for libraries.
+#   https://python-poetry.org/docs/basic-usage/#commit-your-poetrylock-file-to-version-control
+#poetry.lock
+#poetry.toml
+# pdm
+#   Similar to Pipfile.lock, it is generally recommended to include pdm.lock in version control.
+#   pdm recommends including project-wide configuration in pdm.toml, but excluding .pdm-python.
+#   https://pdm-project.org/en/latest/usage/project/#working-with-version-control
+#pdm.lock
+#pdm.toml
+.pdm-python
+.pdm-build/
+# pixi
+#   Similar to Pipfile.lock, it is generally recommended to include pixi.lock in version control.
+#pixi.lock
+#   Pixi creates a virtual environment in the .pixi directory, just like venv module creates one
+#   in the .venv directory. It is recommended not to include this directory in version control.
+.pixi
+# PEP 582; used by e.g. github.com/David-OConnor/pyflow and github.com/pdm-project/pdm
+__pypackages__/
+# Celery stuff
+celerybeat-schedule
+celerybeat.pid
+# SageMath parsed files
+*.sage.py
+# Environments
+.env
+.envrc
+.venv
+env/
+venv/
+ENV/
+env.bak/
+venv.bak/
+# Spyder project settings
+.spyderproject
+.spyproject
+# Rope project settings
+.ropeproject
+# mkdocs documentation
+/site
+# mypy
+.mypy_cache/
+.dmypy.json
+dmypy.json
+# Pyre type checker
+.pyre/
+# pytype static type analyzer
+.pytype/
+# Cython debug symbols
+cython_debug/
+# PyCharm
+#  JetBrains specific template is maintained in a separate JetBrains.gitignore that can
+#  be found at https://github.com/github/gitignore/blob/main/Global/JetBrains.gitignore
+#  and can be added to the global gitignore or merged into this file.  For a more nuclear
+#  option (not recommended) you can uncomment the following to ignore the entire idea folder.
+#.idea/
+# Abstra
+# Abstra is an AI-powered process automation framework.
+# Ignore directories containing user credentials, local state, and settings.
+# Learn more at https://abstra.io/docs
+.abstra/
+# Visual Studio Code
+#  Visual Studio Code specific template is maintained in a separate VisualStudioCode.gitignore
+#  that can be found at https://github.com/github/gitignore/blob/main/Global/VisualStudioCode.gitignore
+#  and can be added to the global gitignore or merged into this file. However, if you prefer,
+#  you could uncomment the following to ignore the entire vscode folder
+# .vscode/
+# Ruff stuff:
+.ruff_cache/
+# PyPI configuration file
+.pypirc
+# Cursor
+#  Cursor is an AI-powered code editor. `.cursorignore` specifies files/directories to
+#  exclude from AI features like autocomplete and code analysis. Recommended for sensitive data
+#  refer to https://docs.cursor.com/context/ignore-files
+.cursorignore
+.cursorindexingignore
+# Marimo
+marimo/_static/
+marimo/_lsp/
+__marimo__/

README.md CHANGED Viewed

@@ -1,12 +1,186 @@
 ---
-title: Business Chatbot
-emoji: 📈
-colorFrom: blue
-colorTo: green
 sdk: gradio
 sdk_version: 5.34.2
-app_file: app.py
-pinned: false
 ---
-Check out the configuration reference at https://huggingface.co/docs/hub/spaces-config-reference

 ---
+title: Business_Chatbot
+app_file: main.py
 sdk: gradio
 sdk_version: 5.34.2
 ---
+# LLM Chatbot with SQL Database and Vector Store
+A research-grade chatbot system that processes user queries to extract entities, store transactions in SQL database, and maintain semantic search capabilities through vector storage.
+## Features
+- **Entity Extraction**: Automatically extracts products, quantities, suppliers, customers, and prices from natural language
+- **Interactive Transaction Completion**: LLM-powered clarification for missing information
+- **SQL Database**: Stores structured sales and purchase data with relationships
+- **Natural Language to SQL**: Uses OpenAI GPT to convert plain English queries to SQL
+- **RAG (Retrieval-Augmented Generation)**: Intelligent responses using LLM with retrieved context
+- **Vector Store**: Enables semantic search of events and transactions
+- **Query Validation**: Ensures generated SQL queries are safe and valid
+## Architecture
+```
+User Input: "Add a purchase of 20 USB drives from TechMart at €5 each"
+    ↓
+1. Entity Extraction → product: USB drives, quantity: 20, supplier: TechMart, unit price: €5
+    ↓
+2. SQL Generation → INSERT INTO purchases (supplier_id, product_id, quantity, unit_price, total_cost)
+    ↓
+3. Vector Storage → Embed and store semantic summary of the event
+```
+## Setup
+1. Install dependencies:
+```bash
+pip install -r requirements.txt
+```
+2. Download spaCy model:
+```bash
+python -m spacy download en_core_web_sm
+```
+3. Set up OpenAI API key:
+```bash
+export OPENAI_API_KEY='your-openai-api-key-here'
+```
+4. Run the chatbot:
+```bash
+python main.py
+```
+5. Test the system:
+```bash
+python test_chatbot.py
+python test_nl_search.py     # Test natural language search
+python test_intent_classifier.py  # Test intent classification
+python test_rag_search.py    # Test RAG functionality
+python test_interactive_transactions.py  # Test interactive transaction completion
+```
+## Usage Examples
+### Adding Transactions (Interactive)
+- `"Add a purchase of 20 USB drives from TechMart at €5 each"` (Complete)
+- `"I bought some laptops"` (Will ask for: quantity, supplier, price)
+- `"Sold items to a customer"` (Will ask for: product, quantity, customer, price)
+- User can respond with specific details or say "N/A" for optional fields
+### Querying Data (Natural Language to SQL)
+- `"How many USB drives did we purchase?"`
+- `"What's the total value of all purchases?"`
+- `"Show me all sales to John Smith"`
+- `"Which suppliers have we bought from?"`
+- `"What's our total spending on electronics?"`
+- `"Show me the most expensive purchases"`
+### Semantic Search (RAG-powered)
+- `"When is my meeting with George?"`
+- `"What do we know about TechMart as a supplier?"`
+- `"Tell me about recent meetings and discussions"`
+- `"Show me customer feedback and satisfaction information"`
+### General Information
+- `"Meeting with new supplier scheduled for next week"`
+- `"Important: Check inventory levels before next order"`
+## Database Schema
+- **suppliers**: Company information
+- **customers**: Customer details
+- **products**: Product catalog
+- **purchases**: Purchase transactions
+- **sales**: Sales transactions
+## Vector Store
+Uses ChromaDB with sentence transformers for semantic similarity search of:
+- Transaction summaries
+- General business events
+- Meeting notes and reminders
+## Files Structure
+```
+├── src/
+│   ├── models.py           # Data models and schemas
+│   ├── entity_extractor.py # NLP entity extraction
+│   ├── database_manager.py # SQL database operations
+│   ├── vector_store.py     # Semantic search functionality
+│   ├── nl_to_sql.py        # OpenAI-powered natural language to SQL
+│   ├── intent_classifier.py # OpenAI-powered intent classification
+│   ├── rag_handler.py      # RAG (Retrieval-Augmented Generation)
+│   ├── transaction_clarifier.py # Interactive transaction completion
+│   └── chatbot.py          # Main chatbot logic
+├── database/
+│   └── schema.sql          # Database schema
+├── main.py                 # Interactive chatbot interface
+├── test_chatbot.py         # Test suite
+├── test_nl_search.py       # Natural language search tests
+├── test_intent_classifier.py # Intent classification tests
+├── test_rag_search.py      # RAG functionality tests
+├── test_interactive_transactions.py # Interactive transaction tests
+├── .env.example            # Environment variables template
+└── requirements.txt        # Python dependencies
+```
+## Research Applications
+This system demonstrates:
+- Multi-modal data storage (structured + vector)
+- LLM-powered natural language to SQL conversion
+- RAG (Retrieval-Augmented Generation) for intelligent responses
+- Interactive transaction completion with missing information handling
+- OpenAI-based intent classification
+- Multi-turn conversation state management
+- Semantic similarity search with embeddings
+- Named entity recognition and extraction
+- Query validation and SQL injection prevention
+- Conversational business process automation
+## Future Works
+### Intent Classification Improvements
+The current system uses OpenAI API for intent classification, which provides excellent accuracy but has some limitations:
+**Current Limitations:**
+- Requires internet connectivity and API calls for each message
+- Dependent on OpenAI service availability and costs
+- May have latency for real-time applications
+- Limited customization for domain-specific intents
+**Potential Improvements:**
+1. **Fine-tuned Classification Models**
+   - Train a smaller, specialized model on business transaction data
+   - Use frameworks like Hugging Face Transformers with custom datasets
+   - Deploy locally for faster inference and offline capability
+   - Examples: DistilBERT, RoBERTa fine-tuned on business intent data
+2. **Local LLM Integration**
+   - Replace OpenAI API with local models (Llama, Mistral, etc.)
+   - Use frameworks like Ollama, LangChain, or vLLM for local deployment
+   - Maintain privacy while reducing external dependencies
+   - Cost-effective for high-volume applications
+3. **Intent Embedding Approaches**
+   - Create vector embeddings for known intent patterns
+   - Use similarity search instead of generative classification
+   - Combine with few-shot learning for new intent types
+   - More efficient for simple intent detection scenarios
+4. **Hybrid Approaches**
+   - Combine rule-based filtering with LLM classification
+   - Use confidence thresholds to decide when to query LLM
+   - Cache common patterns to reduce API calls
+   - Implement progressive enhancement from simple to complex classification
+5. **Domain-Specific Enhancements**
+   - Add business context and domain knowledge
+   - Implement multi-intent detection for complex queries
+   - Add conversation history context for better classification
+   - Support for industry-specific terminology and patterns

add_sample_data.py ADDED Viewed

	@@ -0,0 +1,65 @@

+#!/usr/bin/env python3
+"""
+Add sample data to the chatbot database for testing the dashboard.
+This script adds realistic business transactions to populate the dashboard.
+"""
+import sys
+import os
+sys.path.append(os.path.join(os.path.dirname(__file__), 'src'))
+from chatbot import Chatbot
+from models import ChatbotRequest
+def add_sample_data():
+    """Add sample transactions to the database."""
+    print("🔄 Adding sample data to the database...")
+    chatbot = Chatbot()
+    # Sample purchases
+    purchases = [
+        "Add a purchase of 10 USB drives from TechMart at €5 each",
+        "Add a purchase of 5 laptops from Electronics Plus at €800 each",
+        "Add a purchase of 20 keyboards from Office Supplies Co at €25 each",
+        "Add a purchase of 8 monitors from TechMart at €200 each",
+        "Add a purchase of 15 webcams from Electronics Plus at €45 each"
+    ]
+    # Sample sales
+    sales = [
+        "Sold 8 USB drives to ABC Corp at €12 each",
+        "Sold 3 laptops to XYZ Ltd at €1200 each",
+        "Sold 12 keyboards to StartupTech at €40 each",
+        "Sold 5 monitors to Creative Agency at €350 each",
+        "Sold 10 webcams to Remote Work Solutions at €75 each",
+        "Sold 6 USB drives to Local Business at €15 each",
+        "Sold 2 laptops to Consulting Firm at €1100 each"
+    ]
+    print("📦 Adding purchase transactions...")
+    for purchase in purchases:
+        try:
+            request = ChatbotRequest(message=purchase)
+            response = chatbot.process_message(request)
+            print(f"  ✅ {purchase}")
+        except Exception as e:
+            print(f"  ❌ Failed: {purchase} - {e}")
+    print("💰 Adding sales transactions...")
+    for sale in sales:
+        try:
+            request = ChatbotRequest(message=sale)
+            response = chatbot.process_message(request)
+            print(f"  ✅ {sale}")
+        except Exception as e:
+            print(f"  ❌ Failed: {sale} - {e}")
+    chatbot.close()
+    print("✅ Sample data added successfully!")
+    print("🚀 You can now launch the GUI to see the populated dashboard:")
+    print("   python run_gui.py")
+if __name__ == "__main__":
+    add_sample_data()

config.yaml ADDED Viewed

	@@ -0,0 +1,155 @@

+# LLM Chatbot Configuration
+# This file contains all configurable settings for the chatbot application
+# Database Configuration
+database:
+  path: "chatbot.db"
+  connection_string: "sqlite:///{path}"
+  default_suppliers:
+    - "TechMart"
+    - "Office Supplies Co"
+    - "Electronics Plus"
+  default_products:
+    - name: "USB drives"
+      category: "Electronics"
+    - name: "Office chairs"
+      category: "Furniture"
+    - name: "Laptops"
+      category: "Electronics"
+    - name: "Monitors"
+      category: "Electronics"
+    - name: "Keyboards"
+      category: "Electronics"
+# OpenAI API Configuration
+openai:
+  # Intent Classification
+  intent_classifier:
+    model: "gpt-4o-mini"
+    temperature: 0.1
+    max_tokens: 300
+  # Natural Language to SQL
+  nl_to_sql:
+    model: "gpt-4o-mini"
+    temperature: 0.1
+    max_tokens: 500
+  # SQL Explanation
+  sql_explanation:
+    model: "gpt-3.5-turbo"
+    temperature: 0.3
+    max_tokens: 200
+  # RAG Handler
+  rag_handler:
+    model: "gpt-4o-mini"
+    temperature: 0.3
+    max_tokens: 800
+  # Query Enhancement
+  query_enhancement:
+    model: "gpt-4o-mini"
+    temperature: 0.2
+    max_tokens: 100
+  # Transaction Clarifier
+  transaction_clarifier:
+    model: "gpt-4o-mini"
+    temperature: 0.3
+    max_tokens: 400
+  # Transaction Validation
+  transaction_validation:
+    model: "gpt-4o-mini"
+    temperature: 0.1
+    max_tokens: 300
+# Vector Store Configuration
+vector_store:
+  collection_name: "chatbot_events"
+  persistence_path: "./chroma_db"
+  embedding_model: "all-MiniLM-L6-v2"
+# Search and Query Configuration
+search:
+  # Default number of vector search results
+  vector_search_results: 8
+  # Default number of recent search results
+  recent_events_limit: 10
+  # Default limit for database queries
+  default_query_limit: 10
+  # Maximum SQL results to display
+  max_sql_results_display: 20
+  # Recent transactions display limit
+  recent_transactions_limit: 10
+# Entity Extraction Configuration
+entity_extraction:
+  spacy_model: "en_core_web_sm"
+  # Fallback classification keywords
+  purchase_keywords:
+    - "buy"
+    - "purchase"
+    - "acquire"
+    - "order"
+    - "procure"
+  sale_keywords:
+    - "sell"
+    - "sale"
+    - "sold"
+    - "revenue"
+    - "income"
+# Business Logic Configuration
+business_logic:
+  # Required fields for transaction types
+  required_fields:
+    purchase:
+      - "product"
+      - "quantity"
+      - "supplier"
+      - "unit_price"
+    sale:
+      - "product"
+      - "quantity"
+      - "customer"
+      - "unit_price"
+  # Cancellation keywords
+  cancellation_keywords:
+    - "cancel"
+    - "quit"
+    - "stop"
+    - "abort"
+  # Dangerous SQL keywords (for security)
+  dangerous_sql_keywords:
+    - "drop"
+    - "delete"
+    - "truncate"
+    - "alter"
+    - "create"
+    - "insert"
+    - "update"
+# Application Settings
+app:
+  # Enable/disable features
+  features:
+    vector_storage: true
+    intent_classification: true
+    entity_extraction: true
+    transaction_clarification: true
+    rag_search: true
+  # Logging configuration
+  logging:
+    level: "INFO"
+    format: "%(asctime)s - %(name)s - %(levelname)s - %(message)s"

gui/gradio_interface.py ADDED Viewed

	@@ -0,0 +1,627 @@

+#!/usr/bin/env python3
+import gradio as gr
+import sys
+import os
+from typing import List, Tuple
+from sqlalchemy import text
+# Add the src directory to the path to import existing modules
+sys.path.append(os.path.join(os.path.dirname(__file__), '..', 'src'))
+from chatbot import Chatbot
+from models import ChatbotRequest
+class GradioInterface:
+    """Gradio GUI interface for the LLM Chatbot."""
+    def __init__(self):
+        """Initialize the Gradio interface with the existing chatbot."""
+        self.chatbot = Chatbot()
+        self.conversation_history = []
+    def process_message(self, message: str, history: List[Tuple[str, str]]) -> Tuple[str, List[Tuple[str, str]]]:
+        """
+        Process a user message and return the response with updated history.
+        Args:
+            message: User input message
+            history: Chat history as list of (user_msg, bot_response) tuples
+        Returns:
+            Tuple of (empty_string_for_input, updated_history)
+        """
+        if not message.strip():
+            return "", history
+        # Handle quit/exit commands
+        if message.lower().strip() in ['quit', 'exit', 'bye']:
+            bot_response = "👋 Goodbye! Refresh the page to start a new session."
+            history.append((message, bot_response))
+            return "", history
+        try:
+            # Process the message using the existing chatbot
+            request = ChatbotRequest(message=message)
+            response = chatbot_response = self.chatbot.process_message(request)
+            # Build the response with additional information
+            response_text = f"🤖 {response.response}"
+            # Add extracted entities information
+            if response.entities_extracted:
+                entities_info = (
+                    f"\n\n📊 **Extracted Information:**\n"
+                    f"- Type: {response.entities_extracted.transaction_type}\n"
+                    f"- Product: {response.entities_extracted.product}\n"
+                    f"- Quantity: {response.entities_extracted.quantity}\n"
+                    f"- Total Amount: €{response.entities_extracted.total_amount}"
+                )
+                response_text += entities_info
+            # Add vector storage confirmation
+            if response.vector_stored:
+                response_text += "\n\n💾 Information stored in vector database for future semantic search"
+            # Add intent detection information
+            if response.intent_detected:
+                response_text += f"\n\n🎯 **Intent Detected:** {response.intent_detected} (confidence: {response.intent_confidence:.2f})"
+            # Add clarification prompt
+            if response.awaiting_clarification:
+                response_text += "\n\n⏳ **Waiting for your response to complete the transaction...**"
+            # Update history
+            history.append((message, response_text))
+        except Exception as e:
+            error_response = f"❌ Error processing message: {str(e)}"
+            history.append((message, error_response))
+        return "", history
+    def clear_chat(self) -> Tuple[str, List]:
+        """Clear the chat history and reset the conversation."""
+        return "", []
+    def get_dashboard_data(self):
+        """Get dashboard data using direct SQL queries."""
+        try:
+            # Access the database manager directly
+            db_manager = self.chatbot.db_manager
+            # Get basic statistics
+            total_purchases = db_manager.session.execute(
+                text("SELECT COUNT(*) FROM purchases")
+            ).scalar() or 0
+            total_sales = db_manager.session.execute(
+                text("SELECT COUNT(*) FROM sales")
+            ).scalar() or 0
+            total_revenue = db_manager.session.execute(
+                text("SELECT SUM(total_amount) FROM sales")
+            ).scalar() or 0
+            total_expenses = db_manager.session.execute(
+                text("SELECT SUM(total_cost) FROM purchases")
+            ).scalar() or 0
+            # Get recent transactions (last 5) - combining purchases and sales
+            recent_transactions = db_manager.session.execute(
+                text("""
+                SELECT 'purchase' as transaction_type, p.name as product, pu.quantity,
+                       pu.total_cost as total_amount, s.name as partner, pu.purchase_date as created_at
+                FROM purchases pu
+                LEFT JOIN products p ON pu.product_id = p.id
+                LEFT JOIN suppliers s ON pu.supplier_id = s.id
+                UNION ALL
+                SELECT 'sale' as transaction_type, p.name as product, sa.quantity,
+                       sa.total_amount, c.name as partner, sa.sale_date as created_at
+                FROM sales sa
+                LEFT JOIN products p ON sa.product_id = p.id
+                LEFT JOIN customers c ON sa.customer_id = c.id
+                ORDER BY created_at DESC
+                LIMIT 5
+                """)
+            ).fetchall()
+            # Get top products - combining from both tables
+            top_products = db_manager.session.execute(
+                text("""
+                SELECT p.name as product, SUM(combined.quantity) as total_qty, COUNT(*) as transaction_count
+                FROM (
+                    SELECT product_id, quantity FROM purchases
+                    UNION ALL
+                    SELECT product_id, quantity FROM sales
+                ) combined
+                LEFT JOIN products p ON combined.product_id = p.id
+                GROUP BY p.name
+                ORDER BY total_qty DESC
+                LIMIT 5
+                """)
+            ).fetchall()
+            return {
+                'total_purchases': total_purchases,
+                'total_sales': total_sales,
+                'total_revenue': round(total_revenue, 2),
+                'total_expenses': round(total_expenses, 2),
+                'profit': round(total_revenue - total_expenses, 2),
+                'recent_transactions': recent_transactions,
+                'top_products': top_products
+            }
+        except Exception as e:
+            return {
+                'total_purchases': 0,
+                'total_sales': 0,
+                'total_revenue': 0.0,
+                'total_expenses': 0.0,
+                'profit': 0.0,
+                'recent_transactions': [],
+                'top_products': []
+            }
+    def create_revenue_chart(self, data):
+        """Create revenue vs expenses chart."""
+        import plotly.graph_objects as go
+        fig = go.Figure(data=[
+            go.Bar(name='Revenue', x=['Financial Summary'], y=[data['total_revenue']], marker_color='green'),
+            go.Bar(name='Expenses', x=['Financial Summary'], y=[data['total_expenses']], marker_color='red'),
+            go.Bar(name='Profit', x=['Financial Summary'], y=[data['profit']], marker_color='blue')
+        ])
+        fig.update_layout(
+            title='Financial Overview',
+            barmode='group',
+            height=300
+        )
+        return fig
+    def create_transaction_chart(self, data):
+        """Create transaction count pie chart."""
+        import plotly.graph_objects as go
+        fig = go.Figure(data=[go.Pie(
+            labels=['Purchases', 'Sales'],
+            values=[data['total_purchases'], data['total_sales']],
+            marker_colors=['lightcoral', 'lightgreen']
+        )])
+        fig.update_layout(
+            title='Transaction Distribution',
+            height=300
+        )
+        return fig
+    def create_top_products_chart(self, data):
+        """Create top products bar chart."""
+        import plotly.graph_objects as go
+        if not data['top_products']:
+            fig = go.Figure()
+            fig.add_annotation(text="No product data available",
+                             xref="paper", yref="paper", x=0.5, y=0.5, showarrow=False)
+            fig.update_layout(title='Top Products', height=300)
+            return fig
+        products = [row[0] for row in data['top_products']]
+        quantities = [row[1] for row in data['top_products']]
+        fig = go.Figure(data=[
+            go.Bar(x=products, y=quantities, marker_color='skyblue')
+        ])
+        fig.update_layout(
+            title='Top Products by Quantity',
+            xaxis_title='Products',
+            yaxis_title='Total Quantity',
+            height=300
+        )
+        return fig
+    def structured_purchase(self, product, quantity, supplier, unit_price):
+        """Handle structured purchase entry."""
+        if not all([product, quantity, supplier, unit_price]):
+            return "", [("System", "⚠️ Please fill in all fields for the purchase.")], ""
+        message = f"Add a purchase of {quantity} {product} from {supplier} at €{unit_price} each"
+        request = ChatbotRequest(message=message)
+        response = self.chatbot.process_message(request)
+        history = [("Purchase Entry", message), ("System", f"✅ {response.response}")]
+        return "", history, "Purchase recorded successfully!"
+    def structured_sale(self, product, quantity, customer, unit_price):
+        """Handle structured sale entry."""
+        if not all([product, quantity, customer, unit_price]):
+            return "", [("System", "⚠️ Please fill in all fields for the sale.")], ""
+        message = f"Sold {quantity} {product} to {customer} at €{unit_price} each"
+        request = ChatbotRequest(message=message)
+        response = self.chatbot.process_message(request)
+        history = [("Sale Entry", message), ("System", f"✅ {response.response}")]
+        return "", history, "Sale recorded successfully!"
+    def search_records(self, search_query, search_type):
+        """Handle structured search."""
+        if not search_query:
+            return [("System", "⚠️ Please enter a search query.")]
+        if search_type == "Products":
+            message = f"Find {search_query}"
+        elif search_type == "Suppliers":
+            message = f"Search supplier {search_query}"
+        elif search_type == "Customers":
+            message = f"Search customer {search_query}"
+        else:
+            message = f"Search {search_query}"
+        request = ChatbotRequest(message=message)
+        response = self.chatbot.process_message(request)
+        return [("Search Query", message), ("Results", response.response)]
+    def create_interface(self) -> gr.Interface:
+        """Create and configure the Gradio interface."""
+        with gr.Blocks(
+            title="Business AI Assistant",
+            theme=gr.themes.Default()
+        ) as interface:
+            # Header
+            gr.Markdown("# 💼 Business AI Assistant")
+            gr.Markdown("**Intelligent transaction management and business intelligence platform**")
+            # Main tabbed interface
+            with gr.Tabs() as tabs:
+                # Dashboard Tab
+                with gr.Tab("📊 Dashboard"):
+                    # Key Metrics Row
+                    with gr.Row():
+                        metrics_purchases = gr.Number(label="Total Purchases", interactive=False)
+                        metrics_sales = gr.Number(label="Total Sales", interactive=False)
+                        metrics_revenue = gr.Number(label="Revenue (€)", interactive=False)
+                        metrics_profit = gr.Number(label="Profit (€)", interactive=False)
+                    # Charts Row
+                    with gr.Row():
+                        with gr.Column():
+                            financial_chart = gr.Plot(label="Financial Overview")
+                        with gr.Column():
+                            transaction_chart = gr.Plot(label="Transaction Distribution")
+                    with gr.Row():
+                        with gr.Column():
+                            products_chart = gr.Plot(label="Top Products")
+                        with gr.Column():
+                            # Recent Transactions Table
+                            recent_table = gr.Dataframe(
+                                headers=["Type", "Product", "Qty", "Amount (€)", "Partner"],
+                                datatype=["str", "str", "number", "number", "str"],
+                                label="Recent Transactions",
+                            )
+                    # Action Buttons
+                    with gr.Row():
+                        refresh_dashboard = gr.Button("🔄 Refresh Data", variant="secondary")
+                        dash_new_purchase = gr.Button("➕ New Purchase", variant="primary")
+                        dash_new_sale = gr.Button("💰 New Sale", variant="primary")
+                        dash_search = gr.Button("🔍 Search Records", variant="outline")
+                # Chat Tab
+                with gr.Tab("💬 AI Chat"):
+                    gr.Markdown("### Conversational Business Assistant")
+                    gr.Markdown("*Ask questions, add transactions, search records, or get insights in natural language*")
+                    chatbot_ui = gr.Chatbot(
+                        value=[],
+                        height=500,
+                        label="Conversation",
+                        show_label=False,
+                        container=True,
+                        show_copy_button=True
+                    )
+                    with gr.Row():
+                        msg_input = gr.Textbox(
+                            placeholder="Ask me anything about your business... (e.g., 'Show recent sales', 'Add 10 laptops from TechMart')",
+                            label="Message",
+                            lines=2,
+                            max_lines=4,
+                            scale=5
+                        )
+                        send_btn = gr.Button("Send", variant="primary", scale=1)
+                    with gr.Row():
+                        clear_chat_btn = gr.Button("Clear Chat", variant="secondary")
+                        # Example prompts
+                        example_1 = gr.Button("💡 Example: Add Purchase", variant="outline", size="sm")
+                        example_2 = gr.Button("💡 Example: Search Products", variant="outline", size="sm")
+                        example_3 = gr.Button("💡 Example: View Transactions", variant="outline", size="sm")
+                # Transactions Tab
+                with gr.Tab("📝 Transactions"):
+                    with gr.Row():
+                        # Purchase Form
+                        with gr.Column():
+                            gr.Markdown("### ➕ Add Purchase")
+                            purchase_product = gr.Textbox(label="Product", placeholder="e.g., Laptops")
+                            purchase_quantity = gr.Number(label="Quantity", value=1, minimum=1)
+                            purchase_supplier = gr.Textbox(label="Supplier", placeholder="e.g., TechMart")
+                            purchase_price = gr.Number(label="Unit Price (€)", value=0.00, minimum=0)
+                            purchase_btn = gr.Button("Add Purchase", variant="primary")
+                            purchase_status = gr.Markdown("")
+                        # Sale Form
+                        with gr.Column():
+                            gr.Markdown("### 💰 Add Sale")
+                            sale_product = gr.Textbox(label="Product", placeholder="e.g., USB Drives")
+                            sale_quantity = gr.Number(label="Quantity", value=1, minimum=1)
+                            sale_customer = gr.Textbox(label="Customer", placeholder="e.g., ABC Corp")
+                            sale_price = gr.Number(label="Unit Price (€)", value=0.00, minimum=0)
+                            sale_btn = gr.Button("Add Sale", variant="primary")
+                            sale_status = gr.Markdown("")
+                    # Transaction Results
+                    gr.Markdown("### Transaction Results")
+                    transaction_results = gr.Chatbot(
+                        value=[],
+                        height=300,
+                        label="Transaction Log",
+                        show_copy_button=True
+                    )
+                # Search & Reports Tab
+                with gr.Tab("🔍 Search & Reports"):
+                    gr.Markdown("### Advanced Search")
+                    with gr.Row():
+                        search_query = gr.Textbox(
+                            label="Search Query",
+                            placeholder="Enter product name, supplier, customer, or keywords...",
+                            scale=3
+                        )
+                        search_type = gr.Dropdown(
+                            choices=["All Records", "Products", "Suppliers", "Customers", "Transactions"],
+                            value="All Records",
+                            label="Search Type",
+                            scale=1
+                        )
+                        search_btn = gr.Button("Search", variant="primary", scale=1)
+                    # Search Results
+                    search_results = gr.Chatbot(
+                        value=[],
+                        height=400,
+                        label="Search Results",
+                        show_copy_button=True
+                    )
+                    # Quick Search Buttons
+                    with gr.Row():
+                        gr.Markdown("### Quick Searches")
+                    with gr.Row():
+                        recent_purchases = gr.Button("Recent Purchases", variant="outline")
+                        recent_sales = gr.Button("Recent Sales", variant="outline")
+                        top_products = gr.Button("Top Products", variant="outline")
+                        supplier_summary = gr.Button("Supplier Summary", variant="outline")
+                # Help & Settings Tab
+                with gr.Tab("❓ Help & Settings"):
+                    with gr.Row():
+                        with gr.Column():
+                            gr.Markdown("""
+                            ### 📖 User Guide
+                            **🎯 Getting Started**
+                            - Use the **Dashboard** for quick overview and actions
+                            - **AI Chat** for natural language interactions
+                            - **Transactions** for structured data entry
+                            - **Search & Reports** for finding information
+                            **💬 Chat Examples**
+                            - "Add a purchase of 20 USB drives from TechMart at €5 each"
+                            - "Show me recent sales to ABC Corp"
+                            - "Find all laptop transactions"
+                            - "What's my total revenue this month?"
+                            **📊 Features**
+                            - Smart entity extraction from natural language
+                            - Intelligent search across all records
+                            - Transaction categorization and analysis
+                            - Export capabilities for reports
+                            """)
+                        with gr.Column():
+                            gr.Markdown("""
+                            ### ⚙️ System Information
+                            **Status**: 🟢 Online and Ready
+                            **Capabilities**:
+                            - ✅ Natural language processing
+                            - ✅ Transaction management
+                            - ✅ Intelligent search
+                            - ✅ Data export
+                            - ✅ Real-time analytics
+                            **Supported Operations**:
+                            - Purchase tracking
+                            - Sales recording
+                            - Inventory searches
+                            - Supplier management
+                            - Customer records
+                            - Financial reporting
+                            **Data Security**: 🔒 All data processed locally
+                            """)
+                    gr.Markdown("---")
+                    gr.Markdown("*Business AI Assistant v1.0 • Built with Gradio • Powered by OpenAI*")
+            # Event Handlers
+            # Dashboard events
+            def load_dashboard():
+                data = self.get_dashboard_data()
+                # Create charts
+                financial_fig = self.create_revenue_chart(data)
+                transaction_fig = self.create_transaction_chart(data)
+                products_fig = self.create_top_products_chart(data)
+                # Prepare recent transactions table
+                recent_data = []
+                for row in data['recent_transactions']:
+                    recent_data.append([
+                        row[0].title(),  # transaction_type
+                        row[1],          # product
+                        row[2],          # quantity
+                        f"€{row[3]:.2f}", # total_amount
+                        row[4] or "N/A"  # partner (supplier/customer)
+                    ])
+                return (
+                    data['total_purchases'],
+                    data['total_sales'],
+                    data['total_revenue'],
+                    data['profit'],
+                    financial_fig,
+                    transaction_fig,
+                    products_fig,
+                    recent_data
+                )
+            refresh_dashboard.click(
+                fn=load_dashboard,
+                outputs=[
+                    metrics_purchases, metrics_sales, metrics_revenue, metrics_profit,
+                    financial_chart, transaction_chart, products_chart, recent_table
+                ]
+            )
+            # Chat events
+            msg_input.submit(
+                fn=self.process_message,
+                inputs=[msg_input, chatbot_ui],
+                outputs=[msg_input, chatbot_ui]
+            )
+            send_btn.click(
+                fn=self.process_message,
+                inputs=[msg_input, chatbot_ui],
+                outputs=[msg_input, chatbot_ui]
+            )
+            clear_chat_btn.click(
+                fn=self.clear_chat,
+                outputs=[msg_input, chatbot_ui]
+            )
+            # Example prompts
+            example_1.click(
+                fn=lambda: ("Add a purchase of 10 laptops from TechMart at €800 each", []),
+                outputs=[msg_input, chatbot_ui]
+            )
+            example_2.click(
+                fn=lambda: ("Find all USB drive transactions", []),
+                outputs=[msg_input, chatbot_ui]
+            )
+            example_3.click(
+                fn=lambda: ("Show recent transactions", []),
+                outputs=[msg_input, chatbot_ui]
+            )
+            # Transaction events
+            purchase_btn.click(
+                fn=self.structured_purchase,
+                inputs=[purchase_product, purchase_quantity, purchase_supplier, purchase_price],
+                outputs=[purchase_product, transaction_results, purchase_status]
+            )
+            sale_btn.click(
+                fn=self.structured_sale,
+                inputs=[sale_product, sale_quantity, sale_customer, sale_price],
+                outputs=[sale_product, transaction_results, sale_status]
+            )
+            # Search events
+            search_btn.click(
+                fn=self.search_records,
+                inputs=[search_query, search_type],
+                outputs=[search_results]
+            )
+            # Quick search events
+            recent_purchases.click(
+                fn=lambda: self.search_records("recent purchases", "Transactions"),
+                outputs=[search_results]
+            )
+            recent_sales.click(
+                fn=lambda: self.search_records("recent sales", "Transactions"),
+                outputs=[search_results]
+            )
+            # Dashboard navigation events
+            dash_new_purchase.click(fn=lambda: gr.Tabs.update(selected=2))
+            dash_new_sale.click(fn=lambda: gr.Tabs.update(selected=2))
+            dash_search.click(fn=lambda: gr.Tabs.update(selected=3))
+            # Load initial dashboard data
+            interface.load(
+                fn=load_dashboard,
+                outputs=[
+                    metrics_purchases, metrics_sales, metrics_revenue, metrics_profit,
+                    financial_chart, transaction_chart, products_chart, recent_table
+                ]
+            )
+        return interface
+    def launch(self, **kwargs):
+        """Launch the Gradio interface."""
+        interface = self.create_interface()
+        # Default launch configuration
+        launch_config = {
+            'server_name': '0.0.0.0',
+            'server_port': 7860,
+            'share': False,
+            'debug': False,
+            'show_error': True,
+            'quiet': False
+        }
+        # Update with any provided kwargs
+        launch_config.update(kwargs)
+        print("🚀 Starting Gradio GUI for Business Chatbot...")
+        print(f"📱 Access the interface at: http://localhost:{launch_config['server_port']}")
+        print("💡 Press Ctrl+C to stop the server")
+        try:
+            interface.launch(**launch_config)
+        finally:
+            # Clean up chatbot resources
+            self.chatbot.close()
+def main():
+    """Main function to launch the Gradio interface."""
+    gui = GradioInterface()
+    gui.launch()
+if __name__ == "__main__":
+    main()

main.py ADDED Viewed

	@@ -0,0 +1,61 @@

+#!/usr/bin/env python3
+import sys
+import os
+sys.path.append(os.path.join(os.path.dirname(__file__), 'src'))
+from chatbot import Chatbot
+from models import ChatbotRequest
+def main():
+    print("🤖 Business Chatbot with SQL Database and Vector Store")
+    print("="*60)
+    print("I can help you with:")
+    print("• Adding purchases: 'Add a purchase of 20 USB drives from TechMart at €5 each'")
+    print("• Adding sales: 'Sold 10 laptops to John Smith at €800 each'")
+    print("• Viewing recent transactions: 'Show recent transactions'")
+    print("• Searching: 'Find USB drives' or 'Search TechMart'")
+    print("• Storing general info: 'Meeting with supplier scheduled for next week'")
+    print("• Type 'quit' to exit")
+    print("="*60)
+    chatbot = Chatbot()
+    try:
+        while True:
+            user_input = input("\n💬 You: ").strip()
+            if user_input.lower() in ['quit', 'exit', 'bye']:
+                print("👋 Goodbye!")
+                break
+            if not user_input:
+                continue
+            # Process the message
+            request = ChatbotRequest(message=user_input)
+            response = chatbot.process_message(request)
+            print(f"\n🤖 Bot: {response.response}")
+            # Show additional info if available
+            if response.entities_extracted:
+                print(f"📊 Extracted: {response.entities_extracted.transaction_type} - {response.entities_extracted.product} ({response.entities_extracted.quantity}x) - €{response.entities_extracted.total_amount}")
+            if response.vector_stored:
+                print("💾 Information stored in vector database for future semantic search")
+            if response.intent_detected:
+                print(f"🎯 Intent: {response.intent_detected} (confidence: {response.intent_confidence:.2f})")
+            if response.awaiting_clarification:
+                print("⏳ Waiting for your response to complete the transaction...")
+    except KeyboardInterrupt:
+        print("\n👋 Goodbye!")
+    finally:
+        chatbot.close()
+if __name__ == "__main__":
+    main()

populate_sample_data.py ADDED Viewed

	@@ -0,0 +1,81 @@

+#!/usr/bin/env python3
+"""
+Sample data population script for the LLM Chatbot database.
+This script adds realistic sample transactions to help test the dashboard.
+"""
+import sys
+import os
+sys.path.append(os.path.join(os.path.dirname(__file__), 'src'))
+from chatbot import Chatbot
+from models import ChatbotRequest
+def populate_sample_data():
+    """Add sample transactions to the database"""
+    print("🔧 Populating database with sample transactions...")
+    # Sample transactions to add
+    sample_transactions = [
+        # Purchases
+        "Add a purchase of 100 wireless mice from TechMart at €25 each",
+        "Add a purchase of 50 laptop stands from Office Supplies Co at €35 each",
+        "Add a purchase of 30 webcams from Electronics Plus at €80 each",
+        "Add a purchase of 75 desk lamps from Office Supplies Co at €40 each",
+        "Add a purchase of 20 printers from TechMart at €200 each",
+        "Add a purchase of 60 surge protectors from Electronics Plus at €15 each",
+        "Add a purchase of 40 ethernet cables from TechMart at €12 each",
+        "Add a purchase of 15 projectors from Electronics Plus at €450 each",
+        # Sales
+        "Sold 80 wireless mice to StartupTech Corp at €35 each",
+        "Sold 30 laptop stands to Creative Agency Ltd at €50 each",
+        "Sold 25 webcams to Remote Work Solutions at €120 each",
+        "Sold 50 desk lamps to Modern Office Inc at €55 each",
+        "Sold 12 printers to Small Business Hub at €280 each",
+        "Sold 45 surge protectors to Tech Solutions Ltd at €25 each",
+        "Sold 35 ethernet cables to Network Systems Corp at €18 each",
+        "Sold 10 projectors to Conference Center Co at €650 each",
+        "Sold 5 laptops to Freelance Collective at €1400 each",
+        "Sold 25 monitors to Design Studio Ltd at €380 each",
+    ]
+    chatbot = Chatbot()
+    try:
+        successful_transactions = 0
+        failed_transactions = 0
+        for transaction in sample_transactions:
+            try:
+                print(f"📝 Processing: {transaction}")
+                request = ChatbotRequest(message=transaction)
+                response = chatbot.process_message(request)
+                if "recorded" in response.response.lower():
+                    successful_transactions += 1
+                    print(f"✅ Success: {response.response}")
+                else:
+                    failed_transactions += 1
+                    print(f"⚠️ Warning: {response.response}")
+            except Exception as e:
+                failed_transactions += 1
+                print(f"❌ Error processing transaction: {e}")
+        print(f"\n📊 Summary:")
+        print(f"✅ Successful transactions: {successful_transactions}")
+        print(f"❌ Failed transactions: {failed_transactions}")
+        print(f"🎯 Total attempted: {len(sample_transactions)}")
+        if successful_transactions > 0:
+            print(f"\n🎉 Database populated with {successful_transactions} sample transactions!")
+            print("💡 You can now run the dashboard to see meaningful data.")
+            print("🚀 Run 'python run_gui.py' to launch the Gradio interface.")
+    finally:
+        chatbot.close()
+if __name__ == "__main__":
+    populate_sample_data()

requirements.txt ADDED Viewed

	@@ -0,0 +1,15 @@

+openai>=1.0.0
+sqlalchemy
+db-sqlite3
+pandas>=2.0.0
+numpy>=1.24.0
+sentence-transformers>=2.2.0
+chromadb>=0.4.0
+spacy>=3.6.0
+python-dateutil>=2.8.0
+pydantic>=2.0.0
+fastapi>=0.100.0
+uvicorn>=0.23.0
+gradio>=4.0.0
+pyyaml>=6.0
+plotly>=5.0.0

reset_database.py ADDED Viewed

	@@ -0,0 +1,66 @@

+#!/usr/bin/env python3
+"""
+Database reset script for the LLM Chatbot.
+This script clears all transaction data while keeping the basic structure intact.
+"""
+import sqlite3
+import os
+def reset_database():
+    """Reset the database by clearing all transaction data"""
+    db_path = "chatbot.db"
+    if not os.path.exists(db_path):
+        print(f"❌ Database file '{db_path}' not found.")
+        return
+    try:
+        conn = sqlite3.connect(db_path)
+        cursor = conn.cursor()
+        print("🗑️  Clearing transaction data...")
+        # Clear all transaction data
+        cursor.execute("DELETE FROM sales")
+        cursor.execute("DELETE FROM purchases")
+        # Reset auto-increment counters
+        cursor.execute("DELETE FROM sqlite_sequence WHERE name IN ('sales', 'purchases')")
+        # Clear customers that were created during testing (keep default ones)
+        cursor.execute("DELETE FROM customers")
+        # Keep default suppliers and products, but can remove dynamic ones
+        # For now, we'll keep all suppliers and products
+        conn.commit()
+        # Check results
+        cursor.execute("SELECT COUNT(*) FROM purchases")
+        purchases_count = cursor.fetchone()[0]
+        cursor.execute("SELECT COUNT(*) FROM sales")
+        sales_count = cursor.fetchone()[0]
+        cursor.execute("SELECT COUNT(*) FROM customers")
+        customers_count = cursor.fetchone()[0]
+        print(f"✅ Database reset complete!")
+        print(f"   - Purchases: {purchases_count}")
+        print(f"   - Sales: {sales_count}")
+        print(f"   - Customers: {customers_count}")
+        print(f"💡 You can now add new sample data using 'python populate_sample_data.py'")
+    except Exception as e:
+        print(f"❌ Error resetting database: {e}")
+    finally:
+        conn.close()
+if __name__ == "__main__":
+    response = input("⚠️  This will delete all transaction data. Continue? (y/N): ")
+    if response.lower() in ['y', 'yes']:
+        reset_database()
+    else:
+        print("🚫 Operation cancelled.")

run_gui.py ADDED Viewed

	@@ -0,0 +1,119 @@

+#!/usr/bin/env python3
+"""
+GUI Launcher for LLM Business Chatbot
+This script launches the Gradio web interface for the chatbot application.
+It provides a web-based GUI that wraps around the existing CLI chatbot
+without modifying any of the original code.
+Usage:
+    python run_gui.py                    # Launch with default settings
+    python run_gui.py --port 8080        # Launch on custom port
+    python run_gui.py --share            # Create public sharing link
+    python run_gui.py --debug            # Enable debug mode
+"""
+import sys
+import os
+import argparse
+from pathlib import Path
+# Add gui directory to path
+gui_dir = Path(__file__).parent / "gui"
+sys.path.append(str(gui_dir))
+def main():
+    """Main function to parse arguments and launch the GUI."""
+    parser = argparse.ArgumentParser(
+        description="Launch Gradio GUI for LLM Business Chatbot",
+        formatter_class=argparse.RawDescriptionHelpFormatter,
+        epilog="""
+Examples:
+    python run_gui.py                    # Default: localhost:7860
+    python run_gui.py --port 8080        # Custom port
+    python run_gui.py --share            # Public sharing link
+    python run_gui.py --host 0.0.0.0     # Accept external connections
+    python run_gui.py --debug            # Enable debug mode
+        """
+    )
+    parser.add_argument(
+        "--host",
+        default="0.0.0.0",
+        help="Host address to bind to (default: 0.0.0.0)"
+    )
+    parser.add_argument(
+        "--port",
+        type=int,
+        default=7860,
+        help="Port number to run the server on (default: 7860)"
+    )
+    parser.add_argument(
+        "--share",
+        action="store_true",
+        help="Create a public sharing link via Gradio"
+    )
+    parser.add_argument(
+        "--debug",
+        action="store_true",
+        help="Enable debug mode"
+    )
+    parser.add_argument(
+        "--quiet",
+        action="store_true",
+        help="Suppress startup messages"
+    )
+    args = parser.parse_args()
+    # Print startup banner
+    if not args.quiet:
+        print("=" * 70)
+        print("🤖 LLM Business Chatbot - Gradio GUI")
+        print("=" * 70)
+        print(f"🌐 Starting web interface...")
+        print(f"📍 Host: {args.host}")
+        print(f"🔌 Port: {args.port}")
+        print(f"🔗 Share: {'Yes' if args.share else 'No'}")
+        print(f"🐛 Debug: {'Yes' if args.debug else 'No'}")
+        print("-" * 70)
+    try:
+        # Import and launch the Gradio interface
+        from gradio_interface import GradioInterface
+        gui = GradioInterface()
+        gui.launch(
+            server_name=args.host,
+            server_port=args.port,
+            share=args.share,
+            debug=args.debug,
+            quiet=args.quiet,
+            show_error=True
+        )
+    except ImportError as e:
+        print(f"❌ Error: Missing dependencies. Please install requirements:")
+        print(f"   pip install -r requirements.txt")
+        print(f"   Error details: {e}")
+        sys.exit(1)
+    except KeyboardInterrupt:
+        if not args.quiet:
+            print("\n👋 Shutting down Gradio interface...")
+        sys.exit(0)
+    except Exception as e:
+        print(f"❌ Error launching GUI: {e}")
+        if args.debug:
+            import traceback
+            traceback.print_exc()
+        sys.exit(1)
+if __name__ == "__main__":
+    main()

src/chatbot.py ADDED Viewed

	@@ -0,0 +1,402 @@

+from typing import Dict, Any, Optional
+from entity_extractor import EntityExtractor
+from database_manager import DatabaseManager
+from vector_store import VectorStore
+from nl_to_sql import NaturalLanguageToSQL
+from intent_classifier import IntentClassifier, IntentType
+from rag_handler import RAGHandler
+from transaction_clarifier import TransactionClarifier, ClarificationStatus
+from models import ChatbotRequest, ChatbotResponse, PendingTransaction
+class Chatbot:
+    def __init__(self):
+        self.entity_extractor = EntityExtractor()
+        self.db_manager = DatabaseManager()
+        self.vector_store = VectorStore()
+        self.nl_to_sql = NaturalLanguageToSQL()
+        self.intent_classifier = IntentClassifier()
+        self.rag_handler = RAGHandler()
+        self.transaction_clarifier = TransactionClarifier()
+        # Store pending transactions by session_id
+        self.pending_transactions: Dict[str, PendingTransaction] = {}
+    def process_message(self, request: ChatbotRequest) -> ChatbotResponse:
+        """Process a user message and return appropriate response"""
+        message = request.message.strip()
+        session_id = request.session_id or "default"
+        # Check if we're waiting for clarification on a pending transaction
+        if session_id in self.pending_transactions:
+            print("A transaction is pending...")
+            return self._handle_transaction_clarification(message, session_id)
+        # Classify intent using OpenAI
+        intent_result = self.intent_classifier.classify_intent(message)
+        print(f"🎯 Intent: {intent_result.intent.value} (confidence: {intent_result.confidence:.2f})")
+        print(f"📝 Reasoning: {intent_result.reasoning}")
+        # Route to appropriate handler based on classified intent
+        if intent_result.intent == IntentType.TRANSACTION:
+            response = self._handle_transaction_request(message, session_id)
+        elif intent_result.intent == IntentType.QUERY:
+            response = self._handle_query_request(message)
+        elif intent_result.intent == IntentType.SEMANTIC_SEARCH:
+            response = self._handle_search_request(message)
+        else:  # GENERAL_INFO
+            response = self._handle_general_information(message)
+        # Add intent information to response
+        response.intent_detected = intent_result.intent.value
+        response.intent_confidence = intent_result.confidence
+        return response
+    def _handle_transaction_request(self, message: str, session_id: str) -> ChatbotResponse:
+        """Handle transaction requests (purchases/sales) with interactive clarification"""
+        try:
+            # Extract entities
+            entities = self.entity_extractor.extract_entities(message)
+            # Check if transaction is complete
+            status, clarification = self.transaction_clarifier.analyze_transaction_completeness(entities)
+            if status == ClarificationStatus.COMPLETE:
+                # Transaction is complete, process it
+                return self._complete_transaction(entities, message)
+            elif status == ClarificationStatus.NEEDS_CLARIFICATION:
+                # Store pending transaction and ask for clarification
+                pending = PendingTransaction(
+                    entities=entities,
+                    missing_fields=clarification.missing_fields,
+                    session_id=session_id,
+                    original_message=message
+                )
+                self.pending_transactions[session_id] = pending
+                clarification_message = self.transaction_clarifier.format_clarification_message(clarification)
+                return ChatbotResponse(
+                    response=clarification_message,
+                    entities_extracted=entities,
+                    awaiting_clarification=True
+                )
+            else:
+                return ChatbotResponse(
+                    response="Transaction cancelled.",
+                    entities_extracted=entities
+                )
+        except Exception as e:
+            return ChatbotResponse(
+                response=f"Error processing transaction: {str(e)}",
+                sql_executed=None,
+                entities_extracted=None,
+                vector_stored=False
+            )
+    def _complete_transaction(self, entities, original_message: str) -> ChatbotResponse:
+        """Complete a transaction with all required information"""
+        try:
+            # Process transaction in database and get the SQL transaction ID
+            transaction_id, result_message = self.db_manager.process_transaction(entities)
+            # Store in vector store with SQL transaction ID for linking
+            transaction_data = {
+                "type": entities.transaction_type,
+                "product": entities.product,
+                "quantity": entities.quantity,
+                "supplier": entities.supplier,
+                "customer": entities.customer,
+                "unit_price": entities.unit_price,
+                "total": entities.total_amount
+            }
+            vector_stored = self.vector_store.add_transaction_event(
+                transaction_data,
+                original_message,
+                sql_transaction_id=transaction_id
+            )
+            return ChatbotResponse(
+                response=result_message,
+                sql_executed="Transaction processed successfully",
+                entities_extracted=entities,
+                vector_stored=vector_stored
+            )
+        except Exception as e:
+            return ChatbotResponse(
+                response=f"Error completing transaction: {str(e)}",
+                entities_extracted=entities
+            )
+    def _handle_transaction_clarification(self, message: str, session_id: str) -> ChatbotResponse:
+        """Handle user response to transaction clarification questions"""
+        try:
+            pending = self.pending_transactions.get(session_id)
+            if not pending:
+                return ChatbotResponse(
+                    response="No pending transaction found. Please start a new transaction."
+                )
+            # Check if user wants to cancel
+            if message.lower() in ['cancel', 'quit', 'stop', 'abort']:
+                del self.pending_transactions[session_id]
+                return ChatbotResponse(
+                    response="Transaction cancelled. You can start a new one anytime."
+                )
+            # Add this clarification response to the accumulated responses
+            pending.clarification_responses.append(message)
+            # Process the clarification response
+            updated_entities, is_complete = self.transaction_clarifier.process_clarification_response(
+                pending.entities,
+                pending.missing_fields,
+                message
+            )
+            if is_complete:
+                # Transaction is now complete
+                # Combine original message with all clarification responses for complete context
+                clarifications = "\n".join([f"Clarification {i+1}: {resp}" for i, resp in enumerate(pending.clarification_responses)])
+                full_context = f"{pending.original_message}\n\n{clarifications}"
+                del self.pending_transactions[session_id]
+                return self._complete_transaction(updated_entities, full_context)
+            else:
+                # Still need more information
+                status, clarification = self.transaction_clarifier.analyze_transaction_completeness(updated_entities)
+                if status == ClarificationStatus.NEEDS_CLARIFICATION:
+                    # Update the pending transaction
+                    pending.entities = updated_entities
+                    pending.missing_fields = clarification.missing_fields
+                    clarification_message = self.transaction_clarifier.format_clarification_message(clarification)
+                    return ChatbotResponse(
+                        response=f"Thank you! I still need a bit more information:\n\n{clarification_message}",
+                        entities_extracted=updated_entities,
+                        awaiting_clarification=True
+                    )
+                else:
+                    # Something went wrong or was cancelled
+                    # Still include all clarification context even if completion is unexpected
+                    clarifications = "\n".join([f"Clarification {i+1}: {resp}" for i, resp in enumerate(pending.clarification_responses)])
+                    full_context = f"{pending.original_message}\n\n{clarifications}"
+                    del self.pending_transactions[session_id]
+                    return self._complete_transaction(updated_entities, full_context)
+        except Exception as e:
+            # Clean up on error
+            if session_id in self.pending_transactions:
+                del self.pending_transactions[session_id]
+            return ChatbotResponse(
+                response=f"Error processing your response: {str(e)}. Please start a new transaction."
+            )
+    def _handle_query_request(self, message: str) -> ChatbotResponse:
+        """Handle query requests using OpenAI LLM to generate SQL"""
+        try:
+            # Use OpenAI to convert natural language to SQL
+            sql_query, explanation = self.nl_to_sql.convert_to_sql(message)
+            # Validate the generated SQL
+            is_valid, validation_message = self.nl_to_sql.validate_sql(sql_query)
+            if not is_valid:
+                suggestion = self.nl_to_sql.suggest_corrections(message, validation_message)
+                return ChatbotResponse(
+                    response=f"I couldn't process that query: {validation_message}\n\n{suggestion}",
+                    sql_executed=sql_query
+                )
+            # Execute the SQL query
+            results = self.db_manager.query_data(sql_query)
+            # Format and return results
+            if not results:
+                return ChatbotResponse(
+                    response="No results found for your query.",
+                    sql_executed=sql_query
+                )
+            # Check for error in results
+            if len(results) == 1 and "error" in results[0]:
+                return ChatbotResponse(
+                    response=f"Query execution error: {results[0]['error']}\n\nGenerated SQL: {sql_query}",
+                    sql_executed=sql_query
+                )
+            # Format successful results
+            formatted_response = self._format_sql_results(results, explanation)
+            return ChatbotResponse(
+                response=formatted_response,
+                sql_executed=sql_query
+            )
+        except Exception as e:
+            return ChatbotResponse(response=f"Error processing query: {str(e)}")
+    def _handle_search_request(self, message: str) -> ChatbotResponse:
+        """Handle semantic search requests using RAG"""
+        try:
+            # Enhance the search query for better retrieval
+            enhanced_query = self.rag_handler.enhance_search_query(message)
+            print(f"🔍 Enhanced query: {enhanced_query}")
+            # Search vector store for similar events
+            results = self.vector_store.search_similar_events(enhanced_query, 8)
+            if not results:
+                return ChatbotResponse(response="I couldn't find any relevant information to answer your query.")
+            # Use RAG to generate an intelligent response
+            rag_response = self.rag_handler.generate_rag_response(message, results)
+            return ChatbotResponse(
+                response=rag_response,
+                vector_stored=False
+            )
+        except Exception as e:
+            return ChatbotResponse(response=f"Error processing your search: {str(e)}")
+    def _handle_general_information(self, message: str) -> ChatbotResponse:
+        """Handle general information storage"""
+        try:
+            # Store in vector store
+            stored = self.vector_store.add_general_event(message, "general_info")
+            if stored:
+                return ChatbotResponse(
+                    response="Information stored successfully. I can help you find similar information later.",
+                    vector_stored=True
+                )
+            else:
+                return ChatbotResponse(
+                    response="Information noted, but vector storage is not available.",
+                    vector_stored=False
+                )
+        except Exception as e:
+            return ChatbotResponse(response=f"Error storing information: {str(e)}")
+    def _format_recent_transactions(self, data: Dict[str, list]) -> str:
+        """Format recent transactions for display"""
+        response = "Recent Transactions:\n\n"
+        # Combine and sort all transactions
+        all_transactions = []
+        for purchase in data.get("purchases", []):
+            all_transactions.append(purchase)
+        for sale in data.get("sales", []):
+            all_transactions.append(sale)
+        # Sort by date
+        all_transactions.sort(key=lambda x: x.get("date", ""), reverse=True)
+        if not all_transactions:
+            return "No recent transactions found."
+        for transaction in all_transactions[:10]:  # Show top 10
+            trans_type = transaction.get("type", "unknown").upper()
+            date = transaction.get("date", "")[:10]  # Just the date part
+            if trans_type == "PURCHASE":
+                response += f"🛒 {date} - PURCHASE: {transaction.get('quantity', 0)}x {transaction.get('product', 'Unknown')} from {transaction.get('supplier', 'Unknown')} - €{transaction.get('total_cost', 0)}\n"
+            else:
+                response += f"💰 {date} - SALE: {transaction.get('quantity', 0)}x {transaction.get('product', 'Unknown')} to {transaction.get('customer', 'Unknown')} - €{transaction.get('total_amount', 0)}\n"
+        return response
+    def _format_search_results(self, results: list, search_term: str) -> str:
+        """Format search results for display"""
+        if not results:
+            return f"No transactions found for '{search_term}'."
+        response = f"Found {len(results)} transaction(s) for '{search_term}':\n\n"
+        for transaction in results:
+            trans_type = transaction.get("type", "unknown").upper()
+            date = transaction.get("date", "")[:10]
+            if trans_type == "PURCHASE":
+                response += f"🛒 {date} - {transaction.get('quantity', 0)}x {transaction.get('product', 'Unknown')} from {transaction.get('supplier', 'Unknown')} - €{transaction.get('total', 0)}\n"
+            else:
+                response += f"💰 {date} - {transaction.get('quantity', 0)}x {transaction.get('product', 'Unknown')} to {transaction.get('customer', 'Unknown')} - €{transaction.get('total', 0)}\n"
+        return response
+    def _format_sql_results(self, results: list, explanation: str) -> str:
+        """Format SQL query results for display"""
+        response = f"📊 Query Results:\n{explanation}\n\n"
+        if not results:
+            return response + "No data found."
+        # Handle single value results (like COUNT, SUM)
+        if len(results) == 1 and len(results[0]) == 1:
+            key, value = list(results[0].items())[0]
+            return response + f"**{key.replace('_', ' ').title()}:** {value}"
+        # Handle multiple rows
+        response += "```\n"
+        # Add headers
+        if results:
+            headers = list(results[0].keys())
+            response += " | ".join(f"{header.replace('_', ' ').title():<15}" for header in headers) + "\n"
+            response += "-" * (len(headers) * 17) + "\n"
+            # Add data rows
+            for row in results[:20]:  # Limit to first 20 rows
+                formatted_row = []
+                for value in row.values():
+                    if value is None:
+                        formatted_row.append("N/A".ljust(15))
+                    elif isinstance(value, float):
+                        formatted_row.append(f"{value:.2f}".ljust(15))
+                    else:
+                        formatted_row.append(str(value)[:15].ljust(15))
+                response += " | ".join(formatted_row) + "\n"
+            if len(results) > 20:
+                response += f"\n... and {len(results) - 20} more rows\n"
+        response += "```"
+        return response
+    def get_linked_transaction_data(self, sql_transaction_id: int, transaction_type: str) -> Optional[Dict[str, Any]]:
+        """Retrieve complete transaction data from both SQL and vector stores"""
+        try:
+            # Get SQL data
+            sql_data = self.db_manager.get_transaction_by_id(sql_transaction_id, transaction_type)
+            # Get vector store data
+            vector_data = self.vector_store.get_transaction_by_sql_id(sql_transaction_id, transaction_type)
+            if sql_data:
+                result = {
+                    "sql_data": sql_data,
+                    "vector_data": vector_data,
+                    "linked": vector_data is not None
+                }
+                return result
+            return None
+        except Exception as e:
+            print(f"Error retrieving linked transaction data: {e}")
+            return None
+    def close(self):
+        """Clean up resources"""
+        self.db_manager.close()

src/config_manager.py ADDED Viewed

	@@ -0,0 +1,121 @@

+#!/usr/bin/env python3
+import yaml
+import os
+from typing import Dict, Any, List
+from pathlib import Path
+class ConfigManager:
+    """Manages configuration loading and access for the chatbot application."""
+    def __init__(self, config_path: str = None):
+        """
+        Initialize the configuration manager.
+        Args:
+            config_path: Path to the configuration file. Defaults to config.yaml in project root.
+        """
+        if config_path is None:
+            # Default to config.yaml in the project root
+            project_root = Path(__file__).parent.parent
+            config_path = project_root / "config.yaml"
+        self.config_path = Path(config_path)
+        self._config = self._load_config()
+    def _load_config(self) -> Dict[str, Any]:
+        """Load configuration from YAML file."""
+        try:
+            with open(self.config_path, 'r', encoding='utf-8') as file:
+                config = yaml.safe_load(file)
+                return config or {}
+        except FileNotFoundError:
+            raise FileNotFoundError(f"Configuration file not found: {self.config_path}")
+        except yaml.YAMLError as e:
+            raise ValueError(f"Error parsing configuration file: {e}")
+    def get(self, key_path: str, default: Any = None) -> Any:
+        """
+        Get a configuration value using dot notation.
+        Args:
+            key_path: Dot-separated path to the configuration value (e.g., 'database.path')
+            default: Default value to return if key is not found
+        Returns:
+            The configuration value or default if not found
+        """
+        keys = key_path.split('.')
+        value = self._config
+        try:
+            for key in keys:
+                value = value[key]
+            return value
+        except (KeyError, TypeError):
+            return default
+    def get_database_config(self) -> Dict[str, Any]:
+        """Get database configuration."""
+        return self.get('database', {})
+    def get_openai_config(self, component: str = None) -> Dict[str, Any]:
+        """
+        Get OpenAI configuration.
+        Args:
+            component: Specific component configuration (e.g., 'intent_classifier')
+        Returns:
+            OpenAI configuration dictionary
+        """
+        if component:
+            return self.get(f'openai.{component}', {})
+        return self.get('openai', {})
+    def get_vector_store_config(self) -> Dict[str, Any]:
+        """Get vector store configuration."""
+        return self.get('vector_store', {})
+    def get_search_config(self) -> Dict[str, Any]:
+        """Get search configuration."""
+        return self.get('search', {})
+    def get_entity_extraction_config(self) -> Dict[str, Any]:
+        """Get entity extraction configuration."""
+        return self.get('entity_extraction', {})
+    def get_business_logic_config(self) -> Dict[str, Any]:
+        """Get business logic configuration."""
+        return self.get('business_logic', {})
+    def get_app_config(self) -> Dict[str, Any]:
+        """Get application configuration."""
+        return self.get('app', {})
+    def is_feature_enabled(self, feature_name: str) -> bool:
+        """
+        Check if a feature is enabled.
+        Args:
+            feature_name: Name of the feature to check
+        Returns:
+            True if feature is enabled, False otherwise
+        """
+        return self.get(f'app.features.{feature_name}', True)
+# Global configuration instance
+_config_manager = None
+def get_config() -> ConfigManager:
+    """Get the global configuration manager instance."""
+    global _config_manager
+    if _config_manager is None:
+        _config_manager = ConfigManager()
+    return _config_manager
+def reload_config():
+    """Reload the configuration from file."""
+    global _config_manager
+    _config_manager = ConfigManager()

src/database_manager.py ADDED Viewed

	@@ -0,0 +1,255 @@

+import sqlite3
+from typing import Optional, List, Dict, Any
+from sqlalchemy import create_engine, text
+from sqlalchemy.orm import sessionmaker
+from models import Base, Supplier, Customer, Product, Purchase, Sale, EntityExtraction
+class DatabaseManager:
+    def __init__(self, db_path: str = "chatbot.db"):
+        self.db_path = db_path
+        self.engine = create_engine(f"sqlite:///{db_path}")
+        Base.metadata.create_all(self.engine)
+        Session = sessionmaker(bind=self.engine)
+        self.session = Session()
+        self._initialize_data()
+    def _initialize_data(self):
+        """Initialize database with sample data"""
+        # Add default suppliers if they don't exist
+        suppliers = ["TechMart", "Office Supplies Co", "Electronics Plus"]
+        for supplier_name in suppliers:
+            existing = self.session.query(Supplier).filter_by(name=supplier_name).first()
+            if not existing:
+                supplier = Supplier(name=supplier_name)
+                self.session.add(supplier)
+        # Add default products
+        products = [
+            ("USB drives", "Electronics"),
+            ("Office chairs", "Furniture"),
+            ("Laptops", "Electronics"),
+            ("Monitors", "Electronics"),
+            ("Keyboards", "Electronics")
+        ]
+        for product_name, category in products:
+            existing = self.session.query(Product).filter_by(name=product_name).first()
+            if not existing:
+                product = Product(name=product_name, category=category)
+                self.session.add(product)
+        self.session.commit()
+    def process_transaction(self, entities: EntityExtraction):
+        """Process a transaction based on extracted entities"""
+        try:
+            if entities.transaction_type == "purchase":
+                return self._process_purchase(entities)
+            elif entities.transaction_type == "sale":
+                return self._process_sale(entities)
+            else:
+                return None, "Could not determine transaction type"
+        except Exception as e:
+            self.session.rollback()
+            return None, f"Error processing transaction: {str(e)}"
+    def _process_purchase(self, entities: EntityExtraction) -> str:
+        """Process a purchase transaction"""
+        # Get or create supplier
+        supplier = None
+        if entities.supplier:
+            supplier = self.session.query(Supplier).filter_by(name=entities.supplier).first()
+            if not supplier:
+                supplier = Supplier(name=entities.supplier)
+                self.session.add(supplier)
+                self.session.flush()
+        # Get or create product
+        product = None
+        if entities.product:
+            product = self.session.query(Product).filter_by(name=entities.product).first()
+            if not product:
+                product = Product(name=entities.product)
+                self.session.add(product)
+                self.session.flush()
+        # Create purchase record
+        purchase = Purchase(
+            supplier_id=supplier.id if supplier else None,
+            product_id=product.id if product else None,
+            quantity=entities.quantity or 1,
+            unit_price=entities.unit_price or 0,
+            total_cost=entities.total_amount or (entities.quantity or 1) * (entities.unit_price or 0),
+            notes=entities.notes
+        )
+        self.session.add(purchase)
+        self.session.commit()
+        return purchase.id, f"Purchase recorded: {entities.quantity or 1}x {entities.product or 'Unknown'} from {entities.supplier or 'Unknown'} for €{entities.total_amount or 0}"
+    def _process_sale(self, entities: EntityExtraction) -> str:
+        """Process a sale transaction"""
+        # Get or create customer
+        customer = None
+        if entities.customer:
+            customer = self.session.query(Customer).filter_by(name=entities.customer).first()
+            if not customer:
+                customer = Customer(name=entities.customer)
+                self.session.add(customer)
+                self.session.flush()
+        # Get or create product
+        product = None
+        if entities.product:
+            product = self.session.query(Product).filter_by(name=entities.product).first()
+            if not product:
+                product = Product(name=entities.product)
+                self.session.add(product)
+                self.session.flush()
+        # Create sale record
+        sale = Sale(
+            customer_id=customer.id if customer else None,
+            product_id=product.id if product else None,
+            quantity=entities.quantity or 1,
+            unit_price=entities.unit_price or 0,
+            total_amount=entities.total_amount or (entities.quantity or 1) * (entities.unit_price or 0),
+            notes=entities.notes
+        )
+        self.session.add(sale)
+        self.session.commit()
+        return sale.id, f"Sale recorded: {entities.quantity or 1}x {entities.product or 'Unknown'} to {entities.customer or 'Unknown'} for €{entities.total_amount or 0}"
+    def query_data(self, query: str) -> List[Dict[str, Any]]:
+        """Execute a query and return results"""
+        try:
+            result = self.session.execute(text(query))
+            columns = result.keys()
+            rows = result.fetchall()
+            return [dict(zip(columns, row)) for row in rows]
+        except Exception as e:
+            return [{"error": str(e)}]
+    def get_recent_transactions(self, limit: int = 10) -> Dict[str, List[Dict]]:
+        """Get recent purchases and sales"""
+        purchases = self.session.query(Purchase).order_by(Purchase.purchase_date.desc()).limit(limit).all()
+        sales = self.session.query(Sale).order_by(Sale.sale_date.desc()).limit(limit).all()
+        purchase_data = []
+        for p in purchases:
+            purchase_data.append({
+                "id": p.id,
+                "supplier": p.supplier.name if p.supplier else "Unknown",
+                "product": p.product.name if p.product else "Unknown",
+                "quantity": p.quantity,
+                "unit_price": float(p.unit_price),
+                "total_cost": float(p.total_cost),
+                "date": p.purchase_date.isoformat(),
+                "type": "purchase"
+            })
+        sale_data = []
+        for s in sales:
+            sale_data.append({
+                "id": s.id,
+                "customer": s.customer.name if s.customer else "Unknown",
+                "product": s.product.name if s.product else "Unknown",
+                "quantity": s.quantity,
+                "unit_price": float(s.unit_price),
+                "total_amount": float(s.total_amount),
+                "date": s.sale_date.isoformat(),
+                "type": "sale"
+            })
+        return {"purchases": purchase_data, "sales": sale_data}
+    def search_transactions(self, search_term: str) -> List[Dict[str, Any]]:
+        """Search transactions by supplier, customer, or product"""
+        results = []
+        # Search purchases
+        purchases = self.session.query(Purchase).join(Supplier, Purchase.supplier_id == Supplier.id, isouter=True)\
+            .join(Product, Purchase.product_id == Product.id, isouter=True)\
+            .filter(
+                (Supplier.name.contains(search_term)) |
+                (Product.name.contains(search_term)) |
+                (Purchase.notes.contains(search_term))
+            ).all()
+        for p in purchases:
+            results.append({
+                "id": p.id,
+                "type": "purchase",
+                "supplier": p.supplier.name if p.supplier else "Unknown",
+                "product": p.product.name if p.product else "Unknown",
+                "quantity": p.quantity,
+                "unit_price": float(p.unit_price),
+                "total": float(p.total_cost),
+                "date": p.purchase_date.isoformat()
+            })
+        # Search sales
+        sales = self.session.query(Sale).join(Customer, Sale.customer_id == Customer.id, isouter=True)\
+            .join(Product, Sale.product_id == Product.id, isouter=True)\
+            .filter(
+                (Customer.name.contains(search_term)) |
+                (Product.name.contains(search_term)) |
+                (Sale.notes.contains(search_term))
+            ).all()
+        for s in sales:
+            results.append({
+                "id": s.id,
+                "type": "sale",
+                "customer": s.customer.name if s.customer else "Unknown",
+                "product": s.product.name if s.product else "Unknown",
+                "quantity": s.quantity,
+                "unit_price": float(s.unit_price),
+                "total": float(s.total_amount),
+                "date": s.sale_date.isoformat()
+            })
+        return sorted(results, key=lambda x: x["date"], reverse=True)
+    def get_transaction_by_id(self, transaction_id: int, transaction_type: str) -> Optional[Dict[str, Any]]:
+        """Retrieve a specific transaction by ID and type"""
+        try:
+            if transaction_type == "purchase":
+                transaction = self.session.query(Purchase).filter_by(id=transaction_id).first()
+                if transaction:
+                    return {
+                        "id": transaction.id,
+                        "type": "purchase",
+                        "supplier_id": transaction.supplier_id,
+                        "product_id": transaction.product_id,
+                        "quantity": transaction.quantity,
+                        "unit_price": transaction.unit_price,
+                        "total_cost": transaction.total_cost,
+                        "purchase_date": transaction.purchase_date.isoformat() if transaction.purchase_date else None,
+                        "notes": transaction.notes
+                    }
+            elif transaction_type == "sale":
+                transaction = self.session.query(Sale).filter_by(id=transaction_id).first()
+                if transaction:
+                    return {
+                        "id": transaction.id,
+                        "type": "sale",
+                        "customer_id": transaction.customer_id,
+                        "product_id": transaction.product_id,
+                        "quantity": transaction.quantity,
+                        "unit_price": transaction.unit_price,
+                        "total_amount": transaction.total_amount,
+                        "sale_date": transaction.sale_date.isoformat() if transaction.sale_date else None,
+                        "notes": transaction.notes
+                    }
+            return None
+        except Exception as e:
+            print(f"Error retrieving transaction by ID: {e}")
+            return None
+    def close(self):
+        """Close database connection"""
+        self.session.close()

src/entity_extractor.py ADDED Viewed

	@@ -0,0 +1,204 @@

+import re
+import spacy
+from typing import Optional, Dict, Any
+from datetime import datetime
+from dateutil import parser as date_parser
+from models import EntityExtraction
+class EntityExtractor:
+    def __init__(self):
+        try:
+            self.nlp = spacy.load("en_core_web_sm")
+        except OSError:
+            print("Warning: spaCy model not found. Install with: python -m spacy download en_core_web_sm")
+            self.nlp = None
+    def extract_entities(self, text: str) -> EntityExtraction:
+        """Extract entities from user input text"""
+        text_lower = text.lower()
+        # Determine transaction type
+        transaction_type = self._detect_transaction_type(text_lower)
+        # Extract entities
+        product = self._extract_product(text)
+        quantity = self._extract_quantity(text)
+        unit = self._extract_unit(text)
+        supplier = self._extract_supplier(text) if transaction_type == "purchase" else None
+        customer = self._extract_customer(text) if transaction_type == "sale" else None
+        unit_price = self._extract_unit_price(text)
+        total_amount = self._calculate_total(quantity, unit_price)
+        return EntityExtraction(
+            product=product,
+            quantity=quantity,
+            unit=unit,
+            supplier=supplier,
+            customer=customer,
+            unit_price=unit_price,
+            total_amount=total_amount,
+            transaction_type=transaction_type,
+            notes=text
+        )
+    def _detect_transaction_type(self, text: str) -> str:
+        """Detect if this is a purchase or sale"""
+        purchase_keywords = ["purchase", "buy", "bought", "order", "from", "supplier"]
+        sale_keywords = ["sale", "sell", "sold", "to", "customer", "client"]
+        purchase_score = sum(1 for keyword in purchase_keywords if keyword in text)
+        sale_score = sum(1 for keyword in sale_keywords if keyword in text)
+        return "purchase" if purchase_score >= sale_score else "sale"
+    def _extract_product(self, text: str) -> Optional[str]:
+        """Extract product name from text"""
+        # Enhanced product patterns to handle various formats
+        product_patterns = [
+            # Pattern for "X units of Y" format (e.g., "20 tons of Apples")
+            r"(?:\d+)\s*(?:tons?|kg|kilograms?|pounds?|lbs?|pieces?|units?|items?|boxes?)\s+of\s+([a-zA-Z\s]+?)(?:\s+from|\s+at|\s+for|\s*€|\s*\$|$)",
+            # Pattern for "bought/purchased X Y" format
+            r"(?:bought|purchased|buy|purchase|sold|sale|sell)\s+(?:\d+\s*(?:tons?|kg|pieces?|units?)?\s+)?(?:of\s+)?([a-zA-Z\s]+?)(?:\s+from|\s+to|\s+at|\s+for|\s*€|\s*\$)",
+            # Pattern for quantity followed by product
+            r"(?:\d+)\s*(?:x\s+)?([a-zA-Z\s]+?)(?:\s+from|\s+at|\s+for|\s*€|\s*\$)",
+            # Pattern for standalone capitalized product names
+            r"\b([A-Z][a-zA-Z]*(?:\s+[A-Z][a-zA-Z]*)*)\b(?!\s+(?:from|at|for|€|\$))",
+        ]
+        for pattern in product_patterns:
+            match = re.search(pattern, text, re.IGNORECASE)
+            if match:
+                product = match.group(1).strip()
+                # Filter out common non-product words
+                if product.lower() not in ['from', 'at', 'for', 'to', 'we', 'i', 'you', 'the', 'a', 'an', 'and', 'or']:
+                    return product
+        # Use spaCy for named entity recognition if available
+        if self.nlp:
+            doc = self.nlp(text)
+            for ent in doc.ents:
+                if ent.label_ in ["PRODUCT", "ORG"] and len(ent.text) > 2:
+                    return ent.text
+        return None
+    def _extract_quantity(self, text: str) -> Optional[int]:
+        """Extract quantity from text"""
+        # Enhanced quantity patterns to handle various units
+        quantity_patterns = [
+            # Numbers with explicit units
+            r"(\d+(?:\.\d+)?)\s*(?:tons?|kg|kilograms?|pounds?|lbs?|pieces?|units?|items?|boxes?)",
+            # Numbers followed by "of" or "x"
+            r"(\d+(?:\.\d+)?)\s*(?:of|x)\s+",
+            # Numbers in transaction context
+            r"(?:bought|purchased|buy|purchase|sold|sale|sell)\s+(?:of\s+)?(\d+(?:\.\d+)?)",
+            # Standalone numbers at start
+            r"^(\d+(?:\.\d+)?)\s+",
+        ]
+        for pattern in quantity_patterns:
+            match = re.search(pattern, text, re.IGNORECASE)
+            if match:
+                try:
+                    # Convert to int, handling decimal quantities
+                    quantity = float(match.group(1))
+                    return int(quantity) if quantity.is_integer() else int(round(quantity))
+                except (ValueError, AttributeError):
+                    continue
+        return None
+    def _extract_unit(self, text: str) -> Optional[str]:
+        """Extract unit from text (tons, kg, pieces, etc.)"""
+        # Common unit patterns
+        unit_patterns = [
+            r"\d+(?:\.\d+)?\s*(tons?|kg|kilograms?|pounds?|lbs?|pieces?|units?|items?|boxes?|liters?|gallons?)",
+        ]
+        for pattern in unit_patterns:
+            match = re.search(pattern, text, re.IGNORECASE)
+            if match:
+                unit = match.group(1).lower()
+                # Normalize units
+                unit_mapping = {
+                    'ton': 'tons', 'kg': 'kg', 'kilogram': 'kg', 'kilograms': 'kg',
+                    'pound': 'lbs', 'pounds': 'lbs', 'lb': 'lbs', 'lbs': 'lbs',
+                    'piece': 'pieces', 'pieces': 'pieces',
+                    'unit': 'units', 'units': 'units',
+                    'item': 'items', 'items': 'items',
+                    'box': 'boxes', 'boxes': 'boxes',
+                    'liter': 'liters', 'liters': 'liters',
+                    'gallon': 'gallons', 'gallons': 'gallons'
+                }
+                return unit_mapping.get(unit, unit)
+        return None
+    def _extract_supplier(self, text: str) -> Optional[str]:
+        """Extract supplier name from text"""
+        # Look for "from [supplier]" patterns
+        supplier_patterns = [
+            r"from\s+([A-Za-z\s]+?)(?:\s+at|\s+for|\s*€|\s*\$|$)",
+            r"supplier\s+([A-Za-z\s]+?)(?:\s+at|\s+for|\s*€|\s*\$|$)",
+        ]
+        for pattern in supplier_patterns:
+            match = re.search(pattern, text, re.IGNORECASE)
+            if match:
+                return match.group(1).strip()
+        # Use spaCy for organization detection
+        if self.nlp:
+            doc = self.nlp(text)
+            for ent in doc.ents:
+                if ent.label_ == "ORG":
+                    return ent.text
+        return None
+    def _extract_customer(self, text: str) -> Optional[str]:
+        """Extract customer name from text"""
+        # Look for "to [customer]" patterns
+        customer_patterns = [
+            r"to\s+([A-Za-z\s]+?)(?:\s+at|\s+for|\s*€|\s*\$|$)",
+            r"customer\s+([A-Za-z\s]+?)(?:\s+at|\s+for|\s*€|\s*\$|$)",
+        ]
+        for pattern in customer_patterns:
+            match = re.search(pattern, text, re.IGNORECASE)
+            if match:
+                return match.group(1).strip()
+        # Use spaCy for person detection
+        if self.nlp:
+            doc = self.nlp(text)
+            for ent in doc.ents:
+                if ent.label_ == "PERSON":
+                    return ent.text
+        return None
+    def _extract_unit_price(self, text: str) -> Optional[float]:
+        """Extract unit price from text"""
+        # Look for price patterns
+        price_patterns = [
+            r"(?:at|for|€|$)\s*(\d+(?:\.\d{2})?)\s*(?:each|per|unit)?",
+            r"(\d+(?:\.\d{2})?)\s*(?:€|$)\s*(?:each|per|unit)",
+            r"(?:price|cost)?\s*(?:of)?\s*(\d+(?:\.\d{2})?)\s*(?:€|$)",
+        ]
+        for pattern in price_patterns:
+            match = re.search(pattern, text, re.IGNORECASE)
+            if match:
+                return float(match.group(1))
+        return None
+    def _calculate_total(self, quantity: Optional[int], unit_price: Optional[float]) -> Optional[float]:
+        """Calculate total amount"""
+        if quantity and unit_price:
+            return quantity * unit_price
+        return None

src/intent_classifier.py ADDED Viewed

	@@ -0,0 +1,173 @@

+import openai
+import os
+import dirtyjson as json
+from typing import Dict, Any, Optional, Tuple
+from enum import Enum
+from pydantic import BaseModel
+class IntentType(str, Enum):
+    TRANSACTION = "transaction"
+    QUERY = "query"
+    SEMANTIC_SEARCH = "semantic_search"
+    GENERAL_INFO = "general_info"
+class IntentResult(BaseModel):
+    intent: IntentType
+    confidence: float
+    reasoning: str
+    entities_hint: Optional[str] = None
+class IntentClassifier:
+    def __init__(self, api_key: Optional[str] = None):
+        """Initialize OpenAI client for intent classification"""
+        self.client = openai.OpenAI(
+            api_key=api_key or os.getenv('OPENAI_API_KEY')
+        )
+    def classify_intent(self, user_message: str) -> IntentResult:
+        """
+        Classify user intent using OpenAI API
+        Returns: IntentResult with intent type, confidence, and reasoning
+        """
+        system_prompt = """You are an expert intent classifier for a business chatbot that handles sales, purchases, and general information storage.
+Given a user message, classify it into one of these intents:
+1. **QUERY**: User wants to retrieve or analyze STRUCTURED data from SQL database tables
+   - Examples: "How many USB drives did we buy?" (counts from purchases table)
+   - Examples: "What's the total value of all sales?" (sum from sales table)
+   - Examples: "Show me recent transactions" (list from transactions table)
+   - Examples: "List all customers" (data from customers table)
+   - Key indicators: Asking for counts, totals, lists, recent data from business transactions
+   - Must be answerable from structured database tables (purchases, sales, customers, suppliers, products)
+2. **SEMANTIC_SEARCH**: User wants to find contextual information, tasks, or unstructured data
+   - Examples: "What does Mark need to do?" (searching for task/context info)
+   - Examples: "Find events related to supplier meetings" (contextual search)
+   - Examples: "When do I have the meeting with George?" (calendar/scheduling info)
+   - Examples: "Show me similar purchases to this one" (similarity search)
+   - Examples: "What did we discuss in the last meeting?" (meeting notes/context)
+   - Key indicators: Questions about tasks, meetings, discussions, or contextual information
+   - Information that would NOT be in structured database tables
+3. **TRANSACTION**: User wants to record a business transaction (purchase or sale)
+   - Examples: "Add a purchase of 20 USB drives from TechMart at €5 each"
+   - Examples: "Sold 10 laptops to John Smith at €800 each"
+   - Contains: product names, quantities, suppliers/customers, prices
+   - Action: Recording new business data
+4. **GENERAL_INFO**: User wants to store general business information or notes
+   - It cannot be a question.
+   - Examples: "Meeting with new supplier scheduled for next week"
+   - Examples: "Remember to check inventory levels before next order"
+   - Examples: "Mark needs to call the supplier tomorrow"
+   - Contains: notes, reminders, general business information, task assignments
+Return your response in this exact JSON format:
+{
+    "intent": "transaction|query|semantic_search|general_info",
+    "confidence": 0.0-1.0,
+    "reasoning": "Brief explanation of why you chose this intent",
+    "entities_hint": "Optional: Key entities you detected (for transaction intent)"
+}
+Be precise and consider context carefully. If unsure, choose the most likely intent and indicate lower confidence."""
+        user_prompt = f'Classify the intent of this user message: "{user_message}"'
+        try:
+            response = self.client.chat.completions.create(
+                model="gpt-4o-mini",
+                messages=[
+                    {"role": "system", "content": system_prompt},
+                    {"role": "user", "content": user_prompt}
+                ],
+                temperature=0.1,
+                max_tokens=300
+            )
+            response_text = response.choices[0].message.content.strip()
+            # Clean JSON response more carefully
+            if response_text.startswith("```json"):
+                response_text = response_text[7:]
+            if response_text.startswith("```"):
+                response_text = response_text[3:]
+            if response_text.endswith("```"):
+                response_text = response_text[:-3]
+            response_text = response_text.strip()
+            # Parse JSON response
+            try:
+                result_dict = json.loads(response_text)
+                # Validate intent value
+                intent_value = result_dict.get("intent", "").lower()
+                if intent_value not in [e.value for e in IntentType]:
+                    print(f"Invalid intent value: {intent_value}")
+                    return self._fallback_classification(user_message, f"Invalid intent: {intent_value}")
+                return IntentResult(
+                    intent=IntentType(intent_value),
+                    confidence=float(result_dict.get("confidence", 0.5)),
+                    reasoning=result_dict.get("reasoning", "No reasoning provided"),
+                    entities_hint=result_dict.get("entities_hint")
+                )
+            except Exception as e:
+                # Fallback if JSON parsing fails
+                print(f"JSON parsing error: {e}")
+                print(f"Raw response: {response_text}")
+                return self._fallback_classification(user_message, f"JSON parsing failed: {str(e)}")
+        except Exception as e:
+            print(f"Error in intent classification: {e}")
+            return self._fallback_classification(user_message, str(e))
+    def _fallback_classification(self, user_message: str, error_info: str) -> IntentResult:
+        """Fallback classification when OpenAI API fails"""
+        message_lower = user_message.lower()
+        # Simple keyword-based fallback
+        transaction_keywords = ["purchase", "buy", "sold", "sale", "from", "to", "€", "$"]
+        query_keywords = ["how many", "total", "list all", "recent transactions", "count"]
+        search_keywords = ["similar", "like", "related", "about", "need to do", "meeting", "discuss", "task"]
+        if any(keyword in message_lower for keyword in transaction_keywords):
+            intent = IntentType.TRANSACTION
+            confidence = 0.6
+        elif any(keyword in message_lower for keyword in query_keywords):
+            intent = IntentType.QUERY
+            confidence = 0.6
+        elif any(keyword in message_lower for keyword in search_keywords):
+            intent = IntentType.SEMANTIC_SEARCH
+            confidence = 0.6
+        else:
+            intent = IntentType.GENERAL_INFO
+            confidence = 0.5
+        return IntentResult(
+            intent=intent,
+            confidence=confidence,
+            reasoning=f"Fallback classification due to API error: {error_info[:100]}",
+            entities_hint=None
+        )
+    def get_intent_description(self, intent: IntentType) -> str:
+        """Get human-readable description of intent type"""
+        descriptions = {
+            IntentType.TRANSACTION: "Recording a business transaction (purchase or sale)",
+            IntentType.QUERY: "Retrieving or analyzing data from the database",
+            IntentType.SEMANTIC_SEARCH: "Finding similar events or information",
+            IntentType.GENERAL_INFO: "Storing general business information or notes"
+        }
+        return descriptions.get(intent, "Unknown intent type")
+    def batch_classify(self, messages: list[str]) -> list[IntentResult]:
+        """Classify multiple messages efficiently"""
+        results = []
+        for message in messages:
+            result = self.classify_intent(message)
+            results.append(result)
+        return results

src/models.py ADDED Viewed

	@@ -0,0 +1,104 @@

+from datetime import datetime
+from typing import Optional, List
+from pydantic import BaseModel, Field
+from sqlalchemy import Column, Integer, String, DateTime, ForeignKey, Numeric, Text, create_engine
+from sqlalchemy.ext.declarative import declarative_base
+from sqlalchemy.orm import relationship, sessionmaker
+Base = declarative_base()
+class Supplier(Base):
+    __tablename__ = "suppliers"
+    id = Column(Integer, primary_key=True, autoincrement=True)
+    name = Column(String(255), nullable=False, unique=True)
+    contact_info = Column(Text)
+    created_at = Column(DateTime, default=datetime.utcnow)
+    purchases = relationship("Purchase", back_populates="supplier")
+class Customer(Base):
+    __tablename__ = "customers"
+    id = Column(Integer, primary_key=True, autoincrement=True)
+    name = Column(String(255), nullable=False)
+    email = Column(String(255))
+    phone = Column(String(50))
+    address = Column(Text)
+    created_at = Column(DateTime, default=datetime.utcnow)
+    sales = relationship("Sale", back_populates="customer")
+class Product(Base):
+    __tablename__ = "products"
+    id = Column(Integer, primary_key=True, autoincrement=True)
+    name = Column(String(255), nullable=False)
+    description = Column(Text)
+    category = Column(String(100))
+    created_at = Column(DateTime, default=datetime.utcnow)
+    purchases = relationship("Purchase", back_populates="product")
+    sales = relationship("Sale", back_populates="product")
+class Purchase(Base):
+    __tablename__ = "purchases"
+    id = Column(Integer, primary_key=True, autoincrement=True)
+    supplier_id = Column(Integer, ForeignKey("suppliers.id"))
+    product_id = Column(Integer, ForeignKey("products.id"))
+    quantity = Column(Integer, nullable=False)
+    unit_price = Column(Numeric(10, 2), nullable=False)
+    total_cost = Column(Numeric(10, 2), nullable=False)
+    purchase_date = Column(DateTime, default=datetime.utcnow)
+    notes = Column(Text)
+    supplier = relationship("Supplier", back_populates="purchases")
+    product = relationship("Product", back_populates="purchases")
+class Sale(Base):
+    __tablename__ = "sales"
+    id = Column(Integer, primary_key=True, autoincrement=True)
+    customer_id = Column(Integer, ForeignKey("customers.id"))
+    product_id = Column(Integer, ForeignKey("products.id"))
+    quantity = Column(Integer, nullable=False)
+    unit_price = Column(Numeric(10, 2), nullable=False)
+    total_amount = Column(Numeric(10, 2), nullable=False)
+    sale_date = Column(DateTime, default=datetime.utcnow)
+    notes = Column(Text)
+    customer = relationship("Customer", back_populates="sales")
+    product = relationship("Product", back_populates="sales")
+# Pydantic models for API
+class EntityExtraction(BaseModel):
+    product: Optional[str] = None
+    quantity: Optional[int] = None
+    unit: Optional[str] = None  # e.g., "tons", "pieces", "kg"
+    supplier: Optional[str] = None
+    customer: Optional[str] = None
+    unit_price: Optional[float] = None
+    total_amount: Optional[float] = None
+    transaction_type: str = Field(..., description="'purchase' or 'sale'")
+    notes: Optional[str] = None
+class ChatbotRequest(BaseModel):
+    message: str
+    session_id: Optional[str] = None
+class PendingTransaction(BaseModel):
+    entities: EntityExtraction
+    missing_fields: List[str]
+    session_id: str
+    original_message: str
+    clarification_responses: List[str] = []
+class ChatbotResponse(BaseModel):
+    response: str
+    sql_executed: Optional[str] = None
+    entities_extracted: Optional[EntityExtraction] = None
+    vector_stored: bool = False
+    intent_detected: Optional[str] = None
+    intent_confidence: Optional[float] = None
+    awaiting_clarification: bool = False

src/nl_to_sql.py ADDED Viewed

	@@ -0,0 +1,201 @@

+import openai
+import os
+from typing import Dict, Any, Optional, Tuple
+import re
+import json
+class NaturalLanguageToSQL:
+    def __init__(self, api_key: Optional[str] = None):
+        """Initialize OpenAI client for natural language to SQL conversion"""
+        self.client = openai.OpenAI(
+            api_key=api_key or os.getenv('OPENAI_API_KEY')
+        )
+        # Database schema description for the LLM
+        self.schema_description = """
+Database Schema:
+Table: suppliers
+- id (INTEGER PRIMARY KEY)
+- name (VARCHAR(255)) - Supplier company name
+- contact_info (TEXT) - Contact information
+- created_at (TIMESTAMP)
+Table: customers
+- id (INTEGER PRIMARY KEY)
+- name (VARCHAR(255)) - Customer name
+- email (VARCHAR(255))
+- phone (VARCHAR(50))
+- address (TEXT)
+- created_at (TIMESTAMP)
+Table: products
+- id (INTEGER PRIMARY KEY)
+- name (VARCHAR(255)) - Product name
+- description (TEXT)
+- category (VARCHAR(100)) - Product category
+- created_at (TIMESTAMP)
+Table: purchases
+- id (INTEGER PRIMARY KEY)
+- supplier_id (INTEGER) - Foreign key to suppliers table
+- product_id (INTEGER) - Foreign key to products table
+- quantity (INTEGER) - Number of items purchased
+- unit_price (DECIMAL(10,2)) - Price per unit
+- total_cost (DECIMAL(10,2)) - Total purchase cost
+- purchase_date (TIMESTAMP) - When purchase was made
+- notes (TEXT) - Additional notes
+Table: sales
+- id (INTEGER PRIMARY KEY)
+- customer_id (INTEGER) - Foreign key to customers table
+- product_id (INTEGER) - Foreign key to products table
+- quantity (INTEGER) - Number of items sold
+- unit_price (DECIMAL(10,2)) - Price per unit
+- total_amount (DECIMAL(10,2)) - Total sale amount
+- sale_date (TIMESTAMP) - When sale was made
+- notes (TEXT) - Additional notes
+Relationships:
+- purchases.supplier_id → suppliers.id
+- purchases.product_id → products.id
+- sales.customer_id → customers.id
+- sales.product_id → products.id
+"""
+    def convert_to_sql(self, natural_language_query: str) -> Tuple[str, str]:
+        """
+        Convert natural language query to SQL
+        Returns: (sql_query, explanation)
+        """
+        system_prompt = f"""You are an expert SQL query generator. Given a natural language question about a business database, generate the appropriate SQL query.
+{self.schema_description}
+Guidelines:
+1. Generate valid SQLite syntax
+2. Use JOINs when accessing related data across tables
+3. Use appropriate WHERE clauses for filtering
+4. Use aggregate functions (COUNT, SUM, AVG) when appropriate
+5. Use ORDER BY for sorting results
+6. Use LIMIT for restricting result count when reasonable
+7. Always use proper table aliases for clarity
+8. Handle date ranges using DATE() function for SQLite
+9. Use LIKE with % wildcards for text searches
+10. Return only the SQL query, no explanations unless specifically requested
+Example queries:
+- "Show all USB drives purchased" → SELECT p.name, pu.quantity, pu.unit_price, s.name as supplier FROM purchases pu JOIN products p ON pu.product_id = p.id JOIN suppliers s ON pu.supplier_id = s.id WHERE p.name LIKE '%USB%'
+- "Total sales this month" → SELECT SUM(total_amount) FROM sales WHERE DATE(sale_date) >= DATE('now', 'start of month')
+- "Top 5 customers by sales" → SELECT c.name, SUM(s.total_amount) as total FROM sales s JOIN customers c ON s.customer_id = c.id GROUP BY c.id, c.name ORDER BY total DESC LIMIT 5
+"""
+        user_prompt = f"""Convert this natural language query to SQL:
+"{natural_language_query}"
+Return ONLY the SQL query, nothing else."""
+        try:
+            response = self.client.chat.completions.create(
+                model="gpt-4o-mini",
+                messages=[
+                    {"role": "system", "content": system_prompt},
+                    {"role": "user", "content": user_prompt}
+                ],
+                temperature=0.1,
+                max_tokens=500
+            )
+            sql_query = response.choices[0].message.content.strip()
+            # Clean up the SQL query (remove markdown formatting if present)
+            sql_query = re.sub(r'^```sql\s*', '', sql_query)
+            sql_query = re.sub(r'\s*```$', '', sql_query)
+            sql_query = sql_query.strip()
+            # Generate explanation
+            explanation = self._generate_explanation(natural_language_query, sql_query)
+            return sql_query, explanation
+        except Exception as e:
+            return f"-- Error generating SQL: {str(e)}", f"Failed to convert query: {str(e)}"
+    def _generate_explanation(self, nl_query: str, sql_query: str) -> str:
+        """Generate a human-readable explanation of what the SQL query does"""
+        system_prompt = """You are a helpful assistant that explains SQL queries in simple terms.
+        Given a natural language question and the corresponding SQL query, provide a brief explanation of what the SQL query does."""
+        user_prompt = f"""Natural language query: "{nl_query}"
+SQL query: {sql_query}
+Provide a brief explanation of what this SQL query does:"""
+        try:
+            response = self.client.chat.completions.create(
+                model="gpt-3.5-turbo",
+                messages=[
+                    {"role": "system", "content": system_prompt},
+                    {"role": "user", "content": user_prompt}
+                ],
+                temperature=0.3,
+                max_tokens=200
+            )
+            return response.choices[0].message.content.strip()
+        except Exception as e:
+            return f"Generated SQL query for: {nl_query}"
+    def validate_sql(self, sql_query: str) -> Tuple[bool, str]:
+        """
+        Basic validation of SQL query structure
+        Returns: (is_valid, error_message)
+        """
+        # Basic checks
+        sql_lower = sql_query.lower().strip()
+        # Check for dangerous operations
+        dangerous_keywords = ['drop', 'delete', 'truncate', 'alter', 'create', 'insert', 'update']
+        for keyword in dangerous_keywords:
+            if keyword in sql_lower and not sql_lower.startswith('select'):
+                return False, f"Query contains potentially dangerous keyword: {keyword}"
+        # Check if it starts with SELECT (read-only queries only)
+        if not sql_lower.startswith('select'):
+            return False, "Only SELECT queries are allowed for security"
+        # Basic syntax checks
+        if sql_query.count('(') != sql_query.count(')'):
+            return False, "Unmatched parentheses in query"
+        # Check for basic SQL injection patterns
+        injection_patterns = [r";\s*(drop|delete|insert|update)", r"--", r"/\*.*\*/"]
+        for pattern in injection_patterns:
+            if re.search(pattern, sql_lower):
+                return False, f"Query contains potentially unsafe pattern: {pattern}"
+        return True, "Query appears valid"
+    def suggest_corrections(self, natural_language_query: str, error_message: str) -> str:
+        """Suggest how to rephrase the query if it fails"""
+        suggestions = {
+            "table": "Make sure you're asking about purchases, sales, customers, suppliers, or products",
+            "column": "Try using terms like 'name', 'quantity', 'price', 'date', 'total'",
+            "syntax": "Try rephrasing your question more simply",
+            "ambiguous": "Be more specific about what data you want to see"
+        }
+        error_lower = error_message.lower()
+        for key, suggestion in suggestions.items():
+            if key in error_lower:
+                return f"Suggestion: {suggestion}"
+        return "Try rephrasing your question or ask for help with available data"

src/rag_handler.py ADDED Viewed

	@@ -0,0 +1,204 @@

+import openai
+import os
+from typing import List, Dict, Any, Optional
+import json
+from datetime import datetime
+class RAGHandler:
+    def __init__(self, api_key: Optional[str] = None):
+        """Initialize OpenAI client for RAG responses"""
+        self.client = openai.OpenAI(
+            api_key=api_key or os.getenv('OPENAI_API_KEY')
+        )
+    def generate_rag_response(self, user_query: str, retrieved_documents: List[Dict[str, Any]]) -> str:
+        """
+        Generate a response using RAG (Retrieval-Augmented Generation)
+        Args:
+            user_query: The user's original query
+            retrieved_documents: List of documents from vector store with similarity scores
+        Returns:
+            Generated response based on retrieved context
+        """
+        if not retrieved_documents:
+            return "I couldn't find any relevant information to answer your query."
+        # Format retrieved documents for context
+        context = self._format_context(retrieved_documents)
+        system_prompt = """You are a helpful business assistant with access to a company's transaction history and business information.
+Your role is to answer user questions based on the provided context from the company's records.
+Guidelines:
+1. Answer based ONLY on the provided context
+2. If the context doesn't contain enough information, say so clearly
+3. Be specific and cite relevant details from the context
+4. Maintain a professional, helpful tone
+5. If asked about specific dates, transactions, or events, reference the exact information from context
+6. If the context contains multiple relevant items, summarize them appropriately
+7. Don't make up information not present in the context
+Context format: Each document has a 'document' field with the actual content and 'metadata' with additional details like timestamps."""
+        user_prompt = f"""Based on the following business records, please answer this question: "{user_query}"
+Context from company records:
+{context}
+Please provide a comprehensive answer based on the available information."""
+        try:
+            response = self.client.chat.completions.create(
+                model="gpt-4o-mini",
+                messages=[
+                    {"role": "system", "content": system_prompt},
+                    {"role": "user", "content": user_prompt}
+                ],
+                temperature=0.3,
+                max_tokens=800
+            )
+            return response.choices[0].message.content.strip()
+        except Exception as e:
+            return f"I encountered an error while processing your query: {str(e)}\n\nHowever, I found these relevant records:\n{self._format_fallback_response(retrieved_documents)}"
+    def _format_context(self, documents: List[Dict[str, Any]]) -> str:
+        """Format retrieved documents as context for the LLM"""
+        if not documents:
+            return "No relevant documents found."
+        context_parts = []
+        for i, doc in enumerate(documents, 1):
+            doc_content = doc.get('document', 'No content available')
+            metadata = doc.get('metadata', {})
+            distance = doc.get('distance', 'Unknown')
+            # Format document entry
+            context_entry = f"Document {i}:\n"
+            context_entry += f"Content: {doc_content}\n"
+            # Add metadata if available
+            if metadata:
+                if 'timestamp' in metadata:
+                    try:
+                        # Format timestamp nicely
+                        timestamp = metadata['timestamp']
+                        if isinstance(timestamp, str):
+                            date_part = timestamp[:10] if len(timestamp) >= 10 else timestamp
+                            context_entry += f"Date: {date_part}\n"
+                    except:
+                        pass
+                if 'type' in metadata:
+                    context_entry += f"Type: {metadata['type']}\n"
+                # Add transaction data if available
+                if 'data' in metadata:
+                    try:
+                        data = json.loads(metadata['data']) if isinstance(metadata['data'], str) else metadata['data']
+                        if isinstance(data, dict):
+                            relevant_fields = ['product', 'quantity', 'supplier', 'customer', 'total', 'unit_price']
+                            data_parts = []
+                            for field in relevant_fields:
+                                if field in data and data[field] is not None:
+                                    data_parts.append(f"{field}: {data[field]}")
+                            if data_parts:
+                                context_entry += f"Details: {', '.join(data_parts)}\n"
+                    except:
+                        pass
+            # Add similarity score
+            if distance is not None and distance != 'Unknown':
+                try:
+                    similarity = 1 - float(distance)  # Convert distance to similarity
+                    context_entry += f"Relevance: {similarity:.2f}\n"
+                except:
+                    pass
+            context_parts.append(context_entry)
+        return "\n" + "-" * 50 + "\n".join(context_parts)
+    def _format_fallback_response(self, documents: List[Dict[str, Any]]) -> str:
+        """Create a fallback response when LLM fails"""
+        if not documents:
+            return "No relevant information found."
+        response_parts = []
+        for i, doc in enumerate(documents, 1):
+            doc_content = doc.get('document', 'No content available')
+            metadata = doc.get('metadata', {})
+            entry = f"{i}. {doc_content}"
+            if metadata.get('timestamp'):
+                try:
+                    date_part = metadata['timestamp'][:10]
+                    entry += f" (Date: {date_part})"
+                except:
+                    pass
+            response_parts.append(entry)
+        return "\n".join(response_parts)
+    def enhance_search_query(self, user_query: str) -> str:
+        """
+        Enhance the user's search query for better vector retrieval
+        Args:
+            user_query: Original user query
+        Returns:
+            Enhanced query for better semantic search
+        """
+        system_prompt = """You are an expert at reformulating search queries for business records retrieval.
+Given a user's question, create an enhanced search query that will better match relevant business documents in a vector database.
+Guidelines:
+1. Extract key business concepts (products, suppliers, customers, dates, amounts)
+2. Add relevant synonyms and related terms
+3. Focus on business transaction terminology
+4. Keep it concise but comprehensive
+5. Don't change the core intent of the original query
+Examples:
+- "When is my meeting with George?" → "meeting George supplier customer appointment scheduled"
+- "Show me laptop purchases" → "laptop computer purchase buy bought supplier transaction"
+- "Similar sales to John" → "John customer sale sold transaction similar"
+Return only the enhanced query, nothing else."""
+        user_prompt = f'Enhance this search query for better business records retrieval: "{user_query}"'
+        try:
+            response = self.client.chat.completions.create(
+                model="gpt-4o-mini",
+                messages=[
+                    {"role": "system", "content": system_prompt},
+                    {"role": "user", "content": user_prompt}
+                ],
+                temperature=0.2,
+                max_tokens=100
+            )
+            enhanced_query = response.choices[0].message.content.strip()
+            # Fallback to original if enhancement fails
+            if not enhanced_query or len(enhanced_query) < 3:
+                return user_query
+            return enhanced_query
+        except Exception as e:
+            print(f"Query enhancement failed: {e}")
+            return user_query

src/transaction_clarifier.py ADDED Viewed

	@@ -0,0 +1,281 @@

+import openai
+import os
+import json
+from typing import Dict, Any, Optional, List, Tuple
+from enum import Enum
+from pydantic import BaseModel
+from models import EntityExtraction
+class ClarificationStatus(str, Enum):
+    COMPLETE = "complete"
+    NEEDS_CLARIFICATION = "needs_clarification"
+    CANCELLED = "cancelled"
+class ClarificationRequest(BaseModel):
+    missing_fields: List[str]
+    questions: List[str]
+    suggested_values: Dict[str, Any] = {}
+    explanation: str
+class TransactionClarifier:
+    def __init__(self, api_key: Optional[str] = None):
+        """Initialize OpenAI client for transaction clarification"""
+        self.client = openai.OpenAI(
+            api_key=api_key or os.getenv('OPENAI_API_KEY')
+        )
+    def analyze_transaction_completeness(self, entities: EntityExtraction) -> Tuple[ClarificationStatus, Optional[ClarificationRequest]]:
+        """
+        Analyze if a transaction has all necessary information
+        Args:
+            entities: Extracted entities from user input
+        Returns:
+            Tuple of (status, clarification_request)
+        """
+        # Define required and optional fields based on transaction type
+        if entities.transaction_type == "purchase":
+            required_fields = ["product", "quantity", "supplier", "unit_price"]
+            optional_fields = ["total_amount"]
+        elif entities.transaction_type == "sale":
+            required_fields = ["product", "quantity", "customer", "unit_price"]
+            optional_fields = ["total_amount"]
+        else:
+            return ClarificationStatus.COMPLETE, None
+        # Check for missing required fields
+        missing_fields = []
+        entity_dict = entities.dict()
+        for field in required_fields:
+            if not entity_dict.get(field):
+                missing_fields.append(field)
+        # If all required fields are present, transaction is complete
+        if not missing_fields:
+            return ClarificationStatus.COMPLETE, None
+        # Generate intelligent clarification request
+        clarification = self._generate_clarification_request(entities, missing_fields)
+        return ClarificationStatus.NEEDS_CLARIFICATION, clarification
+    def _generate_clarification_request(self, entities: EntityExtraction, missing_fields: List[str]) -> ClarificationRequest:
+        """Generate intelligent questions for missing information"""
+        # Prepare context about what we already know
+        known_info = {}
+        entity_dict = entities.dict()
+        for field, value in entity_dict.items():
+            if value is not None and field != "notes":
+                known_info[field] = value
+        system_prompt = f"""You are a helpful business assistant helping complete a {entities.transaction_type} transaction.
+Generate natural, conversational questions to gather missing information. The user should be able to:
+1. Provide the missing information
+2. Say "N/A" or "skip" if the information is not available/applicable
+3. Ask for suggestions if they're unsure
+Create personalized questions based on the context of what we already know.
+Return your response in this exact JSON format:
+{{
+    "questions": ["question1", "question2", ...],
+    "suggested_values": {{"field": "suggested_value", ...}},
+    "explanation": "Brief explanation of why we need this information"
+}}
+Missing fields to ask about: {missing_fields}
+Transaction type: {entities.transaction_type}
+"""
+        user_prompt = f"""We're processing a {entities.transaction_type} transaction and need to gather some missing information.
+What we already know:
+{json.dumps(known_info, indent=2)}
+Missing fields: {missing_fields}
+Generate friendly, specific questions to gather the missing information. Make suggestions when appropriate."""
+        try:
+            response = self.client.chat.completions.create(
+                model="gpt-4o-mini",
+                messages=[
+                    {"role": "system", "content": system_prompt},
+                    {"role": "user", "content": user_prompt}
+                ],
+                temperature=0.3,
+                max_tokens=400
+            )
+            response_text = response.choices[0].message.content.strip()
+            try:
+                result_dict = json.loads(response_text)
+                return ClarificationRequest(
+                    missing_fields=missing_fields,
+                    questions=result_dict.get("questions", []),
+                    suggested_values=result_dict.get("suggested_values", {}),
+                    explanation=result_dict.get("explanation", "I need some additional information to complete this transaction.")
+                )
+            except (json.JSONDecodeError, KeyError) as e:
+                # Fallback to simple questions
+                return self._generate_fallback_questions(entities, missing_fields)
+        except Exception as e:
+            print(f"Error generating clarification: {e}")
+            return self._generate_fallback_questions(entities, missing_fields)
+    def _generate_fallback_questions(self, entities: EntityExtraction, missing_fields: List[str]) -> ClarificationRequest:
+        """Generate fallback questions when LLM fails"""
+        question_templates = {
+            "product": "What product or item is involved in this transaction?",
+            "quantity": f"How many units {'were purchased' if entities.transaction_type == 'purchase' else 'were sold'}?",
+            "supplier": "Which supplier or vendor is this purchase from?",
+            "customer": "Who is the customer for this sale?",
+            "unit_price": "What is the price per unit?",
+            "total_amount": "What is the total amount for this transaction?"
+        }
+        questions = []
+        for field in missing_fields:
+            questions.append(question_templates.get(field, f"What is the {field.replace('_', ' ')}?"))
+        return ClarificationRequest(
+            missing_fields=missing_fields,
+            questions=questions,
+            suggested_values={},
+            explanation="I need some additional information to complete this transaction."
+        )
+    def process_clarification_response(self, original_entities: EntityExtraction,
+                                     missing_fields: List[str],
+                                     user_response: str) -> Tuple[EntityExtraction, bool]:
+        """
+        Process user's response to clarification questions
+        Args:
+            original_entities: Original extracted entities
+            missing_fields: Fields we asked about
+            user_response: User's response to our questions
+        Returns:
+            Tuple of (updated_entities, is_complete)
+        """
+        system_prompt = f"""You are processing a user's response to clarification questions about a {original_entities.transaction_type} transaction.
+Extract the missing information from the user's response. The user may:
+1. Provide specific values for the missing fields
+2. Say "N/A", "skip", "not applicable", or similar to indicate the field should be null
+3. Ask for help or say they don't know
+Missing fields we asked about: {missing_fields}
+Return a JSON object with the extracted values. Use null for fields that are N/A or skipped.
+Example response format:
+{{
+    "product": "extracted product name",
+    "quantity": 10,
+    "supplier": null,
+    "unit_price": 5.99,
+    "interpretation": "Brief explanation of what you extracted"
+}}"""
+        user_prompt = f"""Original transaction: {original_entities.transaction_type}
+Missing fields: {missing_fields}
+User's response: "{user_response}"
+Extract the values for the missing fields from the user's response."""
+        try:
+            response = self.client.chat.completions.create(
+                model="gpt-4o-mini",
+                messages=[
+                    {"role": "system", "content": system_prompt},
+                    {"role": "user", "content": user_prompt}
+                ],
+                temperature=0.1,
+                max_tokens=300
+            )
+            response_text = response.choices[0].message.content.strip()
+            try:
+                extracted_values = json.loads(response_text)
+                # Update original entities with extracted values
+                updated_entities = self._update_entities(original_entities, extracted_values, missing_fields)
+                # Check if transaction is now complete
+                status, _ = self.analyze_transaction_completeness(updated_entities)
+                is_complete = (status == ClarificationStatus.COMPLETE)
+                return updated_entities, is_complete
+            except (json.JSONDecodeError, KeyError) as e:
+                print(f"Error parsing clarification response: {e}")
+                return original_entities, False
+        except Exception as e:
+            print(f"Error processing clarification: {e}")
+            return original_entities, False
+    def _update_entities(self, original_entities: EntityExtraction,
+                        extracted_values: Dict[str, Any],
+                        missing_fields: List[str]) -> EntityExtraction:
+        """Update entities with extracted clarification values"""
+        # Convert to dict for easier manipulation
+        entity_dict = original_entities.dict()
+        # Update with extracted values
+        for field in missing_fields:
+            if field in extracted_values:
+                value = extracted_values[field]
+                # Handle type conversions
+                if field in ["quantity"] and value is not None:
+                    try:
+                        entity_dict[field] = int(value)
+                    except (ValueError, TypeError):
+                        entity_dict[field] = None
+                elif field in ["unit_price", "total_amount"] and value is not None:
+                    try:
+                        entity_dict[field] = float(value)
+                    except (ValueError, TypeError):
+                        entity_dict[field] = None
+                else:
+                    entity_dict[field] = value
+        # Recalculate total if we have quantity and unit_price
+        if entity_dict.get("quantity") and entity_dict.get("unit_price"):
+            entity_dict["total_amount"] = entity_dict["quantity"] * entity_dict["unit_price"]
+        return EntityExtraction(**entity_dict)
+    def format_clarification_message(self, clarification: ClarificationRequest) -> str:
+        """Format clarification request as a user-friendly message"""
+        message = f"📝 {clarification.explanation}\n\n"
+        for i, question in enumerate(clarification.questions, 1):
+            message += f"{i}. {question}\n"
+        # Add suggestions if available
+        if clarification.suggested_values:
+            message += "\n💡 Suggestions:\n"
+            for field, suggestion in clarification.suggested_values.items():
+                message += f"   • {field.replace('_', ' ').title()}: {suggestion}\n"
+        message += "\n✨ You can say 'N/A' or 'skip' for any information that's not available."
+        message += "\n📞 Please provide the missing information in your next message."
+        return message

src/vector_store.py ADDED Viewed

	@@ -0,0 +1,214 @@

+import chromadb
+from sentence_transformers import SentenceTransformer
+from typing import List, Dict, Any, Optional
+import json
+from datetime import datetime
+class VectorStore:
+    def __init__(self, collection_name: str = "chatbot_events"):
+        self.client = chromadb.PersistentClient(path="./chroma_db")
+        self.collection = self.client.get_or_create_collection(name=collection_name)
+        try:
+            self.model = SentenceTransformer('all-MiniLM-L6-v2')
+        except Exception as e:
+            print(f"Warning: Could not load sentence transformer model: {e}")
+            self.model = None
+    def add_transaction_event(self, transaction_data: Dict[str, Any], user_query: str, sql_transaction_id: Optional[int] = None) -> bool:
+        """Add a transaction event to the vector store"""
+        if not self.model:
+            return False
+        try:
+            # Create a semantic summary of the event
+            summary = self._create_event_summary(transaction_data, user_query)
+            # Generate embedding
+            embedding = self.model.encode(summary).tolist()
+            # Create document ID - include SQL ID if available for better linking
+            doc_id = f"transaction_{sql_transaction_id or 'unknown'}_{datetime.now().isoformat()}_{hash(summary) % 10000}"
+            # Prepare metadata with SQL transaction linking
+            metadata = {
+                "type": "transaction",
+                "transaction_type": transaction_data.get("type", "unknown"),
+                "timestamp": datetime.now().isoformat(),
+                "user_query": user_query,
+                "data": json.dumps(transaction_data)
+            }
+            # Add SQL transaction ID to metadata for linking
+            if sql_transaction_id is not None:
+                metadata["sql_transaction_id"] = sql_transaction_id
+                metadata["sql_table"] = f"{transaction_data.get('type', 'unknown')}s"  # purchases or sales
+            # Store in vector database
+            self.collection.add(
+                documents=[summary],
+                embeddings=[embedding],
+                metadatas=[metadata],
+                ids=[doc_id]
+            )
+            return True
+        except Exception as e:
+            print(f"Error adding transaction event: {e}")
+            return False
+    def get_transaction_by_sql_id(self, sql_transaction_id: int, transaction_type: str) -> Optional[Dict[str, Any]]:
+        """Retrieve vector store entry linked to a specific SQL transaction ID"""
+        try:
+            # Query the collection for entries with matching SQL transaction ID
+            results = self.collection.get(
+                where={
+                    "sql_transaction_id": sql_transaction_id,
+                    "transaction_type": transaction_type
+                },
+                limit=1
+            )
+            if results and results['documents']:
+                return {
+                    "id": results['ids'][0],
+                    "document": results['documents'][0],
+                    "metadata": results['metadatas'][0]
+                }
+            return None
+        except Exception as e:
+            print(f"Error retrieving transaction by SQL ID: {e}")
+            return None
+    def add_general_event(self, event_text: str, event_type: str = "general") -> bool:
+        """Add a general event or information to the vector store"""
+        if not self.model:
+            return False
+        try:
+            # Generate embedding
+            embedding = self.model.encode(event_text).tolist()
+            # Create document ID
+            doc_id = f"event_{datetime.now().isoformat()}_{hash(event_text) % 10000}"
+            # Store in vector database
+            self.collection.add(
+                documents=[event_text],
+                embeddings=[embedding],
+                metadatas=[{
+                    "type": event_type,
+                    "timestamp": datetime.now().isoformat()
+                }],
+                ids=[doc_id]
+            )
+            return True
+        except Exception as e:
+            print(f"Error adding general event: {e}")
+            return False
+    def search_similar_events(self, query: str, n_results: int = 5) -> List[Dict[str, Any]]:
+        """Search for similar events based on semantic similarity"""
+        if not self.model:
+            return []
+        try:
+            # Generate query embedding
+            query_embedding = self.model.encode(query).tolist()
+            # Search vector database
+            results = self.collection.query(
+                query_embeddings=[query_embedding],
+                n_results=n_results
+            )
+            # Format results
+            formatted_results = []
+            if results['documents'] and results['documents'][0]:
+                for i, doc in enumerate(results['documents'][0]):
+                    result = {
+                        "document": doc,
+                        "distance": results['distances'][0][i] if results['distances'] else None,
+                        "metadata": results['metadatas'][0][i] if results['metadatas'] else {}
+                    }
+                    formatted_results.append(result)
+            return formatted_results
+        except Exception as e:
+            print(f"Error searching events: {e}")
+            return []
+    def get_recent_events(self, n_results: int = 10) -> List[Dict[str, Any]]:
+        """Get recent events from the vector store"""
+        try:
+            results = self.collection.get(
+                limit=n_results,
+                include=["documents", "metadatas"]
+            )
+            formatted_results = []
+            if results['documents']:
+                for i, doc in enumerate(results['documents']):
+                    result = {
+                        "document": doc,
+                        "metadata": results['metadatas'][i] if results['metadatas'] else {}
+                    }
+                    formatted_results.append(result)
+            # Sort by timestamp if available
+            formatted_results.sort(
+                key=lambda x: x.get('metadata', {}).get('timestamp', ''),
+                reverse=True
+            )
+            return formatted_results
+        except Exception as e:
+            print(f"Error getting recent events: {e}")
+            return []
+    def _create_event_summary(self, transaction_data: Dict[str, Any], user_query: str) -> str:
+        """Create a semantic summary of a transaction event"""
+        summary_parts = []
+        # Add transaction type
+        trans_type = transaction_data.get("type", "transaction")
+        summary_parts.append(f"Business {trans_type} event:")
+        # Add key details
+        if "product" in transaction_data:
+            summary_parts.append(f"Product: {transaction_data['product']}")
+        if "quantity" in transaction_data:
+            summary_parts.append(f"Quantity: {transaction_data['quantity']}")
+        if "supplier" in transaction_data:
+            summary_parts.append(f"Supplier: {transaction_data['supplier']}")
+        if "customer" in transaction_data:
+            summary_parts.append(f"Customer: {transaction_data['customer']}")
+        if "total" in transaction_data:
+            summary_parts.append(f"Total amount: €{transaction_data['total']}")
+        # Add original user query for context
+        summary_parts.append(f"Original request: {user_query}")
+        return " | ".join(summary_parts)
+    def delete_collection(self):
+        """Delete the entire collection (use with caution)"""
+        try:
+            self.client.delete_collection(name=self.collection.name)
+            return True
+        except Exception as e:
+            print(f"Error deleting collection: {e}")
+            return False
+    def get_collection_count(self) -> int:
+        """Get the number of documents in the collection"""
+        try:
+            return self.collection.count()
+        except Exception as e:
+            print(f"Error getting collection count: {e}")
+            return 0

tests/test_chatbot.py ADDED Viewed

	@@ -0,0 +1,49 @@

+#!/usr/bin/env python3
+import sys
+import os
+sys.path.append(os.path.join(os.path.dirname(__file__), 'src'))
+from chatbot import Chatbot
+from models import ChatbotRequest
+def test_chatbot():
+    print("🧪 Testing Chatbot System")
+    print("="*50)
+    chatbot = Chatbot()
+    # Test cases
+    test_cases = [
+        "Add a purchase of 20 USB drives from TechMart at €5 each",
+        "Sold 10 laptops to John Smith at €800 each",
+        "Purchase 5 office chairs from Office Supplies Co at €150 per chair",
+        "Show recent transactions",
+        "Find USB drives",
+        "Search TechMart",
+        "Meeting with new supplier scheduled for next week"
+    ]
+    for i, test_message in enumerate(test_cases, 1):
+        print(f"\n🔍 Test {i}: {test_message}")
+        print("-" * 50)
+        request = ChatbotRequest(message=test_message)
+        response = chatbot.process_message(request)
+        print(f"Response: {response.response}")
+        if response.entities_extracted:
+            entities = response.entities_extracted
+            print(f"Entities: {entities.transaction_type} - {entities.product} ({entities.quantity}x) - €{entities.total_amount}")
+        if response.vector_stored:
+            print("✅ Stored in vector database")
+        print()
+    chatbot.close()
+    print("✅ All tests completed!")
+if __name__ == "__main__":
+    test_chatbot()

tests/test_intent_classifier.py ADDED Viewed

	@@ -0,0 +1,98 @@

+#!/usr/bin/env python3
+import sys
+import os
+sys.path.append(os.path.join(os.path.dirname(__file__), 'src'))
+from intent_classifier import IntentClassifier, IntentType
+def test_intent_classification():
+    print("🧪 Testing OpenAI Intent Classification")
+    print("="*60)
+    print("Note: Make sure to set OPENAI_API_KEY environment variable")
+    print("="*60)
+    classifier = IntentClassifier()
+    # Test cases with expected intents
+    test_cases = [
+        # Transaction intents
+        ("Add a purchase of 20 USB drives from TechMart at €5 each", IntentType.TRANSACTION),
+        ("Sold 10 laptops to John Smith at €800 each", IntentType.TRANSACTION),
+        ("Purchase 5 office chairs from Office Supplies Co at €150 per chair", IntentType.TRANSACTION),
+        ("We bought 100 pens from Staples for $2 each", IntentType.TRANSACTION),
+        # Query intents
+        ("How many USB drives did we purchase?", IntentType.QUERY),
+        ("What's the total value of all purchases?", IntentType.QUERY),
+        ("Show me all sales to John Smith", IntentType.QUERY),
+        ("List recent transactions", IntentType.QUERY),
+        ("What's our total spending on electronics?", IntentType.QUERY),
+        # Semantic search intents
+        ("Show me similar purchases to this one", IntentType.SEMANTIC_SEARCH),
+        ("Find events related to supplier meetings", IntentType.SEMANTIC_SEARCH),
+        ("What's similar to our last laptop purchase?", IntentType.SEMANTIC_SEARCH),
+        ("Show me related transactions", IntentType.SEMANTIC_SEARCH),
+        # General info intents
+        ("Meeting with new supplier scheduled for next week", IntentType.GENERAL_INFO),
+        ("Remember to check inventory levels before next order", IntentType.GENERAL_INFO),
+        ("The conference call went well today", IntentType.GENERAL_INFO),
+        ("Don't forget to update the quarterly report", IntentType.GENERAL_INFO),
+        # Edge cases
+        ("Hello", IntentType.GENERAL_INFO),
+        ("What's the weather like?", IntentType.GENERAL_INFO),
+        ("Can you help me?", IntentType.GENERAL_INFO),
+    ]
+    correct_predictions = 0
+    total_predictions = len(test_cases)
+    for i, (message, expected_intent) in enumerate(test_cases, 1):
+        print(f"\n🔍 Test {i}: {message}")
+        print("-" * 60)
+        result = classifier.classify_intent(message)
+        print(f"Expected: {expected_intent.value}")
+        print(f"Predicted: {result.intent.value}")
+        print(f"Confidence: {result.confidence:.2f}")
+        print(f"Reasoning: {result.reasoning}")
+        if result.entities_hint:
+            print(f"Entities: {result.entities_hint}")
+        is_correct = result.intent == expected_intent
+        if is_correct:
+            print("✅ CORRECT")
+            correct_predictions += 1
+        else:
+            print("❌ INCORRECT")
+        print()
+    # Summary
+    accuracy = correct_predictions / total_predictions
+    print("="*60)
+    print(f"📊 Results Summary:")
+    print(f"Correct predictions: {correct_predictions}/{total_predictions}")
+    print(f"Accuracy: {accuracy:.2%}")
+    print("="*60)
+    if accuracy >= 0.8:
+        print("🎉 Excellent accuracy! Intent classification is working well.")
+    elif accuracy >= 0.6:
+        print("👍 Good accuracy. Consider refining prompts for better results.")
+    else:
+        print("⚠️  Low accuracy. Review and improve the classification prompts.")
+if __name__ == "__main__":
+    if not os.getenv('OPENAI_API_KEY'):
+        print("❌ Error: OPENAI_API_KEY environment variable not set")
+        print("Please set your OpenAI API key:")
+        print("export OPENAI_API_KEY='your-key-here'")
+        sys.exit(1)
+    test_intent_classification()

tests/test_interactive_transactions.py ADDED Viewed

	@@ -0,0 +1,179 @@

+#!/usr/bin/env python3
+import sys
+import os
+sys.path.append(os.path.join(os.path.dirname(__file__), 'src'))
+from chatbot import Chatbot
+from models import ChatbotRequest
+def test_interactive_transactions():
+    print("🧪 Testing Interactive Transaction Completion")
+    print("="*70)
+    print("Note: Make sure to set OPENAI_API_KEY environment variable")
+    print("="*70)
+    chatbot = Chatbot()
+    test_scenarios = [
+        {
+            "name": "Complete Purchase Transaction",
+            "initial": "I bought 20 USB drives from TechMart at €5 each",
+            "expected_complete": True,
+            "description": "Should be complete with all required fields"
+        },
+        {
+            "name": "Incomplete Purchase - Missing Supplier",
+            "initial": "I bought 10 laptops at €800 each",
+            "clarifications": ["Electronics Plus"],
+            "expected_questions": ["supplier"],
+            "description": "Should ask for supplier information"
+        },
+        {
+            "name": "Incomplete Purchase - Missing Multiple Fields",
+            "initial": "I bought some office chairs",
+            "clarifications": ["15 chairs", "Office Supplies Co", "€150 per chair"],
+            "expected_questions": ["quantity", "supplier", "unit_price"],
+            "description": "Should ask for quantity, supplier, and price"
+        },
+        {
+            "name": "Sale with Missing Customer",
+            "initial": "Sold 5 laptops at €900 each",
+            "clarifications": ["ABC Corporation"],
+            "expected_questions": ["customer"],
+            "description": "Should ask for customer information"
+        },
+        {
+            "name": "Transaction with N/A Fields",
+            "initial": "Bought 100 pens",
+            "clarifications": ["Staples", "$2 each", "N/A"],
+            "expected_questions": ["supplier", "unit_price"],
+            "description": "Should handle N/A responses gracefully"
+        }
+    ]
+    for i, scenario in enumerate(test_scenarios, 1):
+        print(f"\n🔍 Test Scenario {i}: {scenario['name']}")
+        print("-" * 60)
+        print(f"Description: {scenario['description']}")
+        print(f"Initial input: {scenario['initial']}")
+        # Test initial transaction request
+        session_id = f"test_session_{i}"
+        request = ChatbotRequest(message=scenario['initial'], session_id=session_id)
+        response = chatbot.process_message(request)
+        print(f"\n🤖 Initial Response:")
+        print(response.response)
+        if response.awaiting_clarification:
+            print(f"✅ Correctly identified as incomplete transaction")
+            # Process clarifications if provided
+            if "clarifications" in scenario:
+                print(f"\n📝 Providing clarifications...")
+                for j, clarification in enumerate(scenario["clarifications"], 1):
+                    print(f"\n   Clarification {j}: {clarification}")
+                    clarification_request = ChatbotRequest(
+                        message=clarification,
+                        session_id=session_id
+                    )
+                    clarification_response = chatbot.process_message(clarification_request)
+                    print(f"   🤖 Response: {clarification_response.response[:100]}{'...' if len(clarification_response.response) > 100 else ''}")
+                    if not clarification_response.awaiting_clarification:
+                        print(f"   ✅ Transaction completed!")
+                        break
+                    else:
+                        print(f"   ⏳ Still waiting for more information...")
+        else:
+            if scenario.get("expected_complete", False):
+                print(f"✅ Correctly completed transaction without clarification")
+            else:
+                print(f"❌ Expected clarification but transaction was completed")
+        print(f"\nIntent detected: {response.intent_detected}")
+        if response.entities_extracted:
+            entities = response.entities_extracted
+            print(f"Entities: {entities.transaction_type} - {entities.product} ({entities.quantity}x) - €{entities.total_amount}")
+        print("\n" + "="*60)
+    print("\n🧪 Testing Edge Cases")
+    print("-" * 40)
+    # Test cancellation
+    print("\n🔍 Testing Transaction Cancellation")
+    request = ChatbotRequest(message="I bought some items", session_id="cancel_test")
+    response = chatbot.process_message(request)
+    if response.awaiting_clarification:
+        print("✅ Transaction requires clarification")
+        cancel_request = ChatbotRequest(message="cancel", session_id="cancel_test")
+        cancel_response = chatbot.process_message(cancel_request)
+        print(f"🤖 Cancel response: {cancel_response.response}")
+        if not cancel_response.awaiting_clarification:
+            print("✅ Transaction successfully cancelled")
+        else:
+            print("❌ Transaction not properly cancelled")
+    # Test invalid session
+    print("\n🔍 Testing Invalid Session Response")
+    invalid_request = ChatbotRequest(message="More information here", session_id="nonexistent")
+    invalid_response = chatbot.process_message(invalid_request)
+    print(f"🤖 Invalid session response: {invalid_response.response}")
+    chatbot.close()
+    print("\n✅ Interactive transaction tests completed!")
+def test_clarification_quality():
+    print("\n🔬 Testing Clarification Question Quality")
+    print("-" * 50)
+    chatbot = Chatbot()
+    # Test various incomplete scenarios to see question quality
+    incomplete_scenarios = [
+        "I bought something expensive",
+        "Purchase from TechMart",
+        "Sold items to a customer",
+        "€1000 transaction yesterday",
+        "Bought 50 units"
+    ]
+    for i, scenario in enumerate(incomplete_scenarios, 1):
+        print(f"\n🔍 Scenario {i}: {scenario}")
+        print("-" * 30)
+        request = ChatbotRequest(message=scenario, session_id=f"quality_test_{i}")
+        response = chatbot.process_message(request)
+        if response.awaiting_clarification:
+            print("📝 Clarification questions generated:")
+            # Extract questions from response for analysis
+            lines = response.response.split('\n')
+            questions = [line.strip() for line in lines if line.strip() and any(char.isdigit() and line.strip().startswith(char) for char in '123456789')]
+            for q in questions[:3]:  # Show first 3 questions
+                print(f"   • {q}")
+            print(f"✅ Generated {len(questions)} clarification questions")
+        else:
+            print("❌ No clarification requested (unexpected)")
+    chatbot.close()
+    print("\n✅ Clarification quality tests completed!")
+if __name__ == "__main__":
+    if not os.getenv('OPENAI_API_KEY'):
+        print("❌ Error: OPENAI_API_KEY environment variable not set")
+        print("Please set your OpenAI API key:")
+        print("export OPENAI_API_KEY='your-key-here'")
+        sys.exit(1)
+    test_interactive_transactions()
+    test_clarification_quality()

tests/test_nl_search.py ADDED Viewed

	@@ -0,0 +1,76 @@

+#!/usr/bin/env python3
+import sys
+import os
+sys.path.append(os.path.join(os.path.dirname(__file__), 'src'))
+from chatbot import Chatbot
+from models import ChatbotRequest
+def test_natural_language_search():
+    print("🧪 Testing Natural Language to SQL Search")
+    print("="*60)
+    print("Note: Make sure to set OPENAI_API_KEY environment variable")
+    print("="*60)
+    chatbot = Chatbot()
+    # First add some test data
+    setup_queries = [
+        "Add a purchase of 20 USB drives from TechMart at €5 each",
+        "Add a purchase of 10 laptops from Electronics Plus at €800 each",
+        "Sold 5 USB drives to John Smith at €7 each",
+        "Sold 2 laptops to ABC Corp at €900 each"
+    ]
+    print("📝 Setting up test data...")
+    for query in setup_queries:
+        request = ChatbotRequest(message=query)
+        response = chatbot.process_message(request)
+        print(f"✓ {query}")
+    print("\n🔍 Testing Natural Language Queries...")
+    print("-" * 60)
+    # Test natural language search queries
+    test_queries = [
+        "How many USB drives did we purchase?",
+        "What's the total value of all purchases?",
+        "Show me all sales to John Smith",
+        "Which suppliers have we bought from?",
+        "What products did we sell this month?",
+        "Show me the most expensive purchases",
+        "How much revenue did we generate from laptop sales?",
+        "List all transactions with TechMart",
+        "What's our total spending on electronics?",
+        "Show me customers who bought laptops"
+    ]
+    for i, query in enumerate(test_queries, 1):
+        print(f"\n🔍 Test {i}: {query}")
+        print("-" * 50)
+        request = ChatbotRequest(message=query)
+        response = chatbot.process_message(request)
+        print(f"Response: {response.response}")
+        if response.sql_executed:
+            print(f"Generated SQL: {response.sql_executed}")
+        if response.intent_detected:
+            print(f"Intent: {response.intent_detected} (confidence: {response.intent_confidence:.2f})")
+        print()
+    chatbot.close()
+    print("✅ Natural language search tests completed!")
+if __name__ == "__main__":
+    if not os.getenv('OPENAI_API_KEY'):
+        print("❌ Error: OPENAI_API_KEY environment variable not set")
+        print("Please set your OpenAI API key:")
+        print("export OPENAI_API_KEY='your-key-here'")
+        sys.exit(1)
+    test_natural_language_search()

tests/test_rag_search.py ADDED Viewed

	@@ -0,0 +1,132 @@

+#!/usr/bin/env python3
+import sys
+import os
+sys.path.append(os.path.join(os.path.dirname(__file__), 'src'))
+from chatbot import Chatbot
+from models import ChatbotRequest
+def test_rag_functionality():
+    print("🧪 Testing RAG (Retrieval-Augmented Generation) Functionality")
+    print("="*70)
+    print("Note: Make sure to set OPENAI_API_KEY environment variable")
+    print("="*70)
+    chatbot = Chatbot()
+    # First, populate the system with diverse data
+    setup_data = [
+        # Transaction data
+        "Add a purchase of 20 USB drives from TechMart at €5 each",
+        "Add a purchase of 10 laptops from Electronics Plus at €800 each",
+        "Sold 5 USB drives to John Smith at €7 each",
+        "Sold 2 laptops to ABC Corp at €900 each",
+        "Purchase 15 office chairs from Office Supplies Co at €150 per chair",
+        # Business events and meetings
+        "Meeting with George scheduled for next Tuesday at 2 PM to discuss new laptop supplier contract",
+        "Conference call with TechMart went well - they agreed to bulk discounts for USB drives",
+        "Quarterly review meeting completed - need to increase laptop inventory before Q4",
+        "Supplier evaluation: Electronics Plus provides excellent laptops but delivery times are slow",
+        "Team meeting notes: Focus on ergonomic office furniture for the new office space",
+        "Customer feedback: John Smith very satisfied with USB drive quality and pricing",
+        "Important reminder: Check inventory levels before placing next electronics order",
+        "Budget planning: Allocate €50,000 for office equipment in next quarter"
+    ]
+    print("📝 Setting up test data...")
+    for i, data in enumerate(setup_data, 1):
+        request = ChatbotRequest(message=data)
+        response = chatbot.process_message(request)
+        print(f"✓ {i:2d}. {data[:60]}{'...' if len(data) > 60 else ''}")
+    print(f"\n✅ Setup complete! Added {len(setup_data)} records.")
+    print("\n🔍 Testing RAG-powered semantic search...")
+    print("-" * 70)
+    # Test various types of semantic search queries
+    test_queries = [
+        # Meeting and event queries
+        "When is my meeting with George?",
+        "What was discussed in the TechMart meeting?",
+        "Tell me about recent meetings and discussions",
+        # Product and supplier queries
+        "What do we know about TechMart as a supplier?",
+        "Show me information about laptop purchases and suppliers",
+        "What feedback have we received about our products?",
+        # Business planning queries
+        "What are our budget plans for next quarter?",
+        "What inventory considerations should I be aware of?",
+        "Tell me about office equipment and furniture plans",
+        # Customer information
+        "What do we know about John Smith?",
+        "Show me customer feedback and satisfaction information",
+        # Operational queries
+        "What reminders and important notes do I have?",
+        "Tell me about supplier evaluations and performance",
+        "What are the key business insights from recent records?"
+    ]
+    for i, query in enumerate(test_queries, 1):
+        print(f"\n🔍 Test {i}: {query}")
+        print("-" * 50)
+        request = ChatbotRequest(message=query)
+        response = chatbot.process_message(request)
+        print(f"🤖 Response: {response.response}")
+        if response.intent_detected:
+            print(f"🎯 Intent: {response.intent_detected} (confidence: {response.intent_confidence:.2f})")
+        print()
+    chatbot.close()
+    print("✅ RAG functionality tests completed!")
+    print("\n📊 Expected Behavior:")
+    print("- RAG should provide contextual, specific answers based on stored information")
+    print("- Responses should cite relevant details from business records")
+    print("- Should handle queries about meetings, suppliers, customers, and business plans")
+    print("- Should indicate when information is not available in the records")
+def test_rag_edge_cases():
+    print("\n🔬 Testing RAG Edge Cases")
+    print("-" * 40)
+    chatbot = Chatbot()
+    edge_case_queries = [
+        "Tell me about suppliers we've never worked with",
+        "What happened in 1995?",
+        "Show me information about flying cars",
+        "What's the weather like today?",
+        "Tell me about George's favorite color"
+    ]
+    for i, query in enumerate(edge_case_queries, 1):
+        print(f"\n🔍 Edge Case {i}: {query}")
+        print("-" * 30)
+        request = ChatbotRequest(message=query)
+        response = chatbot.process_message(request)
+        print(f"🤖 Response: {response.response}")
+        print()
+    chatbot.close()
+    print("✅ Edge case testing completed!")
+if __name__ == "__main__":
+    if not os.getenv('OPENAI_API_KEY'):
+        print("❌ Error: OPENAI_API_KEY environment variable not set")
+        print("Please set your OpenAI API key:")
+        print("export OPENAI_API_KEY='your-key-here'")
+        sys.exit(1)
+    test_rag_functionality()
+    test_rag_edge_cases()