Spaces:
Sleeping
Sleeping
Upload folder using huggingface_hub
Browse files- .env.example +5 -0
- .github/workflows/deploy.yml +169 -0
- .github/workflows/update_space.yml +28 -0
- .gitignore +215 -0
- README.md +181 -7
- add_sample_data.py +65 -0
- config.yaml +155 -0
- gui/gradio_interface.py +627 -0
- main.py +61 -0
- populate_sample_data.py +81 -0
- requirements.txt +15 -0
- reset_database.py +66 -0
- run_gui.py +119 -0
- src/chatbot.py +402 -0
- src/config_manager.py +121 -0
- src/database_manager.py +255 -0
- src/entity_extractor.py +204 -0
- src/intent_classifier.py +173 -0
- src/models.py +104 -0
- src/nl_to_sql.py +201 -0
- src/rag_handler.py +204 -0
- src/transaction_clarifier.py +281 -0
- src/vector_store.py +214 -0
- tests/test_chatbot.py +49 -0
- tests/test_intent_classifier.py +98 -0
- tests/test_interactive_transactions.py +179 -0
- tests/test_nl_search.py +76 -0
- tests/test_rag_search.py +132 -0
.env.example
ADDED
|
@@ -0,0 +1,5 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
# OpenAI API Configuration
|
| 2 |
+
OPENAI_API_KEY=your_openai_api_key_here
|
| 3 |
+
|
| 4 |
+
# Optional: Change the model used for NL to SQL conversion
|
| 5 |
+
# OPENAI_MODEL=gpt-3.5-turbo
|
.github/workflows/deploy.yml
ADDED
|
@@ -0,0 +1,169 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
name: Deploy Gradio App
|
| 2 |
+
|
| 3 |
+
on:
|
| 4 |
+
push:
|
| 5 |
+
branches: [ main ]
|
| 6 |
+
pull_request:
|
| 7 |
+
branches: [ main ]
|
| 8 |
+
workflow_dispatch:
|
| 9 |
+
|
| 10 |
+
jobs:
|
| 11 |
+
deploy:
|
| 12 |
+
runs-on: ubuntu-latest
|
| 13 |
+
|
| 14 |
+
steps:
|
| 15 |
+
- name: Checkout code
|
| 16 |
+
uses: actions/checkout@v4
|
| 17 |
+
|
| 18 |
+
- name: Set up Python
|
| 19 |
+
uses: actions/setup-python@v4
|
| 20 |
+
with:
|
| 21 |
+
python-version: '3.11'
|
| 22 |
+
|
| 23 |
+
- name: Cache pip dependencies
|
| 24 |
+
uses: actions/cache@v3
|
| 25 |
+
with:
|
| 26 |
+
path: ~/.cache/pip
|
| 27 |
+
key: ${{ runner.os }}-pip-${{ hashFiles('**/requirements.txt') }}
|
| 28 |
+
restore-keys: |
|
| 29 |
+
${{ runner.os }}-pip-
|
| 30 |
+
|
| 31 |
+
- name: Install dependencies
|
| 32 |
+
run: |
|
| 33 |
+
python -m pip install --upgrade pip
|
| 34 |
+
pip install -r requirements.txt
|
| 35 |
+
|
| 36 |
+
- name: Download spaCy model
|
| 37 |
+
run: python -m spacy download en_core_web_sm
|
| 38 |
+
|
| 39 |
+
- name: Create config file
|
| 40 |
+
run: |
|
| 41 |
+
cat > config.yaml << EOF
|
| 42 |
+
openai:
|
| 43 |
+
api_key: ${{ secrets.OPENAI_API_KEY }}
|
| 44 |
+
model: "gpt-3.5-turbo"
|
| 45 |
+
max_tokens: 1500
|
| 46 |
+
temperature: 0.7
|
| 47 |
+
|
| 48 |
+
database:
|
| 49 |
+
url: "sqlite:///chatbot.db"
|
| 50 |
+
|
| 51 |
+
vector_store:
|
| 52 |
+
persist_directory: "./chroma_db"
|
| 53 |
+
collection_name: "business_transactions"
|
| 54 |
+
|
| 55 |
+
intent_classifier:
|
| 56 |
+
confidence_threshold: 0.7
|
| 57 |
+
|
| 58 |
+
entity_extraction:
|
| 59 |
+
spacy_model: "en_core_web_sm"
|
| 60 |
+
EOF
|
| 61 |
+
|
| 62 |
+
- name: Initialize database
|
| 63 |
+
run: python -c "from src.database_manager import DatabaseManager; db = DatabaseManager(); db.create_tables()"
|
| 64 |
+
|
| 65 |
+
- name: Run tests (if available)
|
| 66 |
+
run: |
|
| 67 |
+
if [ -d "tests" ] && [ -n "$(ls -A tests/*.py 2>/dev/null)" ]; then
|
| 68 |
+
python -m pytest tests/ -v
|
| 69 |
+
else
|
| 70 |
+
echo "No tests found, skipping test step"
|
| 71 |
+
fi
|
| 72 |
+
continue-on-error: true
|
| 73 |
+
|
| 74 |
+
- name: Deploy to Hugging Face Spaces
|
| 75 |
+
if: github.ref == 'refs/heads/main'
|
| 76 |
+
env:
|
| 77 |
+
HF_TOKEN: ${{ secrets.HF_TOKEN }}
|
| 78 |
+
OPENAI_API_KEY: ${{ secrets.OPENAI_API_KEY }}
|
| 79 |
+
run: |
|
| 80 |
+
# Install huggingface_hub
|
| 81 |
+
pip install huggingface_hub
|
| 82 |
+
|
| 83 |
+
# Create a simple app.py for HF Spaces
|
| 84 |
+
cat > app.py << 'EOF'
|
| 85 |
+
#!/usr/bin/env python3
|
| 86 |
+
import os
|
| 87 |
+
import sys
|
| 88 |
+
from pathlib import Path
|
| 89 |
+
|
| 90 |
+
# Add gui directory to path
|
| 91 |
+
gui_dir = Path(__file__).parent / "gui"
|
| 92 |
+
sys.path.append(str(gui_dir))
|
| 93 |
+
|
| 94 |
+
if __name__ == "__main__":
|
| 95 |
+
from gradio_interface import GradioInterface
|
| 96 |
+
|
| 97 |
+
gui = GradioInterface()
|
| 98 |
+
gui.launch(
|
| 99 |
+
server_name="0.0.0.0",
|
| 100 |
+
server_port=7860,
|
| 101 |
+
share=False,
|
| 102 |
+
debug=False
|
| 103 |
+
)
|
| 104 |
+
EOF
|
| 105 |
+
|
| 106 |
+
# Create requirements.txt for HF Spaces
|
| 107 |
+
cp requirements.txt requirements_hf.txt
|
| 108 |
+
|
| 109 |
+
# Upload to Hugging Face Spaces
|
| 110 |
+
python -c "
|
| 111 |
+
from huggingface_hub import HfApi, upload_folder
|
| 112 |
+
import os
|
| 113 |
+
|
| 114 |
+
api = HfApi(token=os.environ['HF_TOKEN'])
|
| 115 |
+
|
| 116 |
+
# Create or update the space
|
| 117 |
+
try:
|
| 118 |
+
api.create_repo(
|
| 119 |
+
repo_id='${{ github.repository_owner }}/llm-chatbot',
|
| 120 |
+
repo_type='space',
|
| 121 |
+
space_sdk='gradio',
|
| 122 |
+
exist_ok=True
|
| 123 |
+
)
|
| 124 |
+
print('Space created/updated successfully')
|
| 125 |
+
except Exception as e:
|
| 126 |
+
print(f'Error creating space: {e}')
|
| 127 |
+
|
| 128 |
+
# Upload files
|
| 129 |
+
try:
|
| 130 |
+
upload_folder(
|
| 131 |
+
folder_path='.',
|
| 132 |
+
repo_id='${{ github.repository_owner }}/llm-chatbot',
|
| 133 |
+
repo_type='space',
|
| 134 |
+
token=os.environ['HF_TOKEN'],
|
| 135 |
+
ignore_patterns=['.git*', '__pycache__', '*.pyc', 'chroma_db', '*.db']
|
| 136 |
+
)
|
| 137 |
+
print('Files uploaded successfully')
|
| 138 |
+
except Exception as e:
|
| 139 |
+
print(f'Error uploading files: {e}')
|
| 140 |
+
"
|
| 141 |
+
|
| 142 |
+
- name: Deploy to Railway (Alternative)
|
| 143 |
+
if: github.ref == 'refs/heads/main' && env.RAILWAY_TOKEN != ''
|
| 144 |
+
env:
|
| 145 |
+
RAILWAY_TOKEN: ${{ secrets.RAILWAY_TOKEN }}
|
| 146 |
+
run: |
|
| 147 |
+
# Install Railway CLI
|
| 148 |
+
npm install -g @railway/cli
|
| 149 |
+
|
| 150 |
+
# Create Procfile for Railway
|
| 151 |
+
echo "web: python run_gui.py --host 0.0.0.0 --port \$PORT" > Procfile
|
| 152 |
+
|
| 153 |
+
# Deploy to Railway
|
| 154 |
+
railway login --token $RAILWAY_TOKEN
|
| 155 |
+
railway up
|
| 156 |
+
continue-on-error: true
|
| 157 |
+
|
| 158 |
+
- name: Deploy Summary
|
| 159 |
+
run: |
|
| 160 |
+
echo "π Deployment completed!"
|
| 161 |
+
echo "π± Your Gradio app should be available at:"
|
| 162 |
+
echo " - Hugging Face Spaces: https://huggingface.co/spaces/${{ github.repository_owner }}/llm-chatbot"
|
| 163 |
+
if [ -n "${{ secrets.RAILWAY_TOKEN }}" ]; then
|
| 164 |
+
echo " - Railway: Check Railway dashboard for URL"
|
| 165 |
+
fi
|
| 166 |
+
echo "π§ Make sure to set the required secrets in your repository:"
|
| 167 |
+
echo " - OPENAI_API_KEY: Your OpenAI API key"
|
| 168 |
+
echo " - HF_TOKEN: Your Hugging Face token"
|
| 169 |
+
echo " - RAILWAY_TOKEN: Your Railway token (optional)"
|
.github/workflows/update_space.yml
ADDED
|
@@ -0,0 +1,28 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
name: Run Python script
|
| 2 |
+
|
| 3 |
+
on:
|
| 4 |
+
push:
|
| 5 |
+
branches:
|
| 6 |
+
- main
|
| 7 |
+
|
| 8 |
+
jobs:
|
| 9 |
+
build:
|
| 10 |
+
runs-on: ubuntu-latest
|
| 11 |
+
|
| 12 |
+
steps:
|
| 13 |
+
- name: Checkout
|
| 14 |
+
uses: actions/checkout@v2
|
| 15 |
+
|
| 16 |
+
- name: Set up Python
|
| 17 |
+
uses: actions/setup-python@v2
|
| 18 |
+
with:
|
| 19 |
+
python-version: '3.9'
|
| 20 |
+
|
| 21 |
+
- name: Install Gradio
|
| 22 |
+
run: python -m pip install gradio
|
| 23 |
+
|
| 24 |
+
- name: Log in to Hugging Face
|
| 25 |
+
run: python -c 'import huggingface_hub; huggingface_hub.login(token="${{ secrets.hf_token }}")'
|
| 26 |
+
|
| 27 |
+
- name: Deploy to Spaces
|
| 28 |
+
run: gradio deploy
|
.gitignore
ADDED
|
@@ -0,0 +1,215 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
*.sqlite
|
| 2 |
+
*.db
|
| 3 |
+
*.json
|
| 4 |
+
*.sql
|
| 5 |
+
*.bin
|
| 6 |
+
chroma_db/*
|
| 7 |
+
|
| 8 |
+
|
| 9 |
+
# Byte-compiled / optimized / DLL files
|
| 10 |
+
__pycache__/
|
| 11 |
+
*.py[codz]
|
| 12 |
+
*$py.class
|
| 13 |
+
|
| 14 |
+
# C extensions
|
| 15 |
+
*.so
|
| 16 |
+
|
| 17 |
+
# Distribution / packaging
|
| 18 |
+
.Python
|
| 19 |
+
build/
|
| 20 |
+
develop-eggs/
|
| 21 |
+
dist/
|
| 22 |
+
downloads/
|
| 23 |
+
eggs/
|
| 24 |
+
.eggs/
|
| 25 |
+
lib/
|
| 26 |
+
lib64/
|
| 27 |
+
parts/
|
| 28 |
+
sdist/
|
| 29 |
+
var/
|
| 30 |
+
wheels/
|
| 31 |
+
share/python-wheels/
|
| 32 |
+
*.egg-info/
|
| 33 |
+
.installed.cfg
|
| 34 |
+
*.egg
|
| 35 |
+
MANIFEST
|
| 36 |
+
|
| 37 |
+
# PyInstaller
|
| 38 |
+
# Usually these files are written by a python script from a template
|
| 39 |
+
# before PyInstaller builds the exe, so as to inject date/other infos into it.
|
| 40 |
+
*.manifest
|
| 41 |
+
*.spec
|
| 42 |
+
|
| 43 |
+
# Installer logs
|
| 44 |
+
pip-log.txt
|
| 45 |
+
pip-delete-this-directory.txt
|
| 46 |
+
|
| 47 |
+
# Unit test / coverage reports
|
| 48 |
+
htmlcov/
|
| 49 |
+
.tox/
|
| 50 |
+
.nox/
|
| 51 |
+
.coverage
|
| 52 |
+
.coverage.*
|
| 53 |
+
.cache
|
| 54 |
+
nosetests.xml
|
| 55 |
+
coverage.xml
|
| 56 |
+
*.cover
|
| 57 |
+
*.py.cover
|
| 58 |
+
.hypothesis/
|
| 59 |
+
.pytest_cache/
|
| 60 |
+
cover/
|
| 61 |
+
|
| 62 |
+
# Translations
|
| 63 |
+
*.mo
|
| 64 |
+
*.pot
|
| 65 |
+
|
| 66 |
+
# Django stuff:
|
| 67 |
+
*.log
|
| 68 |
+
local_settings.py
|
| 69 |
+
db.sqlite3
|
| 70 |
+
db.sqlite3-journal
|
| 71 |
+
|
| 72 |
+
# Flask stuff:
|
| 73 |
+
instance/
|
| 74 |
+
.webassets-cache
|
| 75 |
+
|
| 76 |
+
# Scrapy stuff:
|
| 77 |
+
.scrapy
|
| 78 |
+
|
| 79 |
+
# Sphinx documentation
|
| 80 |
+
docs/_build/
|
| 81 |
+
|
| 82 |
+
# PyBuilder
|
| 83 |
+
.pybuilder/
|
| 84 |
+
target/
|
| 85 |
+
|
| 86 |
+
# Jupyter Notebook
|
| 87 |
+
.ipynb_checkpoints
|
| 88 |
+
|
| 89 |
+
# IPython
|
| 90 |
+
profile_default/
|
| 91 |
+
ipython_config.py
|
| 92 |
+
|
| 93 |
+
# pyenv
|
| 94 |
+
# For a library or package, you might want to ignore these files since the code is
|
| 95 |
+
# intended to run in multiple environments; otherwise, check them in:
|
| 96 |
+
# .python-version
|
| 97 |
+
|
| 98 |
+
# pipenv
|
| 99 |
+
# According to pypa/pipenv#598, it is recommended to include Pipfile.lock in version control.
|
| 100 |
+
# However, in case of collaboration, if having platform-specific dependencies or dependencies
|
| 101 |
+
# having no cross-platform support, pipenv may install dependencies that don't work, or not
|
| 102 |
+
# install all needed dependencies.
|
| 103 |
+
#Pipfile.lock
|
| 104 |
+
|
| 105 |
+
# UV
|
| 106 |
+
# Similar to Pipfile.lock, it is generally recommended to include uv.lock in version control.
|
| 107 |
+
# This is especially recommended for binary packages to ensure reproducibility, and is more
|
| 108 |
+
# commonly ignored for libraries.
|
| 109 |
+
#uv.lock
|
| 110 |
+
|
| 111 |
+
# poetry
|
| 112 |
+
# Similar to Pipfile.lock, it is generally recommended to include poetry.lock in version control.
|
| 113 |
+
# This is especially recommended for binary packages to ensure reproducibility, and is more
|
| 114 |
+
# commonly ignored for libraries.
|
| 115 |
+
# https://python-poetry.org/docs/basic-usage/#commit-your-poetrylock-file-to-version-control
|
| 116 |
+
#poetry.lock
|
| 117 |
+
#poetry.toml
|
| 118 |
+
|
| 119 |
+
# pdm
|
| 120 |
+
# Similar to Pipfile.lock, it is generally recommended to include pdm.lock in version control.
|
| 121 |
+
# pdm recommends including project-wide configuration in pdm.toml, but excluding .pdm-python.
|
| 122 |
+
# https://pdm-project.org/en/latest/usage/project/#working-with-version-control
|
| 123 |
+
#pdm.lock
|
| 124 |
+
#pdm.toml
|
| 125 |
+
.pdm-python
|
| 126 |
+
.pdm-build/
|
| 127 |
+
|
| 128 |
+
# pixi
|
| 129 |
+
# Similar to Pipfile.lock, it is generally recommended to include pixi.lock in version control.
|
| 130 |
+
#pixi.lock
|
| 131 |
+
# Pixi creates a virtual environment in the .pixi directory, just like venv module creates one
|
| 132 |
+
# in the .venv directory. It is recommended not to include this directory in version control.
|
| 133 |
+
.pixi
|
| 134 |
+
|
| 135 |
+
# PEP 582; used by e.g. github.com/David-OConnor/pyflow and github.com/pdm-project/pdm
|
| 136 |
+
__pypackages__/
|
| 137 |
+
|
| 138 |
+
# Celery stuff
|
| 139 |
+
celerybeat-schedule
|
| 140 |
+
celerybeat.pid
|
| 141 |
+
|
| 142 |
+
# SageMath parsed files
|
| 143 |
+
*.sage.py
|
| 144 |
+
|
| 145 |
+
# Environments
|
| 146 |
+
.env
|
| 147 |
+
.envrc
|
| 148 |
+
.venv
|
| 149 |
+
env/
|
| 150 |
+
venv/
|
| 151 |
+
ENV/
|
| 152 |
+
env.bak/
|
| 153 |
+
venv.bak/
|
| 154 |
+
|
| 155 |
+
# Spyder project settings
|
| 156 |
+
.spyderproject
|
| 157 |
+
.spyproject
|
| 158 |
+
|
| 159 |
+
# Rope project settings
|
| 160 |
+
.ropeproject
|
| 161 |
+
|
| 162 |
+
# mkdocs documentation
|
| 163 |
+
/site
|
| 164 |
+
|
| 165 |
+
# mypy
|
| 166 |
+
.mypy_cache/
|
| 167 |
+
.dmypy.json
|
| 168 |
+
dmypy.json
|
| 169 |
+
|
| 170 |
+
# Pyre type checker
|
| 171 |
+
.pyre/
|
| 172 |
+
|
| 173 |
+
# pytype static type analyzer
|
| 174 |
+
.pytype/
|
| 175 |
+
|
| 176 |
+
# Cython debug symbols
|
| 177 |
+
cython_debug/
|
| 178 |
+
|
| 179 |
+
# PyCharm
|
| 180 |
+
# JetBrains specific template is maintained in a separate JetBrains.gitignore that can
|
| 181 |
+
# be found at https://github.com/github/gitignore/blob/main/Global/JetBrains.gitignore
|
| 182 |
+
# and can be added to the global gitignore or merged into this file. For a more nuclear
|
| 183 |
+
# option (not recommended) you can uncomment the following to ignore the entire idea folder.
|
| 184 |
+
#.idea/
|
| 185 |
+
|
| 186 |
+
# Abstra
|
| 187 |
+
# Abstra is an AI-powered process automation framework.
|
| 188 |
+
# Ignore directories containing user credentials, local state, and settings.
|
| 189 |
+
# Learn more at https://abstra.io/docs
|
| 190 |
+
.abstra/
|
| 191 |
+
|
| 192 |
+
# Visual Studio Code
|
| 193 |
+
# Visual Studio Code specific template is maintained in a separate VisualStudioCode.gitignore
|
| 194 |
+
# that can be found at https://github.com/github/gitignore/blob/main/Global/VisualStudioCode.gitignore
|
| 195 |
+
# and can be added to the global gitignore or merged into this file. However, if you prefer,
|
| 196 |
+
# you could uncomment the following to ignore the entire vscode folder
|
| 197 |
+
# .vscode/
|
| 198 |
+
|
| 199 |
+
# Ruff stuff:
|
| 200 |
+
.ruff_cache/
|
| 201 |
+
|
| 202 |
+
# PyPI configuration file
|
| 203 |
+
.pypirc
|
| 204 |
+
|
| 205 |
+
# Cursor
|
| 206 |
+
# Cursor is an AI-powered code editor. `.cursorignore` specifies files/directories to
|
| 207 |
+
# exclude from AI features like autocomplete and code analysis. Recommended for sensitive data
|
| 208 |
+
# refer to https://docs.cursor.com/context/ignore-files
|
| 209 |
+
.cursorignore
|
| 210 |
+
.cursorindexingignore
|
| 211 |
+
|
| 212 |
+
# Marimo
|
| 213 |
+
marimo/_static/
|
| 214 |
+
marimo/_lsp/
|
| 215 |
+
__marimo__/
|
README.md
CHANGED
|
@@ -1,12 +1,186 @@
|
|
| 1 |
---
|
| 2 |
-
title:
|
| 3 |
-
|
| 4 |
-
colorFrom: blue
|
| 5 |
-
colorTo: green
|
| 6 |
sdk: gradio
|
| 7 |
sdk_version: 5.34.2
|
| 8 |
-
app_file: app.py
|
| 9 |
-
pinned: false
|
| 10 |
---
|
|
|
|
| 11 |
|
| 12 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
---
|
| 2 |
+
title: Business_Chatbot
|
| 3 |
+
app_file: main.py
|
|
|
|
|
|
|
| 4 |
sdk: gradio
|
| 5 |
sdk_version: 5.34.2
|
|
|
|
|
|
|
| 6 |
---
|
| 7 |
+
# LLM Chatbot with SQL Database and Vector Store
|
| 8 |
|
| 9 |
+
A research-grade chatbot system that processes user queries to extract entities, store transactions in SQL database, and maintain semantic search capabilities through vector storage.
|
| 10 |
+
|
| 11 |
+
## Features
|
| 12 |
+
|
| 13 |
+
- **Entity Extraction**: Automatically extracts products, quantities, suppliers, customers, and prices from natural language
|
| 14 |
+
- **Interactive Transaction Completion**: LLM-powered clarification for missing information
|
| 15 |
+
- **SQL Database**: Stores structured sales and purchase data with relationships
|
| 16 |
+
- **Natural Language to SQL**: Uses OpenAI GPT to convert plain English queries to SQL
|
| 17 |
+
- **RAG (Retrieval-Augmented Generation)**: Intelligent responses using LLM with retrieved context
|
| 18 |
+
- **Vector Store**: Enables semantic search of events and transactions
|
| 19 |
+
- **Query Validation**: Ensures generated SQL queries are safe and valid
|
| 20 |
+
|
| 21 |
+
## Architecture
|
| 22 |
+
|
| 23 |
+
```
|
| 24 |
+
User Input: "Add a purchase of 20 USB drives from TechMart at β¬5 each"
|
| 25 |
+
β
|
| 26 |
+
1. Entity Extraction β product: USB drives, quantity: 20, supplier: TechMart, unit price: β¬5
|
| 27 |
+
β
|
| 28 |
+
2. SQL Generation β INSERT INTO purchases (supplier_id, product_id, quantity, unit_price, total_cost)
|
| 29 |
+
β
|
| 30 |
+
3. Vector Storage β Embed and store semantic summary of the event
|
| 31 |
+
```
|
| 32 |
+
|
| 33 |
+
## Setup
|
| 34 |
+
|
| 35 |
+
1. Install dependencies:
|
| 36 |
+
```bash
|
| 37 |
+
pip install -r requirements.txt
|
| 38 |
+
```
|
| 39 |
+
|
| 40 |
+
2. Download spaCy model:
|
| 41 |
+
```bash
|
| 42 |
+
python -m spacy download en_core_web_sm
|
| 43 |
+
```
|
| 44 |
+
|
| 45 |
+
3. Set up OpenAI API key:
|
| 46 |
+
```bash
|
| 47 |
+
export OPENAI_API_KEY='your-openai-api-key-here'
|
| 48 |
+
```
|
| 49 |
+
|
| 50 |
+
4. Run the chatbot:
|
| 51 |
+
```bash
|
| 52 |
+
python main.py
|
| 53 |
+
```
|
| 54 |
+
|
| 55 |
+
5. Test the system:
|
| 56 |
+
```bash
|
| 57 |
+
python test_chatbot.py
|
| 58 |
+
python test_nl_search.py # Test natural language search
|
| 59 |
+
python test_intent_classifier.py # Test intent classification
|
| 60 |
+
python test_rag_search.py # Test RAG functionality
|
| 61 |
+
python test_interactive_transactions.py # Test interactive transaction completion
|
| 62 |
+
```
|
| 63 |
+
|
| 64 |
+
## Usage Examples
|
| 65 |
+
|
| 66 |
+
### Adding Transactions (Interactive)
|
| 67 |
+
- `"Add a purchase of 20 USB drives from TechMart at β¬5 each"` (Complete)
|
| 68 |
+
- `"I bought some laptops"` (Will ask for: quantity, supplier, price)
|
| 69 |
+
- `"Sold items to a customer"` (Will ask for: product, quantity, customer, price)
|
| 70 |
+
- User can respond with specific details or say "N/A" for optional fields
|
| 71 |
+
|
| 72 |
+
### Querying Data (Natural Language to SQL)
|
| 73 |
+
- `"How many USB drives did we purchase?"`
|
| 74 |
+
- `"What's the total value of all purchases?"`
|
| 75 |
+
- `"Show me all sales to John Smith"`
|
| 76 |
+
- `"Which suppliers have we bought from?"`
|
| 77 |
+
- `"What's our total spending on electronics?"`
|
| 78 |
+
- `"Show me the most expensive purchases"`
|
| 79 |
+
|
| 80 |
+
### Semantic Search (RAG-powered)
|
| 81 |
+
- `"When is my meeting with George?"`
|
| 82 |
+
- `"What do we know about TechMart as a supplier?"`
|
| 83 |
+
- `"Tell me about recent meetings and discussions"`
|
| 84 |
+
- `"Show me customer feedback and satisfaction information"`
|
| 85 |
+
|
| 86 |
+
### General Information
|
| 87 |
+
- `"Meeting with new supplier scheduled for next week"`
|
| 88 |
+
- `"Important: Check inventory levels before next order"`
|
| 89 |
+
|
| 90 |
+
## Database Schema
|
| 91 |
+
|
| 92 |
+
- **suppliers**: Company information
|
| 93 |
+
- **customers**: Customer details
|
| 94 |
+
- **products**: Product catalog
|
| 95 |
+
- **purchases**: Purchase transactions
|
| 96 |
+
- **sales**: Sales transactions
|
| 97 |
+
|
| 98 |
+
## Vector Store
|
| 99 |
+
|
| 100 |
+
Uses ChromaDB with sentence transformers for semantic similarity search of:
|
| 101 |
+
- Transaction summaries
|
| 102 |
+
- General business events
|
| 103 |
+
- Meeting notes and reminders
|
| 104 |
+
|
| 105 |
+
## Files Structure
|
| 106 |
+
|
| 107 |
+
```
|
| 108 |
+
βββ src/
|
| 109 |
+
β βββ models.py # Data models and schemas
|
| 110 |
+
β βββ entity_extractor.py # NLP entity extraction
|
| 111 |
+
β βββ database_manager.py # SQL database operations
|
| 112 |
+
β βββ vector_store.py # Semantic search functionality
|
| 113 |
+
β βββ nl_to_sql.py # OpenAI-powered natural language to SQL
|
| 114 |
+
β βββ intent_classifier.py # OpenAI-powered intent classification
|
| 115 |
+
β βββ rag_handler.py # RAG (Retrieval-Augmented Generation)
|
| 116 |
+
β βββ transaction_clarifier.py # Interactive transaction completion
|
| 117 |
+
β βββ chatbot.py # Main chatbot logic
|
| 118 |
+
βββ database/
|
| 119 |
+
β βββ schema.sql # Database schema
|
| 120 |
+
βββ main.py # Interactive chatbot interface
|
| 121 |
+
βββ test_chatbot.py # Test suite
|
| 122 |
+
βββ test_nl_search.py # Natural language search tests
|
| 123 |
+
βββ test_intent_classifier.py # Intent classification tests
|
| 124 |
+
βββ test_rag_search.py # RAG functionality tests
|
| 125 |
+
βββ test_interactive_transactions.py # Interactive transaction tests
|
| 126 |
+
βββ .env.example # Environment variables template
|
| 127 |
+
βββ requirements.txt # Python dependencies
|
| 128 |
+
```
|
| 129 |
+
|
| 130 |
+
## Research Applications
|
| 131 |
+
|
| 132 |
+
This system demonstrates:
|
| 133 |
+
- Multi-modal data storage (structured + vector)
|
| 134 |
+
- LLM-powered natural language to SQL conversion
|
| 135 |
+
- RAG (Retrieval-Augmented Generation) for intelligent responses
|
| 136 |
+
- Interactive transaction completion with missing information handling
|
| 137 |
+
- OpenAI-based intent classification
|
| 138 |
+
- Multi-turn conversation state management
|
| 139 |
+
- Semantic similarity search with embeddings
|
| 140 |
+
- Named entity recognition and extraction
|
| 141 |
+
- Query validation and SQL injection prevention
|
| 142 |
+
- Conversational business process automation
|
| 143 |
+
|
| 144 |
+
## Future Works
|
| 145 |
+
|
| 146 |
+
### Intent Classification Improvements
|
| 147 |
+
|
| 148 |
+
The current system uses OpenAI API for intent classification, which provides excellent accuracy but has some limitations:
|
| 149 |
+
|
| 150 |
+
**Current Limitations:**
|
| 151 |
+
- Requires internet connectivity and API calls for each message
|
| 152 |
+
- Dependent on OpenAI service availability and costs
|
| 153 |
+
- May have latency for real-time applications
|
| 154 |
+
- Limited customization for domain-specific intents
|
| 155 |
+
|
| 156 |
+
**Potential Improvements:**
|
| 157 |
+
|
| 158 |
+
1. **Fine-tuned Classification Models**
|
| 159 |
+
- Train a smaller, specialized model on business transaction data
|
| 160 |
+
- Use frameworks like Hugging Face Transformers with custom datasets
|
| 161 |
+
- Deploy locally for faster inference and offline capability
|
| 162 |
+
- Examples: DistilBERT, RoBERTa fine-tuned on business intent data
|
| 163 |
+
|
| 164 |
+
2. **Local LLM Integration**
|
| 165 |
+
- Replace OpenAI API with local models (Llama, Mistral, etc.)
|
| 166 |
+
- Use frameworks like Ollama, LangChain, or vLLM for local deployment
|
| 167 |
+
- Maintain privacy while reducing external dependencies
|
| 168 |
+
- Cost-effective for high-volume applications
|
| 169 |
+
|
| 170 |
+
3. **Intent Embedding Approaches**
|
| 171 |
+
- Create vector embeddings for known intent patterns
|
| 172 |
+
- Use similarity search instead of generative classification
|
| 173 |
+
- Combine with few-shot learning for new intent types
|
| 174 |
+
- More efficient for simple intent detection scenarios
|
| 175 |
+
|
| 176 |
+
4. **Hybrid Approaches**
|
| 177 |
+
- Combine rule-based filtering with LLM classification
|
| 178 |
+
- Use confidence thresholds to decide when to query LLM
|
| 179 |
+
- Cache common patterns to reduce API calls
|
| 180 |
+
- Implement progressive enhancement from simple to complex classification
|
| 181 |
+
|
| 182 |
+
5. **Domain-Specific Enhancements**
|
| 183 |
+
- Add business context and domain knowledge
|
| 184 |
+
- Implement multi-intent detection for complex queries
|
| 185 |
+
- Add conversation history context for better classification
|
| 186 |
+
- Support for industry-specific terminology and patterns
|
add_sample_data.py
ADDED
|
@@ -0,0 +1,65 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
#!/usr/bin/env python3
|
| 2 |
+
|
| 3 |
+
"""
|
| 4 |
+
Add sample data to the chatbot database for testing the dashboard.
|
| 5 |
+
This script adds realistic business transactions to populate the dashboard.
|
| 6 |
+
"""
|
| 7 |
+
|
| 8 |
+
import sys
|
| 9 |
+
import os
|
| 10 |
+
sys.path.append(os.path.join(os.path.dirname(__file__), 'src'))
|
| 11 |
+
|
| 12 |
+
from chatbot import Chatbot
|
| 13 |
+
from models import ChatbotRequest
|
| 14 |
+
|
| 15 |
+
def add_sample_data():
|
| 16 |
+
"""Add sample transactions to the database."""
|
| 17 |
+
print("π Adding sample data to the database...")
|
| 18 |
+
|
| 19 |
+
chatbot = Chatbot()
|
| 20 |
+
|
| 21 |
+
# Sample purchases
|
| 22 |
+
purchases = [
|
| 23 |
+
"Add a purchase of 10 USB drives from TechMart at β¬5 each",
|
| 24 |
+
"Add a purchase of 5 laptops from Electronics Plus at β¬800 each",
|
| 25 |
+
"Add a purchase of 20 keyboards from Office Supplies Co at β¬25 each",
|
| 26 |
+
"Add a purchase of 8 monitors from TechMart at β¬200 each",
|
| 27 |
+
"Add a purchase of 15 webcams from Electronics Plus at β¬45 each"
|
| 28 |
+
]
|
| 29 |
+
|
| 30 |
+
# Sample sales
|
| 31 |
+
sales = [
|
| 32 |
+
"Sold 8 USB drives to ABC Corp at β¬12 each",
|
| 33 |
+
"Sold 3 laptops to XYZ Ltd at β¬1200 each",
|
| 34 |
+
"Sold 12 keyboards to StartupTech at β¬40 each",
|
| 35 |
+
"Sold 5 monitors to Creative Agency at β¬350 each",
|
| 36 |
+
"Sold 10 webcams to Remote Work Solutions at β¬75 each",
|
| 37 |
+
"Sold 6 USB drives to Local Business at β¬15 each",
|
| 38 |
+
"Sold 2 laptops to Consulting Firm at β¬1100 each"
|
| 39 |
+
]
|
| 40 |
+
|
| 41 |
+
print("π¦ Adding purchase transactions...")
|
| 42 |
+
for purchase in purchases:
|
| 43 |
+
try:
|
| 44 |
+
request = ChatbotRequest(message=purchase)
|
| 45 |
+
response = chatbot.process_message(request)
|
| 46 |
+
print(f" β
{purchase}")
|
| 47 |
+
except Exception as e:
|
| 48 |
+
print(f" β Failed: {purchase} - {e}")
|
| 49 |
+
|
| 50 |
+
print("π° Adding sales transactions...")
|
| 51 |
+
for sale in sales:
|
| 52 |
+
try:
|
| 53 |
+
request = ChatbotRequest(message=sale)
|
| 54 |
+
response = chatbot.process_message(request)
|
| 55 |
+
print(f" β
{sale}")
|
| 56 |
+
except Exception as e:
|
| 57 |
+
print(f" β Failed: {sale} - {e}")
|
| 58 |
+
|
| 59 |
+
chatbot.close()
|
| 60 |
+
print("β
Sample data added successfully!")
|
| 61 |
+
print("π You can now launch the GUI to see the populated dashboard:")
|
| 62 |
+
print(" python run_gui.py")
|
| 63 |
+
|
| 64 |
+
if __name__ == "__main__":
|
| 65 |
+
add_sample_data()
|
config.yaml
ADDED
|
@@ -0,0 +1,155 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
# LLM Chatbot Configuration
|
| 2 |
+
# This file contains all configurable settings for the chatbot application
|
| 3 |
+
|
| 4 |
+
# Database Configuration
|
| 5 |
+
database:
|
| 6 |
+
path: "chatbot.db"
|
| 7 |
+
connection_string: "sqlite:///{path}"
|
| 8 |
+
default_suppliers:
|
| 9 |
+
- "TechMart"
|
| 10 |
+
- "Office Supplies Co"
|
| 11 |
+
- "Electronics Plus"
|
| 12 |
+
default_products:
|
| 13 |
+
- name: "USB drives"
|
| 14 |
+
category: "Electronics"
|
| 15 |
+
- name: "Office chairs"
|
| 16 |
+
category: "Furniture"
|
| 17 |
+
- name: "Laptops"
|
| 18 |
+
category: "Electronics"
|
| 19 |
+
- name: "Monitors"
|
| 20 |
+
category: "Electronics"
|
| 21 |
+
- name: "Keyboards"
|
| 22 |
+
category: "Electronics"
|
| 23 |
+
|
| 24 |
+
# OpenAI API Configuration
|
| 25 |
+
openai:
|
| 26 |
+
# Intent Classification
|
| 27 |
+
intent_classifier:
|
| 28 |
+
model: "gpt-4o-mini"
|
| 29 |
+
temperature: 0.1
|
| 30 |
+
max_tokens: 300
|
| 31 |
+
|
| 32 |
+
# Natural Language to SQL
|
| 33 |
+
nl_to_sql:
|
| 34 |
+
model: "gpt-4o-mini"
|
| 35 |
+
temperature: 0.1
|
| 36 |
+
max_tokens: 500
|
| 37 |
+
|
| 38 |
+
# SQL Explanation
|
| 39 |
+
sql_explanation:
|
| 40 |
+
model: "gpt-3.5-turbo"
|
| 41 |
+
temperature: 0.3
|
| 42 |
+
max_tokens: 200
|
| 43 |
+
|
| 44 |
+
# RAG Handler
|
| 45 |
+
rag_handler:
|
| 46 |
+
model: "gpt-4o-mini"
|
| 47 |
+
temperature: 0.3
|
| 48 |
+
max_tokens: 800
|
| 49 |
+
|
| 50 |
+
# Query Enhancement
|
| 51 |
+
query_enhancement:
|
| 52 |
+
model: "gpt-4o-mini"
|
| 53 |
+
temperature: 0.2
|
| 54 |
+
max_tokens: 100
|
| 55 |
+
|
| 56 |
+
# Transaction Clarifier
|
| 57 |
+
transaction_clarifier:
|
| 58 |
+
model: "gpt-4o-mini"
|
| 59 |
+
temperature: 0.3
|
| 60 |
+
max_tokens: 400
|
| 61 |
+
|
| 62 |
+
# Transaction Validation
|
| 63 |
+
transaction_validation:
|
| 64 |
+
model: "gpt-4o-mini"
|
| 65 |
+
temperature: 0.1
|
| 66 |
+
max_tokens: 300
|
| 67 |
+
|
| 68 |
+
# Vector Store Configuration
|
| 69 |
+
vector_store:
|
| 70 |
+
collection_name: "chatbot_events"
|
| 71 |
+
persistence_path: "./chroma_db"
|
| 72 |
+
embedding_model: "all-MiniLM-L6-v2"
|
| 73 |
+
|
| 74 |
+
# Search and Query Configuration
|
| 75 |
+
search:
|
| 76 |
+
# Default number of vector search results
|
| 77 |
+
vector_search_results: 8
|
| 78 |
+
|
| 79 |
+
# Default number of recent search results
|
| 80 |
+
recent_events_limit: 10
|
| 81 |
+
|
| 82 |
+
# Default limit for database queries
|
| 83 |
+
default_query_limit: 10
|
| 84 |
+
|
| 85 |
+
# Maximum SQL results to display
|
| 86 |
+
max_sql_results_display: 20
|
| 87 |
+
|
| 88 |
+
# Recent transactions display limit
|
| 89 |
+
recent_transactions_limit: 10
|
| 90 |
+
|
| 91 |
+
# Entity Extraction Configuration
|
| 92 |
+
entity_extraction:
|
| 93 |
+
spacy_model: "en_core_web_sm"
|
| 94 |
+
|
| 95 |
+
# Fallback classification keywords
|
| 96 |
+
purchase_keywords:
|
| 97 |
+
- "buy"
|
| 98 |
+
- "purchase"
|
| 99 |
+
- "acquire"
|
| 100 |
+
- "order"
|
| 101 |
+
- "procure"
|
| 102 |
+
|
| 103 |
+
sale_keywords:
|
| 104 |
+
- "sell"
|
| 105 |
+
- "sale"
|
| 106 |
+
- "sold"
|
| 107 |
+
- "revenue"
|
| 108 |
+
- "income"
|
| 109 |
+
|
| 110 |
+
# Business Logic Configuration
|
| 111 |
+
business_logic:
|
| 112 |
+
# Required fields for transaction types
|
| 113 |
+
required_fields:
|
| 114 |
+
purchase:
|
| 115 |
+
- "product"
|
| 116 |
+
- "quantity"
|
| 117 |
+
- "supplier"
|
| 118 |
+
- "unit_price"
|
| 119 |
+
sale:
|
| 120 |
+
- "product"
|
| 121 |
+
- "quantity"
|
| 122 |
+
- "customer"
|
| 123 |
+
- "unit_price"
|
| 124 |
+
|
| 125 |
+
# Cancellation keywords
|
| 126 |
+
cancellation_keywords:
|
| 127 |
+
- "cancel"
|
| 128 |
+
- "quit"
|
| 129 |
+
- "stop"
|
| 130 |
+
- "abort"
|
| 131 |
+
|
| 132 |
+
# Dangerous SQL keywords (for security)
|
| 133 |
+
dangerous_sql_keywords:
|
| 134 |
+
- "drop"
|
| 135 |
+
- "delete"
|
| 136 |
+
- "truncate"
|
| 137 |
+
- "alter"
|
| 138 |
+
- "create"
|
| 139 |
+
- "insert"
|
| 140 |
+
- "update"
|
| 141 |
+
|
| 142 |
+
# Application Settings
|
| 143 |
+
app:
|
| 144 |
+
# Enable/disable features
|
| 145 |
+
features:
|
| 146 |
+
vector_storage: true
|
| 147 |
+
intent_classification: true
|
| 148 |
+
entity_extraction: true
|
| 149 |
+
transaction_clarification: true
|
| 150 |
+
rag_search: true
|
| 151 |
+
|
| 152 |
+
# Logging configuration
|
| 153 |
+
logging:
|
| 154 |
+
level: "INFO"
|
| 155 |
+
format: "%(asctime)s - %(name)s - %(levelname)s - %(message)s"
|
gui/gradio_interface.py
ADDED
|
@@ -0,0 +1,627 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
#!/usr/bin/env python3
|
| 2 |
+
|
| 3 |
+
import gradio as gr
|
| 4 |
+
import sys
|
| 5 |
+
import os
|
| 6 |
+
from typing import List, Tuple
|
| 7 |
+
from sqlalchemy import text
|
| 8 |
+
|
| 9 |
+
# Add the src directory to the path to import existing modules
|
| 10 |
+
sys.path.append(os.path.join(os.path.dirname(__file__), '..', 'src'))
|
| 11 |
+
|
| 12 |
+
from chatbot import Chatbot
|
| 13 |
+
from models import ChatbotRequest
|
| 14 |
+
|
| 15 |
+
class GradioInterface:
|
| 16 |
+
"""Gradio GUI interface for the LLM Chatbot."""
|
| 17 |
+
|
| 18 |
+
def __init__(self):
|
| 19 |
+
"""Initialize the Gradio interface with the existing chatbot."""
|
| 20 |
+
self.chatbot = Chatbot()
|
| 21 |
+
self.conversation_history = []
|
| 22 |
+
|
| 23 |
+
def process_message(self, message: str, history: List[Tuple[str, str]]) -> Tuple[str, List[Tuple[str, str]]]:
|
| 24 |
+
"""
|
| 25 |
+
Process a user message and return the response with updated history.
|
| 26 |
+
|
| 27 |
+
Args:
|
| 28 |
+
message: User input message
|
| 29 |
+
history: Chat history as list of (user_msg, bot_response) tuples
|
| 30 |
+
|
| 31 |
+
Returns:
|
| 32 |
+
Tuple of (empty_string_for_input, updated_history)
|
| 33 |
+
"""
|
| 34 |
+
if not message.strip():
|
| 35 |
+
return "", history
|
| 36 |
+
|
| 37 |
+
# Handle quit/exit commands
|
| 38 |
+
if message.lower().strip() in ['quit', 'exit', 'bye']:
|
| 39 |
+
bot_response = "π Goodbye! Refresh the page to start a new session."
|
| 40 |
+
history.append((message, bot_response))
|
| 41 |
+
return "", history
|
| 42 |
+
|
| 43 |
+
try:
|
| 44 |
+
# Process the message using the existing chatbot
|
| 45 |
+
request = ChatbotRequest(message=message)
|
| 46 |
+
response = chatbot_response = self.chatbot.process_message(request)
|
| 47 |
+
|
| 48 |
+
# Build the response with additional information
|
| 49 |
+
response_text = f"π€ {response.response}"
|
| 50 |
+
|
| 51 |
+
# Add extracted entities information
|
| 52 |
+
if response.entities_extracted:
|
| 53 |
+
entities_info = (
|
| 54 |
+
f"\n\nπ **Extracted Information:**\n"
|
| 55 |
+
f"- Type: {response.entities_extracted.transaction_type}\n"
|
| 56 |
+
f"- Product: {response.entities_extracted.product}\n"
|
| 57 |
+
f"- Quantity: {response.entities_extracted.quantity}\n"
|
| 58 |
+
f"- Total Amount: β¬{response.entities_extracted.total_amount}"
|
| 59 |
+
)
|
| 60 |
+
response_text += entities_info
|
| 61 |
+
|
| 62 |
+
# Add vector storage confirmation
|
| 63 |
+
if response.vector_stored:
|
| 64 |
+
response_text += "\n\nπΎ Information stored in vector database for future semantic search"
|
| 65 |
+
|
| 66 |
+
# Add intent detection information
|
| 67 |
+
if response.intent_detected:
|
| 68 |
+
response_text += f"\n\nπ― **Intent Detected:** {response.intent_detected} (confidence: {response.intent_confidence:.2f})"
|
| 69 |
+
|
| 70 |
+
# Add clarification prompt
|
| 71 |
+
if response.awaiting_clarification:
|
| 72 |
+
response_text += "\n\nβ³ **Waiting for your response to complete the transaction...**"
|
| 73 |
+
|
| 74 |
+
# Update history
|
| 75 |
+
history.append((message, response_text))
|
| 76 |
+
|
| 77 |
+
except Exception as e:
|
| 78 |
+
error_response = f"β Error processing message: {str(e)}"
|
| 79 |
+
history.append((message, error_response))
|
| 80 |
+
|
| 81 |
+
return "", history
|
| 82 |
+
|
| 83 |
+
def clear_chat(self) -> Tuple[str, List]:
|
| 84 |
+
"""Clear the chat history and reset the conversation."""
|
| 85 |
+
return "", []
|
| 86 |
+
|
| 87 |
+
def get_dashboard_data(self):
|
| 88 |
+
"""Get dashboard data using direct SQL queries."""
|
| 89 |
+
try:
|
| 90 |
+
# Access the database manager directly
|
| 91 |
+
db_manager = self.chatbot.db_manager
|
| 92 |
+
|
| 93 |
+
# Get basic statistics
|
| 94 |
+
total_purchases = db_manager.session.execute(
|
| 95 |
+
text("SELECT COUNT(*) FROM purchases")
|
| 96 |
+
).scalar() or 0
|
| 97 |
+
|
| 98 |
+
total_sales = db_manager.session.execute(
|
| 99 |
+
text("SELECT COUNT(*) FROM sales")
|
| 100 |
+
).scalar() or 0
|
| 101 |
+
|
| 102 |
+
total_revenue = db_manager.session.execute(
|
| 103 |
+
text("SELECT SUM(total_amount) FROM sales")
|
| 104 |
+
).scalar() or 0
|
| 105 |
+
|
| 106 |
+
total_expenses = db_manager.session.execute(
|
| 107 |
+
text("SELECT SUM(total_cost) FROM purchases")
|
| 108 |
+
).scalar() or 0
|
| 109 |
+
|
| 110 |
+
# Get recent transactions (last 5) - combining purchases and sales
|
| 111 |
+
recent_transactions = db_manager.session.execute(
|
| 112 |
+
text("""
|
| 113 |
+
SELECT 'purchase' as transaction_type, p.name as product, pu.quantity,
|
| 114 |
+
pu.total_cost as total_amount, s.name as partner, pu.purchase_date as created_at
|
| 115 |
+
FROM purchases pu
|
| 116 |
+
LEFT JOIN products p ON pu.product_id = p.id
|
| 117 |
+
LEFT JOIN suppliers s ON pu.supplier_id = s.id
|
| 118 |
+
UNION ALL
|
| 119 |
+
SELECT 'sale' as transaction_type, p.name as product, sa.quantity,
|
| 120 |
+
sa.total_amount, c.name as partner, sa.sale_date as created_at
|
| 121 |
+
FROM sales sa
|
| 122 |
+
LEFT JOIN products p ON sa.product_id = p.id
|
| 123 |
+
LEFT JOIN customers c ON sa.customer_id = c.id
|
| 124 |
+
ORDER BY created_at DESC
|
| 125 |
+
LIMIT 5
|
| 126 |
+
""")
|
| 127 |
+
).fetchall()
|
| 128 |
+
|
| 129 |
+
# Get top products - combining from both tables
|
| 130 |
+
top_products = db_manager.session.execute(
|
| 131 |
+
text("""
|
| 132 |
+
SELECT p.name as product, SUM(combined.quantity) as total_qty, COUNT(*) as transaction_count
|
| 133 |
+
FROM (
|
| 134 |
+
SELECT product_id, quantity FROM purchases
|
| 135 |
+
UNION ALL
|
| 136 |
+
SELECT product_id, quantity FROM sales
|
| 137 |
+
) combined
|
| 138 |
+
LEFT JOIN products p ON combined.product_id = p.id
|
| 139 |
+
GROUP BY p.name
|
| 140 |
+
ORDER BY total_qty DESC
|
| 141 |
+
LIMIT 5
|
| 142 |
+
""")
|
| 143 |
+
).fetchall()
|
| 144 |
+
|
| 145 |
+
return {
|
| 146 |
+
'total_purchases': total_purchases,
|
| 147 |
+
'total_sales': total_sales,
|
| 148 |
+
'total_revenue': round(total_revenue, 2),
|
| 149 |
+
'total_expenses': round(total_expenses, 2),
|
| 150 |
+
'profit': round(total_revenue - total_expenses, 2),
|
| 151 |
+
'recent_transactions': recent_transactions,
|
| 152 |
+
'top_products': top_products
|
| 153 |
+
}
|
| 154 |
+
|
| 155 |
+
except Exception as e:
|
| 156 |
+
return {
|
| 157 |
+
'total_purchases': 0,
|
| 158 |
+
'total_sales': 0,
|
| 159 |
+
'total_revenue': 0.0,
|
| 160 |
+
'total_expenses': 0.0,
|
| 161 |
+
'profit': 0.0,
|
| 162 |
+
'recent_transactions': [],
|
| 163 |
+
'top_products': []
|
| 164 |
+
}
|
| 165 |
+
|
| 166 |
+
def create_revenue_chart(self, data):
|
| 167 |
+
"""Create revenue vs expenses chart."""
|
| 168 |
+
import plotly.graph_objects as go
|
| 169 |
+
|
| 170 |
+
fig = go.Figure(data=[
|
| 171 |
+
go.Bar(name='Revenue', x=['Financial Summary'], y=[data['total_revenue']], marker_color='green'),
|
| 172 |
+
go.Bar(name='Expenses', x=['Financial Summary'], y=[data['total_expenses']], marker_color='red'),
|
| 173 |
+
go.Bar(name='Profit', x=['Financial Summary'], y=[data['profit']], marker_color='blue')
|
| 174 |
+
])
|
| 175 |
+
|
| 176 |
+
fig.update_layout(
|
| 177 |
+
title='Financial Overview',
|
| 178 |
+
barmode='group',
|
| 179 |
+
height=300
|
| 180 |
+
)
|
| 181 |
+
|
| 182 |
+
return fig
|
| 183 |
+
|
| 184 |
+
def create_transaction_chart(self, data):
|
| 185 |
+
"""Create transaction count pie chart."""
|
| 186 |
+
import plotly.graph_objects as go
|
| 187 |
+
|
| 188 |
+
fig = go.Figure(data=[go.Pie(
|
| 189 |
+
labels=['Purchases', 'Sales'],
|
| 190 |
+
values=[data['total_purchases'], data['total_sales']],
|
| 191 |
+
marker_colors=['lightcoral', 'lightgreen']
|
| 192 |
+
)])
|
| 193 |
+
|
| 194 |
+
fig.update_layout(
|
| 195 |
+
title='Transaction Distribution',
|
| 196 |
+
height=300
|
| 197 |
+
)
|
| 198 |
+
|
| 199 |
+
return fig
|
| 200 |
+
|
| 201 |
+
def create_top_products_chart(self, data):
|
| 202 |
+
"""Create top products bar chart."""
|
| 203 |
+
import plotly.graph_objects as go
|
| 204 |
+
|
| 205 |
+
if not data['top_products']:
|
| 206 |
+
fig = go.Figure()
|
| 207 |
+
fig.add_annotation(text="No product data available",
|
| 208 |
+
xref="paper", yref="paper", x=0.5, y=0.5, showarrow=False)
|
| 209 |
+
fig.update_layout(title='Top Products', height=300)
|
| 210 |
+
return fig
|
| 211 |
+
|
| 212 |
+
products = [row[0] for row in data['top_products']]
|
| 213 |
+
quantities = [row[1] for row in data['top_products']]
|
| 214 |
+
|
| 215 |
+
fig = go.Figure(data=[
|
| 216 |
+
go.Bar(x=products, y=quantities, marker_color='skyblue')
|
| 217 |
+
])
|
| 218 |
+
|
| 219 |
+
fig.update_layout(
|
| 220 |
+
title='Top Products by Quantity',
|
| 221 |
+
xaxis_title='Products',
|
| 222 |
+
yaxis_title='Total Quantity',
|
| 223 |
+
height=300
|
| 224 |
+
)
|
| 225 |
+
|
| 226 |
+
return fig
|
| 227 |
+
|
| 228 |
+
def structured_purchase(self, product, quantity, supplier, unit_price):
|
| 229 |
+
"""Handle structured purchase entry."""
|
| 230 |
+
if not all([product, quantity, supplier, unit_price]):
|
| 231 |
+
return "", [("System", "β οΈ Please fill in all fields for the purchase.")], ""
|
| 232 |
+
|
| 233 |
+
message = f"Add a purchase of {quantity} {product} from {supplier} at β¬{unit_price} each"
|
| 234 |
+
request = ChatbotRequest(message=message)
|
| 235 |
+
response = self.chatbot.process_message(request)
|
| 236 |
+
|
| 237 |
+
history = [("Purchase Entry", message), ("System", f"β
{response.response}")]
|
| 238 |
+
return "", history, "Purchase recorded successfully!"
|
| 239 |
+
|
| 240 |
+
def structured_sale(self, product, quantity, customer, unit_price):
|
| 241 |
+
"""Handle structured sale entry."""
|
| 242 |
+
if not all([product, quantity, customer, unit_price]):
|
| 243 |
+
return "", [("System", "β οΈ Please fill in all fields for the sale.")], ""
|
| 244 |
+
|
| 245 |
+
message = f"Sold {quantity} {product} to {customer} at β¬{unit_price} each"
|
| 246 |
+
request = ChatbotRequest(message=message)
|
| 247 |
+
response = self.chatbot.process_message(request)
|
| 248 |
+
|
| 249 |
+
history = [("Sale Entry", message), ("System", f"β
{response.response}")]
|
| 250 |
+
return "", history, "Sale recorded successfully!"
|
| 251 |
+
|
| 252 |
+
def search_records(self, search_query, search_type):
|
| 253 |
+
"""Handle structured search."""
|
| 254 |
+
if not search_query:
|
| 255 |
+
return [("System", "β οΈ Please enter a search query.")]
|
| 256 |
+
|
| 257 |
+
if search_type == "Products":
|
| 258 |
+
message = f"Find {search_query}"
|
| 259 |
+
elif search_type == "Suppliers":
|
| 260 |
+
message = f"Search supplier {search_query}"
|
| 261 |
+
elif search_type == "Customers":
|
| 262 |
+
message = f"Search customer {search_query}"
|
| 263 |
+
else:
|
| 264 |
+
message = f"Search {search_query}"
|
| 265 |
+
|
| 266 |
+
request = ChatbotRequest(message=message)
|
| 267 |
+
response = self.chatbot.process_message(request)
|
| 268 |
+
|
| 269 |
+
return [("Search Query", message), ("Results", response.response)]
|
| 270 |
+
|
| 271 |
+
def create_interface(self) -> gr.Interface:
|
| 272 |
+
"""Create and configure the Gradio interface."""
|
| 273 |
+
|
| 274 |
+
with gr.Blocks(
|
| 275 |
+
title="Business AI Assistant",
|
| 276 |
+
theme=gr.themes.Default()
|
| 277 |
+
) as interface:
|
| 278 |
+
|
| 279 |
+
# Header
|
| 280 |
+
gr.Markdown("# πΌ Business AI Assistant")
|
| 281 |
+
gr.Markdown("**Intelligent transaction management and business intelligence platform**")
|
| 282 |
+
|
| 283 |
+
# Main tabbed interface
|
| 284 |
+
with gr.Tabs() as tabs:
|
| 285 |
+
|
| 286 |
+
# Dashboard Tab
|
| 287 |
+
with gr.Tab("π Dashboard"):
|
| 288 |
+
# Key Metrics Row
|
| 289 |
+
with gr.Row():
|
| 290 |
+
metrics_purchases = gr.Number(label="Total Purchases", interactive=False)
|
| 291 |
+
metrics_sales = gr.Number(label="Total Sales", interactive=False)
|
| 292 |
+
metrics_revenue = gr.Number(label="Revenue (β¬)", interactive=False)
|
| 293 |
+
metrics_profit = gr.Number(label="Profit (β¬)", interactive=False)
|
| 294 |
+
|
| 295 |
+
# Charts Row
|
| 296 |
+
with gr.Row():
|
| 297 |
+
with gr.Column():
|
| 298 |
+
financial_chart = gr.Plot(label="Financial Overview")
|
| 299 |
+
with gr.Column():
|
| 300 |
+
transaction_chart = gr.Plot(label="Transaction Distribution")
|
| 301 |
+
|
| 302 |
+
with gr.Row():
|
| 303 |
+
with gr.Column():
|
| 304 |
+
products_chart = gr.Plot(label="Top Products")
|
| 305 |
+
with gr.Column():
|
| 306 |
+
# Recent Transactions Table
|
| 307 |
+
recent_table = gr.Dataframe(
|
| 308 |
+
headers=["Type", "Product", "Qty", "Amount (β¬)", "Partner"],
|
| 309 |
+
datatype=["str", "str", "number", "number", "str"],
|
| 310 |
+
label="Recent Transactions",
|
| 311 |
+
)
|
| 312 |
+
|
| 313 |
+
# Action Buttons
|
| 314 |
+
with gr.Row():
|
| 315 |
+
refresh_dashboard = gr.Button("π Refresh Data", variant="secondary")
|
| 316 |
+
dash_new_purchase = gr.Button("β New Purchase", variant="primary")
|
| 317 |
+
dash_new_sale = gr.Button("π° New Sale", variant="primary")
|
| 318 |
+
dash_search = gr.Button("π Search Records", variant="outline")
|
| 319 |
+
|
| 320 |
+
# Chat Tab
|
| 321 |
+
with gr.Tab("π¬ AI Chat"):
|
| 322 |
+
gr.Markdown("### Conversational Business Assistant")
|
| 323 |
+
gr.Markdown("*Ask questions, add transactions, search records, or get insights in natural language*")
|
| 324 |
+
|
| 325 |
+
chatbot_ui = gr.Chatbot(
|
| 326 |
+
value=[],
|
| 327 |
+
height=500,
|
| 328 |
+
label="Conversation",
|
| 329 |
+
show_label=False,
|
| 330 |
+
container=True,
|
| 331 |
+
show_copy_button=True
|
| 332 |
+
)
|
| 333 |
+
|
| 334 |
+
with gr.Row():
|
| 335 |
+
msg_input = gr.Textbox(
|
| 336 |
+
placeholder="Ask me anything about your business... (e.g., 'Show recent sales', 'Add 10 laptops from TechMart')",
|
| 337 |
+
label="Message",
|
| 338 |
+
lines=2,
|
| 339 |
+
max_lines=4,
|
| 340 |
+
scale=5
|
| 341 |
+
)
|
| 342 |
+
send_btn = gr.Button("Send", variant="primary", scale=1)
|
| 343 |
+
|
| 344 |
+
with gr.Row():
|
| 345 |
+
clear_chat_btn = gr.Button("Clear Chat", variant="secondary")
|
| 346 |
+
|
| 347 |
+
# Example prompts
|
| 348 |
+
example_1 = gr.Button("π‘ Example: Add Purchase", variant="outline", size="sm")
|
| 349 |
+
example_2 = gr.Button("π‘ Example: Search Products", variant="outline", size="sm")
|
| 350 |
+
example_3 = gr.Button("π‘ Example: View Transactions", variant="outline", size="sm")
|
| 351 |
+
|
| 352 |
+
# Transactions Tab
|
| 353 |
+
with gr.Tab("π Transactions"):
|
| 354 |
+
with gr.Row():
|
| 355 |
+
# Purchase Form
|
| 356 |
+
with gr.Column():
|
| 357 |
+
gr.Markdown("### β Add Purchase")
|
| 358 |
+
purchase_product = gr.Textbox(label="Product", placeholder="e.g., Laptops")
|
| 359 |
+
purchase_quantity = gr.Number(label="Quantity", value=1, minimum=1)
|
| 360 |
+
purchase_supplier = gr.Textbox(label="Supplier", placeholder="e.g., TechMart")
|
| 361 |
+
purchase_price = gr.Number(label="Unit Price (β¬)", value=0.00, minimum=0)
|
| 362 |
+
purchase_btn = gr.Button("Add Purchase", variant="primary")
|
| 363 |
+
purchase_status = gr.Markdown("")
|
| 364 |
+
|
| 365 |
+
# Sale Form
|
| 366 |
+
with gr.Column():
|
| 367 |
+
gr.Markdown("### π° Add Sale")
|
| 368 |
+
sale_product = gr.Textbox(label="Product", placeholder="e.g., USB Drives")
|
| 369 |
+
sale_quantity = gr.Number(label="Quantity", value=1, minimum=1)
|
| 370 |
+
sale_customer = gr.Textbox(label="Customer", placeholder="e.g., ABC Corp")
|
| 371 |
+
sale_price = gr.Number(label="Unit Price (β¬)", value=0.00, minimum=0)
|
| 372 |
+
sale_btn = gr.Button("Add Sale", variant="primary")
|
| 373 |
+
sale_status = gr.Markdown("")
|
| 374 |
+
|
| 375 |
+
# Transaction Results
|
| 376 |
+
gr.Markdown("### Transaction Results")
|
| 377 |
+
transaction_results = gr.Chatbot(
|
| 378 |
+
value=[],
|
| 379 |
+
height=300,
|
| 380 |
+
label="Transaction Log",
|
| 381 |
+
show_copy_button=True
|
| 382 |
+
)
|
| 383 |
+
|
| 384 |
+
# Search & Reports Tab
|
| 385 |
+
with gr.Tab("π Search & Reports"):
|
| 386 |
+
gr.Markdown("### Advanced Search")
|
| 387 |
+
|
| 388 |
+
with gr.Row():
|
| 389 |
+
search_query = gr.Textbox(
|
| 390 |
+
label="Search Query",
|
| 391 |
+
placeholder="Enter product name, supplier, customer, or keywords...",
|
| 392 |
+
scale=3
|
| 393 |
+
)
|
| 394 |
+
search_type = gr.Dropdown(
|
| 395 |
+
choices=["All Records", "Products", "Suppliers", "Customers", "Transactions"],
|
| 396 |
+
value="All Records",
|
| 397 |
+
label="Search Type",
|
| 398 |
+
scale=1
|
| 399 |
+
)
|
| 400 |
+
search_btn = gr.Button("Search", variant="primary", scale=1)
|
| 401 |
+
|
| 402 |
+
# Search Results
|
| 403 |
+
search_results = gr.Chatbot(
|
| 404 |
+
value=[],
|
| 405 |
+
height=400,
|
| 406 |
+
label="Search Results",
|
| 407 |
+
show_copy_button=True
|
| 408 |
+
)
|
| 409 |
+
|
| 410 |
+
# Quick Search Buttons
|
| 411 |
+
with gr.Row():
|
| 412 |
+
gr.Markdown("### Quick Searches")
|
| 413 |
+
with gr.Row():
|
| 414 |
+
recent_purchases = gr.Button("Recent Purchases", variant="outline")
|
| 415 |
+
recent_sales = gr.Button("Recent Sales", variant="outline")
|
| 416 |
+
top_products = gr.Button("Top Products", variant="outline")
|
| 417 |
+
supplier_summary = gr.Button("Supplier Summary", variant="outline")
|
| 418 |
+
|
| 419 |
+
# Help & Settings Tab
|
| 420 |
+
with gr.Tab("β Help & Settings"):
|
| 421 |
+
with gr.Row():
|
| 422 |
+
with gr.Column():
|
| 423 |
+
gr.Markdown("""
|
| 424 |
+
### π User Guide
|
| 425 |
+
|
| 426 |
+
**π― Getting Started**
|
| 427 |
+
- Use the **Dashboard** for quick overview and actions
|
| 428 |
+
- **AI Chat** for natural language interactions
|
| 429 |
+
- **Transactions** for structured data entry
|
| 430 |
+
- **Search & Reports** for finding information
|
| 431 |
+
|
| 432 |
+
**π¬ Chat Examples**
|
| 433 |
+
- "Add a purchase of 20 USB drives from TechMart at β¬5 each"
|
| 434 |
+
- "Show me recent sales to ABC Corp"
|
| 435 |
+
- "Find all laptop transactions"
|
| 436 |
+
- "What's my total revenue this month?"
|
| 437 |
+
|
| 438 |
+
**π Features**
|
| 439 |
+
- Smart entity extraction from natural language
|
| 440 |
+
- Intelligent search across all records
|
| 441 |
+
- Transaction categorization and analysis
|
| 442 |
+
- Export capabilities for reports
|
| 443 |
+
""")
|
| 444 |
+
|
| 445 |
+
with gr.Column():
|
| 446 |
+
gr.Markdown("""
|
| 447 |
+
### βοΈ System Information
|
| 448 |
+
|
| 449 |
+
**Status**: π’ Online and Ready
|
| 450 |
+
|
| 451 |
+
**Capabilities**:
|
| 452 |
+
- β
Natural language processing
|
| 453 |
+
- β
Transaction management
|
| 454 |
+
- β
Intelligent search
|
| 455 |
+
- β
Data export
|
| 456 |
+
- β
Real-time analytics
|
| 457 |
+
|
| 458 |
+
**Supported Operations**:
|
| 459 |
+
- Purchase tracking
|
| 460 |
+
- Sales recording
|
| 461 |
+
- Inventory searches
|
| 462 |
+
- Supplier management
|
| 463 |
+
- Customer records
|
| 464 |
+
- Financial reporting
|
| 465 |
+
|
| 466 |
+
**Data Security**: π All data processed locally
|
| 467 |
+
""")
|
| 468 |
+
|
| 469 |
+
gr.Markdown("---")
|
| 470 |
+
gr.Markdown("*Business AI Assistant v1.0 β’ Built with Gradio β’ Powered by OpenAI*")
|
| 471 |
+
|
| 472 |
+
# Event Handlers
|
| 473 |
+
|
| 474 |
+
# Dashboard events
|
| 475 |
+
def load_dashboard():
|
| 476 |
+
data = self.get_dashboard_data()
|
| 477 |
+
|
| 478 |
+
# Create charts
|
| 479 |
+
financial_fig = self.create_revenue_chart(data)
|
| 480 |
+
transaction_fig = self.create_transaction_chart(data)
|
| 481 |
+
products_fig = self.create_top_products_chart(data)
|
| 482 |
+
|
| 483 |
+
# Prepare recent transactions table
|
| 484 |
+
recent_data = []
|
| 485 |
+
for row in data['recent_transactions']:
|
| 486 |
+
recent_data.append([
|
| 487 |
+
row[0].title(), # transaction_type
|
| 488 |
+
row[1], # product
|
| 489 |
+
row[2], # quantity
|
| 490 |
+
f"β¬{row[3]:.2f}", # total_amount
|
| 491 |
+
row[4] or "N/A" # partner (supplier/customer)
|
| 492 |
+
])
|
| 493 |
+
|
| 494 |
+
return (
|
| 495 |
+
data['total_purchases'],
|
| 496 |
+
data['total_sales'],
|
| 497 |
+
data['total_revenue'],
|
| 498 |
+
data['profit'],
|
| 499 |
+
financial_fig,
|
| 500 |
+
transaction_fig,
|
| 501 |
+
products_fig,
|
| 502 |
+
recent_data
|
| 503 |
+
)
|
| 504 |
+
|
| 505 |
+
refresh_dashboard.click(
|
| 506 |
+
fn=load_dashboard,
|
| 507 |
+
outputs=[
|
| 508 |
+
metrics_purchases, metrics_sales, metrics_revenue, metrics_profit,
|
| 509 |
+
financial_chart, transaction_chart, products_chart, recent_table
|
| 510 |
+
]
|
| 511 |
+
)
|
| 512 |
+
|
| 513 |
+
# Chat events
|
| 514 |
+
msg_input.submit(
|
| 515 |
+
fn=self.process_message,
|
| 516 |
+
inputs=[msg_input, chatbot_ui],
|
| 517 |
+
outputs=[msg_input, chatbot_ui]
|
| 518 |
+
)
|
| 519 |
+
|
| 520 |
+
send_btn.click(
|
| 521 |
+
fn=self.process_message,
|
| 522 |
+
inputs=[msg_input, chatbot_ui],
|
| 523 |
+
outputs=[msg_input, chatbot_ui]
|
| 524 |
+
)
|
| 525 |
+
|
| 526 |
+
clear_chat_btn.click(
|
| 527 |
+
fn=self.clear_chat,
|
| 528 |
+
outputs=[msg_input, chatbot_ui]
|
| 529 |
+
)
|
| 530 |
+
|
| 531 |
+
# Example prompts
|
| 532 |
+
example_1.click(
|
| 533 |
+
fn=lambda: ("Add a purchase of 10 laptops from TechMart at β¬800 each", []),
|
| 534 |
+
outputs=[msg_input, chatbot_ui]
|
| 535 |
+
)
|
| 536 |
+
|
| 537 |
+
example_2.click(
|
| 538 |
+
fn=lambda: ("Find all USB drive transactions", []),
|
| 539 |
+
outputs=[msg_input, chatbot_ui]
|
| 540 |
+
)
|
| 541 |
+
|
| 542 |
+
example_3.click(
|
| 543 |
+
fn=lambda: ("Show recent transactions", []),
|
| 544 |
+
outputs=[msg_input, chatbot_ui]
|
| 545 |
+
)
|
| 546 |
+
|
| 547 |
+
# Transaction events
|
| 548 |
+
purchase_btn.click(
|
| 549 |
+
fn=self.structured_purchase,
|
| 550 |
+
inputs=[purchase_product, purchase_quantity, purchase_supplier, purchase_price],
|
| 551 |
+
outputs=[purchase_product, transaction_results, purchase_status]
|
| 552 |
+
)
|
| 553 |
+
|
| 554 |
+
sale_btn.click(
|
| 555 |
+
fn=self.structured_sale,
|
| 556 |
+
inputs=[sale_product, sale_quantity, sale_customer, sale_price],
|
| 557 |
+
outputs=[sale_product, transaction_results, sale_status]
|
| 558 |
+
)
|
| 559 |
+
|
| 560 |
+
# Search events
|
| 561 |
+
search_btn.click(
|
| 562 |
+
fn=self.search_records,
|
| 563 |
+
inputs=[search_query, search_type],
|
| 564 |
+
outputs=[search_results]
|
| 565 |
+
)
|
| 566 |
+
|
| 567 |
+
# Quick search events
|
| 568 |
+
recent_purchases.click(
|
| 569 |
+
fn=lambda: self.search_records("recent purchases", "Transactions"),
|
| 570 |
+
outputs=[search_results]
|
| 571 |
+
)
|
| 572 |
+
|
| 573 |
+
recent_sales.click(
|
| 574 |
+
fn=lambda: self.search_records("recent sales", "Transactions"),
|
| 575 |
+
outputs=[search_results]
|
| 576 |
+
)
|
| 577 |
+
|
| 578 |
+
# Dashboard navigation events
|
| 579 |
+
dash_new_purchase.click(fn=lambda: gr.Tabs.update(selected=2))
|
| 580 |
+
dash_new_sale.click(fn=lambda: gr.Tabs.update(selected=2))
|
| 581 |
+
dash_search.click(fn=lambda: gr.Tabs.update(selected=3))
|
| 582 |
+
|
| 583 |
+
# Load initial dashboard data
|
| 584 |
+
interface.load(
|
| 585 |
+
fn=load_dashboard,
|
| 586 |
+
outputs=[
|
| 587 |
+
metrics_purchases, metrics_sales, metrics_revenue, metrics_profit,
|
| 588 |
+
financial_chart, transaction_chart, products_chart, recent_table
|
| 589 |
+
]
|
| 590 |
+
)
|
| 591 |
+
|
| 592 |
+
return interface
|
| 593 |
+
|
| 594 |
+
def launch(self, **kwargs):
|
| 595 |
+
"""Launch the Gradio interface."""
|
| 596 |
+
interface = self.create_interface()
|
| 597 |
+
|
| 598 |
+
# Default launch configuration
|
| 599 |
+
launch_config = {
|
| 600 |
+
'server_name': '0.0.0.0',
|
| 601 |
+
'server_port': 7860,
|
| 602 |
+
'share': False,
|
| 603 |
+
'debug': False,
|
| 604 |
+
'show_error': True,
|
| 605 |
+
'quiet': False
|
| 606 |
+
}
|
| 607 |
+
|
| 608 |
+
# Update with any provided kwargs
|
| 609 |
+
launch_config.update(kwargs)
|
| 610 |
+
|
| 611 |
+
print("π Starting Gradio GUI for Business Chatbot...")
|
| 612 |
+
print(f"π± Access the interface at: http://localhost:{launch_config['server_port']}")
|
| 613 |
+
print("π‘ Press Ctrl+C to stop the server")
|
| 614 |
+
|
| 615 |
+
try:
|
| 616 |
+
interface.launch(**launch_config)
|
| 617 |
+
finally:
|
| 618 |
+
# Clean up chatbot resources
|
| 619 |
+
self.chatbot.close()
|
| 620 |
+
|
| 621 |
+
def main():
|
| 622 |
+
"""Main function to launch the Gradio interface."""
|
| 623 |
+
gui = GradioInterface()
|
| 624 |
+
gui.launch()
|
| 625 |
+
|
| 626 |
+
if __name__ == "__main__":
|
| 627 |
+
main()
|
main.py
ADDED
|
@@ -0,0 +1,61 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
#!/usr/bin/env python3
|
| 2 |
+
|
| 3 |
+
import sys
|
| 4 |
+
import os
|
| 5 |
+
sys.path.append(os.path.join(os.path.dirname(__file__), 'src'))
|
| 6 |
+
|
| 7 |
+
from chatbot import Chatbot
|
| 8 |
+
from models import ChatbotRequest
|
| 9 |
+
|
| 10 |
+
def main():
|
| 11 |
+
print("π€ Business Chatbot with SQL Database and Vector Store")
|
| 12 |
+
print("="*60)
|
| 13 |
+
print("I can help you with:")
|
| 14 |
+
print("β’ Adding purchases: 'Add a purchase of 20 USB drives from TechMart at β¬5 each'")
|
| 15 |
+
print("β’ Adding sales: 'Sold 10 laptops to John Smith at β¬800 each'")
|
| 16 |
+
print("β’ Viewing recent transactions: 'Show recent transactions'")
|
| 17 |
+
print("β’ Searching: 'Find USB drives' or 'Search TechMart'")
|
| 18 |
+
print("β’ Storing general info: 'Meeting with supplier scheduled for next week'")
|
| 19 |
+
print("β’ Type 'quit' to exit")
|
| 20 |
+
print("="*60)
|
| 21 |
+
|
| 22 |
+
chatbot = Chatbot()
|
| 23 |
+
|
| 24 |
+
try:
|
| 25 |
+
while True:
|
| 26 |
+
user_input = input("\n㪠You: ").strip()
|
| 27 |
+
|
| 28 |
+
if user_input.lower() in ['quit', 'exit', 'bye']:
|
| 29 |
+
print("π Goodbye!")
|
| 30 |
+
break
|
| 31 |
+
|
| 32 |
+
if not user_input:
|
| 33 |
+
continue
|
| 34 |
+
|
| 35 |
+
# Process the message
|
| 36 |
+
request = ChatbotRequest(message=user_input)
|
| 37 |
+
response = chatbot.process_message(request)
|
| 38 |
+
|
| 39 |
+
print(f"\nπ€ Bot: {response.response}")
|
| 40 |
+
|
| 41 |
+
# Show additional info if available
|
| 42 |
+
if response.entities_extracted:
|
| 43 |
+
print(f"π Extracted: {response.entities_extracted.transaction_type} - {response.entities_extracted.product} ({response.entities_extracted.quantity}x) - β¬{response.entities_extracted.total_amount}")
|
| 44 |
+
|
| 45 |
+
if response.vector_stored:
|
| 46 |
+
print("πΎ Information stored in vector database for future semantic search")
|
| 47 |
+
|
| 48 |
+
if response.intent_detected:
|
| 49 |
+
print(f"π― Intent: {response.intent_detected} (confidence: {response.intent_confidence:.2f})")
|
| 50 |
+
|
| 51 |
+
if response.awaiting_clarification:
|
| 52 |
+
print("β³ Waiting for your response to complete the transaction...")
|
| 53 |
+
|
| 54 |
+
except KeyboardInterrupt:
|
| 55 |
+
print("\nπ Goodbye!")
|
| 56 |
+
|
| 57 |
+
finally:
|
| 58 |
+
chatbot.close()
|
| 59 |
+
|
| 60 |
+
if __name__ == "__main__":
|
| 61 |
+
main()
|
populate_sample_data.py
ADDED
|
@@ -0,0 +1,81 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
#!/usr/bin/env python3
|
| 2 |
+
"""
|
| 3 |
+
Sample data population script for the LLM Chatbot database.
|
| 4 |
+
This script adds realistic sample transactions to help test the dashboard.
|
| 5 |
+
"""
|
| 6 |
+
|
| 7 |
+
import sys
|
| 8 |
+
import os
|
| 9 |
+
sys.path.append(os.path.join(os.path.dirname(__file__), 'src'))
|
| 10 |
+
|
| 11 |
+
from chatbot import Chatbot
|
| 12 |
+
from models import ChatbotRequest
|
| 13 |
+
|
| 14 |
+
def populate_sample_data():
|
| 15 |
+
"""Add sample transactions to the database"""
|
| 16 |
+
|
| 17 |
+
print("π§ Populating database with sample transactions...")
|
| 18 |
+
|
| 19 |
+
# Sample transactions to add
|
| 20 |
+
sample_transactions = [
|
| 21 |
+
# Purchases
|
| 22 |
+
"Add a purchase of 100 wireless mice from TechMart at β¬25 each",
|
| 23 |
+
"Add a purchase of 50 laptop stands from Office Supplies Co at β¬35 each",
|
| 24 |
+
"Add a purchase of 30 webcams from Electronics Plus at β¬80 each",
|
| 25 |
+
"Add a purchase of 75 desk lamps from Office Supplies Co at β¬40 each",
|
| 26 |
+
"Add a purchase of 20 printers from TechMart at β¬200 each",
|
| 27 |
+
"Add a purchase of 60 surge protectors from Electronics Plus at β¬15 each",
|
| 28 |
+
"Add a purchase of 40 ethernet cables from TechMart at β¬12 each",
|
| 29 |
+
"Add a purchase of 15 projectors from Electronics Plus at β¬450 each",
|
| 30 |
+
|
| 31 |
+
# Sales
|
| 32 |
+
"Sold 80 wireless mice to StartupTech Corp at β¬35 each",
|
| 33 |
+
"Sold 30 laptop stands to Creative Agency Ltd at β¬50 each",
|
| 34 |
+
"Sold 25 webcams to Remote Work Solutions at β¬120 each",
|
| 35 |
+
"Sold 50 desk lamps to Modern Office Inc at β¬55 each",
|
| 36 |
+
"Sold 12 printers to Small Business Hub at β¬280 each",
|
| 37 |
+
"Sold 45 surge protectors to Tech Solutions Ltd at β¬25 each",
|
| 38 |
+
"Sold 35 ethernet cables to Network Systems Corp at β¬18 each",
|
| 39 |
+
"Sold 10 projectors to Conference Center Co at β¬650 each",
|
| 40 |
+
"Sold 5 laptops to Freelance Collective at β¬1400 each",
|
| 41 |
+
"Sold 25 monitors to Design Studio Ltd at β¬380 each",
|
| 42 |
+
]
|
| 43 |
+
|
| 44 |
+
chatbot = Chatbot()
|
| 45 |
+
|
| 46 |
+
try:
|
| 47 |
+
successful_transactions = 0
|
| 48 |
+
failed_transactions = 0
|
| 49 |
+
|
| 50 |
+
for transaction in sample_transactions:
|
| 51 |
+
try:
|
| 52 |
+
print(f"π Processing: {transaction}")
|
| 53 |
+
request = ChatbotRequest(message=transaction)
|
| 54 |
+
response = chatbot.process_message(request)
|
| 55 |
+
|
| 56 |
+
if "recorded" in response.response.lower():
|
| 57 |
+
successful_transactions += 1
|
| 58 |
+
print(f"β
Success: {response.response}")
|
| 59 |
+
else:
|
| 60 |
+
failed_transactions += 1
|
| 61 |
+
print(f"β οΈ Warning: {response.response}")
|
| 62 |
+
|
| 63 |
+
except Exception as e:
|
| 64 |
+
failed_transactions += 1
|
| 65 |
+
print(f"β Error processing transaction: {e}")
|
| 66 |
+
|
| 67 |
+
print(f"\nπ Summary:")
|
| 68 |
+
print(f"β
Successful transactions: {successful_transactions}")
|
| 69 |
+
print(f"β Failed transactions: {failed_transactions}")
|
| 70 |
+
print(f"π― Total attempted: {len(sample_transactions)}")
|
| 71 |
+
|
| 72 |
+
if successful_transactions > 0:
|
| 73 |
+
print(f"\nπ Database populated with {successful_transactions} sample transactions!")
|
| 74 |
+
print("π‘ You can now run the dashboard to see meaningful data.")
|
| 75 |
+
print("π Run 'python run_gui.py' to launch the Gradio interface.")
|
| 76 |
+
|
| 77 |
+
finally:
|
| 78 |
+
chatbot.close()
|
| 79 |
+
|
| 80 |
+
if __name__ == "__main__":
|
| 81 |
+
populate_sample_data()
|
requirements.txt
ADDED
|
@@ -0,0 +1,15 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
openai>=1.0.0
|
| 2 |
+
sqlalchemy
|
| 3 |
+
db-sqlite3
|
| 4 |
+
pandas>=2.0.0
|
| 5 |
+
numpy>=1.24.0
|
| 6 |
+
sentence-transformers>=2.2.0
|
| 7 |
+
chromadb>=0.4.0
|
| 8 |
+
spacy>=3.6.0
|
| 9 |
+
python-dateutil>=2.8.0
|
| 10 |
+
pydantic>=2.0.0
|
| 11 |
+
fastapi>=0.100.0
|
| 12 |
+
uvicorn>=0.23.0
|
| 13 |
+
gradio>=4.0.0
|
| 14 |
+
pyyaml>=6.0
|
| 15 |
+
plotly>=5.0.0
|
reset_database.py
ADDED
|
@@ -0,0 +1,66 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
#!/usr/bin/env python3
|
| 2 |
+
"""
|
| 3 |
+
Database reset script for the LLM Chatbot.
|
| 4 |
+
This script clears all transaction data while keeping the basic structure intact.
|
| 5 |
+
"""
|
| 6 |
+
|
| 7 |
+
import sqlite3
|
| 8 |
+
import os
|
| 9 |
+
|
| 10 |
+
def reset_database():
|
| 11 |
+
"""Reset the database by clearing all transaction data"""
|
| 12 |
+
|
| 13 |
+
db_path = "chatbot.db"
|
| 14 |
+
|
| 15 |
+
if not os.path.exists(db_path):
|
| 16 |
+
print(f"β Database file '{db_path}' not found.")
|
| 17 |
+
return
|
| 18 |
+
|
| 19 |
+
try:
|
| 20 |
+
conn = sqlite3.connect(db_path)
|
| 21 |
+
cursor = conn.cursor()
|
| 22 |
+
|
| 23 |
+
print("ποΈ Clearing transaction data...")
|
| 24 |
+
|
| 25 |
+
# Clear all transaction data
|
| 26 |
+
cursor.execute("DELETE FROM sales")
|
| 27 |
+
cursor.execute("DELETE FROM purchases")
|
| 28 |
+
|
| 29 |
+
# Reset auto-increment counters
|
| 30 |
+
cursor.execute("DELETE FROM sqlite_sequence WHERE name IN ('sales', 'purchases')")
|
| 31 |
+
|
| 32 |
+
# Clear customers that were created during testing (keep default ones)
|
| 33 |
+
cursor.execute("DELETE FROM customers")
|
| 34 |
+
|
| 35 |
+
# Keep default suppliers and products, but can remove dynamic ones
|
| 36 |
+
# For now, we'll keep all suppliers and products
|
| 37 |
+
|
| 38 |
+
conn.commit()
|
| 39 |
+
|
| 40 |
+
# Check results
|
| 41 |
+
cursor.execute("SELECT COUNT(*) FROM purchases")
|
| 42 |
+
purchases_count = cursor.fetchone()[0]
|
| 43 |
+
|
| 44 |
+
cursor.execute("SELECT COUNT(*) FROM sales")
|
| 45 |
+
sales_count = cursor.fetchone()[0]
|
| 46 |
+
|
| 47 |
+
cursor.execute("SELECT COUNT(*) FROM customers")
|
| 48 |
+
customers_count = cursor.fetchone()[0]
|
| 49 |
+
|
| 50 |
+
print(f"β
Database reset complete!")
|
| 51 |
+
print(f" - Purchases: {purchases_count}")
|
| 52 |
+
print(f" - Sales: {sales_count}")
|
| 53 |
+
print(f" - Customers: {customers_count}")
|
| 54 |
+
print(f"π‘ You can now add new sample data using 'python populate_sample_data.py'")
|
| 55 |
+
|
| 56 |
+
except Exception as e:
|
| 57 |
+
print(f"β Error resetting database: {e}")
|
| 58 |
+
finally:
|
| 59 |
+
conn.close()
|
| 60 |
+
|
| 61 |
+
if __name__ == "__main__":
|
| 62 |
+
response = input("β οΈ This will delete all transaction data. Continue? (y/N): ")
|
| 63 |
+
if response.lower() in ['y', 'yes']:
|
| 64 |
+
reset_database()
|
| 65 |
+
else:
|
| 66 |
+
print("π« Operation cancelled.")
|
run_gui.py
ADDED
|
@@ -0,0 +1,119 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
#!/usr/bin/env python3
|
| 2 |
+
|
| 3 |
+
"""
|
| 4 |
+
GUI Launcher for LLM Business Chatbot
|
| 5 |
+
|
| 6 |
+
This script launches the Gradio web interface for the chatbot application.
|
| 7 |
+
It provides a web-based GUI that wraps around the existing CLI chatbot
|
| 8 |
+
without modifying any of the original code.
|
| 9 |
+
|
| 10 |
+
Usage:
|
| 11 |
+
python run_gui.py # Launch with default settings
|
| 12 |
+
python run_gui.py --port 8080 # Launch on custom port
|
| 13 |
+
python run_gui.py --share # Create public sharing link
|
| 14 |
+
python run_gui.py --debug # Enable debug mode
|
| 15 |
+
"""
|
| 16 |
+
|
| 17 |
+
import sys
|
| 18 |
+
import os
|
| 19 |
+
import argparse
|
| 20 |
+
from pathlib import Path
|
| 21 |
+
|
| 22 |
+
# Add gui directory to path
|
| 23 |
+
gui_dir = Path(__file__).parent / "gui"
|
| 24 |
+
sys.path.append(str(gui_dir))
|
| 25 |
+
|
| 26 |
+
def main():
|
| 27 |
+
"""Main function to parse arguments and launch the GUI."""
|
| 28 |
+
parser = argparse.ArgumentParser(
|
| 29 |
+
description="Launch Gradio GUI for LLM Business Chatbot",
|
| 30 |
+
formatter_class=argparse.RawDescriptionHelpFormatter,
|
| 31 |
+
epilog="""
|
| 32 |
+
Examples:
|
| 33 |
+
python run_gui.py # Default: localhost:7860
|
| 34 |
+
python run_gui.py --port 8080 # Custom port
|
| 35 |
+
python run_gui.py --share # Public sharing link
|
| 36 |
+
python run_gui.py --host 0.0.0.0 # Accept external connections
|
| 37 |
+
python run_gui.py --debug # Enable debug mode
|
| 38 |
+
"""
|
| 39 |
+
)
|
| 40 |
+
|
| 41 |
+
parser.add_argument(
|
| 42 |
+
"--host",
|
| 43 |
+
default="0.0.0.0",
|
| 44 |
+
help="Host address to bind to (default: 0.0.0.0)"
|
| 45 |
+
)
|
| 46 |
+
|
| 47 |
+
parser.add_argument(
|
| 48 |
+
"--port",
|
| 49 |
+
type=int,
|
| 50 |
+
default=7860,
|
| 51 |
+
help="Port number to run the server on (default: 7860)"
|
| 52 |
+
)
|
| 53 |
+
|
| 54 |
+
parser.add_argument(
|
| 55 |
+
"--share",
|
| 56 |
+
action="store_true",
|
| 57 |
+
help="Create a public sharing link via Gradio"
|
| 58 |
+
)
|
| 59 |
+
|
| 60 |
+
parser.add_argument(
|
| 61 |
+
"--debug",
|
| 62 |
+
action="store_true",
|
| 63 |
+
help="Enable debug mode"
|
| 64 |
+
)
|
| 65 |
+
|
| 66 |
+
parser.add_argument(
|
| 67 |
+
"--quiet",
|
| 68 |
+
action="store_true",
|
| 69 |
+
help="Suppress startup messages"
|
| 70 |
+
)
|
| 71 |
+
|
| 72 |
+
args = parser.parse_args()
|
| 73 |
+
|
| 74 |
+
# Print startup banner
|
| 75 |
+
if not args.quiet:
|
| 76 |
+
print("=" * 70)
|
| 77 |
+
print("π€ LLM Business Chatbot - Gradio GUI")
|
| 78 |
+
print("=" * 70)
|
| 79 |
+
print(f"π Starting web interface...")
|
| 80 |
+
print(f"π Host: {args.host}")
|
| 81 |
+
print(f"π Port: {args.port}")
|
| 82 |
+
print(f"π Share: {'Yes' if args.share else 'No'}")
|
| 83 |
+
print(f"π Debug: {'Yes' if args.debug else 'No'}")
|
| 84 |
+
print("-" * 70)
|
| 85 |
+
|
| 86 |
+
try:
|
| 87 |
+
# Import and launch the Gradio interface
|
| 88 |
+
from gradio_interface import GradioInterface
|
| 89 |
+
|
| 90 |
+
gui = GradioInterface()
|
| 91 |
+
gui.launch(
|
| 92 |
+
server_name=args.host,
|
| 93 |
+
server_port=args.port,
|
| 94 |
+
share=args.share,
|
| 95 |
+
debug=args.debug,
|
| 96 |
+
quiet=args.quiet,
|
| 97 |
+
show_error=True
|
| 98 |
+
)
|
| 99 |
+
|
| 100 |
+
except ImportError as e:
|
| 101 |
+
print(f"β Error: Missing dependencies. Please install requirements:")
|
| 102 |
+
print(f" pip install -r requirements.txt")
|
| 103 |
+
print(f" Error details: {e}")
|
| 104 |
+
sys.exit(1)
|
| 105 |
+
|
| 106 |
+
except KeyboardInterrupt:
|
| 107 |
+
if not args.quiet:
|
| 108 |
+
print("\nπ Shutting down Gradio interface...")
|
| 109 |
+
sys.exit(0)
|
| 110 |
+
|
| 111 |
+
except Exception as e:
|
| 112 |
+
print(f"β Error launching GUI: {e}")
|
| 113 |
+
if args.debug:
|
| 114 |
+
import traceback
|
| 115 |
+
traceback.print_exc()
|
| 116 |
+
sys.exit(1)
|
| 117 |
+
|
| 118 |
+
if __name__ == "__main__":
|
| 119 |
+
main()
|
src/chatbot.py
ADDED
|
@@ -0,0 +1,402 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
from typing import Dict, Any, Optional
|
| 2 |
+
from entity_extractor import EntityExtractor
|
| 3 |
+
from database_manager import DatabaseManager
|
| 4 |
+
from vector_store import VectorStore
|
| 5 |
+
from nl_to_sql import NaturalLanguageToSQL
|
| 6 |
+
from intent_classifier import IntentClassifier, IntentType
|
| 7 |
+
from rag_handler import RAGHandler
|
| 8 |
+
from transaction_clarifier import TransactionClarifier, ClarificationStatus
|
| 9 |
+
from models import ChatbotRequest, ChatbotResponse, PendingTransaction
|
| 10 |
+
|
| 11 |
+
class Chatbot:
|
| 12 |
+
def __init__(self):
|
| 13 |
+
self.entity_extractor = EntityExtractor()
|
| 14 |
+
self.db_manager = DatabaseManager()
|
| 15 |
+
self.vector_store = VectorStore()
|
| 16 |
+
self.nl_to_sql = NaturalLanguageToSQL()
|
| 17 |
+
self.intent_classifier = IntentClassifier()
|
| 18 |
+
self.rag_handler = RAGHandler()
|
| 19 |
+
self.transaction_clarifier = TransactionClarifier()
|
| 20 |
+
|
| 21 |
+
# Store pending transactions by session_id
|
| 22 |
+
self.pending_transactions: Dict[str, PendingTransaction] = {}
|
| 23 |
+
|
| 24 |
+
def process_message(self, request: ChatbotRequest) -> ChatbotResponse:
|
| 25 |
+
"""Process a user message and return appropriate response"""
|
| 26 |
+
message = request.message.strip()
|
| 27 |
+
session_id = request.session_id or "default"
|
| 28 |
+
|
| 29 |
+
# Check if we're waiting for clarification on a pending transaction
|
| 30 |
+
if session_id in self.pending_transactions:
|
| 31 |
+
print("A transaction is pending...")
|
| 32 |
+
return self._handle_transaction_clarification(message, session_id)
|
| 33 |
+
|
| 34 |
+
# Classify intent using OpenAI
|
| 35 |
+
intent_result = self.intent_classifier.classify_intent(message)
|
| 36 |
+
|
| 37 |
+
print(f"π― Intent: {intent_result.intent.value} (confidence: {intent_result.confidence:.2f})")
|
| 38 |
+
print(f"π Reasoning: {intent_result.reasoning}")
|
| 39 |
+
|
| 40 |
+
# Route to appropriate handler based on classified intent
|
| 41 |
+
if intent_result.intent == IntentType.TRANSACTION:
|
| 42 |
+
response = self._handle_transaction_request(message, session_id)
|
| 43 |
+
elif intent_result.intent == IntentType.QUERY:
|
| 44 |
+
response = self._handle_query_request(message)
|
| 45 |
+
elif intent_result.intent == IntentType.SEMANTIC_SEARCH:
|
| 46 |
+
response = self._handle_search_request(message)
|
| 47 |
+
else: # GENERAL_INFO
|
| 48 |
+
response = self._handle_general_information(message)
|
| 49 |
+
|
| 50 |
+
# Add intent information to response
|
| 51 |
+
response.intent_detected = intent_result.intent.value
|
| 52 |
+
response.intent_confidence = intent_result.confidence
|
| 53 |
+
|
| 54 |
+
return response
|
| 55 |
+
|
| 56 |
+
|
| 57 |
+
def _handle_transaction_request(self, message: str, session_id: str) -> ChatbotResponse:
|
| 58 |
+
"""Handle transaction requests (purchases/sales) with interactive clarification"""
|
| 59 |
+
try:
|
| 60 |
+
# Extract entities
|
| 61 |
+
entities = self.entity_extractor.extract_entities(message)
|
| 62 |
+
|
| 63 |
+
# Check if transaction is complete
|
| 64 |
+
status, clarification = self.transaction_clarifier.analyze_transaction_completeness(entities)
|
| 65 |
+
|
| 66 |
+
if status == ClarificationStatus.COMPLETE:
|
| 67 |
+
# Transaction is complete, process it
|
| 68 |
+
return self._complete_transaction(entities, message)
|
| 69 |
+
|
| 70 |
+
elif status == ClarificationStatus.NEEDS_CLARIFICATION:
|
| 71 |
+
# Store pending transaction and ask for clarification
|
| 72 |
+
pending = PendingTransaction(
|
| 73 |
+
entities=entities,
|
| 74 |
+
missing_fields=clarification.missing_fields,
|
| 75 |
+
session_id=session_id,
|
| 76 |
+
original_message=message
|
| 77 |
+
)
|
| 78 |
+
self.pending_transactions[session_id] = pending
|
| 79 |
+
|
| 80 |
+
clarification_message = self.transaction_clarifier.format_clarification_message(clarification)
|
| 81 |
+
|
| 82 |
+
return ChatbotResponse(
|
| 83 |
+
response=clarification_message,
|
| 84 |
+
entities_extracted=entities,
|
| 85 |
+
awaiting_clarification=True
|
| 86 |
+
)
|
| 87 |
+
|
| 88 |
+
else:
|
| 89 |
+
return ChatbotResponse(
|
| 90 |
+
response="Transaction cancelled.",
|
| 91 |
+
entities_extracted=entities
|
| 92 |
+
)
|
| 93 |
+
|
| 94 |
+
except Exception as e:
|
| 95 |
+
return ChatbotResponse(
|
| 96 |
+
response=f"Error processing transaction: {str(e)}",
|
| 97 |
+
sql_executed=None,
|
| 98 |
+
entities_extracted=None,
|
| 99 |
+
vector_stored=False
|
| 100 |
+
)
|
| 101 |
+
|
| 102 |
+
def _complete_transaction(self, entities, original_message: str) -> ChatbotResponse:
|
| 103 |
+
"""Complete a transaction with all required information"""
|
| 104 |
+
try:
|
| 105 |
+
# Process transaction in database and get the SQL transaction ID
|
| 106 |
+
transaction_id, result_message = self.db_manager.process_transaction(entities)
|
| 107 |
+
|
| 108 |
+
# Store in vector store with SQL transaction ID for linking
|
| 109 |
+
transaction_data = {
|
| 110 |
+
"type": entities.transaction_type,
|
| 111 |
+
"product": entities.product,
|
| 112 |
+
"quantity": entities.quantity,
|
| 113 |
+
"supplier": entities.supplier,
|
| 114 |
+
"customer": entities.customer,
|
| 115 |
+
"unit_price": entities.unit_price,
|
| 116 |
+
"total": entities.total_amount
|
| 117 |
+
}
|
| 118 |
+
|
| 119 |
+
vector_stored = self.vector_store.add_transaction_event(
|
| 120 |
+
transaction_data,
|
| 121 |
+
original_message,
|
| 122 |
+
sql_transaction_id=transaction_id
|
| 123 |
+
)
|
| 124 |
+
|
| 125 |
+
return ChatbotResponse(
|
| 126 |
+
response=result_message,
|
| 127 |
+
sql_executed="Transaction processed successfully",
|
| 128 |
+
entities_extracted=entities,
|
| 129 |
+
vector_stored=vector_stored
|
| 130 |
+
)
|
| 131 |
+
|
| 132 |
+
except Exception as e:
|
| 133 |
+
return ChatbotResponse(
|
| 134 |
+
response=f"Error completing transaction: {str(e)}",
|
| 135 |
+
entities_extracted=entities
|
| 136 |
+
)
|
| 137 |
+
|
| 138 |
+
def _handle_transaction_clarification(self, message: str, session_id: str) -> ChatbotResponse:
|
| 139 |
+
"""Handle user response to transaction clarification questions"""
|
| 140 |
+
try:
|
| 141 |
+
pending = self.pending_transactions.get(session_id)
|
| 142 |
+
if not pending:
|
| 143 |
+
return ChatbotResponse(
|
| 144 |
+
response="No pending transaction found. Please start a new transaction."
|
| 145 |
+
)
|
| 146 |
+
|
| 147 |
+
# Check if user wants to cancel
|
| 148 |
+
if message.lower() in ['cancel', 'quit', 'stop', 'abort']:
|
| 149 |
+
del self.pending_transactions[session_id]
|
| 150 |
+
return ChatbotResponse(
|
| 151 |
+
response="Transaction cancelled. You can start a new one anytime."
|
| 152 |
+
)
|
| 153 |
+
|
| 154 |
+
# Add this clarification response to the accumulated responses
|
| 155 |
+
pending.clarification_responses.append(message)
|
| 156 |
+
|
| 157 |
+
# Process the clarification response
|
| 158 |
+
updated_entities, is_complete = self.transaction_clarifier.process_clarification_response(
|
| 159 |
+
pending.entities,
|
| 160 |
+
pending.missing_fields,
|
| 161 |
+
message
|
| 162 |
+
)
|
| 163 |
+
|
| 164 |
+
if is_complete:
|
| 165 |
+
# Transaction is now complete
|
| 166 |
+
# Combine original message with all clarification responses for complete context
|
| 167 |
+
clarifications = "\n".join([f"Clarification {i+1}: {resp}" for i, resp in enumerate(pending.clarification_responses)])
|
| 168 |
+
full_context = f"{pending.original_message}\n\n{clarifications}"
|
| 169 |
+
del self.pending_transactions[session_id]
|
| 170 |
+
return self._complete_transaction(updated_entities, full_context)
|
| 171 |
+
else:
|
| 172 |
+
# Still need more information
|
| 173 |
+
status, clarification = self.transaction_clarifier.analyze_transaction_completeness(updated_entities)
|
| 174 |
+
|
| 175 |
+
if status == ClarificationStatus.NEEDS_CLARIFICATION:
|
| 176 |
+
# Update the pending transaction
|
| 177 |
+
pending.entities = updated_entities
|
| 178 |
+
pending.missing_fields = clarification.missing_fields
|
| 179 |
+
|
| 180 |
+
clarification_message = self.transaction_clarifier.format_clarification_message(clarification)
|
| 181 |
+
|
| 182 |
+
return ChatbotResponse(
|
| 183 |
+
response=f"Thank you! I still need a bit more information:\n\n{clarification_message}",
|
| 184 |
+
entities_extracted=updated_entities,
|
| 185 |
+
awaiting_clarification=True
|
| 186 |
+
)
|
| 187 |
+
else:
|
| 188 |
+
# Something went wrong or was cancelled
|
| 189 |
+
# Still include all clarification context even if completion is unexpected
|
| 190 |
+
clarifications = "\n".join([f"Clarification {i+1}: {resp}" for i, resp in enumerate(pending.clarification_responses)])
|
| 191 |
+
full_context = f"{pending.original_message}\n\n{clarifications}"
|
| 192 |
+
del self.pending_transactions[session_id]
|
| 193 |
+
return self._complete_transaction(updated_entities, full_context)
|
| 194 |
+
|
| 195 |
+
except Exception as e:
|
| 196 |
+
# Clean up on error
|
| 197 |
+
if session_id in self.pending_transactions:
|
| 198 |
+
del self.pending_transactions[session_id]
|
| 199 |
+
|
| 200 |
+
return ChatbotResponse(
|
| 201 |
+
response=f"Error processing your response: {str(e)}. Please start a new transaction."
|
| 202 |
+
)
|
| 203 |
+
|
| 204 |
+
def _handle_query_request(self, message: str) -> ChatbotResponse:
|
| 205 |
+
"""Handle query requests using OpenAI LLM to generate SQL"""
|
| 206 |
+
try:
|
| 207 |
+
# Use OpenAI to convert natural language to SQL
|
| 208 |
+
sql_query, explanation = self.nl_to_sql.convert_to_sql(message)
|
| 209 |
+
|
| 210 |
+
# Validate the generated SQL
|
| 211 |
+
is_valid, validation_message = self.nl_to_sql.validate_sql(sql_query)
|
| 212 |
+
|
| 213 |
+
if not is_valid:
|
| 214 |
+
suggestion = self.nl_to_sql.suggest_corrections(message, validation_message)
|
| 215 |
+
return ChatbotResponse(
|
| 216 |
+
response=f"I couldn't process that query: {validation_message}\n\n{suggestion}",
|
| 217 |
+
sql_executed=sql_query
|
| 218 |
+
)
|
| 219 |
+
|
| 220 |
+
# Execute the SQL query
|
| 221 |
+
results = self.db_manager.query_data(sql_query)
|
| 222 |
+
|
| 223 |
+
# Format and return results
|
| 224 |
+
if not results:
|
| 225 |
+
return ChatbotResponse(
|
| 226 |
+
response="No results found for your query.",
|
| 227 |
+
sql_executed=sql_query
|
| 228 |
+
)
|
| 229 |
+
|
| 230 |
+
# Check for error in results
|
| 231 |
+
if len(results) == 1 and "error" in results[0]:
|
| 232 |
+
return ChatbotResponse(
|
| 233 |
+
response=f"Query execution error: {results[0]['error']}\n\nGenerated SQL: {sql_query}",
|
| 234 |
+
sql_executed=sql_query
|
| 235 |
+
)
|
| 236 |
+
|
| 237 |
+
# Format successful results
|
| 238 |
+
formatted_response = self._format_sql_results(results, explanation)
|
| 239 |
+
|
| 240 |
+
return ChatbotResponse(
|
| 241 |
+
response=formatted_response,
|
| 242 |
+
sql_executed=sql_query
|
| 243 |
+
)
|
| 244 |
+
|
| 245 |
+
except Exception as e:
|
| 246 |
+
return ChatbotResponse(response=f"Error processing query: {str(e)}")
|
| 247 |
+
|
| 248 |
+
def _handle_search_request(self, message: str) -> ChatbotResponse:
|
| 249 |
+
"""Handle semantic search requests using RAG"""
|
| 250 |
+
try:
|
| 251 |
+
# Enhance the search query for better retrieval
|
| 252 |
+
enhanced_query = self.rag_handler.enhance_search_query(message)
|
| 253 |
+
print(f"π Enhanced query: {enhanced_query}")
|
| 254 |
+
|
| 255 |
+
# Search vector store for similar events
|
| 256 |
+
results = self.vector_store.search_similar_events(enhanced_query, 8)
|
| 257 |
+
|
| 258 |
+
if not results:
|
| 259 |
+
return ChatbotResponse(response="I couldn't find any relevant information to answer your query.")
|
| 260 |
+
|
| 261 |
+
# Use RAG to generate an intelligent response
|
| 262 |
+
rag_response = self.rag_handler.generate_rag_response(message, results)
|
| 263 |
+
|
| 264 |
+
return ChatbotResponse(
|
| 265 |
+
response=rag_response,
|
| 266 |
+
vector_stored=False
|
| 267 |
+
)
|
| 268 |
+
|
| 269 |
+
except Exception as e:
|
| 270 |
+
return ChatbotResponse(response=f"Error processing your search: {str(e)}")
|
| 271 |
+
|
| 272 |
+
def _handle_general_information(self, message: str) -> ChatbotResponse:
|
| 273 |
+
"""Handle general information storage"""
|
| 274 |
+
try:
|
| 275 |
+
# Store in vector store
|
| 276 |
+
stored = self.vector_store.add_general_event(message, "general_info")
|
| 277 |
+
|
| 278 |
+
if stored:
|
| 279 |
+
return ChatbotResponse(
|
| 280 |
+
response="Information stored successfully. I can help you find similar information later.",
|
| 281 |
+
vector_stored=True
|
| 282 |
+
)
|
| 283 |
+
else:
|
| 284 |
+
return ChatbotResponse(
|
| 285 |
+
response="Information noted, but vector storage is not available.",
|
| 286 |
+
vector_stored=False
|
| 287 |
+
)
|
| 288 |
+
|
| 289 |
+
except Exception as e:
|
| 290 |
+
return ChatbotResponse(response=f"Error storing information: {str(e)}")
|
| 291 |
+
|
| 292 |
+
def _format_recent_transactions(self, data: Dict[str, list]) -> str:
|
| 293 |
+
"""Format recent transactions for display"""
|
| 294 |
+
response = "Recent Transactions:\n\n"
|
| 295 |
+
|
| 296 |
+
# Combine and sort all transactions
|
| 297 |
+
all_transactions = []
|
| 298 |
+
for purchase in data.get("purchases", []):
|
| 299 |
+
all_transactions.append(purchase)
|
| 300 |
+
for sale in data.get("sales", []):
|
| 301 |
+
all_transactions.append(sale)
|
| 302 |
+
|
| 303 |
+
# Sort by date
|
| 304 |
+
all_transactions.sort(key=lambda x: x.get("date", ""), reverse=True)
|
| 305 |
+
|
| 306 |
+
if not all_transactions:
|
| 307 |
+
return "No recent transactions found."
|
| 308 |
+
|
| 309 |
+
for transaction in all_transactions[:10]: # Show top 10
|
| 310 |
+
trans_type = transaction.get("type", "unknown").upper()
|
| 311 |
+
date = transaction.get("date", "")[:10] # Just the date part
|
| 312 |
+
|
| 313 |
+
if trans_type == "PURCHASE":
|
| 314 |
+
response += f"π {date} - PURCHASE: {transaction.get('quantity', 0)}x {transaction.get('product', 'Unknown')} from {transaction.get('supplier', 'Unknown')} - β¬{transaction.get('total_cost', 0)}\n"
|
| 315 |
+
else:
|
| 316 |
+
response += f"π° {date} - SALE: {transaction.get('quantity', 0)}x {transaction.get('product', 'Unknown')} to {transaction.get('customer', 'Unknown')} - β¬{transaction.get('total_amount', 0)}\n"
|
| 317 |
+
|
| 318 |
+
return response
|
| 319 |
+
|
| 320 |
+
def _format_search_results(self, results: list, search_term: str) -> str:
|
| 321 |
+
"""Format search results for display"""
|
| 322 |
+
if not results:
|
| 323 |
+
return f"No transactions found for '{search_term}'."
|
| 324 |
+
|
| 325 |
+
response = f"Found {len(results)} transaction(s) for '{search_term}':\n\n"
|
| 326 |
+
|
| 327 |
+
for transaction in results:
|
| 328 |
+
trans_type = transaction.get("type", "unknown").upper()
|
| 329 |
+
date = transaction.get("date", "")[:10]
|
| 330 |
+
|
| 331 |
+
if trans_type == "PURCHASE":
|
| 332 |
+
response += f"π {date} - {transaction.get('quantity', 0)}x {transaction.get('product', 'Unknown')} from {transaction.get('supplier', 'Unknown')} - β¬{transaction.get('total', 0)}\n"
|
| 333 |
+
else:
|
| 334 |
+
response += f"π° {date} - {transaction.get('quantity', 0)}x {transaction.get('product', 'Unknown')} to {transaction.get('customer', 'Unknown')} - β¬{transaction.get('total', 0)}\n"
|
| 335 |
+
|
| 336 |
+
return response
|
| 337 |
+
|
| 338 |
+
def _format_sql_results(self, results: list, explanation: str) -> str:
|
| 339 |
+
"""Format SQL query results for display"""
|
| 340 |
+
response = f"π Query Results:\n{explanation}\n\n"
|
| 341 |
+
|
| 342 |
+
if not results:
|
| 343 |
+
return response + "No data found."
|
| 344 |
+
|
| 345 |
+
# Handle single value results (like COUNT, SUM)
|
| 346 |
+
if len(results) == 1 and len(results[0]) == 1:
|
| 347 |
+
key, value = list(results[0].items())[0]
|
| 348 |
+
return response + f"**{key.replace('_', ' ').title()}:** {value}"
|
| 349 |
+
|
| 350 |
+
# Handle multiple rows
|
| 351 |
+
response += "```\n"
|
| 352 |
+
|
| 353 |
+
# Add headers
|
| 354 |
+
if results:
|
| 355 |
+
headers = list(results[0].keys())
|
| 356 |
+
response += " | ".join(f"{header.replace('_', ' ').title():<15}" for header in headers) + "\n"
|
| 357 |
+
response += "-" * (len(headers) * 17) + "\n"
|
| 358 |
+
|
| 359 |
+
# Add data rows
|
| 360 |
+
for row in results[:20]: # Limit to first 20 rows
|
| 361 |
+
formatted_row = []
|
| 362 |
+
for value in row.values():
|
| 363 |
+
if value is None:
|
| 364 |
+
formatted_row.append("N/A".ljust(15))
|
| 365 |
+
elif isinstance(value, float):
|
| 366 |
+
formatted_row.append(f"{value:.2f}".ljust(15))
|
| 367 |
+
else:
|
| 368 |
+
formatted_row.append(str(value)[:15].ljust(15))
|
| 369 |
+
response += " | ".join(formatted_row) + "\n"
|
| 370 |
+
|
| 371 |
+
if len(results) > 20:
|
| 372 |
+
response += f"\n... and {len(results) - 20} more rows\n"
|
| 373 |
+
|
| 374 |
+
response += "```"
|
| 375 |
+
|
| 376 |
+
return response
|
| 377 |
+
|
| 378 |
+
def get_linked_transaction_data(self, sql_transaction_id: int, transaction_type: str) -> Optional[Dict[str, Any]]:
|
| 379 |
+
"""Retrieve complete transaction data from both SQL and vector stores"""
|
| 380 |
+
try:
|
| 381 |
+
# Get SQL data
|
| 382 |
+
sql_data = self.db_manager.get_transaction_by_id(sql_transaction_id, transaction_type)
|
| 383 |
+
|
| 384 |
+
# Get vector store data
|
| 385 |
+
vector_data = self.vector_store.get_transaction_by_sql_id(sql_transaction_id, transaction_type)
|
| 386 |
+
|
| 387 |
+
if sql_data:
|
| 388 |
+
result = {
|
| 389 |
+
"sql_data": sql_data,
|
| 390 |
+
"vector_data": vector_data,
|
| 391 |
+
"linked": vector_data is not None
|
| 392 |
+
}
|
| 393 |
+
return result
|
| 394 |
+
|
| 395 |
+
return None
|
| 396 |
+
except Exception as e:
|
| 397 |
+
print(f"Error retrieving linked transaction data: {e}")
|
| 398 |
+
return None
|
| 399 |
+
|
| 400 |
+
def close(self):
|
| 401 |
+
"""Clean up resources"""
|
| 402 |
+
self.db_manager.close()
|
src/config_manager.py
ADDED
|
@@ -0,0 +1,121 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
#!/usr/bin/env python3
|
| 2 |
+
|
| 3 |
+
import yaml
|
| 4 |
+
import os
|
| 5 |
+
from typing import Dict, Any, List
|
| 6 |
+
from pathlib import Path
|
| 7 |
+
|
| 8 |
+
class ConfigManager:
|
| 9 |
+
"""Manages configuration loading and access for the chatbot application."""
|
| 10 |
+
|
| 11 |
+
def __init__(self, config_path: str = None):
|
| 12 |
+
"""
|
| 13 |
+
Initialize the configuration manager.
|
| 14 |
+
|
| 15 |
+
Args:
|
| 16 |
+
config_path: Path to the configuration file. Defaults to config.yaml in project root.
|
| 17 |
+
"""
|
| 18 |
+
if config_path is None:
|
| 19 |
+
# Default to config.yaml in the project root
|
| 20 |
+
project_root = Path(__file__).parent.parent
|
| 21 |
+
config_path = project_root / "config.yaml"
|
| 22 |
+
|
| 23 |
+
self.config_path = Path(config_path)
|
| 24 |
+
self._config = self._load_config()
|
| 25 |
+
|
| 26 |
+
def _load_config(self) -> Dict[str, Any]:
|
| 27 |
+
"""Load configuration from YAML file."""
|
| 28 |
+
try:
|
| 29 |
+
with open(self.config_path, 'r', encoding='utf-8') as file:
|
| 30 |
+
config = yaml.safe_load(file)
|
| 31 |
+
return config or {}
|
| 32 |
+
except FileNotFoundError:
|
| 33 |
+
raise FileNotFoundError(f"Configuration file not found: {self.config_path}")
|
| 34 |
+
except yaml.YAMLError as e:
|
| 35 |
+
raise ValueError(f"Error parsing configuration file: {e}")
|
| 36 |
+
|
| 37 |
+
def get(self, key_path: str, default: Any = None) -> Any:
|
| 38 |
+
"""
|
| 39 |
+
Get a configuration value using dot notation.
|
| 40 |
+
|
| 41 |
+
Args:
|
| 42 |
+
key_path: Dot-separated path to the configuration value (e.g., 'database.path')
|
| 43 |
+
default: Default value to return if key is not found
|
| 44 |
+
|
| 45 |
+
Returns:
|
| 46 |
+
The configuration value or default if not found
|
| 47 |
+
"""
|
| 48 |
+
keys = key_path.split('.')
|
| 49 |
+
value = self._config
|
| 50 |
+
|
| 51 |
+
try:
|
| 52 |
+
for key in keys:
|
| 53 |
+
value = value[key]
|
| 54 |
+
return value
|
| 55 |
+
except (KeyError, TypeError):
|
| 56 |
+
return default
|
| 57 |
+
|
| 58 |
+
def get_database_config(self) -> Dict[str, Any]:
|
| 59 |
+
"""Get database configuration."""
|
| 60 |
+
return self.get('database', {})
|
| 61 |
+
|
| 62 |
+
def get_openai_config(self, component: str = None) -> Dict[str, Any]:
|
| 63 |
+
"""
|
| 64 |
+
Get OpenAI configuration.
|
| 65 |
+
|
| 66 |
+
Args:
|
| 67 |
+
component: Specific component configuration (e.g., 'intent_classifier')
|
| 68 |
+
|
| 69 |
+
Returns:
|
| 70 |
+
OpenAI configuration dictionary
|
| 71 |
+
"""
|
| 72 |
+
if component:
|
| 73 |
+
return self.get(f'openai.{component}', {})
|
| 74 |
+
return self.get('openai', {})
|
| 75 |
+
|
| 76 |
+
def get_vector_store_config(self) -> Dict[str, Any]:
|
| 77 |
+
"""Get vector store configuration."""
|
| 78 |
+
return self.get('vector_store', {})
|
| 79 |
+
|
| 80 |
+
def get_search_config(self) -> Dict[str, Any]:
|
| 81 |
+
"""Get search configuration."""
|
| 82 |
+
return self.get('search', {})
|
| 83 |
+
|
| 84 |
+
def get_entity_extraction_config(self) -> Dict[str, Any]:
|
| 85 |
+
"""Get entity extraction configuration."""
|
| 86 |
+
return self.get('entity_extraction', {})
|
| 87 |
+
|
| 88 |
+
def get_business_logic_config(self) -> Dict[str, Any]:
|
| 89 |
+
"""Get business logic configuration."""
|
| 90 |
+
return self.get('business_logic', {})
|
| 91 |
+
|
| 92 |
+
def get_app_config(self) -> Dict[str, Any]:
|
| 93 |
+
"""Get application configuration."""
|
| 94 |
+
return self.get('app', {})
|
| 95 |
+
|
| 96 |
+
def is_feature_enabled(self, feature_name: str) -> bool:
|
| 97 |
+
"""
|
| 98 |
+
Check if a feature is enabled.
|
| 99 |
+
|
| 100 |
+
Args:
|
| 101 |
+
feature_name: Name of the feature to check
|
| 102 |
+
|
| 103 |
+
Returns:
|
| 104 |
+
True if feature is enabled, False otherwise
|
| 105 |
+
"""
|
| 106 |
+
return self.get(f'app.features.{feature_name}', True)
|
| 107 |
+
|
| 108 |
+
# Global configuration instance
|
| 109 |
+
_config_manager = None
|
| 110 |
+
|
| 111 |
+
def get_config() -> ConfigManager:
|
| 112 |
+
"""Get the global configuration manager instance."""
|
| 113 |
+
global _config_manager
|
| 114 |
+
if _config_manager is None:
|
| 115 |
+
_config_manager = ConfigManager()
|
| 116 |
+
return _config_manager
|
| 117 |
+
|
| 118 |
+
def reload_config():
|
| 119 |
+
"""Reload the configuration from file."""
|
| 120 |
+
global _config_manager
|
| 121 |
+
_config_manager = ConfigManager()
|
src/database_manager.py
ADDED
|
@@ -0,0 +1,255 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
import sqlite3
|
| 2 |
+
from typing import Optional, List, Dict, Any
|
| 3 |
+
from sqlalchemy import create_engine, text
|
| 4 |
+
from sqlalchemy.orm import sessionmaker
|
| 5 |
+
from models import Base, Supplier, Customer, Product, Purchase, Sale, EntityExtraction
|
| 6 |
+
|
| 7 |
+
class DatabaseManager:
|
| 8 |
+
def __init__(self, db_path: str = "chatbot.db"):
|
| 9 |
+
self.db_path = db_path
|
| 10 |
+
self.engine = create_engine(f"sqlite:///{db_path}")
|
| 11 |
+
Base.metadata.create_all(self.engine)
|
| 12 |
+
Session = sessionmaker(bind=self.engine)
|
| 13 |
+
self.session = Session()
|
| 14 |
+
self._initialize_data()
|
| 15 |
+
|
| 16 |
+
def _initialize_data(self):
|
| 17 |
+
"""Initialize database with sample data"""
|
| 18 |
+
# Add default suppliers if they don't exist
|
| 19 |
+
suppliers = ["TechMart", "Office Supplies Co", "Electronics Plus"]
|
| 20 |
+
for supplier_name in suppliers:
|
| 21 |
+
existing = self.session.query(Supplier).filter_by(name=supplier_name).first()
|
| 22 |
+
if not existing:
|
| 23 |
+
supplier = Supplier(name=supplier_name)
|
| 24 |
+
self.session.add(supplier)
|
| 25 |
+
|
| 26 |
+
# Add default products
|
| 27 |
+
products = [
|
| 28 |
+
("USB drives", "Electronics"),
|
| 29 |
+
("Office chairs", "Furniture"),
|
| 30 |
+
("Laptops", "Electronics"),
|
| 31 |
+
("Monitors", "Electronics"),
|
| 32 |
+
("Keyboards", "Electronics")
|
| 33 |
+
]
|
| 34 |
+
for product_name, category in products:
|
| 35 |
+
existing = self.session.query(Product).filter_by(name=product_name).first()
|
| 36 |
+
if not existing:
|
| 37 |
+
product = Product(name=product_name, category=category)
|
| 38 |
+
self.session.add(product)
|
| 39 |
+
|
| 40 |
+
self.session.commit()
|
| 41 |
+
|
| 42 |
+
def process_transaction(self, entities: EntityExtraction):
|
| 43 |
+
"""Process a transaction based on extracted entities"""
|
| 44 |
+
try:
|
| 45 |
+
if entities.transaction_type == "purchase":
|
| 46 |
+
return self._process_purchase(entities)
|
| 47 |
+
elif entities.transaction_type == "sale":
|
| 48 |
+
return self._process_sale(entities)
|
| 49 |
+
else:
|
| 50 |
+
return None, "Could not determine transaction type"
|
| 51 |
+
except Exception as e:
|
| 52 |
+
self.session.rollback()
|
| 53 |
+
return None, f"Error processing transaction: {str(e)}"
|
| 54 |
+
|
| 55 |
+
def _process_purchase(self, entities: EntityExtraction) -> str:
|
| 56 |
+
"""Process a purchase transaction"""
|
| 57 |
+
# Get or create supplier
|
| 58 |
+
supplier = None
|
| 59 |
+
if entities.supplier:
|
| 60 |
+
supplier = self.session.query(Supplier).filter_by(name=entities.supplier).first()
|
| 61 |
+
if not supplier:
|
| 62 |
+
supplier = Supplier(name=entities.supplier)
|
| 63 |
+
self.session.add(supplier)
|
| 64 |
+
self.session.flush()
|
| 65 |
+
|
| 66 |
+
# Get or create product
|
| 67 |
+
product = None
|
| 68 |
+
if entities.product:
|
| 69 |
+
product = self.session.query(Product).filter_by(name=entities.product).first()
|
| 70 |
+
if not product:
|
| 71 |
+
product = Product(name=entities.product)
|
| 72 |
+
self.session.add(product)
|
| 73 |
+
self.session.flush()
|
| 74 |
+
|
| 75 |
+
# Create purchase record
|
| 76 |
+
purchase = Purchase(
|
| 77 |
+
supplier_id=supplier.id if supplier else None,
|
| 78 |
+
product_id=product.id if product else None,
|
| 79 |
+
quantity=entities.quantity or 1,
|
| 80 |
+
unit_price=entities.unit_price or 0,
|
| 81 |
+
total_cost=entities.total_amount or (entities.quantity or 1) * (entities.unit_price or 0),
|
| 82 |
+
notes=entities.notes
|
| 83 |
+
)
|
| 84 |
+
|
| 85 |
+
self.session.add(purchase)
|
| 86 |
+
self.session.commit()
|
| 87 |
+
|
| 88 |
+
return purchase.id, f"Purchase recorded: {entities.quantity or 1}x {entities.product or 'Unknown'} from {entities.supplier or 'Unknown'} for β¬{entities.total_amount or 0}"
|
| 89 |
+
|
| 90 |
+
def _process_sale(self, entities: EntityExtraction) -> str:
|
| 91 |
+
"""Process a sale transaction"""
|
| 92 |
+
# Get or create customer
|
| 93 |
+
customer = None
|
| 94 |
+
if entities.customer:
|
| 95 |
+
customer = self.session.query(Customer).filter_by(name=entities.customer).first()
|
| 96 |
+
if not customer:
|
| 97 |
+
customer = Customer(name=entities.customer)
|
| 98 |
+
self.session.add(customer)
|
| 99 |
+
self.session.flush()
|
| 100 |
+
|
| 101 |
+
# Get or create product
|
| 102 |
+
product = None
|
| 103 |
+
if entities.product:
|
| 104 |
+
product = self.session.query(Product).filter_by(name=entities.product).first()
|
| 105 |
+
if not product:
|
| 106 |
+
product = Product(name=entities.product)
|
| 107 |
+
self.session.add(product)
|
| 108 |
+
self.session.flush()
|
| 109 |
+
|
| 110 |
+
# Create sale record
|
| 111 |
+
sale = Sale(
|
| 112 |
+
customer_id=customer.id if customer else None,
|
| 113 |
+
product_id=product.id if product else None,
|
| 114 |
+
quantity=entities.quantity or 1,
|
| 115 |
+
unit_price=entities.unit_price or 0,
|
| 116 |
+
total_amount=entities.total_amount or (entities.quantity or 1) * (entities.unit_price or 0),
|
| 117 |
+
notes=entities.notes
|
| 118 |
+
)
|
| 119 |
+
|
| 120 |
+
self.session.add(sale)
|
| 121 |
+
self.session.commit()
|
| 122 |
+
|
| 123 |
+
return sale.id, f"Sale recorded: {entities.quantity or 1}x {entities.product or 'Unknown'} to {entities.customer or 'Unknown'} for β¬{entities.total_amount or 0}"
|
| 124 |
+
|
| 125 |
+
def query_data(self, query: str) -> List[Dict[str, Any]]:
|
| 126 |
+
"""Execute a query and return results"""
|
| 127 |
+
try:
|
| 128 |
+
result = self.session.execute(text(query))
|
| 129 |
+
columns = result.keys()
|
| 130 |
+
rows = result.fetchall()
|
| 131 |
+
return [dict(zip(columns, row)) for row in rows]
|
| 132 |
+
except Exception as e:
|
| 133 |
+
return [{"error": str(e)}]
|
| 134 |
+
|
| 135 |
+
def get_recent_transactions(self, limit: int = 10) -> Dict[str, List[Dict]]:
|
| 136 |
+
"""Get recent purchases and sales"""
|
| 137 |
+
purchases = self.session.query(Purchase).order_by(Purchase.purchase_date.desc()).limit(limit).all()
|
| 138 |
+
sales = self.session.query(Sale).order_by(Sale.sale_date.desc()).limit(limit).all()
|
| 139 |
+
|
| 140 |
+
purchase_data = []
|
| 141 |
+
for p in purchases:
|
| 142 |
+
purchase_data.append({
|
| 143 |
+
"id": p.id,
|
| 144 |
+
"supplier": p.supplier.name if p.supplier else "Unknown",
|
| 145 |
+
"product": p.product.name if p.product else "Unknown",
|
| 146 |
+
"quantity": p.quantity,
|
| 147 |
+
"unit_price": float(p.unit_price),
|
| 148 |
+
"total_cost": float(p.total_cost),
|
| 149 |
+
"date": p.purchase_date.isoformat(),
|
| 150 |
+
"type": "purchase"
|
| 151 |
+
})
|
| 152 |
+
|
| 153 |
+
sale_data = []
|
| 154 |
+
for s in sales:
|
| 155 |
+
sale_data.append({
|
| 156 |
+
"id": s.id,
|
| 157 |
+
"customer": s.customer.name if s.customer else "Unknown",
|
| 158 |
+
"product": s.product.name if s.product else "Unknown",
|
| 159 |
+
"quantity": s.quantity,
|
| 160 |
+
"unit_price": float(s.unit_price),
|
| 161 |
+
"total_amount": float(s.total_amount),
|
| 162 |
+
"date": s.sale_date.isoformat(),
|
| 163 |
+
"type": "sale"
|
| 164 |
+
})
|
| 165 |
+
|
| 166 |
+
return {"purchases": purchase_data, "sales": sale_data}
|
| 167 |
+
|
| 168 |
+
def search_transactions(self, search_term: str) -> List[Dict[str, Any]]:
|
| 169 |
+
"""Search transactions by supplier, customer, or product"""
|
| 170 |
+
results = []
|
| 171 |
+
|
| 172 |
+
# Search purchases
|
| 173 |
+
purchases = self.session.query(Purchase).join(Supplier, Purchase.supplier_id == Supplier.id, isouter=True)\
|
| 174 |
+
.join(Product, Purchase.product_id == Product.id, isouter=True)\
|
| 175 |
+
.filter(
|
| 176 |
+
(Supplier.name.contains(search_term)) |
|
| 177 |
+
(Product.name.contains(search_term)) |
|
| 178 |
+
(Purchase.notes.contains(search_term))
|
| 179 |
+
).all()
|
| 180 |
+
|
| 181 |
+
for p in purchases:
|
| 182 |
+
results.append({
|
| 183 |
+
"id": p.id,
|
| 184 |
+
"type": "purchase",
|
| 185 |
+
"supplier": p.supplier.name if p.supplier else "Unknown",
|
| 186 |
+
"product": p.product.name if p.product else "Unknown",
|
| 187 |
+
"quantity": p.quantity,
|
| 188 |
+
"unit_price": float(p.unit_price),
|
| 189 |
+
"total": float(p.total_cost),
|
| 190 |
+
"date": p.purchase_date.isoformat()
|
| 191 |
+
})
|
| 192 |
+
|
| 193 |
+
# Search sales
|
| 194 |
+
sales = self.session.query(Sale).join(Customer, Sale.customer_id == Customer.id, isouter=True)\
|
| 195 |
+
.join(Product, Sale.product_id == Product.id, isouter=True)\
|
| 196 |
+
.filter(
|
| 197 |
+
(Customer.name.contains(search_term)) |
|
| 198 |
+
(Product.name.contains(search_term)) |
|
| 199 |
+
(Sale.notes.contains(search_term))
|
| 200 |
+
).all()
|
| 201 |
+
|
| 202 |
+
for s in sales:
|
| 203 |
+
results.append({
|
| 204 |
+
"id": s.id,
|
| 205 |
+
"type": "sale",
|
| 206 |
+
"customer": s.customer.name if s.customer else "Unknown",
|
| 207 |
+
"product": s.product.name if s.product else "Unknown",
|
| 208 |
+
"quantity": s.quantity,
|
| 209 |
+
"unit_price": float(s.unit_price),
|
| 210 |
+
"total": float(s.total_amount),
|
| 211 |
+
"date": s.sale_date.isoformat()
|
| 212 |
+
})
|
| 213 |
+
|
| 214 |
+
return sorted(results, key=lambda x: x["date"], reverse=True)
|
| 215 |
+
|
| 216 |
+
def get_transaction_by_id(self, transaction_id: int, transaction_type: str) -> Optional[Dict[str, Any]]:
|
| 217 |
+
"""Retrieve a specific transaction by ID and type"""
|
| 218 |
+
try:
|
| 219 |
+
if transaction_type == "purchase":
|
| 220 |
+
transaction = self.session.query(Purchase).filter_by(id=transaction_id).first()
|
| 221 |
+
if transaction:
|
| 222 |
+
return {
|
| 223 |
+
"id": transaction.id,
|
| 224 |
+
"type": "purchase",
|
| 225 |
+
"supplier_id": transaction.supplier_id,
|
| 226 |
+
"product_id": transaction.product_id,
|
| 227 |
+
"quantity": transaction.quantity,
|
| 228 |
+
"unit_price": transaction.unit_price,
|
| 229 |
+
"total_cost": transaction.total_cost,
|
| 230 |
+
"purchase_date": transaction.purchase_date.isoformat() if transaction.purchase_date else None,
|
| 231 |
+
"notes": transaction.notes
|
| 232 |
+
}
|
| 233 |
+
elif transaction_type == "sale":
|
| 234 |
+
transaction = self.session.query(Sale).filter_by(id=transaction_id).first()
|
| 235 |
+
if transaction:
|
| 236 |
+
return {
|
| 237 |
+
"id": transaction.id,
|
| 238 |
+
"type": "sale",
|
| 239 |
+
"customer_id": transaction.customer_id,
|
| 240 |
+
"product_id": transaction.product_id,
|
| 241 |
+
"quantity": transaction.quantity,
|
| 242 |
+
"unit_price": transaction.unit_price,
|
| 243 |
+
"total_amount": transaction.total_amount,
|
| 244 |
+
"sale_date": transaction.sale_date.isoformat() if transaction.sale_date else None,
|
| 245 |
+
"notes": transaction.notes
|
| 246 |
+
}
|
| 247 |
+
|
| 248 |
+
return None
|
| 249 |
+
except Exception as e:
|
| 250 |
+
print(f"Error retrieving transaction by ID: {e}")
|
| 251 |
+
return None
|
| 252 |
+
|
| 253 |
+
def close(self):
|
| 254 |
+
"""Close database connection"""
|
| 255 |
+
self.session.close()
|
src/entity_extractor.py
ADDED
|
@@ -0,0 +1,204 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
import re
|
| 2 |
+
import spacy
|
| 3 |
+
from typing import Optional, Dict, Any
|
| 4 |
+
from datetime import datetime
|
| 5 |
+
from dateutil import parser as date_parser
|
| 6 |
+
from models import EntityExtraction
|
| 7 |
+
|
| 8 |
+
class EntityExtractor:
|
| 9 |
+
def __init__(self):
|
| 10 |
+
try:
|
| 11 |
+
self.nlp = spacy.load("en_core_web_sm")
|
| 12 |
+
except OSError:
|
| 13 |
+
print("Warning: spaCy model not found. Install with: python -m spacy download en_core_web_sm")
|
| 14 |
+
self.nlp = None
|
| 15 |
+
|
| 16 |
+
def extract_entities(self, text: str) -> EntityExtraction:
|
| 17 |
+
"""Extract entities from user input text"""
|
| 18 |
+
text_lower = text.lower()
|
| 19 |
+
|
| 20 |
+
# Determine transaction type
|
| 21 |
+
transaction_type = self._detect_transaction_type(text_lower)
|
| 22 |
+
|
| 23 |
+
# Extract entities
|
| 24 |
+
product = self._extract_product(text)
|
| 25 |
+
quantity = self._extract_quantity(text)
|
| 26 |
+
unit = self._extract_unit(text)
|
| 27 |
+
supplier = self._extract_supplier(text) if transaction_type == "purchase" else None
|
| 28 |
+
customer = self._extract_customer(text) if transaction_type == "sale" else None
|
| 29 |
+
unit_price = self._extract_unit_price(text)
|
| 30 |
+
total_amount = self._calculate_total(quantity, unit_price)
|
| 31 |
+
|
| 32 |
+
return EntityExtraction(
|
| 33 |
+
product=product,
|
| 34 |
+
quantity=quantity,
|
| 35 |
+
unit=unit,
|
| 36 |
+
supplier=supplier,
|
| 37 |
+
customer=customer,
|
| 38 |
+
unit_price=unit_price,
|
| 39 |
+
total_amount=total_amount,
|
| 40 |
+
transaction_type=transaction_type,
|
| 41 |
+
notes=text
|
| 42 |
+
)
|
| 43 |
+
|
| 44 |
+
def _detect_transaction_type(self, text: str) -> str:
|
| 45 |
+
"""Detect if this is a purchase or sale"""
|
| 46 |
+
purchase_keywords = ["purchase", "buy", "bought", "order", "from", "supplier"]
|
| 47 |
+
sale_keywords = ["sale", "sell", "sold", "to", "customer", "client"]
|
| 48 |
+
|
| 49 |
+
purchase_score = sum(1 for keyword in purchase_keywords if keyword in text)
|
| 50 |
+
sale_score = sum(1 for keyword in sale_keywords if keyword in text)
|
| 51 |
+
|
| 52 |
+
return "purchase" if purchase_score >= sale_score else "sale"
|
| 53 |
+
|
| 54 |
+
def _extract_product(self, text: str) -> Optional[str]:
|
| 55 |
+
"""Extract product name from text"""
|
| 56 |
+
# Enhanced product patterns to handle various formats
|
| 57 |
+
product_patterns = [
|
| 58 |
+
# Pattern for "X units of Y" format (e.g., "20 tons of Apples")
|
| 59 |
+
r"(?:\d+)\s*(?:tons?|kg|kilograms?|pounds?|lbs?|pieces?|units?|items?|boxes?)\s+of\s+([a-zA-Z\s]+?)(?:\s+from|\s+at|\s+for|\s*β¬|\s*\$|$)",
|
| 60 |
+
|
| 61 |
+
# Pattern for "bought/purchased X Y" format
|
| 62 |
+
r"(?:bought|purchased|buy|purchase|sold|sale|sell)\s+(?:\d+\s*(?:tons?|kg|pieces?|units?)?\s+)?(?:of\s+)?([a-zA-Z\s]+?)(?:\s+from|\s+to|\s+at|\s+for|\s*β¬|\s*\$)",
|
| 63 |
+
|
| 64 |
+
# Pattern for quantity followed by product
|
| 65 |
+
r"(?:\d+)\s*(?:x\s+)?([a-zA-Z\s]+?)(?:\s+from|\s+at|\s+for|\s*β¬|\s*\$)",
|
| 66 |
+
|
| 67 |
+
# Pattern for standalone capitalized product names
|
| 68 |
+
r"\b([A-Z][a-zA-Z]*(?:\s+[A-Z][a-zA-Z]*)*)\b(?!\s+(?:from|at|for|β¬|\$))",
|
| 69 |
+
]
|
| 70 |
+
|
| 71 |
+
for pattern in product_patterns:
|
| 72 |
+
match = re.search(pattern, text, re.IGNORECASE)
|
| 73 |
+
if match:
|
| 74 |
+
product = match.group(1).strip()
|
| 75 |
+
# Filter out common non-product words
|
| 76 |
+
if product.lower() not in ['from', 'at', 'for', 'to', 'we', 'i', 'you', 'the', 'a', 'an', 'and', 'or']:
|
| 77 |
+
return product
|
| 78 |
+
|
| 79 |
+
# Use spaCy for named entity recognition if available
|
| 80 |
+
if self.nlp:
|
| 81 |
+
doc = self.nlp(text)
|
| 82 |
+
for ent in doc.ents:
|
| 83 |
+
if ent.label_ in ["PRODUCT", "ORG"] and len(ent.text) > 2:
|
| 84 |
+
return ent.text
|
| 85 |
+
|
| 86 |
+
return None
|
| 87 |
+
|
| 88 |
+
def _extract_quantity(self, text: str) -> Optional[int]:
|
| 89 |
+
"""Extract quantity from text"""
|
| 90 |
+
# Enhanced quantity patterns to handle various units
|
| 91 |
+
quantity_patterns = [
|
| 92 |
+
# Numbers with explicit units
|
| 93 |
+
r"(\d+(?:\.\d+)?)\s*(?:tons?|kg|kilograms?|pounds?|lbs?|pieces?|units?|items?|boxes?)",
|
| 94 |
+
# Numbers followed by "of" or "x"
|
| 95 |
+
r"(\d+(?:\.\d+)?)\s*(?:of|x)\s+",
|
| 96 |
+
# Numbers in transaction context
|
| 97 |
+
r"(?:bought|purchased|buy|purchase|sold|sale|sell)\s+(?:of\s+)?(\d+(?:\.\d+)?)",
|
| 98 |
+
# Standalone numbers at start
|
| 99 |
+
r"^(\d+(?:\.\d+)?)\s+",
|
| 100 |
+
]
|
| 101 |
+
|
| 102 |
+
for pattern in quantity_patterns:
|
| 103 |
+
match = re.search(pattern, text, re.IGNORECASE)
|
| 104 |
+
if match:
|
| 105 |
+
try:
|
| 106 |
+
# Convert to int, handling decimal quantities
|
| 107 |
+
quantity = float(match.group(1))
|
| 108 |
+
return int(quantity) if quantity.is_integer() else int(round(quantity))
|
| 109 |
+
except (ValueError, AttributeError):
|
| 110 |
+
continue
|
| 111 |
+
|
| 112 |
+
return None
|
| 113 |
+
|
| 114 |
+
def _extract_unit(self, text: str) -> Optional[str]:
|
| 115 |
+
"""Extract unit from text (tons, kg, pieces, etc.)"""
|
| 116 |
+
# Common unit patterns
|
| 117 |
+
unit_patterns = [
|
| 118 |
+
r"\d+(?:\.\d+)?\s*(tons?|kg|kilograms?|pounds?|lbs?|pieces?|units?|items?|boxes?|liters?|gallons?)",
|
| 119 |
+
]
|
| 120 |
+
|
| 121 |
+
for pattern in unit_patterns:
|
| 122 |
+
match = re.search(pattern, text, re.IGNORECASE)
|
| 123 |
+
if match:
|
| 124 |
+
unit = match.group(1).lower()
|
| 125 |
+
# Normalize units
|
| 126 |
+
unit_mapping = {
|
| 127 |
+
'ton': 'tons', 'kg': 'kg', 'kilogram': 'kg', 'kilograms': 'kg',
|
| 128 |
+
'pound': 'lbs', 'pounds': 'lbs', 'lb': 'lbs', 'lbs': 'lbs',
|
| 129 |
+
'piece': 'pieces', 'pieces': 'pieces',
|
| 130 |
+
'unit': 'units', 'units': 'units',
|
| 131 |
+
'item': 'items', 'items': 'items',
|
| 132 |
+
'box': 'boxes', 'boxes': 'boxes',
|
| 133 |
+
'liter': 'liters', 'liters': 'liters',
|
| 134 |
+
'gallon': 'gallons', 'gallons': 'gallons'
|
| 135 |
+
}
|
| 136 |
+
return unit_mapping.get(unit, unit)
|
| 137 |
+
|
| 138 |
+
return None
|
| 139 |
+
|
| 140 |
+
def _extract_supplier(self, text: str) -> Optional[str]:
|
| 141 |
+
"""Extract supplier name from text"""
|
| 142 |
+
# Look for "from [supplier]" patterns
|
| 143 |
+
supplier_patterns = [
|
| 144 |
+
r"from\s+([A-Za-z\s]+?)(?:\s+at|\s+for|\s*β¬|\s*\$|$)",
|
| 145 |
+
r"supplier\s+([A-Za-z\s]+?)(?:\s+at|\s+for|\s*β¬|\s*\$|$)",
|
| 146 |
+
]
|
| 147 |
+
|
| 148 |
+
for pattern in supplier_patterns:
|
| 149 |
+
match = re.search(pattern, text, re.IGNORECASE)
|
| 150 |
+
if match:
|
| 151 |
+
return match.group(1).strip()
|
| 152 |
+
|
| 153 |
+
# Use spaCy for organization detection
|
| 154 |
+
if self.nlp:
|
| 155 |
+
doc = self.nlp(text)
|
| 156 |
+
for ent in doc.ents:
|
| 157 |
+
if ent.label_ == "ORG":
|
| 158 |
+
return ent.text
|
| 159 |
+
|
| 160 |
+
return None
|
| 161 |
+
|
| 162 |
+
def _extract_customer(self, text: str) -> Optional[str]:
|
| 163 |
+
"""Extract customer name from text"""
|
| 164 |
+
# Look for "to [customer]" patterns
|
| 165 |
+
customer_patterns = [
|
| 166 |
+
r"to\s+([A-Za-z\s]+?)(?:\s+at|\s+for|\s*β¬|\s*\$|$)",
|
| 167 |
+
r"customer\s+([A-Za-z\s]+?)(?:\s+at|\s+for|\s*β¬|\s*\$|$)",
|
| 168 |
+
]
|
| 169 |
+
|
| 170 |
+
for pattern in customer_patterns:
|
| 171 |
+
match = re.search(pattern, text, re.IGNORECASE)
|
| 172 |
+
if match:
|
| 173 |
+
return match.group(1).strip()
|
| 174 |
+
|
| 175 |
+
# Use spaCy for person detection
|
| 176 |
+
if self.nlp:
|
| 177 |
+
doc = self.nlp(text)
|
| 178 |
+
for ent in doc.ents:
|
| 179 |
+
if ent.label_ == "PERSON":
|
| 180 |
+
return ent.text
|
| 181 |
+
|
| 182 |
+
return None
|
| 183 |
+
|
| 184 |
+
def _extract_unit_price(self, text: str) -> Optional[float]:
|
| 185 |
+
"""Extract unit price from text"""
|
| 186 |
+
# Look for price patterns
|
| 187 |
+
price_patterns = [
|
| 188 |
+
r"(?:at|for|β¬|$)\s*(\d+(?:\.\d{2})?)\s*(?:each|per|unit)?",
|
| 189 |
+
r"(\d+(?:\.\d{2})?)\s*(?:β¬|$)\s*(?:each|per|unit)",
|
| 190 |
+
r"(?:price|cost)?\s*(?:of)?\s*(\d+(?:\.\d{2})?)\s*(?:β¬|$)",
|
| 191 |
+
]
|
| 192 |
+
|
| 193 |
+
for pattern in price_patterns:
|
| 194 |
+
match = re.search(pattern, text, re.IGNORECASE)
|
| 195 |
+
if match:
|
| 196 |
+
return float(match.group(1))
|
| 197 |
+
|
| 198 |
+
return None
|
| 199 |
+
|
| 200 |
+
def _calculate_total(self, quantity: Optional[int], unit_price: Optional[float]) -> Optional[float]:
|
| 201 |
+
"""Calculate total amount"""
|
| 202 |
+
if quantity and unit_price:
|
| 203 |
+
return quantity * unit_price
|
| 204 |
+
return None
|
src/intent_classifier.py
ADDED
|
@@ -0,0 +1,173 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
import openai
|
| 2 |
+
import os
|
| 3 |
+
import dirtyjson as json
|
| 4 |
+
from typing import Dict, Any, Optional, Tuple
|
| 5 |
+
from enum import Enum
|
| 6 |
+
from pydantic import BaseModel
|
| 7 |
+
|
| 8 |
+
class IntentType(str, Enum):
|
| 9 |
+
TRANSACTION = "transaction"
|
| 10 |
+
QUERY = "query"
|
| 11 |
+
SEMANTIC_SEARCH = "semantic_search"
|
| 12 |
+
GENERAL_INFO = "general_info"
|
| 13 |
+
|
| 14 |
+
class IntentResult(BaseModel):
|
| 15 |
+
intent: IntentType
|
| 16 |
+
confidence: float
|
| 17 |
+
reasoning: str
|
| 18 |
+
entities_hint: Optional[str] = None
|
| 19 |
+
|
| 20 |
+
class IntentClassifier:
|
| 21 |
+
def __init__(self, api_key: Optional[str] = None):
|
| 22 |
+
"""Initialize OpenAI client for intent classification"""
|
| 23 |
+
self.client = openai.OpenAI(
|
| 24 |
+
api_key=api_key or os.getenv('OPENAI_API_KEY')
|
| 25 |
+
)
|
| 26 |
+
|
| 27 |
+
def classify_intent(self, user_message: str) -> IntentResult:
|
| 28 |
+
"""
|
| 29 |
+
Classify user intent using OpenAI API
|
| 30 |
+
Returns: IntentResult with intent type, confidence, and reasoning
|
| 31 |
+
"""
|
| 32 |
+
|
| 33 |
+
system_prompt = """You are an expert intent classifier for a business chatbot that handles sales, purchases, and general information storage.
|
| 34 |
+
|
| 35 |
+
Given a user message, classify it into one of these intents:
|
| 36 |
+
|
| 37 |
+
1. **QUERY**: User wants to retrieve or analyze STRUCTURED data from SQL database tables
|
| 38 |
+
- Examples: "How many USB drives did we buy?" (counts from purchases table)
|
| 39 |
+
- Examples: "What's the total value of all sales?" (sum from sales table)
|
| 40 |
+
- Examples: "Show me recent transactions" (list from transactions table)
|
| 41 |
+
- Examples: "List all customers" (data from customers table)
|
| 42 |
+
- Key indicators: Asking for counts, totals, lists, recent data from business transactions
|
| 43 |
+
- Must be answerable from structured database tables (purchases, sales, customers, suppliers, products)
|
| 44 |
+
|
| 45 |
+
2. **SEMANTIC_SEARCH**: User wants to find contextual information, tasks, or unstructured data
|
| 46 |
+
- Examples: "What does Mark need to do?" (searching for task/context info)
|
| 47 |
+
- Examples: "Find events related to supplier meetings" (contextual search)
|
| 48 |
+
- Examples: "When do I have the meeting with George?" (calendar/scheduling info)
|
| 49 |
+
- Examples: "Show me similar purchases to this one" (similarity search)
|
| 50 |
+
- Examples: "What did we discuss in the last meeting?" (meeting notes/context)
|
| 51 |
+
- Key indicators: Questions about tasks, meetings, discussions, or contextual information
|
| 52 |
+
- Information that would NOT be in structured database tables
|
| 53 |
+
|
| 54 |
+
3. **TRANSACTION**: User wants to record a business transaction (purchase or sale)
|
| 55 |
+
- Examples: "Add a purchase of 20 USB drives from TechMart at β¬5 each"
|
| 56 |
+
- Examples: "Sold 10 laptops to John Smith at β¬800 each"
|
| 57 |
+
- Contains: product names, quantities, suppliers/customers, prices
|
| 58 |
+
- Action: Recording new business data
|
| 59 |
+
|
| 60 |
+
4. **GENERAL_INFO**: User wants to store general business information or notes
|
| 61 |
+
- It cannot be a question.
|
| 62 |
+
- Examples: "Meeting with new supplier scheduled for next week"
|
| 63 |
+
- Examples: "Remember to check inventory levels before next order"
|
| 64 |
+
- Examples: "Mark needs to call the supplier tomorrow"
|
| 65 |
+
- Contains: notes, reminders, general business information, task assignments
|
| 66 |
+
|
| 67 |
+
Return your response in this exact JSON format:
|
| 68 |
+
{
|
| 69 |
+
"intent": "transaction|query|semantic_search|general_info",
|
| 70 |
+
"confidence": 0.0-1.0,
|
| 71 |
+
"reasoning": "Brief explanation of why you chose this intent",
|
| 72 |
+
"entities_hint": "Optional: Key entities you detected (for transaction intent)"
|
| 73 |
+
}
|
| 74 |
+
|
| 75 |
+
Be precise and consider context carefully. If unsure, choose the most likely intent and indicate lower confidence."""
|
| 76 |
+
|
| 77 |
+
user_prompt = f'Classify the intent of this user message: "{user_message}"'
|
| 78 |
+
|
| 79 |
+
try:
|
| 80 |
+
response = self.client.chat.completions.create(
|
| 81 |
+
model="gpt-4o-mini",
|
| 82 |
+
messages=[
|
| 83 |
+
{"role": "system", "content": system_prompt},
|
| 84 |
+
{"role": "user", "content": user_prompt}
|
| 85 |
+
],
|
| 86 |
+
temperature=0.1,
|
| 87 |
+
max_tokens=300
|
| 88 |
+
)
|
| 89 |
+
|
| 90 |
+
response_text = response.choices[0].message.content.strip()
|
| 91 |
+
|
| 92 |
+
# Clean JSON response more carefully
|
| 93 |
+
if response_text.startswith("```json"):
|
| 94 |
+
response_text = response_text[7:]
|
| 95 |
+
if response_text.startswith("```"):
|
| 96 |
+
response_text = response_text[3:]
|
| 97 |
+
if response_text.endswith("```"):
|
| 98 |
+
response_text = response_text[:-3]
|
| 99 |
+
|
| 100 |
+
response_text = response_text.strip()
|
| 101 |
+
|
| 102 |
+
# Parse JSON response
|
| 103 |
+
try:
|
| 104 |
+
result_dict = json.loads(response_text)
|
| 105 |
+
|
| 106 |
+
# Validate intent value
|
| 107 |
+
intent_value = result_dict.get("intent", "").lower()
|
| 108 |
+
if intent_value not in [e.value for e in IntentType]:
|
| 109 |
+
print(f"Invalid intent value: {intent_value}")
|
| 110 |
+
return self._fallback_classification(user_message, f"Invalid intent: {intent_value}")
|
| 111 |
+
|
| 112 |
+
return IntentResult(
|
| 113 |
+
intent=IntentType(intent_value),
|
| 114 |
+
confidence=float(result_dict.get("confidence", 0.5)),
|
| 115 |
+
reasoning=result_dict.get("reasoning", "No reasoning provided"),
|
| 116 |
+
entities_hint=result_dict.get("entities_hint")
|
| 117 |
+
)
|
| 118 |
+
except Exception as e:
|
| 119 |
+
# Fallback if JSON parsing fails
|
| 120 |
+
print(f"JSON parsing error: {e}")
|
| 121 |
+
print(f"Raw response: {response_text}")
|
| 122 |
+
return self._fallback_classification(user_message, f"JSON parsing failed: {str(e)}")
|
| 123 |
+
|
| 124 |
+
except Exception as e:
|
| 125 |
+
print(f"Error in intent classification: {e}")
|
| 126 |
+
return self._fallback_classification(user_message, str(e))
|
| 127 |
+
|
| 128 |
+
def _fallback_classification(self, user_message: str, error_info: str) -> IntentResult:
|
| 129 |
+
"""Fallback classification when OpenAI API fails"""
|
| 130 |
+
message_lower = user_message.lower()
|
| 131 |
+
|
| 132 |
+
# Simple keyword-based fallback
|
| 133 |
+
transaction_keywords = ["purchase", "buy", "sold", "sale", "from", "to", "β¬", "$"]
|
| 134 |
+
query_keywords = ["how many", "total", "list all", "recent transactions", "count"]
|
| 135 |
+
search_keywords = ["similar", "like", "related", "about", "need to do", "meeting", "discuss", "task"]
|
| 136 |
+
|
| 137 |
+
if any(keyword in message_lower for keyword in transaction_keywords):
|
| 138 |
+
intent = IntentType.TRANSACTION
|
| 139 |
+
confidence = 0.6
|
| 140 |
+
elif any(keyword in message_lower for keyword in query_keywords):
|
| 141 |
+
intent = IntentType.QUERY
|
| 142 |
+
confidence = 0.6
|
| 143 |
+
elif any(keyword in message_lower for keyword in search_keywords):
|
| 144 |
+
intent = IntentType.SEMANTIC_SEARCH
|
| 145 |
+
confidence = 0.6
|
| 146 |
+
else:
|
| 147 |
+
intent = IntentType.GENERAL_INFO
|
| 148 |
+
confidence = 0.5
|
| 149 |
+
|
| 150 |
+
return IntentResult(
|
| 151 |
+
intent=intent,
|
| 152 |
+
confidence=confidence,
|
| 153 |
+
reasoning=f"Fallback classification due to API error: {error_info[:100]}",
|
| 154 |
+
entities_hint=None
|
| 155 |
+
)
|
| 156 |
+
|
| 157 |
+
def get_intent_description(self, intent: IntentType) -> str:
|
| 158 |
+
"""Get human-readable description of intent type"""
|
| 159 |
+
descriptions = {
|
| 160 |
+
IntentType.TRANSACTION: "Recording a business transaction (purchase or sale)",
|
| 161 |
+
IntentType.QUERY: "Retrieving or analyzing data from the database",
|
| 162 |
+
IntentType.SEMANTIC_SEARCH: "Finding similar events or information",
|
| 163 |
+
IntentType.GENERAL_INFO: "Storing general business information or notes"
|
| 164 |
+
}
|
| 165 |
+
return descriptions.get(intent, "Unknown intent type")
|
| 166 |
+
|
| 167 |
+
def batch_classify(self, messages: list[str]) -> list[IntentResult]:
|
| 168 |
+
"""Classify multiple messages efficiently"""
|
| 169 |
+
results = []
|
| 170 |
+
for message in messages:
|
| 171 |
+
result = self.classify_intent(message)
|
| 172 |
+
results.append(result)
|
| 173 |
+
return results
|
src/models.py
ADDED
|
@@ -0,0 +1,104 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
from datetime import datetime
|
| 2 |
+
from typing import Optional, List
|
| 3 |
+
from pydantic import BaseModel, Field
|
| 4 |
+
from sqlalchemy import Column, Integer, String, DateTime, ForeignKey, Numeric, Text, create_engine
|
| 5 |
+
from sqlalchemy.ext.declarative import declarative_base
|
| 6 |
+
from sqlalchemy.orm import relationship, sessionmaker
|
| 7 |
+
|
| 8 |
+
Base = declarative_base()
|
| 9 |
+
|
| 10 |
+
class Supplier(Base):
|
| 11 |
+
__tablename__ = "suppliers"
|
| 12 |
+
|
| 13 |
+
id = Column(Integer, primary_key=True, autoincrement=True)
|
| 14 |
+
name = Column(String(255), nullable=False, unique=True)
|
| 15 |
+
contact_info = Column(Text)
|
| 16 |
+
created_at = Column(DateTime, default=datetime.utcnow)
|
| 17 |
+
|
| 18 |
+
purchases = relationship("Purchase", back_populates="supplier")
|
| 19 |
+
|
| 20 |
+
class Customer(Base):
|
| 21 |
+
__tablename__ = "customers"
|
| 22 |
+
|
| 23 |
+
id = Column(Integer, primary_key=True, autoincrement=True)
|
| 24 |
+
name = Column(String(255), nullable=False)
|
| 25 |
+
email = Column(String(255))
|
| 26 |
+
phone = Column(String(50))
|
| 27 |
+
address = Column(Text)
|
| 28 |
+
created_at = Column(DateTime, default=datetime.utcnow)
|
| 29 |
+
|
| 30 |
+
sales = relationship("Sale", back_populates="customer")
|
| 31 |
+
|
| 32 |
+
class Product(Base):
|
| 33 |
+
__tablename__ = "products"
|
| 34 |
+
|
| 35 |
+
id = Column(Integer, primary_key=True, autoincrement=True)
|
| 36 |
+
name = Column(String(255), nullable=False)
|
| 37 |
+
description = Column(Text)
|
| 38 |
+
category = Column(String(100))
|
| 39 |
+
created_at = Column(DateTime, default=datetime.utcnow)
|
| 40 |
+
|
| 41 |
+
purchases = relationship("Purchase", back_populates="product")
|
| 42 |
+
sales = relationship("Sale", back_populates="product")
|
| 43 |
+
|
| 44 |
+
class Purchase(Base):
|
| 45 |
+
__tablename__ = "purchases"
|
| 46 |
+
|
| 47 |
+
id = Column(Integer, primary_key=True, autoincrement=True)
|
| 48 |
+
supplier_id = Column(Integer, ForeignKey("suppliers.id"))
|
| 49 |
+
product_id = Column(Integer, ForeignKey("products.id"))
|
| 50 |
+
quantity = Column(Integer, nullable=False)
|
| 51 |
+
unit_price = Column(Numeric(10, 2), nullable=False)
|
| 52 |
+
total_cost = Column(Numeric(10, 2), nullable=False)
|
| 53 |
+
purchase_date = Column(DateTime, default=datetime.utcnow)
|
| 54 |
+
notes = Column(Text)
|
| 55 |
+
|
| 56 |
+
supplier = relationship("Supplier", back_populates="purchases")
|
| 57 |
+
product = relationship("Product", back_populates="purchases")
|
| 58 |
+
|
| 59 |
+
class Sale(Base):
|
| 60 |
+
__tablename__ = "sales"
|
| 61 |
+
|
| 62 |
+
id = Column(Integer, primary_key=True, autoincrement=True)
|
| 63 |
+
customer_id = Column(Integer, ForeignKey("customers.id"))
|
| 64 |
+
product_id = Column(Integer, ForeignKey("products.id"))
|
| 65 |
+
quantity = Column(Integer, nullable=False)
|
| 66 |
+
unit_price = Column(Numeric(10, 2), nullable=False)
|
| 67 |
+
total_amount = Column(Numeric(10, 2), nullable=False)
|
| 68 |
+
sale_date = Column(DateTime, default=datetime.utcnow)
|
| 69 |
+
notes = Column(Text)
|
| 70 |
+
|
| 71 |
+
customer = relationship("Customer", back_populates="sales")
|
| 72 |
+
product = relationship("Product", back_populates="sales")
|
| 73 |
+
|
| 74 |
+
# Pydantic models for API
|
| 75 |
+
class EntityExtraction(BaseModel):
|
| 76 |
+
product: Optional[str] = None
|
| 77 |
+
quantity: Optional[int] = None
|
| 78 |
+
unit: Optional[str] = None # e.g., "tons", "pieces", "kg"
|
| 79 |
+
supplier: Optional[str] = None
|
| 80 |
+
customer: Optional[str] = None
|
| 81 |
+
unit_price: Optional[float] = None
|
| 82 |
+
total_amount: Optional[float] = None
|
| 83 |
+
transaction_type: str = Field(..., description="'purchase' or 'sale'")
|
| 84 |
+
notes: Optional[str] = None
|
| 85 |
+
|
| 86 |
+
class ChatbotRequest(BaseModel):
|
| 87 |
+
message: str
|
| 88 |
+
session_id: Optional[str] = None
|
| 89 |
+
|
| 90 |
+
class PendingTransaction(BaseModel):
|
| 91 |
+
entities: EntityExtraction
|
| 92 |
+
missing_fields: List[str]
|
| 93 |
+
session_id: str
|
| 94 |
+
original_message: str
|
| 95 |
+
clarification_responses: List[str] = []
|
| 96 |
+
|
| 97 |
+
class ChatbotResponse(BaseModel):
|
| 98 |
+
response: str
|
| 99 |
+
sql_executed: Optional[str] = None
|
| 100 |
+
entities_extracted: Optional[EntityExtraction] = None
|
| 101 |
+
vector_stored: bool = False
|
| 102 |
+
intent_detected: Optional[str] = None
|
| 103 |
+
intent_confidence: Optional[float] = None
|
| 104 |
+
awaiting_clarification: bool = False
|
src/nl_to_sql.py
ADDED
|
@@ -0,0 +1,201 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
import openai
|
| 2 |
+
import os
|
| 3 |
+
from typing import Dict, Any, Optional, Tuple
|
| 4 |
+
import re
|
| 5 |
+
import json
|
| 6 |
+
|
| 7 |
+
class NaturalLanguageToSQL:
|
| 8 |
+
def __init__(self, api_key: Optional[str] = None):
|
| 9 |
+
"""Initialize OpenAI client for natural language to SQL conversion"""
|
| 10 |
+
self.client = openai.OpenAI(
|
| 11 |
+
api_key=api_key or os.getenv('OPENAI_API_KEY')
|
| 12 |
+
)
|
| 13 |
+
|
| 14 |
+
# Database schema description for the LLM
|
| 15 |
+
self.schema_description = """
|
| 16 |
+
Database Schema:
|
| 17 |
+
|
| 18 |
+
Table: suppliers
|
| 19 |
+
- id (INTEGER PRIMARY KEY)
|
| 20 |
+
- name (VARCHAR(255)) - Supplier company name
|
| 21 |
+
- contact_info (TEXT) - Contact information
|
| 22 |
+
- created_at (TIMESTAMP)
|
| 23 |
+
|
| 24 |
+
Table: customers
|
| 25 |
+
- id (INTEGER PRIMARY KEY)
|
| 26 |
+
- name (VARCHAR(255)) - Customer name
|
| 27 |
+
- email (VARCHAR(255))
|
| 28 |
+
- phone (VARCHAR(50))
|
| 29 |
+
- address (TEXT)
|
| 30 |
+
- created_at (TIMESTAMP)
|
| 31 |
+
|
| 32 |
+
Table: products
|
| 33 |
+
- id (INTEGER PRIMARY KEY)
|
| 34 |
+
- name (VARCHAR(255)) - Product name
|
| 35 |
+
- description (TEXT)
|
| 36 |
+
- category (VARCHAR(100)) - Product category
|
| 37 |
+
- created_at (TIMESTAMP)
|
| 38 |
+
|
| 39 |
+
Table: purchases
|
| 40 |
+
- id (INTEGER PRIMARY KEY)
|
| 41 |
+
- supplier_id (INTEGER) - Foreign key to suppliers table
|
| 42 |
+
- product_id (INTEGER) - Foreign key to products table
|
| 43 |
+
- quantity (INTEGER) - Number of items purchased
|
| 44 |
+
- unit_price (DECIMAL(10,2)) - Price per unit
|
| 45 |
+
- total_cost (DECIMAL(10,2)) - Total purchase cost
|
| 46 |
+
- purchase_date (TIMESTAMP) - When purchase was made
|
| 47 |
+
- notes (TEXT) - Additional notes
|
| 48 |
+
|
| 49 |
+
Table: sales
|
| 50 |
+
- id (INTEGER PRIMARY KEY)
|
| 51 |
+
- customer_id (INTEGER) - Foreign key to customers table
|
| 52 |
+
- product_id (INTEGER) - Foreign key to products table
|
| 53 |
+
- quantity (INTEGER) - Number of items sold
|
| 54 |
+
- unit_price (DECIMAL(10,2)) - Price per unit
|
| 55 |
+
- total_amount (DECIMAL(10,2)) - Total sale amount
|
| 56 |
+
- sale_date (TIMESTAMP) - When sale was made
|
| 57 |
+
- notes (TEXT) - Additional notes
|
| 58 |
+
|
| 59 |
+
Relationships:
|
| 60 |
+
- purchases.supplier_id β suppliers.id
|
| 61 |
+
- purchases.product_id β products.id
|
| 62 |
+
- sales.customer_id β customers.id
|
| 63 |
+
- sales.product_id β products.id
|
| 64 |
+
"""
|
| 65 |
+
|
| 66 |
+
def convert_to_sql(self, natural_language_query: str) -> Tuple[str, str]:
|
| 67 |
+
"""
|
| 68 |
+
Convert natural language query to SQL
|
| 69 |
+
Returns: (sql_query, explanation)
|
| 70 |
+
"""
|
| 71 |
+
|
| 72 |
+
system_prompt = f"""You are an expert SQL query generator. Given a natural language question about a business database, generate the appropriate SQL query.
|
| 73 |
+
|
| 74 |
+
{self.schema_description}
|
| 75 |
+
|
| 76 |
+
Guidelines:
|
| 77 |
+
1. Generate valid SQLite syntax
|
| 78 |
+
2. Use JOINs when accessing related data across tables
|
| 79 |
+
3. Use appropriate WHERE clauses for filtering
|
| 80 |
+
4. Use aggregate functions (COUNT, SUM, AVG) when appropriate
|
| 81 |
+
5. Use ORDER BY for sorting results
|
| 82 |
+
6. Use LIMIT for restricting result count when reasonable
|
| 83 |
+
7. Always use proper table aliases for clarity
|
| 84 |
+
8. Handle date ranges using DATE() function for SQLite
|
| 85 |
+
9. Use LIKE with % wildcards for text searches
|
| 86 |
+
10. Return only the SQL query, no explanations unless specifically requested
|
| 87 |
+
|
| 88 |
+
Example queries:
|
| 89 |
+
- "Show all USB drives purchased" β SELECT p.name, pu.quantity, pu.unit_price, s.name as supplier FROM purchases pu JOIN products p ON pu.product_id = p.id JOIN suppliers s ON pu.supplier_id = s.id WHERE p.name LIKE '%USB%'
|
| 90 |
+
- "Total sales this month" β SELECT SUM(total_amount) FROM sales WHERE DATE(sale_date) >= DATE('now', 'start of month')
|
| 91 |
+
- "Top 5 customers by sales" β SELECT c.name, SUM(s.total_amount) as total FROM sales s JOIN customers c ON s.customer_id = c.id GROUP BY c.id, c.name ORDER BY total DESC LIMIT 5
|
| 92 |
+
"""
|
| 93 |
+
|
| 94 |
+
user_prompt = f"""Convert this natural language query to SQL:
|
| 95 |
+
|
| 96 |
+
"{natural_language_query}"
|
| 97 |
+
|
| 98 |
+
Return ONLY the SQL query, nothing else."""
|
| 99 |
+
|
| 100 |
+
try:
|
| 101 |
+
response = self.client.chat.completions.create(
|
| 102 |
+
model="gpt-4o-mini",
|
| 103 |
+
messages=[
|
| 104 |
+
{"role": "system", "content": system_prompt},
|
| 105 |
+
{"role": "user", "content": user_prompt}
|
| 106 |
+
],
|
| 107 |
+
temperature=0.1,
|
| 108 |
+
max_tokens=500
|
| 109 |
+
)
|
| 110 |
+
|
| 111 |
+
sql_query = response.choices[0].message.content.strip()
|
| 112 |
+
|
| 113 |
+
# Clean up the SQL query (remove markdown formatting if present)
|
| 114 |
+
sql_query = re.sub(r'^```sql\s*', '', sql_query)
|
| 115 |
+
sql_query = re.sub(r'\s*```$', '', sql_query)
|
| 116 |
+
sql_query = sql_query.strip()
|
| 117 |
+
|
| 118 |
+
# Generate explanation
|
| 119 |
+
explanation = self._generate_explanation(natural_language_query, sql_query)
|
| 120 |
+
|
| 121 |
+
return sql_query, explanation
|
| 122 |
+
|
| 123 |
+
except Exception as e:
|
| 124 |
+
return f"-- Error generating SQL: {str(e)}", f"Failed to convert query: {str(e)}"
|
| 125 |
+
|
| 126 |
+
def _generate_explanation(self, nl_query: str, sql_query: str) -> str:
|
| 127 |
+
"""Generate a human-readable explanation of what the SQL query does"""
|
| 128 |
+
|
| 129 |
+
system_prompt = """You are a helpful assistant that explains SQL queries in simple terms.
|
| 130 |
+
Given a natural language question and the corresponding SQL query, provide a brief explanation of what the SQL query does."""
|
| 131 |
+
|
| 132 |
+
user_prompt = f"""Natural language query: "{nl_query}"
|
| 133 |
+
|
| 134 |
+
SQL query: {sql_query}
|
| 135 |
+
|
| 136 |
+
Provide a brief explanation of what this SQL query does:"""
|
| 137 |
+
|
| 138 |
+
try:
|
| 139 |
+
response = self.client.chat.completions.create(
|
| 140 |
+
model="gpt-3.5-turbo",
|
| 141 |
+
messages=[
|
| 142 |
+
{"role": "system", "content": system_prompt},
|
| 143 |
+
{"role": "user", "content": user_prompt}
|
| 144 |
+
],
|
| 145 |
+
temperature=0.3,
|
| 146 |
+
max_tokens=200
|
| 147 |
+
)
|
| 148 |
+
|
| 149 |
+
return response.choices[0].message.content.strip()
|
| 150 |
+
|
| 151 |
+
except Exception as e:
|
| 152 |
+
return f"Generated SQL query for: {nl_query}"
|
| 153 |
+
|
| 154 |
+
def validate_sql(self, sql_query: str) -> Tuple[bool, str]:
|
| 155 |
+
"""
|
| 156 |
+
Basic validation of SQL query structure
|
| 157 |
+
Returns: (is_valid, error_message)
|
| 158 |
+
"""
|
| 159 |
+
|
| 160 |
+
# Basic checks
|
| 161 |
+
sql_lower = sql_query.lower().strip()
|
| 162 |
+
|
| 163 |
+
# Check for dangerous operations
|
| 164 |
+
dangerous_keywords = ['drop', 'delete', 'truncate', 'alter', 'create', 'insert', 'update']
|
| 165 |
+
for keyword in dangerous_keywords:
|
| 166 |
+
if keyword in sql_lower and not sql_lower.startswith('select'):
|
| 167 |
+
return False, f"Query contains potentially dangerous keyword: {keyword}"
|
| 168 |
+
|
| 169 |
+
# Check if it starts with SELECT (read-only queries only)
|
| 170 |
+
if not sql_lower.startswith('select'):
|
| 171 |
+
return False, "Only SELECT queries are allowed for security"
|
| 172 |
+
|
| 173 |
+
# Basic syntax checks
|
| 174 |
+
if sql_query.count('(') != sql_query.count(')'):
|
| 175 |
+
return False, "Unmatched parentheses in query"
|
| 176 |
+
|
| 177 |
+
# Check for basic SQL injection patterns
|
| 178 |
+
injection_patterns = [r";\s*(drop|delete|insert|update)", r"--", r"/\*.*\*/"]
|
| 179 |
+
for pattern in injection_patterns:
|
| 180 |
+
if re.search(pattern, sql_lower):
|
| 181 |
+
return False, f"Query contains potentially unsafe pattern: {pattern}"
|
| 182 |
+
|
| 183 |
+
return True, "Query appears valid"
|
| 184 |
+
|
| 185 |
+
def suggest_corrections(self, natural_language_query: str, error_message: str) -> str:
|
| 186 |
+
"""Suggest how to rephrase the query if it fails"""
|
| 187 |
+
|
| 188 |
+
suggestions = {
|
| 189 |
+
"table": "Make sure you're asking about purchases, sales, customers, suppliers, or products",
|
| 190 |
+
"column": "Try using terms like 'name', 'quantity', 'price', 'date', 'total'",
|
| 191 |
+
"syntax": "Try rephrasing your question more simply",
|
| 192 |
+
"ambiguous": "Be more specific about what data you want to see"
|
| 193 |
+
}
|
| 194 |
+
|
| 195 |
+
error_lower = error_message.lower()
|
| 196 |
+
|
| 197 |
+
for key, suggestion in suggestions.items():
|
| 198 |
+
if key in error_lower:
|
| 199 |
+
return f"Suggestion: {suggestion}"
|
| 200 |
+
|
| 201 |
+
return "Try rephrasing your question or ask for help with available data"
|
src/rag_handler.py
ADDED
|
@@ -0,0 +1,204 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
import openai
|
| 2 |
+
import os
|
| 3 |
+
from typing import List, Dict, Any, Optional
|
| 4 |
+
import json
|
| 5 |
+
from datetime import datetime
|
| 6 |
+
|
| 7 |
+
class RAGHandler:
|
| 8 |
+
def __init__(self, api_key: Optional[str] = None):
|
| 9 |
+
"""Initialize OpenAI client for RAG responses"""
|
| 10 |
+
self.client = openai.OpenAI(
|
| 11 |
+
api_key=api_key or os.getenv('OPENAI_API_KEY')
|
| 12 |
+
)
|
| 13 |
+
|
| 14 |
+
def generate_rag_response(self, user_query: str, retrieved_documents: List[Dict[str, Any]]) -> str:
|
| 15 |
+
"""
|
| 16 |
+
Generate a response using RAG (Retrieval-Augmented Generation)
|
| 17 |
+
|
| 18 |
+
Args:
|
| 19 |
+
user_query: The user's original query
|
| 20 |
+
retrieved_documents: List of documents from vector store with similarity scores
|
| 21 |
+
|
| 22 |
+
Returns:
|
| 23 |
+
Generated response based on retrieved context
|
| 24 |
+
"""
|
| 25 |
+
|
| 26 |
+
if not retrieved_documents:
|
| 27 |
+
return "I couldn't find any relevant information to answer your query."
|
| 28 |
+
|
| 29 |
+
# Format retrieved documents for context
|
| 30 |
+
context = self._format_context(retrieved_documents)
|
| 31 |
+
|
| 32 |
+
system_prompt = """You are a helpful business assistant with access to a company's transaction history and business information.
|
| 33 |
+
|
| 34 |
+
Your role is to answer user questions based on the provided context from the company's records.
|
| 35 |
+
|
| 36 |
+
Guidelines:
|
| 37 |
+
1. Answer based ONLY on the provided context
|
| 38 |
+
2. If the context doesn't contain enough information, say so clearly
|
| 39 |
+
3. Be specific and cite relevant details from the context
|
| 40 |
+
4. Maintain a professional, helpful tone
|
| 41 |
+
5. If asked about specific dates, transactions, or events, reference the exact information from context
|
| 42 |
+
6. If the context contains multiple relevant items, summarize them appropriately
|
| 43 |
+
7. Don't make up information not present in the context
|
| 44 |
+
|
| 45 |
+
Context format: Each document has a 'document' field with the actual content and 'metadata' with additional details like timestamps."""
|
| 46 |
+
|
| 47 |
+
user_prompt = f"""Based on the following business records, please answer this question: "{user_query}"
|
| 48 |
+
|
| 49 |
+
Context from company records:
|
| 50 |
+
{context}
|
| 51 |
+
|
| 52 |
+
Please provide a comprehensive answer based on the available information."""
|
| 53 |
+
|
| 54 |
+
try:
|
| 55 |
+
response = self.client.chat.completions.create(
|
| 56 |
+
model="gpt-4o-mini",
|
| 57 |
+
messages=[
|
| 58 |
+
{"role": "system", "content": system_prompt},
|
| 59 |
+
{"role": "user", "content": user_prompt}
|
| 60 |
+
],
|
| 61 |
+
temperature=0.3,
|
| 62 |
+
max_tokens=800
|
| 63 |
+
)
|
| 64 |
+
|
| 65 |
+
return response.choices[0].message.content.strip()
|
| 66 |
+
|
| 67 |
+
except Exception as e:
|
| 68 |
+
return f"I encountered an error while processing your query: {str(e)}\n\nHowever, I found these relevant records:\n{self._format_fallback_response(retrieved_documents)}"
|
| 69 |
+
|
| 70 |
+
def _format_context(self, documents: List[Dict[str, Any]]) -> str:
|
| 71 |
+
"""Format retrieved documents as context for the LLM"""
|
| 72 |
+
if not documents:
|
| 73 |
+
return "No relevant documents found."
|
| 74 |
+
|
| 75 |
+
context_parts = []
|
| 76 |
+
|
| 77 |
+
for i, doc in enumerate(documents, 1):
|
| 78 |
+
doc_content = doc.get('document', 'No content available')
|
| 79 |
+
metadata = doc.get('metadata', {})
|
| 80 |
+
distance = doc.get('distance', 'Unknown')
|
| 81 |
+
|
| 82 |
+
# Format document entry
|
| 83 |
+
context_entry = f"Document {i}:\n"
|
| 84 |
+
context_entry += f"Content: {doc_content}\n"
|
| 85 |
+
|
| 86 |
+
# Add metadata if available
|
| 87 |
+
if metadata:
|
| 88 |
+
if 'timestamp' in metadata:
|
| 89 |
+
try:
|
| 90 |
+
# Format timestamp nicely
|
| 91 |
+
timestamp = metadata['timestamp']
|
| 92 |
+
if isinstance(timestamp, str):
|
| 93 |
+
date_part = timestamp[:10] if len(timestamp) >= 10 else timestamp
|
| 94 |
+
context_entry += f"Date: {date_part}\n"
|
| 95 |
+
except:
|
| 96 |
+
pass
|
| 97 |
+
|
| 98 |
+
if 'type' in metadata:
|
| 99 |
+
context_entry += f"Type: {metadata['type']}\n"
|
| 100 |
+
|
| 101 |
+
# Add transaction data if available
|
| 102 |
+
if 'data' in metadata:
|
| 103 |
+
try:
|
| 104 |
+
data = json.loads(metadata['data']) if isinstance(metadata['data'], str) else metadata['data']
|
| 105 |
+
if isinstance(data, dict):
|
| 106 |
+
relevant_fields = ['product', 'quantity', 'supplier', 'customer', 'total', 'unit_price']
|
| 107 |
+
data_parts = []
|
| 108 |
+
for field in relevant_fields:
|
| 109 |
+
if field in data and data[field] is not None:
|
| 110 |
+
data_parts.append(f"{field}: {data[field]}")
|
| 111 |
+
if data_parts:
|
| 112 |
+
context_entry += f"Details: {', '.join(data_parts)}\n"
|
| 113 |
+
except:
|
| 114 |
+
pass
|
| 115 |
+
|
| 116 |
+
# Add similarity score
|
| 117 |
+
if distance is not None and distance != 'Unknown':
|
| 118 |
+
try:
|
| 119 |
+
similarity = 1 - float(distance) # Convert distance to similarity
|
| 120 |
+
context_entry += f"Relevance: {similarity:.2f}\n"
|
| 121 |
+
except:
|
| 122 |
+
pass
|
| 123 |
+
|
| 124 |
+
context_parts.append(context_entry)
|
| 125 |
+
|
| 126 |
+
return "\n" + "-" * 50 + "\n".join(context_parts)
|
| 127 |
+
|
| 128 |
+
def _format_fallback_response(self, documents: List[Dict[str, Any]]) -> str:
|
| 129 |
+
"""Create a fallback response when LLM fails"""
|
| 130 |
+
if not documents:
|
| 131 |
+
return "No relevant information found."
|
| 132 |
+
|
| 133 |
+
response_parts = []
|
| 134 |
+
|
| 135 |
+
for i, doc in enumerate(documents, 1):
|
| 136 |
+
doc_content = doc.get('document', 'No content available')
|
| 137 |
+
metadata = doc.get('metadata', {})
|
| 138 |
+
|
| 139 |
+
entry = f"{i}. {doc_content}"
|
| 140 |
+
|
| 141 |
+
if metadata.get('timestamp'):
|
| 142 |
+
try:
|
| 143 |
+
date_part = metadata['timestamp'][:10]
|
| 144 |
+
entry += f" (Date: {date_part})"
|
| 145 |
+
except:
|
| 146 |
+
pass
|
| 147 |
+
|
| 148 |
+
response_parts.append(entry)
|
| 149 |
+
|
| 150 |
+
return "\n".join(response_parts)
|
| 151 |
+
|
| 152 |
+
def enhance_search_query(self, user_query: str) -> str:
|
| 153 |
+
"""
|
| 154 |
+
Enhance the user's search query for better vector retrieval
|
| 155 |
+
|
| 156 |
+
Args:
|
| 157 |
+
user_query: Original user query
|
| 158 |
+
|
| 159 |
+
Returns:
|
| 160 |
+
Enhanced query for better semantic search
|
| 161 |
+
"""
|
| 162 |
+
|
| 163 |
+
system_prompt = """You are an expert at reformulating search queries for business records retrieval.
|
| 164 |
+
|
| 165 |
+
Given a user's question, create an enhanced search query that will better match relevant business documents in a vector database.
|
| 166 |
+
|
| 167 |
+
Guidelines:
|
| 168 |
+
1. Extract key business concepts (products, suppliers, customers, dates, amounts)
|
| 169 |
+
2. Add relevant synonyms and related terms
|
| 170 |
+
3. Focus on business transaction terminology
|
| 171 |
+
4. Keep it concise but comprehensive
|
| 172 |
+
5. Don't change the core intent of the original query
|
| 173 |
+
|
| 174 |
+
Examples:
|
| 175 |
+
- "When is my meeting with George?" β "meeting George supplier customer appointment scheduled"
|
| 176 |
+
- "Show me laptop purchases" β "laptop computer purchase buy bought supplier transaction"
|
| 177 |
+
- "Similar sales to John" β "John customer sale sold transaction similar"
|
| 178 |
+
|
| 179 |
+
Return only the enhanced query, nothing else."""
|
| 180 |
+
|
| 181 |
+
user_prompt = f'Enhance this search query for better business records retrieval: "{user_query}"'
|
| 182 |
+
|
| 183 |
+
try:
|
| 184 |
+
response = self.client.chat.completions.create(
|
| 185 |
+
model="gpt-4o-mini",
|
| 186 |
+
messages=[
|
| 187 |
+
{"role": "system", "content": system_prompt},
|
| 188 |
+
{"role": "user", "content": user_prompt}
|
| 189 |
+
],
|
| 190 |
+
temperature=0.2,
|
| 191 |
+
max_tokens=100
|
| 192 |
+
)
|
| 193 |
+
|
| 194 |
+
enhanced_query = response.choices[0].message.content.strip()
|
| 195 |
+
|
| 196 |
+
# Fallback to original if enhancement fails
|
| 197 |
+
if not enhanced_query or len(enhanced_query) < 3:
|
| 198 |
+
return user_query
|
| 199 |
+
|
| 200 |
+
return enhanced_query
|
| 201 |
+
|
| 202 |
+
except Exception as e:
|
| 203 |
+
print(f"Query enhancement failed: {e}")
|
| 204 |
+
return user_query
|
src/transaction_clarifier.py
ADDED
|
@@ -0,0 +1,281 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
import openai
|
| 2 |
+
import os
|
| 3 |
+
import json
|
| 4 |
+
from typing import Dict, Any, Optional, List, Tuple
|
| 5 |
+
from enum import Enum
|
| 6 |
+
from pydantic import BaseModel
|
| 7 |
+
from models import EntityExtraction
|
| 8 |
+
|
| 9 |
+
class ClarificationStatus(str, Enum):
|
| 10 |
+
COMPLETE = "complete"
|
| 11 |
+
NEEDS_CLARIFICATION = "needs_clarification"
|
| 12 |
+
CANCELLED = "cancelled"
|
| 13 |
+
|
| 14 |
+
class ClarificationRequest(BaseModel):
|
| 15 |
+
missing_fields: List[str]
|
| 16 |
+
questions: List[str]
|
| 17 |
+
suggested_values: Dict[str, Any] = {}
|
| 18 |
+
explanation: str
|
| 19 |
+
|
| 20 |
+
class TransactionClarifier:
|
| 21 |
+
def __init__(self, api_key: Optional[str] = None):
|
| 22 |
+
"""Initialize OpenAI client for transaction clarification"""
|
| 23 |
+
self.client = openai.OpenAI(
|
| 24 |
+
api_key=api_key or os.getenv('OPENAI_API_KEY')
|
| 25 |
+
)
|
| 26 |
+
|
| 27 |
+
def analyze_transaction_completeness(self, entities: EntityExtraction) -> Tuple[ClarificationStatus, Optional[ClarificationRequest]]:
|
| 28 |
+
"""
|
| 29 |
+
Analyze if a transaction has all necessary information
|
| 30 |
+
|
| 31 |
+
Args:
|
| 32 |
+
entities: Extracted entities from user input
|
| 33 |
+
|
| 34 |
+
Returns:
|
| 35 |
+
Tuple of (status, clarification_request)
|
| 36 |
+
"""
|
| 37 |
+
|
| 38 |
+
# Define required and optional fields based on transaction type
|
| 39 |
+
if entities.transaction_type == "purchase":
|
| 40 |
+
required_fields = ["product", "quantity", "supplier", "unit_price"]
|
| 41 |
+
optional_fields = ["total_amount"]
|
| 42 |
+
elif entities.transaction_type == "sale":
|
| 43 |
+
required_fields = ["product", "quantity", "customer", "unit_price"]
|
| 44 |
+
optional_fields = ["total_amount"]
|
| 45 |
+
else:
|
| 46 |
+
return ClarificationStatus.COMPLETE, None
|
| 47 |
+
|
| 48 |
+
# Check for missing required fields
|
| 49 |
+
missing_fields = []
|
| 50 |
+
entity_dict = entities.dict()
|
| 51 |
+
|
| 52 |
+
for field in required_fields:
|
| 53 |
+
if not entity_dict.get(field):
|
| 54 |
+
missing_fields.append(field)
|
| 55 |
+
|
| 56 |
+
# If all required fields are present, transaction is complete
|
| 57 |
+
if not missing_fields:
|
| 58 |
+
return ClarificationStatus.COMPLETE, None
|
| 59 |
+
|
| 60 |
+
# Generate intelligent clarification request
|
| 61 |
+
clarification = self._generate_clarification_request(entities, missing_fields)
|
| 62 |
+
|
| 63 |
+
return ClarificationStatus.NEEDS_CLARIFICATION, clarification
|
| 64 |
+
|
| 65 |
+
def _generate_clarification_request(self, entities: EntityExtraction, missing_fields: List[str]) -> ClarificationRequest:
|
| 66 |
+
"""Generate intelligent questions for missing information"""
|
| 67 |
+
|
| 68 |
+
# Prepare context about what we already know
|
| 69 |
+
known_info = {}
|
| 70 |
+
entity_dict = entities.dict()
|
| 71 |
+
|
| 72 |
+
for field, value in entity_dict.items():
|
| 73 |
+
if value is not None and field != "notes":
|
| 74 |
+
known_info[field] = value
|
| 75 |
+
|
| 76 |
+
system_prompt = f"""You are a helpful business assistant helping complete a {entities.transaction_type} transaction.
|
| 77 |
+
|
| 78 |
+
Generate natural, conversational questions to gather missing information. The user should be able to:
|
| 79 |
+
1. Provide the missing information
|
| 80 |
+
2. Say "N/A" or "skip" if the information is not available/applicable
|
| 81 |
+
3. Ask for suggestions if they're unsure
|
| 82 |
+
|
| 83 |
+
Create personalized questions based on the context of what we already know.
|
| 84 |
+
|
| 85 |
+
Return your response in this exact JSON format:
|
| 86 |
+
{{
|
| 87 |
+
"questions": ["question1", "question2", ...],
|
| 88 |
+
"suggested_values": {{"field": "suggested_value", ...}},
|
| 89 |
+
"explanation": "Brief explanation of why we need this information"
|
| 90 |
+
}}
|
| 91 |
+
|
| 92 |
+
Missing fields to ask about: {missing_fields}
|
| 93 |
+
Transaction type: {entities.transaction_type}
|
| 94 |
+
"""
|
| 95 |
+
|
| 96 |
+
user_prompt = f"""We're processing a {entities.transaction_type} transaction and need to gather some missing information.
|
| 97 |
+
|
| 98 |
+
What we already know:
|
| 99 |
+
{json.dumps(known_info, indent=2)}
|
| 100 |
+
|
| 101 |
+
Missing fields: {missing_fields}
|
| 102 |
+
|
| 103 |
+
Generate friendly, specific questions to gather the missing information. Make suggestions when appropriate."""
|
| 104 |
+
|
| 105 |
+
try:
|
| 106 |
+
response = self.client.chat.completions.create(
|
| 107 |
+
model="gpt-4o-mini",
|
| 108 |
+
messages=[
|
| 109 |
+
{"role": "system", "content": system_prompt},
|
| 110 |
+
{"role": "user", "content": user_prompt}
|
| 111 |
+
],
|
| 112 |
+
temperature=0.3,
|
| 113 |
+
max_tokens=400
|
| 114 |
+
)
|
| 115 |
+
|
| 116 |
+
response_text = response.choices[0].message.content.strip()
|
| 117 |
+
|
| 118 |
+
try:
|
| 119 |
+
result_dict = json.loads(response_text)
|
| 120 |
+
return ClarificationRequest(
|
| 121 |
+
missing_fields=missing_fields,
|
| 122 |
+
questions=result_dict.get("questions", []),
|
| 123 |
+
suggested_values=result_dict.get("suggested_values", {}),
|
| 124 |
+
explanation=result_dict.get("explanation", "I need some additional information to complete this transaction.")
|
| 125 |
+
)
|
| 126 |
+
except (json.JSONDecodeError, KeyError) as e:
|
| 127 |
+
# Fallback to simple questions
|
| 128 |
+
return self._generate_fallback_questions(entities, missing_fields)
|
| 129 |
+
|
| 130 |
+
except Exception as e:
|
| 131 |
+
print(f"Error generating clarification: {e}")
|
| 132 |
+
return self._generate_fallback_questions(entities, missing_fields)
|
| 133 |
+
|
| 134 |
+
def _generate_fallback_questions(self, entities: EntityExtraction, missing_fields: List[str]) -> ClarificationRequest:
|
| 135 |
+
"""Generate fallback questions when LLM fails"""
|
| 136 |
+
|
| 137 |
+
question_templates = {
|
| 138 |
+
"product": "What product or item is involved in this transaction?",
|
| 139 |
+
"quantity": f"How many units {'were purchased' if entities.transaction_type == 'purchase' else 'were sold'}?",
|
| 140 |
+
"supplier": "Which supplier or vendor is this purchase from?",
|
| 141 |
+
"customer": "Who is the customer for this sale?",
|
| 142 |
+
"unit_price": "What is the price per unit?",
|
| 143 |
+
"total_amount": "What is the total amount for this transaction?"
|
| 144 |
+
}
|
| 145 |
+
|
| 146 |
+
questions = []
|
| 147 |
+
for field in missing_fields:
|
| 148 |
+
questions.append(question_templates.get(field, f"What is the {field.replace('_', ' ')}?"))
|
| 149 |
+
|
| 150 |
+
return ClarificationRequest(
|
| 151 |
+
missing_fields=missing_fields,
|
| 152 |
+
questions=questions,
|
| 153 |
+
suggested_values={},
|
| 154 |
+
explanation="I need some additional information to complete this transaction."
|
| 155 |
+
)
|
| 156 |
+
|
| 157 |
+
def process_clarification_response(self, original_entities: EntityExtraction,
|
| 158 |
+
missing_fields: List[str],
|
| 159 |
+
user_response: str) -> Tuple[EntityExtraction, bool]:
|
| 160 |
+
"""
|
| 161 |
+
Process user's response to clarification questions
|
| 162 |
+
|
| 163 |
+
Args:
|
| 164 |
+
original_entities: Original extracted entities
|
| 165 |
+
missing_fields: Fields we asked about
|
| 166 |
+
user_response: User's response to our questions
|
| 167 |
+
|
| 168 |
+
Returns:
|
| 169 |
+
Tuple of (updated_entities, is_complete)
|
| 170 |
+
"""
|
| 171 |
+
|
| 172 |
+
system_prompt = f"""You are processing a user's response to clarification questions about a {original_entities.transaction_type} transaction.
|
| 173 |
+
|
| 174 |
+
Extract the missing information from the user's response. The user may:
|
| 175 |
+
1. Provide specific values for the missing fields
|
| 176 |
+
2. Say "N/A", "skip", "not applicable", or similar to indicate the field should be null
|
| 177 |
+
3. Ask for help or say they don't know
|
| 178 |
+
|
| 179 |
+
Missing fields we asked about: {missing_fields}
|
| 180 |
+
|
| 181 |
+
Return a JSON object with the extracted values. Use null for fields that are N/A or skipped.
|
| 182 |
+
|
| 183 |
+
Example response format:
|
| 184 |
+
{{
|
| 185 |
+
"product": "extracted product name",
|
| 186 |
+
"quantity": 10,
|
| 187 |
+
"supplier": null,
|
| 188 |
+
"unit_price": 5.99,
|
| 189 |
+
"interpretation": "Brief explanation of what you extracted"
|
| 190 |
+
}}"""
|
| 191 |
+
|
| 192 |
+
user_prompt = f"""Original transaction: {original_entities.transaction_type}
|
| 193 |
+
Missing fields: {missing_fields}
|
| 194 |
+
User's response: "{user_response}"
|
| 195 |
+
|
| 196 |
+
Extract the values for the missing fields from the user's response."""
|
| 197 |
+
|
| 198 |
+
try:
|
| 199 |
+
response = self.client.chat.completions.create(
|
| 200 |
+
model="gpt-4o-mini",
|
| 201 |
+
messages=[
|
| 202 |
+
{"role": "system", "content": system_prompt},
|
| 203 |
+
{"role": "user", "content": user_prompt}
|
| 204 |
+
],
|
| 205 |
+
temperature=0.1,
|
| 206 |
+
max_tokens=300
|
| 207 |
+
)
|
| 208 |
+
|
| 209 |
+
response_text = response.choices[0].message.content.strip()
|
| 210 |
+
|
| 211 |
+
try:
|
| 212 |
+
extracted_values = json.loads(response_text)
|
| 213 |
+
|
| 214 |
+
# Update original entities with extracted values
|
| 215 |
+
updated_entities = self._update_entities(original_entities, extracted_values, missing_fields)
|
| 216 |
+
|
| 217 |
+
# Check if transaction is now complete
|
| 218 |
+
status, _ = self.analyze_transaction_completeness(updated_entities)
|
| 219 |
+
is_complete = (status == ClarificationStatus.COMPLETE)
|
| 220 |
+
|
| 221 |
+
return updated_entities, is_complete
|
| 222 |
+
|
| 223 |
+
except (json.JSONDecodeError, KeyError) as e:
|
| 224 |
+
print(f"Error parsing clarification response: {e}")
|
| 225 |
+
return original_entities, False
|
| 226 |
+
|
| 227 |
+
except Exception as e:
|
| 228 |
+
print(f"Error processing clarification: {e}")
|
| 229 |
+
return original_entities, False
|
| 230 |
+
|
| 231 |
+
def _update_entities(self, original_entities: EntityExtraction,
|
| 232 |
+
extracted_values: Dict[str, Any],
|
| 233 |
+
missing_fields: List[str]) -> EntityExtraction:
|
| 234 |
+
"""Update entities with extracted clarification values"""
|
| 235 |
+
|
| 236 |
+
# Convert to dict for easier manipulation
|
| 237 |
+
entity_dict = original_entities.dict()
|
| 238 |
+
|
| 239 |
+
# Update with extracted values
|
| 240 |
+
for field in missing_fields:
|
| 241 |
+
if field in extracted_values:
|
| 242 |
+
value = extracted_values[field]
|
| 243 |
+
|
| 244 |
+
# Handle type conversions
|
| 245 |
+
if field in ["quantity"] and value is not None:
|
| 246 |
+
try:
|
| 247 |
+
entity_dict[field] = int(value)
|
| 248 |
+
except (ValueError, TypeError):
|
| 249 |
+
entity_dict[field] = None
|
| 250 |
+
elif field in ["unit_price", "total_amount"] and value is not None:
|
| 251 |
+
try:
|
| 252 |
+
entity_dict[field] = float(value)
|
| 253 |
+
except (ValueError, TypeError):
|
| 254 |
+
entity_dict[field] = None
|
| 255 |
+
else:
|
| 256 |
+
entity_dict[field] = value
|
| 257 |
+
|
| 258 |
+
# Recalculate total if we have quantity and unit_price
|
| 259 |
+
if entity_dict.get("quantity") and entity_dict.get("unit_price"):
|
| 260 |
+
entity_dict["total_amount"] = entity_dict["quantity"] * entity_dict["unit_price"]
|
| 261 |
+
|
| 262 |
+
return EntityExtraction(**entity_dict)
|
| 263 |
+
|
| 264 |
+
def format_clarification_message(self, clarification: ClarificationRequest) -> str:
|
| 265 |
+
"""Format clarification request as a user-friendly message"""
|
| 266 |
+
|
| 267 |
+
message = f"π {clarification.explanation}\n\n"
|
| 268 |
+
|
| 269 |
+
for i, question in enumerate(clarification.questions, 1):
|
| 270 |
+
message += f"{i}. {question}\n"
|
| 271 |
+
|
| 272 |
+
# Add suggestions if available
|
| 273 |
+
if clarification.suggested_values:
|
| 274 |
+
message += "\nπ‘ Suggestions:\n"
|
| 275 |
+
for field, suggestion in clarification.suggested_values.items():
|
| 276 |
+
message += f" β’ {field.replace('_', ' ').title()}: {suggestion}\n"
|
| 277 |
+
|
| 278 |
+
message += "\n⨠You can say 'N/A' or 'skip' for any information that's not available."
|
| 279 |
+
message += "\nπ Please provide the missing information in your next message."
|
| 280 |
+
|
| 281 |
+
return message
|
src/vector_store.py
ADDED
|
@@ -0,0 +1,214 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
import chromadb
|
| 2 |
+
from sentence_transformers import SentenceTransformer
|
| 3 |
+
from typing import List, Dict, Any, Optional
|
| 4 |
+
import json
|
| 5 |
+
from datetime import datetime
|
| 6 |
+
|
| 7 |
+
class VectorStore:
|
| 8 |
+
def __init__(self, collection_name: str = "chatbot_events"):
|
| 9 |
+
self.client = chromadb.PersistentClient(path="./chroma_db")
|
| 10 |
+
self.collection = self.client.get_or_create_collection(name=collection_name)
|
| 11 |
+
try:
|
| 12 |
+
self.model = SentenceTransformer('all-MiniLM-L6-v2')
|
| 13 |
+
except Exception as e:
|
| 14 |
+
print(f"Warning: Could not load sentence transformer model: {e}")
|
| 15 |
+
self.model = None
|
| 16 |
+
|
| 17 |
+
def add_transaction_event(self, transaction_data: Dict[str, Any], user_query: str, sql_transaction_id: Optional[int] = None) -> bool:
|
| 18 |
+
"""Add a transaction event to the vector store"""
|
| 19 |
+
if not self.model:
|
| 20 |
+
return False
|
| 21 |
+
|
| 22 |
+
try:
|
| 23 |
+
# Create a semantic summary of the event
|
| 24 |
+
summary = self._create_event_summary(transaction_data, user_query)
|
| 25 |
+
|
| 26 |
+
# Generate embedding
|
| 27 |
+
embedding = self.model.encode(summary).tolist()
|
| 28 |
+
|
| 29 |
+
# Create document ID - include SQL ID if available for better linking
|
| 30 |
+
doc_id = f"transaction_{sql_transaction_id or 'unknown'}_{datetime.now().isoformat()}_{hash(summary) % 10000}"
|
| 31 |
+
|
| 32 |
+
# Prepare metadata with SQL transaction linking
|
| 33 |
+
metadata = {
|
| 34 |
+
"type": "transaction",
|
| 35 |
+
"transaction_type": transaction_data.get("type", "unknown"),
|
| 36 |
+
"timestamp": datetime.now().isoformat(),
|
| 37 |
+
"user_query": user_query,
|
| 38 |
+
"data": json.dumps(transaction_data)
|
| 39 |
+
}
|
| 40 |
+
|
| 41 |
+
# Add SQL transaction ID to metadata for linking
|
| 42 |
+
if sql_transaction_id is not None:
|
| 43 |
+
metadata["sql_transaction_id"] = sql_transaction_id
|
| 44 |
+
metadata["sql_table"] = f"{transaction_data.get('type', 'unknown')}s" # purchases or sales
|
| 45 |
+
|
| 46 |
+
# Store in vector database
|
| 47 |
+
self.collection.add(
|
| 48 |
+
documents=[summary],
|
| 49 |
+
embeddings=[embedding],
|
| 50 |
+
metadatas=[metadata],
|
| 51 |
+
ids=[doc_id]
|
| 52 |
+
)
|
| 53 |
+
|
| 54 |
+
return True
|
| 55 |
+
except Exception as e:
|
| 56 |
+
print(f"Error adding transaction event: {e}")
|
| 57 |
+
return False
|
| 58 |
+
|
| 59 |
+
def get_transaction_by_sql_id(self, sql_transaction_id: int, transaction_type: str) -> Optional[Dict[str, Any]]:
|
| 60 |
+
"""Retrieve vector store entry linked to a specific SQL transaction ID"""
|
| 61 |
+
try:
|
| 62 |
+
# Query the collection for entries with matching SQL transaction ID
|
| 63 |
+
results = self.collection.get(
|
| 64 |
+
where={
|
| 65 |
+
"sql_transaction_id": sql_transaction_id,
|
| 66 |
+
"transaction_type": transaction_type
|
| 67 |
+
},
|
| 68 |
+
limit=1
|
| 69 |
+
)
|
| 70 |
+
|
| 71 |
+
if results and results['documents']:
|
| 72 |
+
return {
|
| 73 |
+
"id": results['ids'][0],
|
| 74 |
+
"document": results['documents'][0],
|
| 75 |
+
"metadata": results['metadatas'][0]
|
| 76 |
+
}
|
| 77 |
+
|
| 78 |
+
return None
|
| 79 |
+
except Exception as e:
|
| 80 |
+
print(f"Error retrieving transaction by SQL ID: {e}")
|
| 81 |
+
return None
|
| 82 |
+
|
| 83 |
+
def add_general_event(self, event_text: str, event_type: str = "general") -> bool:
|
| 84 |
+
"""Add a general event or information to the vector store"""
|
| 85 |
+
if not self.model:
|
| 86 |
+
return False
|
| 87 |
+
|
| 88 |
+
try:
|
| 89 |
+
# Generate embedding
|
| 90 |
+
embedding = self.model.encode(event_text).tolist()
|
| 91 |
+
|
| 92 |
+
# Create document ID
|
| 93 |
+
doc_id = f"event_{datetime.now().isoformat()}_{hash(event_text) % 10000}"
|
| 94 |
+
|
| 95 |
+
# Store in vector database
|
| 96 |
+
self.collection.add(
|
| 97 |
+
documents=[event_text],
|
| 98 |
+
embeddings=[embedding],
|
| 99 |
+
metadatas=[{
|
| 100 |
+
"type": event_type,
|
| 101 |
+
"timestamp": datetime.now().isoformat()
|
| 102 |
+
}],
|
| 103 |
+
ids=[doc_id]
|
| 104 |
+
)
|
| 105 |
+
|
| 106 |
+
return True
|
| 107 |
+
except Exception as e:
|
| 108 |
+
print(f"Error adding general event: {e}")
|
| 109 |
+
return False
|
| 110 |
+
|
| 111 |
+
def search_similar_events(self, query: str, n_results: int = 5) -> List[Dict[str, Any]]:
|
| 112 |
+
"""Search for similar events based on semantic similarity"""
|
| 113 |
+
if not self.model:
|
| 114 |
+
return []
|
| 115 |
+
|
| 116 |
+
try:
|
| 117 |
+
# Generate query embedding
|
| 118 |
+
query_embedding = self.model.encode(query).tolist()
|
| 119 |
+
|
| 120 |
+
# Search vector database
|
| 121 |
+
results = self.collection.query(
|
| 122 |
+
query_embeddings=[query_embedding],
|
| 123 |
+
n_results=n_results
|
| 124 |
+
)
|
| 125 |
+
|
| 126 |
+
# Format results
|
| 127 |
+
formatted_results = []
|
| 128 |
+
if results['documents'] and results['documents'][0]:
|
| 129 |
+
for i, doc in enumerate(results['documents'][0]):
|
| 130 |
+
result = {
|
| 131 |
+
"document": doc,
|
| 132 |
+
"distance": results['distances'][0][i] if results['distances'] else None,
|
| 133 |
+
"metadata": results['metadatas'][0][i] if results['metadatas'] else {}
|
| 134 |
+
}
|
| 135 |
+
formatted_results.append(result)
|
| 136 |
+
|
| 137 |
+
return formatted_results
|
| 138 |
+
except Exception as e:
|
| 139 |
+
print(f"Error searching events: {e}")
|
| 140 |
+
return []
|
| 141 |
+
|
| 142 |
+
def get_recent_events(self, n_results: int = 10) -> List[Dict[str, Any]]:
|
| 143 |
+
"""Get recent events from the vector store"""
|
| 144 |
+
try:
|
| 145 |
+
results = self.collection.get(
|
| 146 |
+
limit=n_results,
|
| 147 |
+
include=["documents", "metadatas"]
|
| 148 |
+
)
|
| 149 |
+
|
| 150 |
+
formatted_results = []
|
| 151 |
+
if results['documents']:
|
| 152 |
+
for i, doc in enumerate(results['documents']):
|
| 153 |
+
result = {
|
| 154 |
+
"document": doc,
|
| 155 |
+
"metadata": results['metadatas'][i] if results['metadatas'] else {}
|
| 156 |
+
}
|
| 157 |
+
formatted_results.append(result)
|
| 158 |
+
|
| 159 |
+
# Sort by timestamp if available
|
| 160 |
+
formatted_results.sort(
|
| 161 |
+
key=lambda x: x.get('metadata', {}).get('timestamp', ''),
|
| 162 |
+
reverse=True
|
| 163 |
+
)
|
| 164 |
+
|
| 165 |
+
return formatted_results
|
| 166 |
+
except Exception as e:
|
| 167 |
+
print(f"Error getting recent events: {e}")
|
| 168 |
+
return []
|
| 169 |
+
|
| 170 |
+
def _create_event_summary(self, transaction_data: Dict[str, Any], user_query: str) -> str:
|
| 171 |
+
"""Create a semantic summary of a transaction event"""
|
| 172 |
+
summary_parts = []
|
| 173 |
+
|
| 174 |
+
# Add transaction type
|
| 175 |
+
trans_type = transaction_data.get("type", "transaction")
|
| 176 |
+
summary_parts.append(f"Business {trans_type} event:")
|
| 177 |
+
|
| 178 |
+
# Add key details
|
| 179 |
+
if "product" in transaction_data:
|
| 180 |
+
summary_parts.append(f"Product: {transaction_data['product']}")
|
| 181 |
+
|
| 182 |
+
if "quantity" in transaction_data:
|
| 183 |
+
summary_parts.append(f"Quantity: {transaction_data['quantity']}")
|
| 184 |
+
|
| 185 |
+
if "supplier" in transaction_data:
|
| 186 |
+
summary_parts.append(f"Supplier: {transaction_data['supplier']}")
|
| 187 |
+
|
| 188 |
+
if "customer" in transaction_data:
|
| 189 |
+
summary_parts.append(f"Customer: {transaction_data['customer']}")
|
| 190 |
+
|
| 191 |
+
if "total" in transaction_data:
|
| 192 |
+
summary_parts.append(f"Total amount: β¬{transaction_data['total']}")
|
| 193 |
+
|
| 194 |
+
# Add original user query for context
|
| 195 |
+
summary_parts.append(f"Original request: {user_query}")
|
| 196 |
+
|
| 197 |
+
return " | ".join(summary_parts)
|
| 198 |
+
|
| 199 |
+
def delete_collection(self):
|
| 200 |
+
"""Delete the entire collection (use with caution)"""
|
| 201 |
+
try:
|
| 202 |
+
self.client.delete_collection(name=self.collection.name)
|
| 203 |
+
return True
|
| 204 |
+
except Exception as e:
|
| 205 |
+
print(f"Error deleting collection: {e}")
|
| 206 |
+
return False
|
| 207 |
+
|
| 208 |
+
def get_collection_count(self) -> int:
|
| 209 |
+
"""Get the number of documents in the collection"""
|
| 210 |
+
try:
|
| 211 |
+
return self.collection.count()
|
| 212 |
+
except Exception as e:
|
| 213 |
+
print(f"Error getting collection count: {e}")
|
| 214 |
+
return 0
|
tests/test_chatbot.py
ADDED
|
@@ -0,0 +1,49 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
#!/usr/bin/env python3
|
| 2 |
+
|
| 3 |
+
import sys
|
| 4 |
+
import os
|
| 5 |
+
sys.path.append(os.path.join(os.path.dirname(__file__), 'src'))
|
| 6 |
+
|
| 7 |
+
from chatbot import Chatbot
|
| 8 |
+
from models import ChatbotRequest
|
| 9 |
+
|
| 10 |
+
def test_chatbot():
|
| 11 |
+
print("π§ͺ Testing Chatbot System")
|
| 12 |
+
print("="*50)
|
| 13 |
+
|
| 14 |
+
chatbot = Chatbot()
|
| 15 |
+
|
| 16 |
+
# Test cases
|
| 17 |
+
test_cases = [
|
| 18 |
+
"Add a purchase of 20 USB drives from TechMart at β¬5 each",
|
| 19 |
+
"Sold 10 laptops to John Smith at β¬800 each",
|
| 20 |
+
"Purchase 5 office chairs from Office Supplies Co at β¬150 per chair",
|
| 21 |
+
"Show recent transactions",
|
| 22 |
+
"Find USB drives",
|
| 23 |
+
"Search TechMart",
|
| 24 |
+
"Meeting with new supplier scheduled for next week"
|
| 25 |
+
]
|
| 26 |
+
|
| 27 |
+
for i, test_message in enumerate(test_cases, 1):
|
| 28 |
+
print(f"\nπ Test {i}: {test_message}")
|
| 29 |
+
print("-" * 50)
|
| 30 |
+
|
| 31 |
+
request = ChatbotRequest(message=test_message)
|
| 32 |
+
response = chatbot.process_message(request)
|
| 33 |
+
|
| 34 |
+
print(f"Response: {response.response}")
|
| 35 |
+
|
| 36 |
+
if response.entities_extracted:
|
| 37 |
+
entities = response.entities_extracted
|
| 38 |
+
print(f"Entities: {entities.transaction_type} - {entities.product} ({entities.quantity}x) - β¬{entities.total_amount}")
|
| 39 |
+
|
| 40 |
+
if response.vector_stored:
|
| 41 |
+
print("β
Stored in vector database")
|
| 42 |
+
|
| 43 |
+
print()
|
| 44 |
+
|
| 45 |
+
chatbot.close()
|
| 46 |
+
print("β
All tests completed!")
|
| 47 |
+
|
| 48 |
+
if __name__ == "__main__":
|
| 49 |
+
test_chatbot()
|
tests/test_intent_classifier.py
ADDED
|
@@ -0,0 +1,98 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
#!/usr/bin/env python3
|
| 2 |
+
|
| 3 |
+
import sys
|
| 4 |
+
import os
|
| 5 |
+
sys.path.append(os.path.join(os.path.dirname(__file__), 'src'))
|
| 6 |
+
|
| 7 |
+
from intent_classifier import IntentClassifier, IntentType
|
| 8 |
+
|
| 9 |
+
def test_intent_classification():
|
| 10 |
+
print("π§ͺ Testing OpenAI Intent Classification")
|
| 11 |
+
print("="*60)
|
| 12 |
+
print("Note: Make sure to set OPENAI_API_KEY environment variable")
|
| 13 |
+
print("="*60)
|
| 14 |
+
|
| 15 |
+
classifier = IntentClassifier()
|
| 16 |
+
|
| 17 |
+
# Test cases with expected intents
|
| 18 |
+
test_cases = [
|
| 19 |
+
# Transaction intents
|
| 20 |
+
("Add a purchase of 20 USB drives from TechMart at β¬5 each", IntentType.TRANSACTION),
|
| 21 |
+
("Sold 10 laptops to John Smith at β¬800 each", IntentType.TRANSACTION),
|
| 22 |
+
("Purchase 5 office chairs from Office Supplies Co at β¬150 per chair", IntentType.TRANSACTION),
|
| 23 |
+
("We bought 100 pens from Staples for $2 each", IntentType.TRANSACTION),
|
| 24 |
+
|
| 25 |
+
# Query intents
|
| 26 |
+
("How many USB drives did we purchase?", IntentType.QUERY),
|
| 27 |
+
("What's the total value of all purchases?", IntentType.QUERY),
|
| 28 |
+
("Show me all sales to John Smith", IntentType.QUERY),
|
| 29 |
+
("List recent transactions", IntentType.QUERY),
|
| 30 |
+
("What's our total spending on electronics?", IntentType.QUERY),
|
| 31 |
+
|
| 32 |
+
# Semantic search intents
|
| 33 |
+
("Show me similar purchases to this one", IntentType.SEMANTIC_SEARCH),
|
| 34 |
+
("Find events related to supplier meetings", IntentType.SEMANTIC_SEARCH),
|
| 35 |
+
("What's similar to our last laptop purchase?", IntentType.SEMANTIC_SEARCH),
|
| 36 |
+
("Show me related transactions", IntentType.SEMANTIC_SEARCH),
|
| 37 |
+
|
| 38 |
+
# General info intents
|
| 39 |
+
("Meeting with new supplier scheduled for next week", IntentType.GENERAL_INFO),
|
| 40 |
+
("Remember to check inventory levels before next order", IntentType.GENERAL_INFO),
|
| 41 |
+
("The conference call went well today", IntentType.GENERAL_INFO),
|
| 42 |
+
("Don't forget to update the quarterly report", IntentType.GENERAL_INFO),
|
| 43 |
+
|
| 44 |
+
# Edge cases
|
| 45 |
+
("Hello", IntentType.GENERAL_INFO),
|
| 46 |
+
("What's the weather like?", IntentType.GENERAL_INFO),
|
| 47 |
+
("Can you help me?", IntentType.GENERAL_INFO),
|
| 48 |
+
]
|
| 49 |
+
|
| 50 |
+
correct_predictions = 0
|
| 51 |
+
total_predictions = len(test_cases)
|
| 52 |
+
|
| 53 |
+
for i, (message, expected_intent) in enumerate(test_cases, 1):
|
| 54 |
+
print(f"\nπ Test {i}: {message}")
|
| 55 |
+
print("-" * 60)
|
| 56 |
+
|
| 57 |
+
result = classifier.classify_intent(message)
|
| 58 |
+
|
| 59 |
+
print(f"Expected: {expected_intent.value}")
|
| 60 |
+
print(f"Predicted: {result.intent.value}")
|
| 61 |
+
print(f"Confidence: {result.confidence:.2f}")
|
| 62 |
+
print(f"Reasoning: {result.reasoning}")
|
| 63 |
+
|
| 64 |
+
if result.entities_hint:
|
| 65 |
+
print(f"Entities: {result.entities_hint}")
|
| 66 |
+
|
| 67 |
+
is_correct = result.intent == expected_intent
|
| 68 |
+
if is_correct:
|
| 69 |
+
print("β
CORRECT")
|
| 70 |
+
correct_predictions += 1
|
| 71 |
+
else:
|
| 72 |
+
print("β INCORRECT")
|
| 73 |
+
|
| 74 |
+
print()
|
| 75 |
+
|
| 76 |
+
# Summary
|
| 77 |
+
accuracy = correct_predictions / total_predictions
|
| 78 |
+
print("="*60)
|
| 79 |
+
print(f"π Results Summary:")
|
| 80 |
+
print(f"Correct predictions: {correct_predictions}/{total_predictions}")
|
| 81 |
+
print(f"Accuracy: {accuracy:.2%}")
|
| 82 |
+
print("="*60)
|
| 83 |
+
|
| 84 |
+
if accuracy >= 0.8:
|
| 85 |
+
print("π Excellent accuracy! Intent classification is working well.")
|
| 86 |
+
elif accuracy >= 0.6:
|
| 87 |
+
print("π Good accuracy. Consider refining prompts for better results.")
|
| 88 |
+
else:
|
| 89 |
+
print("β οΈ Low accuracy. Review and improve the classification prompts.")
|
| 90 |
+
|
| 91 |
+
if __name__ == "__main__":
|
| 92 |
+
if not os.getenv('OPENAI_API_KEY'):
|
| 93 |
+
print("β Error: OPENAI_API_KEY environment variable not set")
|
| 94 |
+
print("Please set your OpenAI API key:")
|
| 95 |
+
print("export OPENAI_API_KEY='your-key-here'")
|
| 96 |
+
sys.exit(1)
|
| 97 |
+
|
| 98 |
+
test_intent_classification()
|
tests/test_interactive_transactions.py
ADDED
|
@@ -0,0 +1,179 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
#!/usr/bin/env python3
|
| 2 |
+
|
| 3 |
+
import sys
|
| 4 |
+
import os
|
| 5 |
+
sys.path.append(os.path.join(os.path.dirname(__file__), 'src'))
|
| 6 |
+
|
| 7 |
+
from chatbot import Chatbot
|
| 8 |
+
from models import ChatbotRequest
|
| 9 |
+
|
| 10 |
+
def test_interactive_transactions():
|
| 11 |
+
print("π§ͺ Testing Interactive Transaction Completion")
|
| 12 |
+
print("="*70)
|
| 13 |
+
print("Note: Make sure to set OPENAI_API_KEY environment variable")
|
| 14 |
+
print("="*70)
|
| 15 |
+
|
| 16 |
+
chatbot = Chatbot()
|
| 17 |
+
|
| 18 |
+
test_scenarios = [
|
| 19 |
+
{
|
| 20 |
+
"name": "Complete Purchase Transaction",
|
| 21 |
+
"initial": "I bought 20 USB drives from TechMart at β¬5 each",
|
| 22 |
+
"expected_complete": True,
|
| 23 |
+
"description": "Should be complete with all required fields"
|
| 24 |
+
},
|
| 25 |
+
{
|
| 26 |
+
"name": "Incomplete Purchase - Missing Supplier",
|
| 27 |
+
"initial": "I bought 10 laptops at β¬800 each",
|
| 28 |
+
"clarifications": ["Electronics Plus"],
|
| 29 |
+
"expected_questions": ["supplier"],
|
| 30 |
+
"description": "Should ask for supplier information"
|
| 31 |
+
},
|
| 32 |
+
{
|
| 33 |
+
"name": "Incomplete Purchase - Missing Multiple Fields",
|
| 34 |
+
"initial": "I bought some office chairs",
|
| 35 |
+
"clarifications": ["15 chairs", "Office Supplies Co", "β¬150 per chair"],
|
| 36 |
+
"expected_questions": ["quantity", "supplier", "unit_price"],
|
| 37 |
+
"description": "Should ask for quantity, supplier, and price"
|
| 38 |
+
},
|
| 39 |
+
{
|
| 40 |
+
"name": "Sale with Missing Customer",
|
| 41 |
+
"initial": "Sold 5 laptops at β¬900 each",
|
| 42 |
+
"clarifications": ["ABC Corporation"],
|
| 43 |
+
"expected_questions": ["customer"],
|
| 44 |
+
"description": "Should ask for customer information"
|
| 45 |
+
},
|
| 46 |
+
{
|
| 47 |
+
"name": "Transaction with N/A Fields",
|
| 48 |
+
"initial": "Bought 100 pens",
|
| 49 |
+
"clarifications": ["Staples", "$2 each", "N/A"],
|
| 50 |
+
"expected_questions": ["supplier", "unit_price"],
|
| 51 |
+
"description": "Should handle N/A responses gracefully"
|
| 52 |
+
}
|
| 53 |
+
]
|
| 54 |
+
|
| 55 |
+
for i, scenario in enumerate(test_scenarios, 1):
|
| 56 |
+
print(f"\nπ Test Scenario {i}: {scenario['name']}")
|
| 57 |
+
print("-" * 60)
|
| 58 |
+
print(f"Description: {scenario['description']}")
|
| 59 |
+
print(f"Initial input: {scenario['initial']}")
|
| 60 |
+
|
| 61 |
+
# Test initial transaction request
|
| 62 |
+
session_id = f"test_session_{i}"
|
| 63 |
+
request = ChatbotRequest(message=scenario['initial'], session_id=session_id)
|
| 64 |
+
response = chatbot.process_message(request)
|
| 65 |
+
|
| 66 |
+
print(f"\nπ€ Initial Response:")
|
| 67 |
+
print(response.response)
|
| 68 |
+
|
| 69 |
+
if response.awaiting_clarification:
|
| 70 |
+
print(f"β
Correctly identified as incomplete transaction")
|
| 71 |
+
|
| 72 |
+
# Process clarifications if provided
|
| 73 |
+
if "clarifications" in scenario:
|
| 74 |
+
print(f"\nπ Providing clarifications...")
|
| 75 |
+
|
| 76 |
+
for j, clarification in enumerate(scenario["clarifications"], 1):
|
| 77 |
+
print(f"\n Clarification {j}: {clarification}")
|
| 78 |
+
|
| 79 |
+
clarification_request = ChatbotRequest(
|
| 80 |
+
message=clarification,
|
| 81 |
+
session_id=session_id
|
| 82 |
+
)
|
| 83 |
+
clarification_response = chatbot.process_message(clarification_request)
|
| 84 |
+
|
| 85 |
+
print(f" π€ Response: {clarification_response.response[:100]}{'...' if len(clarification_response.response) > 100 else ''}")
|
| 86 |
+
|
| 87 |
+
if not clarification_response.awaiting_clarification:
|
| 88 |
+
print(f" β
Transaction completed!")
|
| 89 |
+
break
|
| 90 |
+
else:
|
| 91 |
+
print(f" β³ Still waiting for more information...")
|
| 92 |
+
else:
|
| 93 |
+
if scenario.get("expected_complete", False):
|
| 94 |
+
print(f"β
Correctly completed transaction without clarification")
|
| 95 |
+
else:
|
| 96 |
+
print(f"β Expected clarification but transaction was completed")
|
| 97 |
+
|
| 98 |
+
print(f"\nIntent detected: {response.intent_detected}")
|
| 99 |
+
if response.entities_extracted:
|
| 100 |
+
entities = response.entities_extracted
|
| 101 |
+
print(f"Entities: {entities.transaction_type} - {entities.product} ({entities.quantity}x) - β¬{entities.total_amount}")
|
| 102 |
+
|
| 103 |
+
print("\n" + "="*60)
|
| 104 |
+
|
| 105 |
+
print("\nπ§ͺ Testing Edge Cases")
|
| 106 |
+
print("-" * 40)
|
| 107 |
+
|
| 108 |
+
# Test cancellation
|
| 109 |
+
print("\nπ Testing Transaction Cancellation")
|
| 110 |
+
request = ChatbotRequest(message="I bought some items", session_id="cancel_test")
|
| 111 |
+
response = chatbot.process_message(request)
|
| 112 |
+
|
| 113 |
+
if response.awaiting_clarification:
|
| 114 |
+
print("β
Transaction requires clarification")
|
| 115 |
+
cancel_request = ChatbotRequest(message="cancel", session_id="cancel_test")
|
| 116 |
+
cancel_response = chatbot.process_message(cancel_request)
|
| 117 |
+
print(f"π€ Cancel response: {cancel_response.response}")
|
| 118 |
+
|
| 119 |
+
if not cancel_response.awaiting_clarification:
|
| 120 |
+
print("β
Transaction successfully cancelled")
|
| 121 |
+
else:
|
| 122 |
+
print("β Transaction not properly cancelled")
|
| 123 |
+
|
| 124 |
+
# Test invalid session
|
| 125 |
+
print("\nπ Testing Invalid Session Response")
|
| 126 |
+
invalid_request = ChatbotRequest(message="More information here", session_id="nonexistent")
|
| 127 |
+
invalid_response = chatbot.process_message(invalid_request)
|
| 128 |
+
print(f"π€ Invalid session response: {invalid_response.response}")
|
| 129 |
+
|
| 130 |
+
chatbot.close()
|
| 131 |
+
print("\nβ
Interactive transaction tests completed!")
|
| 132 |
+
|
| 133 |
+
def test_clarification_quality():
|
| 134 |
+
print("\n㪠Testing Clarification Question Quality")
|
| 135 |
+
print("-" * 50)
|
| 136 |
+
|
| 137 |
+
chatbot = Chatbot()
|
| 138 |
+
|
| 139 |
+
# Test various incomplete scenarios to see question quality
|
| 140 |
+
incomplete_scenarios = [
|
| 141 |
+
"I bought something expensive",
|
| 142 |
+
"Purchase from TechMart",
|
| 143 |
+
"Sold items to a customer",
|
| 144 |
+
"β¬1000 transaction yesterday",
|
| 145 |
+
"Bought 50 units"
|
| 146 |
+
]
|
| 147 |
+
|
| 148 |
+
for i, scenario in enumerate(incomplete_scenarios, 1):
|
| 149 |
+
print(f"\nπ Scenario {i}: {scenario}")
|
| 150 |
+
print("-" * 30)
|
| 151 |
+
|
| 152 |
+
request = ChatbotRequest(message=scenario, session_id=f"quality_test_{i}")
|
| 153 |
+
response = chatbot.process_message(request)
|
| 154 |
+
|
| 155 |
+
if response.awaiting_clarification:
|
| 156 |
+
print("π Clarification questions generated:")
|
| 157 |
+
# Extract questions from response for analysis
|
| 158 |
+
lines = response.response.split('\n')
|
| 159 |
+
questions = [line.strip() for line in lines if line.strip() and any(char.isdigit() and line.strip().startswith(char) for char in '123456789')]
|
| 160 |
+
|
| 161 |
+
for q in questions[:3]: # Show first 3 questions
|
| 162 |
+
print(f" β’ {q}")
|
| 163 |
+
|
| 164 |
+
print(f"β
Generated {len(questions)} clarification questions")
|
| 165 |
+
else:
|
| 166 |
+
print("β No clarification requested (unexpected)")
|
| 167 |
+
|
| 168 |
+
chatbot.close()
|
| 169 |
+
print("\nβ
Clarification quality tests completed!")
|
| 170 |
+
|
| 171 |
+
if __name__ == "__main__":
|
| 172 |
+
if not os.getenv('OPENAI_API_KEY'):
|
| 173 |
+
print("β Error: OPENAI_API_KEY environment variable not set")
|
| 174 |
+
print("Please set your OpenAI API key:")
|
| 175 |
+
print("export OPENAI_API_KEY='your-key-here'")
|
| 176 |
+
sys.exit(1)
|
| 177 |
+
|
| 178 |
+
test_interactive_transactions()
|
| 179 |
+
test_clarification_quality()
|
tests/test_nl_search.py
ADDED
|
@@ -0,0 +1,76 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
#!/usr/bin/env python3
|
| 2 |
+
|
| 3 |
+
import sys
|
| 4 |
+
import os
|
| 5 |
+
sys.path.append(os.path.join(os.path.dirname(__file__), 'src'))
|
| 6 |
+
|
| 7 |
+
from chatbot import Chatbot
|
| 8 |
+
from models import ChatbotRequest
|
| 9 |
+
|
| 10 |
+
def test_natural_language_search():
|
| 11 |
+
print("π§ͺ Testing Natural Language to SQL Search")
|
| 12 |
+
print("="*60)
|
| 13 |
+
print("Note: Make sure to set OPENAI_API_KEY environment variable")
|
| 14 |
+
print("="*60)
|
| 15 |
+
|
| 16 |
+
chatbot = Chatbot()
|
| 17 |
+
|
| 18 |
+
# First add some test data
|
| 19 |
+
setup_queries = [
|
| 20 |
+
"Add a purchase of 20 USB drives from TechMart at β¬5 each",
|
| 21 |
+
"Add a purchase of 10 laptops from Electronics Plus at β¬800 each",
|
| 22 |
+
"Sold 5 USB drives to John Smith at β¬7 each",
|
| 23 |
+
"Sold 2 laptops to ABC Corp at β¬900 each"
|
| 24 |
+
]
|
| 25 |
+
|
| 26 |
+
print("π Setting up test data...")
|
| 27 |
+
for query in setup_queries:
|
| 28 |
+
request = ChatbotRequest(message=query)
|
| 29 |
+
response = chatbot.process_message(request)
|
| 30 |
+
print(f"β {query}")
|
| 31 |
+
|
| 32 |
+
print("\nπ Testing Natural Language Queries...")
|
| 33 |
+
print("-" * 60)
|
| 34 |
+
|
| 35 |
+
# Test natural language search queries
|
| 36 |
+
test_queries = [
|
| 37 |
+
"How many USB drives did we purchase?",
|
| 38 |
+
"What's the total value of all purchases?",
|
| 39 |
+
"Show me all sales to John Smith",
|
| 40 |
+
"Which suppliers have we bought from?",
|
| 41 |
+
"What products did we sell this month?",
|
| 42 |
+
"Show me the most expensive purchases",
|
| 43 |
+
"How much revenue did we generate from laptop sales?",
|
| 44 |
+
"List all transactions with TechMart",
|
| 45 |
+
"What's our total spending on electronics?",
|
| 46 |
+
"Show me customers who bought laptops"
|
| 47 |
+
]
|
| 48 |
+
|
| 49 |
+
for i, query in enumerate(test_queries, 1):
|
| 50 |
+
print(f"\nπ Test {i}: {query}")
|
| 51 |
+
print("-" * 50)
|
| 52 |
+
|
| 53 |
+
request = ChatbotRequest(message=query)
|
| 54 |
+
response = chatbot.process_message(request)
|
| 55 |
+
|
| 56 |
+
print(f"Response: {response.response}")
|
| 57 |
+
|
| 58 |
+
if response.sql_executed:
|
| 59 |
+
print(f"Generated SQL: {response.sql_executed}")
|
| 60 |
+
|
| 61 |
+
if response.intent_detected:
|
| 62 |
+
print(f"Intent: {response.intent_detected} (confidence: {response.intent_confidence:.2f})")
|
| 63 |
+
|
| 64 |
+
print()
|
| 65 |
+
|
| 66 |
+
chatbot.close()
|
| 67 |
+
print("β
Natural language search tests completed!")
|
| 68 |
+
|
| 69 |
+
if __name__ == "__main__":
|
| 70 |
+
if not os.getenv('OPENAI_API_KEY'):
|
| 71 |
+
print("β Error: OPENAI_API_KEY environment variable not set")
|
| 72 |
+
print("Please set your OpenAI API key:")
|
| 73 |
+
print("export OPENAI_API_KEY='your-key-here'")
|
| 74 |
+
sys.exit(1)
|
| 75 |
+
|
| 76 |
+
test_natural_language_search()
|
tests/test_rag_search.py
ADDED
|
@@ -0,0 +1,132 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
#!/usr/bin/env python3
|
| 2 |
+
|
| 3 |
+
import sys
|
| 4 |
+
import os
|
| 5 |
+
sys.path.append(os.path.join(os.path.dirname(__file__), 'src'))
|
| 6 |
+
|
| 7 |
+
from chatbot import Chatbot
|
| 8 |
+
from models import ChatbotRequest
|
| 9 |
+
|
| 10 |
+
def test_rag_functionality():
|
| 11 |
+
print("π§ͺ Testing RAG (Retrieval-Augmented Generation) Functionality")
|
| 12 |
+
print("="*70)
|
| 13 |
+
print("Note: Make sure to set OPENAI_API_KEY environment variable")
|
| 14 |
+
print("="*70)
|
| 15 |
+
|
| 16 |
+
chatbot = Chatbot()
|
| 17 |
+
|
| 18 |
+
# First, populate the system with diverse data
|
| 19 |
+
setup_data = [
|
| 20 |
+
# Transaction data
|
| 21 |
+
"Add a purchase of 20 USB drives from TechMart at β¬5 each",
|
| 22 |
+
"Add a purchase of 10 laptops from Electronics Plus at β¬800 each",
|
| 23 |
+
"Sold 5 USB drives to John Smith at β¬7 each",
|
| 24 |
+
"Sold 2 laptops to ABC Corp at β¬900 each",
|
| 25 |
+
"Purchase 15 office chairs from Office Supplies Co at β¬150 per chair",
|
| 26 |
+
|
| 27 |
+
# Business events and meetings
|
| 28 |
+
"Meeting with George scheduled for next Tuesday at 2 PM to discuss new laptop supplier contract",
|
| 29 |
+
"Conference call with TechMart went well - they agreed to bulk discounts for USB drives",
|
| 30 |
+
"Quarterly review meeting completed - need to increase laptop inventory before Q4",
|
| 31 |
+
"Supplier evaluation: Electronics Plus provides excellent laptops but delivery times are slow",
|
| 32 |
+
"Team meeting notes: Focus on ergonomic office furniture for the new office space",
|
| 33 |
+
"Customer feedback: John Smith very satisfied with USB drive quality and pricing",
|
| 34 |
+
"Important reminder: Check inventory levels before placing next electronics order",
|
| 35 |
+
"Budget planning: Allocate β¬50,000 for office equipment in next quarter"
|
| 36 |
+
]
|
| 37 |
+
|
| 38 |
+
print("π Setting up test data...")
|
| 39 |
+
for i, data in enumerate(setup_data, 1):
|
| 40 |
+
request = ChatbotRequest(message=data)
|
| 41 |
+
response = chatbot.process_message(request)
|
| 42 |
+
print(f"β {i:2d}. {data[:60]}{'...' if len(data) > 60 else ''}")
|
| 43 |
+
|
| 44 |
+
print(f"\nβ
Setup complete! Added {len(setup_data)} records.")
|
| 45 |
+
print("\nπ Testing RAG-powered semantic search...")
|
| 46 |
+
print("-" * 70)
|
| 47 |
+
|
| 48 |
+
# Test various types of semantic search queries
|
| 49 |
+
test_queries = [
|
| 50 |
+
# Meeting and event queries
|
| 51 |
+
"When is my meeting with George?",
|
| 52 |
+
"What was discussed in the TechMart meeting?",
|
| 53 |
+
"Tell me about recent meetings and discussions",
|
| 54 |
+
|
| 55 |
+
# Product and supplier queries
|
| 56 |
+
"What do we know about TechMart as a supplier?",
|
| 57 |
+
"Show me information about laptop purchases and suppliers",
|
| 58 |
+
"What feedback have we received about our products?",
|
| 59 |
+
|
| 60 |
+
# Business planning queries
|
| 61 |
+
"What are our budget plans for next quarter?",
|
| 62 |
+
"What inventory considerations should I be aware of?",
|
| 63 |
+
"Tell me about office equipment and furniture plans",
|
| 64 |
+
|
| 65 |
+
# Customer information
|
| 66 |
+
"What do we know about John Smith?",
|
| 67 |
+
"Show me customer feedback and satisfaction information",
|
| 68 |
+
|
| 69 |
+
# Operational queries
|
| 70 |
+
"What reminders and important notes do I have?",
|
| 71 |
+
"Tell me about supplier evaluations and performance",
|
| 72 |
+
"What are the key business insights from recent records?"
|
| 73 |
+
]
|
| 74 |
+
|
| 75 |
+
for i, query in enumerate(test_queries, 1):
|
| 76 |
+
print(f"\nπ Test {i}: {query}")
|
| 77 |
+
print("-" * 50)
|
| 78 |
+
|
| 79 |
+
request = ChatbotRequest(message=query)
|
| 80 |
+
response = chatbot.process_message(request)
|
| 81 |
+
|
| 82 |
+
print(f"π€ Response: {response.response}")
|
| 83 |
+
|
| 84 |
+
if response.intent_detected:
|
| 85 |
+
print(f"π― Intent: {response.intent_detected} (confidence: {response.intent_confidence:.2f})")
|
| 86 |
+
|
| 87 |
+
print()
|
| 88 |
+
|
| 89 |
+
chatbot.close()
|
| 90 |
+
print("β
RAG functionality tests completed!")
|
| 91 |
+
print("\nπ Expected Behavior:")
|
| 92 |
+
print("- RAG should provide contextual, specific answers based on stored information")
|
| 93 |
+
print("- Responses should cite relevant details from business records")
|
| 94 |
+
print("- Should handle queries about meetings, suppliers, customers, and business plans")
|
| 95 |
+
print("- Should indicate when information is not available in the records")
|
| 96 |
+
|
| 97 |
+
def test_rag_edge_cases():
|
| 98 |
+
print("\n㪠Testing RAG Edge Cases")
|
| 99 |
+
print("-" * 40)
|
| 100 |
+
|
| 101 |
+
chatbot = Chatbot()
|
| 102 |
+
|
| 103 |
+
edge_case_queries = [
|
| 104 |
+
"Tell me about suppliers we've never worked with",
|
| 105 |
+
"What happened in 1995?",
|
| 106 |
+
"Show me information about flying cars",
|
| 107 |
+
"What's the weather like today?",
|
| 108 |
+
"Tell me about George's favorite color"
|
| 109 |
+
]
|
| 110 |
+
|
| 111 |
+
for i, query in enumerate(edge_case_queries, 1):
|
| 112 |
+
print(f"\nπ Edge Case {i}: {query}")
|
| 113 |
+
print("-" * 30)
|
| 114 |
+
|
| 115 |
+
request = ChatbotRequest(message=query)
|
| 116 |
+
response = chatbot.process_message(request)
|
| 117 |
+
|
| 118 |
+
print(f"π€ Response: {response.response}")
|
| 119 |
+
print()
|
| 120 |
+
|
| 121 |
+
chatbot.close()
|
| 122 |
+
print("β
Edge case testing completed!")
|
| 123 |
+
|
| 124 |
+
if __name__ == "__main__":
|
| 125 |
+
if not os.getenv('OPENAI_API_KEY'):
|
| 126 |
+
print("β Error: OPENAI_API_KEY environment variable not set")
|
| 127 |
+
print("Please set your OpenAI API key:")
|
| 128 |
+
print("export OPENAI_API_KEY='your-key-here'")
|
| 129 |
+
sys.exit(1)
|
| 130 |
+
|
| 131 |
+
test_rag_functionality()
|
| 132 |
+
test_rag_edge_cases()
|