Ancastal commited on
Commit
401b16c
Β·
verified Β·
1 Parent(s): c4f9694

Upload folder using huggingface_hub

Browse files
.env.example ADDED
@@ -0,0 +1,5 @@
 
 
 
 
 
 
1
+ # OpenAI API Configuration
2
+ OPENAI_API_KEY=your_openai_api_key_here
3
+
4
+ # Optional: Change the model used for NL to SQL conversion
5
+ # OPENAI_MODEL=gpt-3.5-turbo
.github/workflows/deploy.yml ADDED
@@ -0,0 +1,169 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ name: Deploy Gradio App
2
+
3
+ on:
4
+ push:
5
+ branches: [ main ]
6
+ pull_request:
7
+ branches: [ main ]
8
+ workflow_dispatch:
9
+
10
+ jobs:
11
+ deploy:
12
+ runs-on: ubuntu-latest
13
+
14
+ steps:
15
+ - name: Checkout code
16
+ uses: actions/checkout@v4
17
+
18
+ - name: Set up Python
19
+ uses: actions/setup-python@v4
20
+ with:
21
+ python-version: '3.11'
22
+
23
+ - name: Cache pip dependencies
24
+ uses: actions/cache@v3
25
+ with:
26
+ path: ~/.cache/pip
27
+ key: ${{ runner.os }}-pip-${{ hashFiles('**/requirements.txt') }}
28
+ restore-keys: |
29
+ ${{ runner.os }}-pip-
30
+
31
+ - name: Install dependencies
32
+ run: |
33
+ python -m pip install --upgrade pip
34
+ pip install -r requirements.txt
35
+
36
+ - name: Download spaCy model
37
+ run: python -m spacy download en_core_web_sm
38
+
39
+ - name: Create config file
40
+ run: |
41
+ cat > config.yaml << EOF
42
+ openai:
43
+ api_key: ${{ secrets.OPENAI_API_KEY }}
44
+ model: "gpt-3.5-turbo"
45
+ max_tokens: 1500
46
+ temperature: 0.7
47
+
48
+ database:
49
+ url: "sqlite:///chatbot.db"
50
+
51
+ vector_store:
52
+ persist_directory: "./chroma_db"
53
+ collection_name: "business_transactions"
54
+
55
+ intent_classifier:
56
+ confidence_threshold: 0.7
57
+
58
+ entity_extraction:
59
+ spacy_model: "en_core_web_sm"
60
+ EOF
61
+
62
+ - name: Initialize database
63
+ run: python -c "from src.database_manager import DatabaseManager; db = DatabaseManager(); db.create_tables()"
64
+
65
+ - name: Run tests (if available)
66
+ run: |
67
+ if [ -d "tests" ] && [ -n "$(ls -A tests/*.py 2>/dev/null)" ]; then
68
+ python -m pytest tests/ -v
69
+ else
70
+ echo "No tests found, skipping test step"
71
+ fi
72
+ continue-on-error: true
73
+
74
+ - name: Deploy to Hugging Face Spaces
75
+ if: github.ref == 'refs/heads/main'
76
+ env:
77
+ HF_TOKEN: ${{ secrets.HF_TOKEN }}
78
+ OPENAI_API_KEY: ${{ secrets.OPENAI_API_KEY }}
79
+ run: |
80
+ # Install huggingface_hub
81
+ pip install huggingface_hub
82
+
83
+ # Create a simple app.py for HF Spaces
84
+ cat > app.py << 'EOF'
85
+ #!/usr/bin/env python3
86
+ import os
87
+ import sys
88
+ from pathlib import Path
89
+
90
+ # Add gui directory to path
91
+ gui_dir = Path(__file__).parent / "gui"
92
+ sys.path.append(str(gui_dir))
93
+
94
+ if __name__ == "__main__":
95
+ from gradio_interface import GradioInterface
96
+
97
+ gui = GradioInterface()
98
+ gui.launch(
99
+ server_name="0.0.0.0",
100
+ server_port=7860,
101
+ share=False,
102
+ debug=False
103
+ )
104
+ EOF
105
+
106
+ # Create requirements.txt for HF Spaces
107
+ cp requirements.txt requirements_hf.txt
108
+
109
+ # Upload to Hugging Face Spaces
110
+ python -c "
111
+ from huggingface_hub import HfApi, upload_folder
112
+ import os
113
+
114
+ api = HfApi(token=os.environ['HF_TOKEN'])
115
+
116
+ # Create or update the space
117
+ try:
118
+ api.create_repo(
119
+ repo_id='${{ github.repository_owner }}/llm-chatbot',
120
+ repo_type='space',
121
+ space_sdk='gradio',
122
+ exist_ok=True
123
+ )
124
+ print('Space created/updated successfully')
125
+ except Exception as e:
126
+ print(f'Error creating space: {e}')
127
+
128
+ # Upload files
129
+ try:
130
+ upload_folder(
131
+ folder_path='.',
132
+ repo_id='${{ github.repository_owner }}/llm-chatbot',
133
+ repo_type='space',
134
+ token=os.environ['HF_TOKEN'],
135
+ ignore_patterns=['.git*', '__pycache__', '*.pyc', 'chroma_db', '*.db']
136
+ )
137
+ print('Files uploaded successfully')
138
+ except Exception as e:
139
+ print(f'Error uploading files: {e}')
140
+ "
141
+
142
+ - name: Deploy to Railway (Alternative)
143
+ if: github.ref == 'refs/heads/main' && env.RAILWAY_TOKEN != ''
144
+ env:
145
+ RAILWAY_TOKEN: ${{ secrets.RAILWAY_TOKEN }}
146
+ run: |
147
+ # Install Railway CLI
148
+ npm install -g @railway/cli
149
+
150
+ # Create Procfile for Railway
151
+ echo "web: python run_gui.py --host 0.0.0.0 --port \$PORT" > Procfile
152
+
153
+ # Deploy to Railway
154
+ railway login --token $RAILWAY_TOKEN
155
+ railway up
156
+ continue-on-error: true
157
+
158
+ - name: Deploy Summary
159
+ run: |
160
+ echo "πŸš€ Deployment completed!"
161
+ echo "πŸ“± Your Gradio app should be available at:"
162
+ echo " - Hugging Face Spaces: https://huggingface.co/spaces/${{ github.repository_owner }}/llm-chatbot"
163
+ if [ -n "${{ secrets.RAILWAY_TOKEN }}" ]; then
164
+ echo " - Railway: Check Railway dashboard for URL"
165
+ fi
166
+ echo "πŸ”§ Make sure to set the required secrets in your repository:"
167
+ echo " - OPENAI_API_KEY: Your OpenAI API key"
168
+ echo " - HF_TOKEN: Your Hugging Face token"
169
+ echo " - RAILWAY_TOKEN: Your Railway token (optional)"
.github/workflows/update_space.yml ADDED
@@ -0,0 +1,28 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ name: Run Python script
2
+
3
+ on:
4
+ push:
5
+ branches:
6
+ - main
7
+
8
+ jobs:
9
+ build:
10
+ runs-on: ubuntu-latest
11
+
12
+ steps:
13
+ - name: Checkout
14
+ uses: actions/checkout@v2
15
+
16
+ - name: Set up Python
17
+ uses: actions/setup-python@v2
18
+ with:
19
+ python-version: '3.9'
20
+
21
+ - name: Install Gradio
22
+ run: python -m pip install gradio
23
+
24
+ - name: Log in to Hugging Face
25
+ run: python -c 'import huggingface_hub; huggingface_hub.login(token="${{ secrets.hf_token }}")'
26
+
27
+ - name: Deploy to Spaces
28
+ run: gradio deploy
.gitignore ADDED
@@ -0,0 +1,215 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ *.sqlite
2
+ *.db
3
+ *.json
4
+ *.sql
5
+ *.bin
6
+ chroma_db/*
7
+
8
+
9
+ # Byte-compiled / optimized / DLL files
10
+ __pycache__/
11
+ *.py[codz]
12
+ *$py.class
13
+
14
+ # C extensions
15
+ *.so
16
+
17
+ # Distribution / packaging
18
+ .Python
19
+ build/
20
+ develop-eggs/
21
+ dist/
22
+ downloads/
23
+ eggs/
24
+ .eggs/
25
+ lib/
26
+ lib64/
27
+ parts/
28
+ sdist/
29
+ var/
30
+ wheels/
31
+ share/python-wheels/
32
+ *.egg-info/
33
+ .installed.cfg
34
+ *.egg
35
+ MANIFEST
36
+
37
+ # PyInstaller
38
+ # Usually these files are written by a python script from a template
39
+ # before PyInstaller builds the exe, so as to inject date/other infos into it.
40
+ *.manifest
41
+ *.spec
42
+
43
+ # Installer logs
44
+ pip-log.txt
45
+ pip-delete-this-directory.txt
46
+
47
+ # Unit test / coverage reports
48
+ htmlcov/
49
+ .tox/
50
+ .nox/
51
+ .coverage
52
+ .coverage.*
53
+ .cache
54
+ nosetests.xml
55
+ coverage.xml
56
+ *.cover
57
+ *.py.cover
58
+ .hypothesis/
59
+ .pytest_cache/
60
+ cover/
61
+
62
+ # Translations
63
+ *.mo
64
+ *.pot
65
+
66
+ # Django stuff:
67
+ *.log
68
+ local_settings.py
69
+ db.sqlite3
70
+ db.sqlite3-journal
71
+
72
+ # Flask stuff:
73
+ instance/
74
+ .webassets-cache
75
+
76
+ # Scrapy stuff:
77
+ .scrapy
78
+
79
+ # Sphinx documentation
80
+ docs/_build/
81
+
82
+ # PyBuilder
83
+ .pybuilder/
84
+ target/
85
+
86
+ # Jupyter Notebook
87
+ .ipynb_checkpoints
88
+
89
+ # IPython
90
+ profile_default/
91
+ ipython_config.py
92
+
93
+ # pyenv
94
+ # For a library or package, you might want to ignore these files since the code is
95
+ # intended to run in multiple environments; otherwise, check them in:
96
+ # .python-version
97
+
98
+ # pipenv
99
+ # According to pypa/pipenv#598, it is recommended to include Pipfile.lock in version control.
100
+ # However, in case of collaboration, if having platform-specific dependencies or dependencies
101
+ # having no cross-platform support, pipenv may install dependencies that don't work, or not
102
+ # install all needed dependencies.
103
+ #Pipfile.lock
104
+
105
+ # UV
106
+ # Similar to Pipfile.lock, it is generally recommended to include uv.lock in version control.
107
+ # This is especially recommended for binary packages to ensure reproducibility, and is more
108
+ # commonly ignored for libraries.
109
+ #uv.lock
110
+
111
+ # poetry
112
+ # Similar to Pipfile.lock, it is generally recommended to include poetry.lock in version control.
113
+ # This is especially recommended for binary packages to ensure reproducibility, and is more
114
+ # commonly ignored for libraries.
115
+ # https://python-poetry.org/docs/basic-usage/#commit-your-poetrylock-file-to-version-control
116
+ #poetry.lock
117
+ #poetry.toml
118
+
119
+ # pdm
120
+ # Similar to Pipfile.lock, it is generally recommended to include pdm.lock in version control.
121
+ # pdm recommends including project-wide configuration in pdm.toml, but excluding .pdm-python.
122
+ # https://pdm-project.org/en/latest/usage/project/#working-with-version-control
123
+ #pdm.lock
124
+ #pdm.toml
125
+ .pdm-python
126
+ .pdm-build/
127
+
128
+ # pixi
129
+ # Similar to Pipfile.lock, it is generally recommended to include pixi.lock in version control.
130
+ #pixi.lock
131
+ # Pixi creates a virtual environment in the .pixi directory, just like venv module creates one
132
+ # in the .venv directory. It is recommended not to include this directory in version control.
133
+ .pixi
134
+
135
+ # PEP 582; used by e.g. github.com/David-OConnor/pyflow and github.com/pdm-project/pdm
136
+ __pypackages__/
137
+
138
+ # Celery stuff
139
+ celerybeat-schedule
140
+ celerybeat.pid
141
+
142
+ # SageMath parsed files
143
+ *.sage.py
144
+
145
+ # Environments
146
+ .env
147
+ .envrc
148
+ .venv
149
+ env/
150
+ venv/
151
+ ENV/
152
+ env.bak/
153
+ venv.bak/
154
+
155
+ # Spyder project settings
156
+ .spyderproject
157
+ .spyproject
158
+
159
+ # Rope project settings
160
+ .ropeproject
161
+
162
+ # mkdocs documentation
163
+ /site
164
+
165
+ # mypy
166
+ .mypy_cache/
167
+ .dmypy.json
168
+ dmypy.json
169
+
170
+ # Pyre type checker
171
+ .pyre/
172
+
173
+ # pytype static type analyzer
174
+ .pytype/
175
+
176
+ # Cython debug symbols
177
+ cython_debug/
178
+
179
+ # PyCharm
180
+ # JetBrains specific template is maintained in a separate JetBrains.gitignore that can
181
+ # be found at https://github.com/github/gitignore/blob/main/Global/JetBrains.gitignore
182
+ # and can be added to the global gitignore or merged into this file. For a more nuclear
183
+ # option (not recommended) you can uncomment the following to ignore the entire idea folder.
184
+ #.idea/
185
+
186
+ # Abstra
187
+ # Abstra is an AI-powered process automation framework.
188
+ # Ignore directories containing user credentials, local state, and settings.
189
+ # Learn more at https://abstra.io/docs
190
+ .abstra/
191
+
192
+ # Visual Studio Code
193
+ # Visual Studio Code specific template is maintained in a separate VisualStudioCode.gitignore
194
+ # that can be found at https://github.com/github/gitignore/blob/main/Global/VisualStudioCode.gitignore
195
+ # and can be added to the global gitignore or merged into this file. However, if you prefer,
196
+ # you could uncomment the following to ignore the entire vscode folder
197
+ # .vscode/
198
+
199
+ # Ruff stuff:
200
+ .ruff_cache/
201
+
202
+ # PyPI configuration file
203
+ .pypirc
204
+
205
+ # Cursor
206
+ # Cursor is an AI-powered code editor. `.cursorignore` specifies files/directories to
207
+ # exclude from AI features like autocomplete and code analysis. Recommended for sensitive data
208
+ # refer to https://docs.cursor.com/context/ignore-files
209
+ .cursorignore
210
+ .cursorindexingignore
211
+
212
+ # Marimo
213
+ marimo/_static/
214
+ marimo/_lsp/
215
+ __marimo__/
README.md CHANGED
@@ -1,12 +1,186 @@
1
  ---
2
- title: Business Chatbot
3
- emoji: πŸ“ˆ
4
- colorFrom: blue
5
- colorTo: green
6
  sdk: gradio
7
  sdk_version: 5.34.2
8
- app_file: app.py
9
- pinned: false
10
  ---
 
11
 
12
- Check out the configuration reference at https://huggingface.co/docs/hub/spaces-config-reference
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
  ---
2
+ title: Business_Chatbot
3
+ app_file: main.py
 
 
4
  sdk: gradio
5
  sdk_version: 5.34.2
 
 
6
  ---
7
+ # LLM Chatbot with SQL Database and Vector Store
8
 
9
+ A research-grade chatbot system that processes user queries to extract entities, store transactions in SQL database, and maintain semantic search capabilities through vector storage.
10
+
11
+ ## Features
12
+
13
+ - **Entity Extraction**: Automatically extracts products, quantities, suppliers, customers, and prices from natural language
14
+ - **Interactive Transaction Completion**: LLM-powered clarification for missing information
15
+ - **SQL Database**: Stores structured sales and purchase data with relationships
16
+ - **Natural Language to SQL**: Uses OpenAI GPT to convert plain English queries to SQL
17
+ - **RAG (Retrieval-Augmented Generation)**: Intelligent responses using LLM with retrieved context
18
+ - **Vector Store**: Enables semantic search of events and transactions
19
+ - **Query Validation**: Ensures generated SQL queries are safe and valid
20
+
21
+ ## Architecture
22
+
23
+ ```
24
+ User Input: "Add a purchase of 20 USB drives from TechMart at €5 each"
25
+ ↓
26
+ 1. Entity Extraction β†’ product: USB drives, quantity: 20, supplier: TechMart, unit price: €5
27
+ ↓
28
+ 2. SQL Generation β†’ INSERT INTO purchases (supplier_id, product_id, quantity, unit_price, total_cost)
29
+ ↓
30
+ 3. Vector Storage β†’ Embed and store semantic summary of the event
31
+ ```
32
+
33
+ ## Setup
34
+
35
+ 1. Install dependencies:
36
+ ```bash
37
+ pip install -r requirements.txt
38
+ ```
39
+
40
+ 2. Download spaCy model:
41
+ ```bash
42
+ python -m spacy download en_core_web_sm
43
+ ```
44
+
45
+ 3. Set up OpenAI API key:
46
+ ```bash
47
+ export OPENAI_API_KEY='your-openai-api-key-here'
48
+ ```
49
+
50
+ 4. Run the chatbot:
51
+ ```bash
52
+ python main.py
53
+ ```
54
+
55
+ 5. Test the system:
56
+ ```bash
57
+ python test_chatbot.py
58
+ python test_nl_search.py # Test natural language search
59
+ python test_intent_classifier.py # Test intent classification
60
+ python test_rag_search.py # Test RAG functionality
61
+ python test_interactive_transactions.py # Test interactive transaction completion
62
+ ```
63
+
64
+ ## Usage Examples
65
+
66
+ ### Adding Transactions (Interactive)
67
+ - `"Add a purchase of 20 USB drives from TechMart at €5 each"` (Complete)
68
+ - `"I bought some laptops"` (Will ask for: quantity, supplier, price)
69
+ - `"Sold items to a customer"` (Will ask for: product, quantity, customer, price)
70
+ - User can respond with specific details or say "N/A" for optional fields
71
+
72
+ ### Querying Data (Natural Language to SQL)
73
+ - `"How many USB drives did we purchase?"`
74
+ - `"What's the total value of all purchases?"`
75
+ - `"Show me all sales to John Smith"`
76
+ - `"Which suppliers have we bought from?"`
77
+ - `"What's our total spending on electronics?"`
78
+ - `"Show me the most expensive purchases"`
79
+
80
+ ### Semantic Search (RAG-powered)
81
+ - `"When is my meeting with George?"`
82
+ - `"What do we know about TechMart as a supplier?"`
83
+ - `"Tell me about recent meetings and discussions"`
84
+ - `"Show me customer feedback and satisfaction information"`
85
+
86
+ ### General Information
87
+ - `"Meeting with new supplier scheduled for next week"`
88
+ - `"Important: Check inventory levels before next order"`
89
+
90
+ ## Database Schema
91
+
92
+ - **suppliers**: Company information
93
+ - **customers**: Customer details
94
+ - **products**: Product catalog
95
+ - **purchases**: Purchase transactions
96
+ - **sales**: Sales transactions
97
+
98
+ ## Vector Store
99
+
100
+ Uses ChromaDB with sentence transformers for semantic similarity search of:
101
+ - Transaction summaries
102
+ - General business events
103
+ - Meeting notes and reminders
104
+
105
+ ## Files Structure
106
+
107
+ ```
108
+ β”œβ”€β”€ src/
109
+ β”‚ β”œβ”€β”€ models.py # Data models and schemas
110
+ β”‚ β”œβ”€β”€ entity_extractor.py # NLP entity extraction
111
+ β”‚ β”œβ”€β”€ database_manager.py # SQL database operations
112
+ β”‚ β”œβ”€β”€ vector_store.py # Semantic search functionality
113
+ β”‚ β”œβ”€β”€ nl_to_sql.py # OpenAI-powered natural language to SQL
114
+ β”‚ β”œβ”€β”€ intent_classifier.py # OpenAI-powered intent classification
115
+ β”‚ β”œβ”€β”€ rag_handler.py # RAG (Retrieval-Augmented Generation)
116
+ β”‚ β”œβ”€β”€ transaction_clarifier.py # Interactive transaction completion
117
+ β”‚ └── chatbot.py # Main chatbot logic
118
+ β”œβ”€β”€ database/
119
+ β”‚ └── schema.sql # Database schema
120
+ β”œβ”€β”€ main.py # Interactive chatbot interface
121
+ β”œβ”€β”€ test_chatbot.py # Test suite
122
+ β”œβ”€β”€ test_nl_search.py # Natural language search tests
123
+ β”œβ”€β”€ test_intent_classifier.py # Intent classification tests
124
+ β”œβ”€β”€ test_rag_search.py # RAG functionality tests
125
+ β”œβ”€β”€ test_interactive_transactions.py # Interactive transaction tests
126
+ β”œβ”€β”€ .env.example # Environment variables template
127
+ └── requirements.txt # Python dependencies
128
+ ```
129
+
130
+ ## Research Applications
131
+
132
+ This system demonstrates:
133
+ - Multi-modal data storage (structured + vector)
134
+ - LLM-powered natural language to SQL conversion
135
+ - RAG (Retrieval-Augmented Generation) for intelligent responses
136
+ - Interactive transaction completion with missing information handling
137
+ - OpenAI-based intent classification
138
+ - Multi-turn conversation state management
139
+ - Semantic similarity search with embeddings
140
+ - Named entity recognition and extraction
141
+ - Query validation and SQL injection prevention
142
+ - Conversational business process automation
143
+
144
+ ## Future Works
145
+
146
+ ### Intent Classification Improvements
147
+
148
+ The current system uses OpenAI API for intent classification, which provides excellent accuracy but has some limitations:
149
+
150
+ **Current Limitations:**
151
+ - Requires internet connectivity and API calls for each message
152
+ - Dependent on OpenAI service availability and costs
153
+ - May have latency for real-time applications
154
+ - Limited customization for domain-specific intents
155
+
156
+ **Potential Improvements:**
157
+
158
+ 1. **Fine-tuned Classification Models**
159
+ - Train a smaller, specialized model on business transaction data
160
+ - Use frameworks like Hugging Face Transformers with custom datasets
161
+ - Deploy locally for faster inference and offline capability
162
+ - Examples: DistilBERT, RoBERTa fine-tuned on business intent data
163
+
164
+ 2. **Local LLM Integration**
165
+ - Replace OpenAI API with local models (Llama, Mistral, etc.)
166
+ - Use frameworks like Ollama, LangChain, or vLLM for local deployment
167
+ - Maintain privacy while reducing external dependencies
168
+ - Cost-effective for high-volume applications
169
+
170
+ 3. **Intent Embedding Approaches**
171
+ - Create vector embeddings for known intent patterns
172
+ - Use similarity search instead of generative classification
173
+ - Combine with few-shot learning for new intent types
174
+ - More efficient for simple intent detection scenarios
175
+
176
+ 4. **Hybrid Approaches**
177
+ - Combine rule-based filtering with LLM classification
178
+ - Use confidence thresholds to decide when to query LLM
179
+ - Cache common patterns to reduce API calls
180
+ - Implement progressive enhancement from simple to complex classification
181
+
182
+ 5. **Domain-Specific Enhancements**
183
+ - Add business context and domain knowledge
184
+ - Implement multi-intent detection for complex queries
185
+ - Add conversation history context for better classification
186
+ - Support for industry-specific terminology and patterns
add_sample_data.py ADDED
@@ -0,0 +1,65 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ #!/usr/bin/env python3
2
+
3
+ """
4
+ Add sample data to the chatbot database for testing the dashboard.
5
+ This script adds realistic business transactions to populate the dashboard.
6
+ """
7
+
8
+ import sys
9
+ import os
10
+ sys.path.append(os.path.join(os.path.dirname(__file__), 'src'))
11
+
12
+ from chatbot import Chatbot
13
+ from models import ChatbotRequest
14
+
15
+ def add_sample_data():
16
+ """Add sample transactions to the database."""
17
+ print("πŸ”„ Adding sample data to the database...")
18
+
19
+ chatbot = Chatbot()
20
+
21
+ # Sample purchases
22
+ purchases = [
23
+ "Add a purchase of 10 USB drives from TechMart at €5 each",
24
+ "Add a purchase of 5 laptops from Electronics Plus at €800 each",
25
+ "Add a purchase of 20 keyboards from Office Supplies Co at €25 each",
26
+ "Add a purchase of 8 monitors from TechMart at €200 each",
27
+ "Add a purchase of 15 webcams from Electronics Plus at €45 each"
28
+ ]
29
+
30
+ # Sample sales
31
+ sales = [
32
+ "Sold 8 USB drives to ABC Corp at €12 each",
33
+ "Sold 3 laptops to XYZ Ltd at €1200 each",
34
+ "Sold 12 keyboards to StartupTech at €40 each",
35
+ "Sold 5 monitors to Creative Agency at €350 each",
36
+ "Sold 10 webcams to Remote Work Solutions at €75 each",
37
+ "Sold 6 USB drives to Local Business at €15 each",
38
+ "Sold 2 laptops to Consulting Firm at €1100 each"
39
+ ]
40
+
41
+ print("πŸ“¦ Adding purchase transactions...")
42
+ for purchase in purchases:
43
+ try:
44
+ request = ChatbotRequest(message=purchase)
45
+ response = chatbot.process_message(request)
46
+ print(f" βœ… {purchase}")
47
+ except Exception as e:
48
+ print(f" ❌ Failed: {purchase} - {e}")
49
+
50
+ print("πŸ’° Adding sales transactions...")
51
+ for sale in sales:
52
+ try:
53
+ request = ChatbotRequest(message=sale)
54
+ response = chatbot.process_message(request)
55
+ print(f" βœ… {sale}")
56
+ except Exception as e:
57
+ print(f" ❌ Failed: {sale} - {e}")
58
+
59
+ chatbot.close()
60
+ print("βœ… Sample data added successfully!")
61
+ print("πŸš€ You can now launch the GUI to see the populated dashboard:")
62
+ print(" python run_gui.py")
63
+
64
+ if __name__ == "__main__":
65
+ add_sample_data()
config.yaml ADDED
@@ -0,0 +1,155 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # LLM Chatbot Configuration
2
+ # This file contains all configurable settings for the chatbot application
3
+
4
+ # Database Configuration
5
+ database:
6
+ path: "chatbot.db"
7
+ connection_string: "sqlite:///{path}"
8
+ default_suppliers:
9
+ - "TechMart"
10
+ - "Office Supplies Co"
11
+ - "Electronics Plus"
12
+ default_products:
13
+ - name: "USB drives"
14
+ category: "Electronics"
15
+ - name: "Office chairs"
16
+ category: "Furniture"
17
+ - name: "Laptops"
18
+ category: "Electronics"
19
+ - name: "Monitors"
20
+ category: "Electronics"
21
+ - name: "Keyboards"
22
+ category: "Electronics"
23
+
24
+ # OpenAI API Configuration
25
+ openai:
26
+ # Intent Classification
27
+ intent_classifier:
28
+ model: "gpt-4o-mini"
29
+ temperature: 0.1
30
+ max_tokens: 300
31
+
32
+ # Natural Language to SQL
33
+ nl_to_sql:
34
+ model: "gpt-4o-mini"
35
+ temperature: 0.1
36
+ max_tokens: 500
37
+
38
+ # SQL Explanation
39
+ sql_explanation:
40
+ model: "gpt-3.5-turbo"
41
+ temperature: 0.3
42
+ max_tokens: 200
43
+
44
+ # RAG Handler
45
+ rag_handler:
46
+ model: "gpt-4o-mini"
47
+ temperature: 0.3
48
+ max_tokens: 800
49
+
50
+ # Query Enhancement
51
+ query_enhancement:
52
+ model: "gpt-4o-mini"
53
+ temperature: 0.2
54
+ max_tokens: 100
55
+
56
+ # Transaction Clarifier
57
+ transaction_clarifier:
58
+ model: "gpt-4o-mini"
59
+ temperature: 0.3
60
+ max_tokens: 400
61
+
62
+ # Transaction Validation
63
+ transaction_validation:
64
+ model: "gpt-4o-mini"
65
+ temperature: 0.1
66
+ max_tokens: 300
67
+
68
+ # Vector Store Configuration
69
+ vector_store:
70
+ collection_name: "chatbot_events"
71
+ persistence_path: "./chroma_db"
72
+ embedding_model: "all-MiniLM-L6-v2"
73
+
74
+ # Search and Query Configuration
75
+ search:
76
+ # Default number of vector search results
77
+ vector_search_results: 8
78
+
79
+ # Default number of recent search results
80
+ recent_events_limit: 10
81
+
82
+ # Default limit for database queries
83
+ default_query_limit: 10
84
+
85
+ # Maximum SQL results to display
86
+ max_sql_results_display: 20
87
+
88
+ # Recent transactions display limit
89
+ recent_transactions_limit: 10
90
+
91
+ # Entity Extraction Configuration
92
+ entity_extraction:
93
+ spacy_model: "en_core_web_sm"
94
+
95
+ # Fallback classification keywords
96
+ purchase_keywords:
97
+ - "buy"
98
+ - "purchase"
99
+ - "acquire"
100
+ - "order"
101
+ - "procure"
102
+
103
+ sale_keywords:
104
+ - "sell"
105
+ - "sale"
106
+ - "sold"
107
+ - "revenue"
108
+ - "income"
109
+
110
+ # Business Logic Configuration
111
+ business_logic:
112
+ # Required fields for transaction types
113
+ required_fields:
114
+ purchase:
115
+ - "product"
116
+ - "quantity"
117
+ - "supplier"
118
+ - "unit_price"
119
+ sale:
120
+ - "product"
121
+ - "quantity"
122
+ - "customer"
123
+ - "unit_price"
124
+
125
+ # Cancellation keywords
126
+ cancellation_keywords:
127
+ - "cancel"
128
+ - "quit"
129
+ - "stop"
130
+ - "abort"
131
+
132
+ # Dangerous SQL keywords (for security)
133
+ dangerous_sql_keywords:
134
+ - "drop"
135
+ - "delete"
136
+ - "truncate"
137
+ - "alter"
138
+ - "create"
139
+ - "insert"
140
+ - "update"
141
+
142
+ # Application Settings
143
+ app:
144
+ # Enable/disable features
145
+ features:
146
+ vector_storage: true
147
+ intent_classification: true
148
+ entity_extraction: true
149
+ transaction_clarification: true
150
+ rag_search: true
151
+
152
+ # Logging configuration
153
+ logging:
154
+ level: "INFO"
155
+ format: "%(asctime)s - %(name)s - %(levelname)s - %(message)s"
gui/gradio_interface.py ADDED
@@ -0,0 +1,627 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ #!/usr/bin/env python3
2
+
3
+ import gradio as gr
4
+ import sys
5
+ import os
6
+ from typing import List, Tuple
7
+ from sqlalchemy import text
8
+
9
+ # Add the src directory to the path to import existing modules
10
+ sys.path.append(os.path.join(os.path.dirname(__file__), '..', 'src'))
11
+
12
+ from chatbot import Chatbot
13
+ from models import ChatbotRequest
14
+
15
+ class GradioInterface:
16
+ """Gradio GUI interface for the LLM Chatbot."""
17
+
18
+ def __init__(self):
19
+ """Initialize the Gradio interface with the existing chatbot."""
20
+ self.chatbot = Chatbot()
21
+ self.conversation_history = []
22
+
23
+ def process_message(self, message: str, history: List[Tuple[str, str]]) -> Tuple[str, List[Tuple[str, str]]]:
24
+ """
25
+ Process a user message and return the response with updated history.
26
+
27
+ Args:
28
+ message: User input message
29
+ history: Chat history as list of (user_msg, bot_response) tuples
30
+
31
+ Returns:
32
+ Tuple of (empty_string_for_input, updated_history)
33
+ """
34
+ if not message.strip():
35
+ return "", history
36
+
37
+ # Handle quit/exit commands
38
+ if message.lower().strip() in ['quit', 'exit', 'bye']:
39
+ bot_response = "πŸ‘‹ Goodbye! Refresh the page to start a new session."
40
+ history.append((message, bot_response))
41
+ return "", history
42
+
43
+ try:
44
+ # Process the message using the existing chatbot
45
+ request = ChatbotRequest(message=message)
46
+ response = chatbot_response = self.chatbot.process_message(request)
47
+
48
+ # Build the response with additional information
49
+ response_text = f"πŸ€– {response.response}"
50
+
51
+ # Add extracted entities information
52
+ if response.entities_extracted:
53
+ entities_info = (
54
+ f"\n\nπŸ“Š **Extracted Information:**\n"
55
+ f"- Type: {response.entities_extracted.transaction_type}\n"
56
+ f"- Product: {response.entities_extracted.product}\n"
57
+ f"- Quantity: {response.entities_extracted.quantity}\n"
58
+ f"- Total Amount: €{response.entities_extracted.total_amount}"
59
+ )
60
+ response_text += entities_info
61
+
62
+ # Add vector storage confirmation
63
+ if response.vector_stored:
64
+ response_text += "\n\nπŸ’Ύ Information stored in vector database for future semantic search"
65
+
66
+ # Add intent detection information
67
+ if response.intent_detected:
68
+ response_text += f"\n\n🎯 **Intent Detected:** {response.intent_detected} (confidence: {response.intent_confidence:.2f})"
69
+
70
+ # Add clarification prompt
71
+ if response.awaiting_clarification:
72
+ response_text += "\n\n⏳ **Waiting for your response to complete the transaction...**"
73
+
74
+ # Update history
75
+ history.append((message, response_text))
76
+
77
+ except Exception as e:
78
+ error_response = f"❌ Error processing message: {str(e)}"
79
+ history.append((message, error_response))
80
+
81
+ return "", history
82
+
83
+ def clear_chat(self) -> Tuple[str, List]:
84
+ """Clear the chat history and reset the conversation."""
85
+ return "", []
86
+
87
+ def get_dashboard_data(self):
88
+ """Get dashboard data using direct SQL queries."""
89
+ try:
90
+ # Access the database manager directly
91
+ db_manager = self.chatbot.db_manager
92
+
93
+ # Get basic statistics
94
+ total_purchases = db_manager.session.execute(
95
+ text("SELECT COUNT(*) FROM purchases")
96
+ ).scalar() or 0
97
+
98
+ total_sales = db_manager.session.execute(
99
+ text("SELECT COUNT(*) FROM sales")
100
+ ).scalar() or 0
101
+
102
+ total_revenue = db_manager.session.execute(
103
+ text("SELECT SUM(total_amount) FROM sales")
104
+ ).scalar() or 0
105
+
106
+ total_expenses = db_manager.session.execute(
107
+ text("SELECT SUM(total_cost) FROM purchases")
108
+ ).scalar() or 0
109
+
110
+ # Get recent transactions (last 5) - combining purchases and sales
111
+ recent_transactions = db_manager.session.execute(
112
+ text("""
113
+ SELECT 'purchase' as transaction_type, p.name as product, pu.quantity,
114
+ pu.total_cost as total_amount, s.name as partner, pu.purchase_date as created_at
115
+ FROM purchases pu
116
+ LEFT JOIN products p ON pu.product_id = p.id
117
+ LEFT JOIN suppliers s ON pu.supplier_id = s.id
118
+ UNION ALL
119
+ SELECT 'sale' as transaction_type, p.name as product, sa.quantity,
120
+ sa.total_amount, c.name as partner, sa.sale_date as created_at
121
+ FROM sales sa
122
+ LEFT JOIN products p ON sa.product_id = p.id
123
+ LEFT JOIN customers c ON sa.customer_id = c.id
124
+ ORDER BY created_at DESC
125
+ LIMIT 5
126
+ """)
127
+ ).fetchall()
128
+
129
+ # Get top products - combining from both tables
130
+ top_products = db_manager.session.execute(
131
+ text("""
132
+ SELECT p.name as product, SUM(combined.quantity) as total_qty, COUNT(*) as transaction_count
133
+ FROM (
134
+ SELECT product_id, quantity FROM purchases
135
+ UNION ALL
136
+ SELECT product_id, quantity FROM sales
137
+ ) combined
138
+ LEFT JOIN products p ON combined.product_id = p.id
139
+ GROUP BY p.name
140
+ ORDER BY total_qty DESC
141
+ LIMIT 5
142
+ """)
143
+ ).fetchall()
144
+
145
+ return {
146
+ 'total_purchases': total_purchases,
147
+ 'total_sales': total_sales,
148
+ 'total_revenue': round(total_revenue, 2),
149
+ 'total_expenses': round(total_expenses, 2),
150
+ 'profit': round(total_revenue - total_expenses, 2),
151
+ 'recent_transactions': recent_transactions,
152
+ 'top_products': top_products
153
+ }
154
+
155
+ except Exception as e:
156
+ return {
157
+ 'total_purchases': 0,
158
+ 'total_sales': 0,
159
+ 'total_revenue': 0.0,
160
+ 'total_expenses': 0.0,
161
+ 'profit': 0.0,
162
+ 'recent_transactions': [],
163
+ 'top_products': []
164
+ }
165
+
166
+ def create_revenue_chart(self, data):
167
+ """Create revenue vs expenses chart."""
168
+ import plotly.graph_objects as go
169
+
170
+ fig = go.Figure(data=[
171
+ go.Bar(name='Revenue', x=['Financial Summary'], y=[data['total_revenue']], marker_color='green'),
172
+ go.Bar(name='Expenses', x=['Financial Summary'], y=[data['total_expenses']], marker_color='red'),
173
+ go.Bar(name='Profit', x=['Financial Summary'], y=[data['profit']], marker_color='blue')
174
+ ])
175
+
176
+ fig.update_layout(
177
+ title='Financial Overview',
178
+ barmode='group',
179
+ height=300
180
+ )
181
+
182
+ return fig
183
+
184
+ def create_transaction_chart(self, data):
185
+ """Create transaction count pie chart."""
186
+ import plotly.graph_objects as go
187
+
188
+ fig = go.Figure(data=[go.Pie(
189
+ labels=['Purchases', 'Sales'],
190
+ values=[data['total_purchases'], data['total_sales']],
191
+ marker_colors=['lightcoral', 'lightgreen']
192
+ )])
193
+
194
+ fig.update_layout(
195
+ title='Transaction Distribution',
196
+ height=300
197
+ )
198
+
199
+ return fig
200
+
201
+ def create_top_products_chart(self, data):
202
+ """Create top products bar chart."""
203
+ import plotly.graph_objects as go
204
+
205
+ if not data['top_products']:
206
+ fig = go.Figure()
207
+ fig.add_annotation(text="No product data available",
208
+ xref="paper", yref="paper", x=0.5, y=0.5, showarrow=False)
209
+ fig.update_layout(title='Top Products', height=300)
210
+ return fig
211
+
212
+ products = [row[0] for row in data['top_products']]
213
+ quantities = [row[1] for row in data['top_products']]
214
+
215
+ fig = go.Figure(data=[
216
+ go.Bar(x=products, y=quantities, marker_color='skyblue')
217
+ ])
218
+
219
+ fig.update_layout(
220
+ title='Top Products by Quantity',
221
+ xaxis_title='Products',
222
+ yaxis_title='Total Quantity',
223
+ height=300
224
+ )
225
+
226
+ return fig
227
+
228
+ def structured_purchase(self, product, quantity, supplier, unit_price):
229
+ """Handle structured purchase entry."""
230
+ if not all([product, quantity, supplier, unit_price]):
231
+ return "", [("System", "⚠️ Please fill in all fields for the purchase.")], ""
232
+
233
+ message = f"Add a purchase of {quantity} {product} from {supplier} at €{unit_price} each"
234
+ request = ChatbotRequest(message=message)
235
+ response = self.chatbot.process_message(request)
236
+
237
+ history = [("Purchase Entry", message), ("System", f"βœ… {response.response}")]
238
+ return "", history, "Purchase recorded successfully!"
239
+
240
+ def structured_sale(self, product, quantity, customer, unit_price):
241
+ """Handle structured sale entry."""
242
+ if not all([product, quantity, customer, unit_price]):
243
+ return "", [("System", "⚠️ Please fill in all fields for the sale.")], ""
244
+
245
+ message = f"Sold {quantity} {product} to {customer} at €{unit_price} each"
246
+ request = ChatbotRequest(message=message)
247
+ response = self.chatbot.process_message(request)
248
+
249
+ history = [("Sale Entry", message), ("System", f"βœ… {response.response}")]
250
+ return "", history, "Sale recorded successfully!"
251
+
252
+ def search_records(self, search_query, search_type):
253
+ """Handle structured search."""
254
+ if not search_query:
255
+ return [("System", "⚠️ Please enter a search query.")]
256
+
257
+ if search_type == "Products":
258
+ message = f"Find {search_query}"
259
+ elif search_type == "Suppliers":
260
+ message = f"Search supplier {search_query}"
261
+ elif search_type == "Customers":
262
+ message = f"Search customer {search_query}"
263
+ else:
264
+ message = f"Search {search_query}"
265
+
266
+ request = ChatbotRequest(message=message)
267
+ response = self.chatbot.process_message(request)
268
+
269
+ return [("Search Query", message), ("Results", response.response)]
270
+
271
+ def create_interface(self) -> gr.Interface:
272
+ """Create and configure the Gradio interface."""
273
+
274
+ with gr.Blocks(
275
+ title="Business AI Assistant",
276
+ theme=gr.themes.Default()
277
+ ) as interface:
278
+
279
+ # Header
280
+ gr.Markdown("# πŸ’Ό Business AI Assistant")
281
+ gr.Markdown("**Intelligent transaction management and business intelligence platform**")
282
+
283
+ # Main tabbed interface
284
+ with gr.Tabs() as tabs:
285
+
286
+ # Dashboard Tab
287
+ with gr.Tab("πŸ“Š Dashboard"):
288
+ # Key Metrics Row
289
+ with gr.Row():
290
+ metrics_purchases = gr.Number(label="Total Purchases", interactive=False)
291
+ metrics_sales = gr.Number(label="Total Sales", interactive=False)
292
+ metrics_revenue = gr.Number(label="Revenue (€)", interactive=False)
293
+ metrics_profit = gr.Number(label="Profit (€)", interactive=False)
294
+
295
+ # Charts Row
296
+ with gr.Row():
297
+ with gr.Column():
298
+ financial_chart = gr.Plot(label="Financial Overview")
299
+ with gr.Column():
300
+ transaction_chart = gr.Plot(label="Transaction Distribution")
301
+
302
+ with gr.Row():
303
+ with gr.Column():
304
+ products_chart = gr.Plot(label="Top Products")
305
+ with gr.Column():
306
+ # Recent Transactions Table
307
+ recent_table = gr.Dataframe(
308
+ headers=["Type", "Product", "Qty", "Amount (€)", "Partner"],
309
+ datatype=["str", "str", "number", "number", "str"],
310
+ label="Recent Transactions",
311
+ )
312
+
313
+ # Action Buttons
314
+ with gr.Row():
315
+ refresh_dashboard = gr.Button("πŸ”„ Refresh Data", variant="secondary")
316
+ dash_new_purchase = gr.Button("βž• New Purchase", variant="primary")
317
+ dash_new_sale = gr.Button("πŸ’° New Sale", variant="primary")
318
+ dash_search = gr.Button("πŸ” Search Records", variant="outline")
319
+
320
+ # Chat Tab
321
+ with gr.Tab("πŸ’¬ AI Chat"):
322
+ gr.Markdown("### Conversational Business Assistant")
323
+ gr.Markdown("*Ask questions, add transactions, search records, or get insights in natural language*")
324
+
325
+ chatbot_ui = gr.Chatbot(
326
+ value=[],
327
+ height=500,
328
+ label="Conversation",
329
+ show_label=False,
330
+ container=True,
331
+ show_copy_button=True
332
+ )
333
+
334
+ with gr.Row():
335
+ msg_input = gr.Textbox(
336
+ placeholder="Ask me anything about your business... (e.g., 'Show recent sales', 'Add 10 laptops from TechMart')",
337
+ label="Message",
338
+ lines=2,
339
+ max_lines=4,
340
+ scale=5
341
+ )
342
+ send_btn = gr.Button("Send", variant="primary", scale=1)
343
+
344
+ with gr.Row():
345
+ clear_chat_btn = gr.Button("Clear Chat", variant="secondary")
346
+
347
+ # Example prompts
348
+ example_1 = gr.Button("πŸ’‘ Example: Add Purchase", variant="outline", size="sm")
349
+ example_2 = gr.Button("πŸ’‘ Example: Search Products", variant="outline", size="sm")
350
+ example_3 = gr.Button("πŸ’‘ Example: View Transactions", variant="outline", size="sm")
351
+
352
+ # Transactions Tab
353
+ with gr.Tab("πŸ“ Transactions"):
354
+ with gr.Row():
355
+ # Purchase Form
356
+ with gr.Column():
357
+ gr.Markdown("### βž• Add Purchase")
358
+ purchase_product = gr.Textbox(label="Product", placeholder="e.g., Laptops")
359
+ purchase_quantity = gr.Number(label="Quantity", value=1, minimum=1)
360
+ purchase_supplier = gr.Textbox(label="Supplier", placeholder="e.g., TechMart")
361
+ purchase_price = gr.Number(label="Unit Price (€)", value=0.00, minimum=0)
362
+ purchase_btn = gr.Button("Add Purchase", variant="primary")
363
+ purchase_status = gr.Markdown("")
364
+
365
+ # Sale Form
366
+ with gr.Column():
367
+ gr.Markdown("### πŸ’° Add Sale")
368
+ sale_product = gr.Textbox(label="Product", placeholder="e.g., USB Drives")
369
+ sale_quantity = gr.Number(label="Quantity", value=1, minimum=1)
370
+ sale_customer = gr.Textbox(label="Customer", placeholder="e.g., ABC Corp")
371
+ sale_price = gr.Number(label="Unit Price (€)", value=0.00, minimum=0)
372
+ sale_btn = gr.Button("Add Sale", variant="primary")
373
+ sale_status = gr.Markdown("")
374
+
375
+ # Transaction Results
376
+ gr.Markdown("### Transaction Results")
377
+ transaction_results = gr.Chatbot(
378
+ value=[],
379
+ height=300,
380
+ label="Transaction Log",
381
+ show_copy_button=True
382
+ )
383
+
384
+ # Search & Reports Tab
385
+ with gr.Tab("πŸ” Search & Reports"):
386
+ gr.Markdown("### Advanced Search")
387
+
388
+ with gr.Row():
389
+ search_query = gr.Textbox(
390
+ label="Search Query",
391
+ placeholder="Enter product name, supplier, customer, or keywords...",
392
+ scale=3
393
+ )
394
+ search_type = gr.Dropdown(
395
+ choices=["All Records", "Products", "Suppliers", "Customers", "Transactions"],
396
+ value="All Records",
397
+ label="Search Type",
398
+ scale=1
399
+ )
400
+ search_btn = gr.Button("Search", variant="primary", scale=1)
401
+
402
+ # Search Results
403
+ search_results = gr.Chatbot(
404
+ value=[],
405
+ height=400,
406
+ label="Search Results",
407
+ show_copy_button=True
408
+ )
409
+
410
+ # Quick Search Buttons
411
+ with gr.Row():
412
+ gr.Markdown("### Quick Searches")
413
+ with gr.Row():
414
+ recent_purchases = gr.Button("Recent Purchases", variant="outline")
415
+ recent_sales = gr.Button("Recent Sales", variant="outline")
416
+ top_products = gr.Button("Top Products", variant="outline")
417
+ supplier_summary = gr.Button("Supplier Summary", variant="outline")
418
+
419
+ # Help & Settings Tab
420
+ with gr.Tab("❓ Help & Settings"):
421
+ with gr.Row():
422
+ with gr.Column():
423
+ gr.Markdown("""
424
+ ### πŸ“– User Guide
425
+
426
+ **🎯 Getting Started**
427
+ - Use the **Dashboard** for quick overview and actions
428
+ - **AI Chat** for natural language interactions
429
+ - **Transactions** for structured data entry
430
+ - **Search & Reports** for finding information
431
+
432
+ **πŸ’¬ Chat Examples**
433
+ - "Add a purchase of 20 USB drives from TechMart at €5 each"
434
+ - "Show me recent sales to ABC Corp"
435
+ - "Find all laptop transactions"
436
+ - "What's my total revenue this month?"
437
+
438
+ **πŸ“Š Features**
439
+ - Smart entity extraction from natural language
440
+ - Intelligent search across all records
441
+ - Transaction categorization and analysis
442
+ - Export capabilities for reports
443
+ """)
444
+
445
+ with gr.Column():
446
+ gr.Markdown("""
447
+ ### βš™οΈ System Information
448
+
449
+ **Status**: 🟒 Online and Ready
450
+
451
+ **Capabilities**:
452
+ - βœ… Natural language processing
453
+ - βœ… Transaction management
454
+ - βœ… Intelligent search
455
+ - βœ… Data export
456
+ - βœ… Real-time analytics
457
+
458
+ **Supported Operations**:
459
+ - Purchase tracking
460
+ - Sales recording
461
+ - Inventory searches
462
+ - Supplier management
463
+ - Customer records
464
+ - Financial reporting
465
+
466
+ **Data Security**: πŸ”’ All data processed locally
467
+ """)
468
+
469
+ gr.Markdown("---")
470
+ gr.Markdown("*Business AI Assistant v1.0 β€’ Built with Gradio β€’ Powered by OpenAI*")
471
+
472
+ # Event Handlers
473
+
474
+ # Dashboard events
475
+ def load_dashboard():
476
+ data = self.get_dashboard_data()
477
+
478
+ # Create charts
479
+ financial_fig = self.create_revenue_chart(data)
480
+ transaction_fig = self.create_transaction_chart(data)
481
+ products_fig = self.create_top_products_chart(data)
482
+
483
+ # Prepare recent transactions table
484
+ recent_data = []
485
+ for row in data['recent_transactions']:
486
+ recent_data.append([
487
+ row[0].title(), # transaction_type
488
+ row[1], # product
489
+ row[2], # quantity
490
+ f"€{row[3]:.2f}", # total_amount
491
+ row[4] or "N/A" # partner (supplier/customer)
492
+ ])
493
+
494
+ return (
495
+ data['total_purchases'],
496
+ data['total_sales'],
497
+ data['total_revenue'],
498
+ data['profit'],
499
+ financial_fig,
500
+ transaction_fig,
501
+ products_fig,
502
+ recent_data
503
+ )
504
+
505
+ refresh_dashboard.click(
506
+ fn=load_dashboard,
507
+ outputs=[
508
+ metrics_purchases, metrics_sales, metrics_revenue, metrics_profit,
509
+ financial_chart, transaction_chart, products_chart, recent_table
510
+ ]
511
+ )
512
+
513
+ # Chat events
514
+ msg_input.submit(
515
+ fn=self.process_message,
516
+ inputs=[msg_input, chatbot_ui],
517
+ outputs=[msg_input, chatbot_ui]
518
+ )
519
+
520
+ send_btn.click(
521
+ fn=self.process_message,
522
+ inputs=[msg_input, chatbot_ui],
523
+ outputs=[msg_input, chatbot_ui]
524
+ )
525
+
526
+ clear_chat_btn.click(
527
+ fn=self.clear_chat,
528
+ outputs=[msg_input, chatbot_ui]
529
+ )
530
+
531
+ # Example prompts
532
+ example_1.click(
533
+ fn=lambda: ("Add a purchase of 10 laptops from TechMart at €800 each", []),
534
+ outputs=[msg_input, chatbot_ui]
535
+ )
536
+
537
+ example_2.click(
538
+ fn=lambda: ("Find all USB drive transactions", []),
539
+ outputs=[msg_input, chatbot_ui]
540
+ )
541
+
542
+ example_3.click(
543
+ fn=lambda: ("Show recent transactions", []),
544
+ outputs=[msg_input, chatbot_ui]
545
+ )
546
+
547
+ # Transaction events
548
+ purchase_btn.click(
549
+ fn=self.structured_purchase,
550
+ inputs=[purchase_product, purchase_quantity, purchase_supplier, purchase_price],
551
+ outputs=[purchase_product, transaction_results, purchase_status]
552
+ )
553
+
554
+ sale_btn.click(
555
+ fn=self.structured_sale,
556
+ inputs=[sale_product, sale_quantity, sale_customer, sale_price],
557
+ outputs=[sale_product, transaction_results, sale_status]
558
+ )
559
+
560
+ # Search events
561
+ search_btn.click(
562
+ fn=self.search_records,
563
+ inputs=[search_query, search_type],
564
+ outputs=[search_results]
565
+ )
566
+
567
+ # Quick search events
568
+ recent_purchases.click(
569
+ fn=lambda: self.search_records("recent purchases", "Transactions"),
570
+ outputs=[search_results]
571
+ )
572
+
573
+ recent_sales.click(
574
+ fn=lambda: self.search_records("recent sales", "Transactions"),
575
+ outputs=[search_results]
576
+ )
577
+
578
+ # Dashboard navigation events
579
+ dash_new_purchase.click(fn=lambda: gr.Tabs.update(selected=2))
580
+ dash_new_sale.click(fn=lambda: gr.Tabs.update(selected=2))
581
+ dash_search.click(fn=lambda: gr.Tabs.update(selected=3))
582
+
583
+ # Load initial dashboard data
584
+ interface.load(
585
+ fn=load_dashboard,
586
+ outputs=[
587
+ metrics_purchases, metrics_sales, metrics_revenue, metrics_profit,
588
+ financial_chart, transaction_chart, products_chart, recent_table
589
+ ]
590
+ )
591
+
592
+ return interface
593
+
594
+ def launch(self, **kwargs):
595
+ """Launch the Gradio interface."""
596
+ interface = self.create_interface()
597
+
598
+ # Default launch configuration
599
+ launch_config = {
600
+ 'server_name': '0.0.0.0',
601
+ 'server_port': 7860,
602
+ 'share': False,
603
+ 'debug': False,
604
+ 'show_error': True,
605
+ 'quiet': False
606
+ }
607
+
608
+ # Update with any provided kwargs
609
+ launch_config.update(kwargs)
610
+
611
+ print("πŸš€ Starting Gradio GUI for Business Chatbot...")
612
+ print(f"πŸ“± Access the interface at: http://localhost:{launch_config['server_port']}")
613
+ print("πŸ’‘ Press Ctrl+C to stop the server")
614
+
615
+ try:
616
+ interface.launch(**launch_config)
617
+ finally:
618
+ # Clean up chatbot resources
619
+ self.chatbot.close()
620
+
621
+ def main():
622
+ """Main function to launch the Gradio interface."""
623
+ gui = GradioInterface()
624
+ gui.launch()
625
+
626
+ if __name__ == "__main__":
627
+ main()
main.py ADDED
@@ -0,0 +1,61 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ #!/usr/bin/env python3
2
+
3
+ import sys
4
+ import os
5
+ sys.path.append(os.path.join(os.path.dirname(__file__), 'src'))
6
+
7
+ from chatbot import Chatbot
8
+ from models import ChatbotRequest
9
+
10
+ def main():
11
+ print("πŸ€– Business Chatbot with SQL Database and Vector Store")
12
+ print("="*60)
13
+ print("I can help you with:")
14
+ print("β€’ Adding purchases: 'Add a purchase of 20 USB drives from TechMart at €5 each'")
15
+ print("β€’ Adding sales: 'Sold 10 laptops to John Smith at €800 each'")
16
+ print("β€’ Viewing recent transactions: 'Show recent transactions'")
17
+ print("β€’ Searching: 'Find USB drives' or 'Search TechMart'")
18
+ print("β€’ Storing general info: 'Meeting with supplier scheduled for next week'")
19
+ print("β€’ Type 'quit' to exit")
20
+ print("="*60)
21
+
22
+ chatbot = Chatbot()
23
+
24
+ try:
25
+ while True:
26
+ user_input = input("\nπŸ’¬ You: ").strip()
27
+
28
+ if user_input.lower() in ['quit', 'exit', 'bye']:
29
+ print("πŸ‘‹ Goodbye!")
30
+ break
31
+
32
+ if not user_input:
33
+ continue
34
+
35
+ # Process the message
36
+ request = ChatbotRequest(message=user_input)
37
+ response = chatbot.process_message(request)
38
+
39
+ print(f"\nπŸ€– Bot: {response.response}")
40
+
41
+ # Show additional info if available
42
+ if response.entities_extracted:
43
+ print(f"πŸ“Š Extracted: {response.entities_extracted.transaction_type} - {response.entities_extracted.product} ({response.entities_extracted.quantity}x) - €{response.entities_extracted.total_amount}")
44
+
45
+ if response.vector_stored:
46
+ print("πŸ’Ύ Information stored in vector database for future semantic search")
47
+
48
+ if response.intent_detected:
49
+ print(f"🎯 Intent: {response.intent_detected} (confidence: {response.intent_confidence:.2f})")
50
+
51
+ if response.awaiting_clarification:
52
+ print("⏳ Waiting for your response to complete the transaction...")
53
+
54
+ except KeyboardInterrupt:
55
+ print("\nπŸ‘‹ Goodbye!")
56
+
57
+ finally:
58
+ chatbot.close()
59
+
60
+ if __name__ == "__main__":
61
+ main()
populate_sample_data.py ADDED
@@ -0,0 +1,81 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ #!/usr/bin/env python3
2
+ """
3
+ Sample data population script for the LLM Chatbot database.
4
+ This script adds realistic sample transactions to help test the dashboard.
5
+ """
6
+
7
+ import sys
8
+ import os
9
+ sys.path.append(os.path.join(os.path.dirname(__file__), 'src'))
10
+
11
+ from chatbot import Chatbot
12
+ from models import ChatbotRequest
13
+
14
+ def populate_sample_data():
15
+ """Add sample transactions to the database"""
16
+
17
+ print("πŸ”§ Populating database with sample transactions...")
18
+
19
+ # Sample transactions to add
20
+ sample_transactions = [
21
+ # Purchases
22
+ "Add a purchase of 100 wireless mice from TechMart at €25 each",
23
+ "Add a purchase of 50 laptop stands from Office Supplies Co at €35 each",
24
+ "Add a purchase of 30 webcams from Electronics Plus at €80 each",
25
+ "Add a purchase of 75 desk lamps from Office Supplies Co at €40 each",
26
+ "Add a purchase of 20 printers from TechMart at €200 each",
27
+ "Add a purchase of 60 surge protectors from Electronics Plus at €15 each",
28
+ "Add a purchase of 40 ethernet cables from TechMart at €12 each",
29
+ "Add a purchase of 15 projectors from Electronics Plus at €450 each",
30
+
31
+ # Sales
32
+ "Sold 80 wireless mice to StartupTech Corp at €35 each",
33
+ "Sold 30 laptop stands to Creative Agency Ltd at €50 each",
34
+ "Sold 25 webcams to Remote Work Solutions at €120 each",
35
+ "Sold 50 desk lamps to Modern Office Inc at €55 each",
36
+ "Sold 12 printers to Small Business Hub at €280 each",
37
+ "Sold 45 surge protectors to Tech Solutions Ltd at €25 each",
38
+ "Sold 35 ethernet cables to Network Systems Corp at €18 each",
39
+ "Sold 10 projectors to Conference Center Co at €650 each",
40
+ "Sold 5 laptops to Freelance Collective at €1400 each",
41
+ "Sold 25 monitors to Design Studio Ltd at €380 each",
42
+ ]
43
+
44
+ chatbot = Chatbot()
45
+
46
+ try:
47
+ successful_transactions = 0
48
+ failed_transactions = 0
49
+
50
+ for transaction in sample_transactions:
51
+ try:
52
+ print(f"πŸ“ Processing: {transaction}")
53
+ request = ChatbotRequest(message=transaction)
54
+ response = chatbot.process_message(request)
55
+
56
+ if "recorded" in response.response.lower():
57
+ successful_transactions += 1
58
+ print(f"βœ… Success: {response.response}")
59
+ else:
60
+ failed_transactions += 1
61
+ print(f"⚠️ Warning: {response.response}")
62
+
63
+ except Exception as e:
64
+ failed_transactions += 1
65
+ print(f"❌ Error processing transaction: {e}")
66
+
67
+ print(f"\nπŸ“Š Summary:")
68
+ print(f"βœ… Successful transactions: {successful_transactions}")
69
+ print(f"❌ Failed transactions: {failed_transactions}")
70
+ print(f"🎯 Total attempted: {len(sample_transactions)}")
71
+
72
+ if successful_transactions > 0:
73
+ print(f"\nπŸŽ‰ Database populated with {successful_transactions} sample transactions!")
74
+ print("πŸ’‘ You can now run the dashboard to see meaningful data.")
75
+ print("πŸš€ Run 'python run_gui.py' to launch the Gradio interface.")
76
+
77
+ finally:
78
+ chatbot.close()
79
+
80
+ if __name__ == "__main__":
81
+ populate_sample_data()
requirements.txt ADDED
@@ -0,0 +1,15 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ openai>=1.0.0
2
+ sqlalchemy
3
+ db-sqlite3
4
+ pandas>=2.0.0
5
+ numpy>=1.24.0
6
+ sentence-transformers>=2.2.0
7
+ chromadb>=0.4.0
8
+ spacy>=3.6.0
9
+ python-dateutil>=2.8.0
10
+ pydantic>=2.0.0
11
+ fastapi>=0.100.0
12
+ uvicorn>=0.23.0
13
+ gradio>=4.0.0
14
+ pyyaml>=6.0
15
+ plotly>=5.0.0
reset_database.py ADDED
@@ -0,0 +1,66 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ #!/usr/bin/env python3
2
+ """
3
+ Database reset script for the LLM Chatbot.
4
+ This script clears all transaction data while keeping the basic structure intact.
5
+ """
6
+
7
+ import sqlite3
8
+ import os
9
+
10
+ def reset_database():
11
+ """Reset the database by clearing all transaction data"""
12
+
13
+ db_path = "chatbot.db"
14
+
15
+ if not os.path.exists(db_path):
16
+ print(f"❌ Database file '{db_path}' not found.")
17
+ return
18
+
19
+ try:
20
+ conn = sqlite3.connect(db_path)
21
+ cursor = conn.cursor()
22
+
23
+ print("πŸ—‘οΈ Clearing transaction data...")
24
+
25
+ # Clear all transaction data
26
+ cursor.execute("DELETE FROM sales")
27
+ cursor.execute("DELETE FROM purchases")
28
+
29
+ # Reset auto-increment counters
30
+ cursor.execute("DELETE FROM sqlite_sequence WHERE name IN ('sales', 'purchases')")
31
+
32
+ # Clear customers that were created during testing (keep default ones)
33
+ cursor.execute("DELETE FROM customers")
34
+
35
+ # Keep default suppliers and products, but can remove dynamic ones
36
+ # For now, we'll keep all suppliers and products
37
+
38
+ conn.commit()
39
+
40
+ # Check results
41
+ cursor.execute("SELECT COUNT(*) FROM purchases")
42
+ purchases_count = cursor.fetchone()[0]
43
+
44
+ cursor.execute("SELECT COUNT(*) FROM sales")
45
+ sales_count = cursor.fetchone()[0]
46
+
47
+ cursor.execute("SELECT COUNT(*) FROM customers")
48
+ customers_count = cursor.fetchone()[0]
49
+
50
+ print(f"βœ… Database reset complete!")
51
+ print(f" - Purchases: {purchases_count}")
52
+ print(f" - Sales: {sales_count}")
53
+ print(f" - Customers: {customers_count}")
54
+ print(f"πŸ’‘ You can now add new sample data using 'python populate_sample_data.py'")
55
+
56
+ except Exception as e:
57
+ print(f"❌ Error resetting database: {e}")
58
+ finally:
59
+ conn.close()
60
+
61
+ if __name__ == "__main__":
62
+ response = input("⚠️ This will delete all transaction data. Continue? (y/N): ")
63
+ if response.lower() in ['y', 'yes']:
64
+ reset_database()
65
+ else:
66
+ print("🚫 Operation cancelled.")
run_gui.py ADDED
@@ -0,0 +1,119 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ #!/usr/bin/env python3
2
+
3
+ """
4
+ GUI Launcher for LLM Business Chatbot
5
+
6
+ This script launches the Gradio web interface for the chatbot application.
7
+ It provides a web-based GUI that wraps around the existing CLI chatbot
8
+ without modifying any of the original code.
9
+
10
+ Usage:
11
+ python run_gui.py # Launch with default settings
12
+ python run_gui.py --port 8080 # Launch on custom port
13
+ python run_gui.py --share # Create public sharing link
14
+ python run_gui.py --debug # Enable debug mode
15
+ """
16
+
17
+ import sys
18
+ import os
19
+ import argparse
20
+ from pathlib import Path
21
+
22
+ # Add gui directory to path
23
+ gui_dir = Path(__file__).parent / "gui"
24
+ sys.path.append(str(gui_dir))
25
+
26
+ def main():
27
+ """Main function to parse arguments and launch the GUI."""
28
+ parser = argparse.ArgumentParser(
29
+ description="Launch Gradio GUI for LLM Business Chatbot",
30
+ formatter_class=argparse.RawDescriptionHelpFormatter,
31
+ epilog="""
32
+ Examples:
33
+ python run_gui.py # Default: localhost:7860
34
+ python run_gui.py --port 8080 # Custom port
35
+ python run_gui.py --share # Public sharing link
36
+ python run_gui.py --host 0.0.0.0 # Accept external connections
37
+ python run_gui.py --debug # Enable debug mode
38
+ """
39
+ )
40
+
41
+ parser.add_argument(
42
+ "--host",
43
+ default="0.0.0.0",
44
+ help="Host address to bind to (default: 0.0.0.0)"
45
+ )
46
+
47
+ parser.add_argument(
48
+ "--port",
49
+ type=int,
50
+ default=7860,
51
+ help="Port number to run the server on (default: 7860)"
52
+ )
53
+
54
+ parser.add_argument(
55
+ "--share",
56
+ action="store_true",
57
+ help="Create a public sharing link via Gradio"
58
+ )
59
+
60
+ parser.add_argument(
61
+ "--debug",
62
+ action="store_true",
63
+ help="Enable debug mode"
64
+ )
65
+
66
+ parser.add_argument(
67
+ "--quiet",
68
+ action="store_true",
69
+ help="Suppress startup messages"
70
+ )
71
+
72
+ args = parser.parse_args()
73
+
74
+ # Print startup banner
75
+ if not args.quiet:
76
+ print("=" * 70)
77
+ print("πŸ€– LLM Business Chatbot - Gradio GUI")
78
+ print("=" * 70)
79
+ print(f"🌐 Starting web interface...")
80
+ print(f"πŸ“ Host: {args.host}")
81
+ print(f"πŸ”Œ Port: {args.port}")
82
+ print(f"πŸ”— Share: {'Yes' if args.share else 'No'}")
83
+ print(f"πŸ› Debug: {'Yes' if args.debug else 'No'}")
84
+ print("-" * 70)
85
+
86
+ try:
87
+ # Import and launch the Gradio interface
88
+ from gradio_interface import GradioInterface
89
+
90
+ gui = GradioInterface()
91
+ gui.launch(
92
+ server_name=args.host,
93
+ server_port=args.port,
94
+ share=args.share,
95
+ debug=args.debug,
96
+ quiet=args.quiet,
97
+ show_error=True
98
+ )
99
+
100
+ except ImportError as e:
101
+ print(f"❌ Error: Missing dependencies. Please install requirements:")
102
+ print(f" pip install -r requirements.txt")
103
+ print(f" Error details: {e}")
104
+ sys.exit(1)
105
+
106
+ except KeyboardInterrupt:
107
+ if not args.quiet:
108
+ print("\nπŸ‘‹ Shutting down Gradio interface...")
109
+ sys.exit(0)
110
+
111
+ except Exception as e:
112
+ print(f"❌ Error launching GUI: {e}")
113
+ if args.debug:
114
+ import traceback
115
+ traceback.print_exc()
116
+ sys.exit(1)
117
+
118
+ if __name__ == "__main__":
119
+ main()
src/chatbot.py ADDED
@@ -0,0 +1,402 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from typing import Dict, Any, Optional
2
+ from entity_extractor import EntityExtractor
3
+ from database_manager import DatabaseManager
4
+ from vector_store import VectorStore
5
+ from nl_to_sql import NaturalLanguageToSQL
6
+ from intent_classifier import IntentClassifier, IntentType
7
+ from rag_handler import RAGHandler
8
+ from transaction_clarifier import TransactionClarifier, ClarificationStatus
9
+ from models import ChatbotRequest, ChatbotResponse, PendingTransaction
10
+
11
+ class Chatbot:
12
+ def __init__(self):
13
+ self.entity_extractor = EntityExtractor()
14
+ self.db_manager = DatabaseManager()
15
+ self.vector_store = VectorStore()
16
+ self.nl_to_sql = NaturalLanguageToSQL()
17
+ self.intent_classifier = IntentClassifier()
18
+ self.rag_handler = RAGHandler()
19
+ self.transaction_clarifier = TransactionClarifier()
20
+
21
+ # Store pending transactions by session_id
22
+ self.pending_transactions: Dict[str, PendingTransaction] = {}
23
+
24
+ def process_message(self, request: ChatbotRequest) -> ChatbotResponse:
25
+ """Process a user message and return appropriate response"""
26
+ message = request.message.strip()
27
+ session_id = request.session_id or "default"
28
+
29
+ # Check if we're waiting for clarification on a pending transaction
30
+ if session_id in self.pending_transactions:
31
+ print("A transaction is pending...")
32
+ return self._handle_transaction_clarification(message, session_id)
33
+
34
+ # Classify intent using OpenAI
35
+ intent_result = self.intent_classifier.classify_intent(message)
36
+
37
+ print(f"🎯 Intent: {intent_result.intent.value} (confidence: {intent_result.confidence:.2f})")
38
+ print(f"πŸ“ Reasoning: {intent_result.reasoning}")
39
+
40
+ # Route to appropriate handler based on classified intent
41
+ if intent_result.intent == IntentType.TRANSACTION:
42
+ response = self._handle_transaction_request(message, session_id)
43
+ elif intent_result.intent == IntentType.QUERY:
44
+ response = self._handle_query_request(message)
45
+ elif intent_result.intent == IntentType.SEMANTIC_SEARCH:
46
+ response = self._handle_search_request(message)
47
+ else: # GENERAL_INFO
48
+ response = self._handle_general_information(message)
49
+
50
+ # Add intent information to response
51
+ response.intent_detected = intent_result.intent.value
52
+ response.intent_confidence = intent_result.confidence
53
+
54
+ return response
55
+
56
+
57
+ def _handle_transaction_request(self, message: str, session_id: str) -> ChatbotResponse:
58
+ """Handle transaction requests (purchases/sales) with interactive clarification"""
59
+ try:
60
+ # Extract entities
61
+ entities = self.entity_extractor.extract_entities(message)
62
+
63
+ # Check if transaction is complete
64
+ status, clarification = self.transaction_clarifier.analyze_transaction_completeness(entities)
65
+
66
+ if status == ClarificationStatus.COMPLETE:
67
+ # Transaction is complete, process it
68
+ return self._complete_transaction(entities, message)
69
+
70
+ elif status == ClarificationStatus.NEEDS_CLARIFICATION:
71
+ # Store pending transaction and ask for clarification
72
+ pending = PendingTransaction(
73
+ entities=entities,
74
+ missing_fields=clarification.missing_fields,
75
+ session_id=session_id,
76
+ original_message=message
77
+ )
78
+ self.pending_transactions[session_id] = pending
79
+
80
+ clarification_message = self.transaction_clarifier.format_clarification_message(clarification)
81
+
82
+ return ChatbotResponse(
83
+ response=clarification_message,
84
+ entities_extracted=entities,
85
+ awaiting_clarification=True
86
+ )
87
+
88
+ else:
89
+ return ChatbotResponse(
90
+ response="Transaction cancelled.",
91
+ entities_extracted=entities
92
+ )
93
+
94
+ except Exception as e:
95
+ return ChatbotResponse(
96
+ response=f"Error processing transaction: {str(e)}",
97
+ sql_executed=None,
98
+ entities_extracted=None,
99
+ vector_stored=False
100
+ )
101
+
102
+ def _complete_transaction(self, entities, original_message: str) -> ChatbotResponse:
103
+ """Complete a transaction with all required information"""
104
+ try:
105
+ # Process transaction in database and get the SQL transaction ID
106
+ transaction_id, result_message = self.db_manager.process_transaction(entities)
107
+
108
+ # Store in vector store with SQL transaction ID for linking
109
+ transaction_data = {
110
+ "type": entities.transaction_type,
111
+ "product": entities.product,
112
+ "quantity": entities.quantity,
113
+ "supplier": entities.supplier,
114
+ "customer": entities.customer,
115
+ "unit_price": entities.unit_price,
116
+ "total": entities.total_amount
117
+ }
118
+
119
+ vector_stored = self.vector_store.add_transaction_event(
120
+ transaction_data,
121
+ original_message,
122
+ sql_transaction_id=transaction_id
123
+ )
124
+
125
+ return ChatbotResponse(
126
+ response=result_message,
127
+ sql_executed="Transaction processed successfully",
128
+ entities_extracted=entities,
129
+ vector_stored=vector_stored
130
+ )
131
+
132
+ except Exception as e:
133
+ return ChatbotResponse(
134
+ response=f"Error completing transaction: {str(e)}",
135
+ entities_extracted=entities
136
+ )
137
+
138
+ def _handle_transaction_clarification(self, message: str, session_id: str) -> ChatbotResponse:
139
+ """Handle user response to transaction clarification questions"""
140
+ try:
141
+ pending = self.pending_transactions.get(session_id)
142
+ if not pending:
143
+ return ChatbotResponse(
144
+ response="No pending transaction found. Please start a new transaction."
145
+ )
146
+
147
+ # Check if user wants to cancel
148
+ if message.lower() in ['cancel', 'quit', 'stop', 'abort']:
149
+ del self.pending_transactions[session_id]
150
+ return ChatbotResponse(
151
+ response="Transaction cancelled. You can start a new one anytime."
152
+ )
153
+
154
+ # Add this clarification response to the accumulated responses
155
+ pending.clarification_responses.append(message)
156
+
157
+ # Process the clarification response
158
+ updated_entities, is_complete = self.transaction_clarifier.process_clarification_response(
159
+ pending.entities,
160
+ pending.missing_fields,
161
+ message
162
+ )
163
+
164
+ if is_complete:
165
+ # Transaction is now complete
166
+ # Combine original message with all clarification responses for complete context
167
+ clarifications = "\n".join([f"Clarification {i+1}: {resp}" for i, resp in enumerate(pending.clarification_responses)])
168
+ full_context = f"{pending.original_message}\n\n{clarifications}"
169
+ del self.pending_transactions[session_id]
170
+ return self._complete_transaction(updated_entities, full_context)
171
+ else:
172
+ # Still need more information
173
+ status, clarification = self.transaction_clarifier.analyze_transaction_completeness(updated_entities)
174
+
175
+ if status == ClarificationStatus.NEEDS_CLARIFICATION:
176
+ # Update the pending transaction
177
+ pending.entities = updated_entities
178
+ pending.missing_fields = clarification.missing_fields
179
+
180
+ clarification_message = self.transaction_clarifier.format_clarification_message(clarification)
181
+
182
+ return ChatbotResponse(
183
+ response=f"Thank you! I still need a bit more information:\n\n{clarification_message}",
184
+ entities_extracted=updated_entities,
185
+ awaiting_clarification=True
186
+ )
187
+ else:
188
+ # Something went wrong or was cancelled
189
+ # Still include all clarification context even if completion is unexpected
190
+ clarifications = "\n".join([f"Clarification {i+1}: {resp}" for i, resp in enumerate(pending.clarification_responses)])
191
+ full_context = f"{pending.original_message}\n\n{clarifications}"
192
+ del self.pending_transactions[session_id]
193
+ return self._complete_transaction(updated_entities, full_context)
194
+
195
+ except Exception as e:
196
+ # Clean up on error
197
+ if session_id in self.pending_transactions:
198
+ del self.pending_transactions[session_id]
199
+
200
+ return ChatbotResponse(
201
+ response=f"Error processing your response: {str(e)}. Please start a new transaction."
202
+ )
203
+
204
+ def _handle_query_request(self, message: str) -> ChatbotResponse:
205
+ """Handle query requests using OpenAI LLM to generate SQL"""
206
+ try:
207
+ # Use OpenAI to convert natural language to SQL
208
+ sql_query, explanation = self.nl_to_sql.convert_to_sql(message)
209
+
210
+ # Validate the generated SQL
211
+ is_valid, validation_message = self.nl_to_sql.validate_sql(sql_query)
212
+
213
+ if not is_valid:
214
+ suggestion = self.nl_to_sql.suggest_corrections(message, validation_message)
215
+ return ChatbotResponse(
216
+ response=f"I couldn't process that query: {validation_message}\n\n{suggestion}",
217
+ sql_executed=sql_query
218
+ )
219
+
220
+ # Execute the SQL query
221
+ results = self.db_manager.query_data(sql_query)
222
+
223
+ # Format and return results
224
+ if not results:
225
+ return ChatbotResponse(
226
+ response="No results found for your query.",
227
+ sql_executed=sql_query
228
+ )
229
+
230
+ # Check for error in results
231
+ if len(results) == 1 and "error" in results[0]:
232
+ return ChatbotResponse(
233
+ response=f"Query execution error: {results[0]['error']}\n\nGenerated SQL: {sql_query}",
234
+ sql_executed=sql_query
235
+ )
236
+
237
+ # Format successful results
238
+ formatted_response = self._format_sql_results(results, explanation)
239
+
240
+ return ChatbotResponse(
241
+ response=formatted_response,
242
+ sql_executed=sql_query
243
+ )
244
+
245
+ except Exception as e:
246
+ return ChatbotResponse(response=f"Error processing query: {str(e)}")
247
+
248
+ def _handle_search_request(self, message: str) -> ChatbotResponse:
249
+ """Handle semantic search requests using RAG"""
250
+ try:
251
+ # Enhance the search query for better retrieval
252
+ enhanced_query = self.rag_handler.enhance_search_query(message)
253
+ print(f"πŸ” Enhanced query: {enhanced_query}")
254
+
255
+ # Search vector store for similar events
256
+ results = self.vector_store.search_similar_events(enhanced_query, 8)
257
+
258
+ if not results:
259
+ return ChatbotResponse(response="I couldn't find any relevant information to answer your query.")
260
+
261
+ # Use RAG to generate an intelligent response
262
+ rag_response = self.rag_handler.generate_rag_response(message, results)
263
+
264
+ return ChatbotResponse(
265
+ response=rag_response,
266
+ vector_stored=False
267
+ )
268
+
269
+ except Exception as e:
270
+ return ChatbotResponse(response=f"Error processing your search: {str(e)}")
271
+
272
+ def _handle_general_information(self, message: str) -> ChatbotResponse:
273
+ """Handle general information storage"""
274
+ try:
275
+ # Store in vector store
276
+ stored = self.vector_store.add_general_event(message, "general_info")
277
+
278
+ if stored:
279
+ return ChatbotResponse(
280
+ response="Information stored successfully. I can help you find similar information later.",
281
+ vector_stored=True
282
+ )
283
+ else:
284
+ return ChatbotResponse(
285
+ response="Information noted, but vector storage is not available.",
286
+ vector_stored=False
287
+ )
288
+
289
+ except Exception as e:
290
+ return ChatbotResponse(response=f"Error storing information: {str(e)}")
291
+
292
+ def _format_recent_transactions(self, data: Dict[str, list]) -> str:
293
+ """Format recent transactions for display"""
294
+ response = "Recent Transactions:\n\n"
295
+
296
+ # Combine and sort all transactions
297
+ all_transactions = []
298
+ for purchase in data.get("purchases", []):
299
+ all_transactions.append(purchase)
300
+ for sale in data.get("sales", []):
301
+ all_transactions.append(sale)
302
+
303
+ # Sort by date
304
+ all_transactions.sort(key=lambda x: x.get("date", ""), reverse=True)
305
+
306
+ if not all_transactions:
307
+ return "No recent transactions found."
308
+
309
+ for transaction in all_transactions[:10]: # Show top 10
310
+ trans_type = transaction.get("type", "unknown").upper()
311
+ date = transaction.get("date", "")[:10] # Just the date part
312
+
313
+ if trans_type == "PURCHASE":
314
+ response += f"πŸ›’ {date} - PURCHASE: {transaction.get('quantity', 0)}x {transaction.get('product', 'Unknown')} from {transaction.get('supplier', 'Unknown')} - €{transaction.get('total_cost', 0)}\n"
315
+ else:
316
+ response += f"πŸ’° {date} - SALE: {transaction.get('quantity', 0)}x {transaction.get('product', 'Unknown')} to {transaction.get('customer', 'Unknown')} - €{transaction.get('total_amount', 0)}\n"
317
+
318
+ return response
319
+
320
+ def _format_search_results(self, results: list, search_term: str) -> str:
321
+ """Format search results for display"""
322
+ if not results:
323
+ return f"No transactions found for '{search_term}'."
324
+
325
+ response = f"Found {len(results)} transaction(s) for '{search_term}':\n\n"
326
+
327
+ for transaction in results:
328
+ trans_type = transaction.get("type", "unknown").upper()
329
+ date = transaction.get("date", "")[:10]
330
+
331
+ if trans_type == "PURCHASE":
332
+ response += f"πŸ›’ {date} - {transaction.get('quantity', 0)}x {transaction.get('product', 'Unknown')} from {transaction.get('supplier', 'Unknown')} - €{transaction.get('total', 0)}\n"
333
+ else:
334
+ response += f"πŸ’° {date} - {transaction.get('quantity', 0)}x {transaction.get('product', 'Unknown')} to {transaction.get('customer', 'Unknown')} - €{transaction.get('total', 0)}\n"
335
+
336
+ return response
337
+
338
+ def _format_sql_results(self, results: list, explanation: str) -> str:
339
+ """Format SQL query results for display"""
340
+ response = f"πŸ“Š Query Results:\n{explanation}\n\n"
341
+
342
+ if not results:
343
+ return response + "No data found."
344
+
345
+ # Handle single value results (like COUNT, SUM)
346
+ if len(results) == 1 and len(results[0]) == 1:
347
+ key, value = list(results[0].items())[0]
348
+ return response + f"**{key.replace('_', ' ').title()}:** {value}"
349
+
350
+ # Handle multiple rows
351
+ response += "```\n"
352
+
353
+ # Add headers
354
+ if results:
355
+ headers = list(results[0].keys())
356
+ response += " | ".join(f"{header.replace('_', ' ').title():<15}" for header in headers) + "\n"
357
+ response += "-" * (len(headers) * 17) + "\n"
358
+
359
+ # Add data rows
360
+ for row in results[:20]: # Limit to first 20 rows
361
+ formatted_row = []
362
+ for value in row.values():
363
+ if value is None:
364
+ formatted_row.append("N/A".ljust(15))
365
+ elif isinstance(value, float):
366
+ formatted_row.append(f"{value:.2f}".ljust(15))
367
+ else:
368
+ formatted_row.append(str(value)[:15].ljust(15))
369
+ response += " | ".join(formatted_row) + "\n"
370
+
371
+ if len(results) > 20:
372
+ response += f"\n... and {len(results) - 20} more rows\n"
373
+
374
+ response += "```"
375
+
376
+ return response
377
+
378
+ def get_linked_transaction_data(self, sql_transaction_id: int, transaction_type: str) -> Optional[Dict[str, Any]]:
379
+ """Retrieve complete transaction data from both SQL and vector stores"""
380
+ try:
381
+ # Get SQL data
382
+ sql_data = self.db_manager.get_transaction_by_id(sql_transaction_id, transaction_type)
383
+
384
+ # Get vector store data
385
+ vector_data = self.vector_store.get_transaction_by_sql_id(sql_transaction_id, transaction_type)
386
+
387
+ if sql_data:
388
+ result = {
389
+ "sql_data": sql_data,
390
+ "vector_data": vector_data,
391
+ "linked": vector_data is not None
392
+ }
393
+ return result
394
+
395
+ return None
396
+ except Exception as e:
397
+ print(f"Error retrieving linked transaction data: {e}")
398
+ return None
399
+
400
+ def close(self):
401
+ """Clean up resources"""
402
+ self.db_manager.close()
src/config_manager.py ADDED
@@ -0,0 +1,121 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ #!/usr/bin/env python3
2
+
3
+ import yaml
4
+ import os
5
+ from typing import Dict, Any, List
6
+ from pathlib import Path
7
+
8
+ class ConfigManager:
9
+ """Manages configuration loading and access for the chatbot application."""
10
+
11
+ def __init__(self, config_path: str = None):
12
+ """
13
+ Initialize the configuration manager.
14
+
15
+ Args:
16
+ config_path: Path to the configuration file. Defaults to config.yaml in project root.
17
+ """
18
+ if config_path is None:
19
+ # Default to config.yaml in the project root
20
+ project_root = Path(__file__).parent.parent
21
+ config_path = project_root / "config.yaml"
22
+
23
+ self.config_path = Path(config_path)
24
+ self._config = self._load_config()
25
+
26
+ def _load_config(self) -> Dict[str, Any]:
27
+ """Load configuration from YAML file."""
28
+ try:
29
+ with open(self.config_path, 'r', encoding='utf-8') as file:
30
+ config = yaml.safe_load(file)
31
+ return config or {}
32
+ except FileNotFoundError:
33
+ raise FileNotFoundError(f"Configuration file not found: {self.config_path}")
34
+ except yaml.YAMLError as e:
35
+ raise ValueError(f"Error parsing configuration file: {e}")
36
+
37
+ def get(self, key_path: str, default: Any = None) -> Any:
38
+ """
39
+ Get a configuration value using dot notation.
40
+
41
+ Args:
42
+ key_path: Dot-separated path to the configuration value (e.g., 'database.path')
43
+ default: Default value to return if key is not found
44
+
45
+ Returns:
46
+ The configuration value or default if not found
47
+ """
48
+ keys = key_path.split('.')
49
+ value = self._config
50
+
51
+ try:
52
+ for key in keys:
53
+ value = value[key]
54
+ return value
55
+ except (KeyError, TypeError):
56
+ return default
57
+
58
+ def get_database_config(self) -> Dict[str, Any]:
59
+ """Get database configuration."""
60
+ return self.get('database', {})
61
+
62
+ def get_openai_config(self, component: str = None) -> Dict[str, Any]:
63
+ """
64
+ Get OpenAI configuration.
65
+
66
+ Args:
67
+ component: Specific component configuration (e.g., 'intent_classifier')
68
+
69
+ Returns:
70
+ OpenAI configuration dictionary
71
+ """
72
+ if component:
73
+ return self.get(f'openai.{component}', {})
74
+ return self.get('openai', {})
75
+
76
+ def get_vector_store_config(self) -> Dict[str, Any]:
77
+ """Get vector store configuration."""
78
+ return self.get('vector_store', {})
79
+
80
+ def get_search_config(self) -> Dict[str, Any]:
81
+ """Get search configuration."""
82
+ return self.get('search', {})
83
+
84
+ def get_entity_extraction_config(self) -> Dict[str, Any]:
85
+ """Get entity extraction configuration."""
86
+ return self.get('entity_extraction', {})
87
+
88
+ def get_business_logic_config(self) -> Dict[str, Any]:
89
+ """Get business logic configuration."""
90
+ return self.get('business_logic', {})
91
+
92
+ def get_app_config(self) -> Dict[str, Any]:
93
+ """Get application configuration."""
94
+ return self.get('app', {})
95
+
96
+ def is_feature_enabled(self, feature_name: str) -> bool:
97
+ """
98
+ Check if a feature is enabled.
99
+
100
+ Args:
101
+ feature_name: Name of the feature to check
102
+
103
+ Returns:
104
+ True if feature is enabled, False otherwise
105
+ """
106
+ return self.get(f'app.features.{feature_name}', True)
107
+
108
+ # Global configuration instance
109
+ _config_manager = None
110
+
111
+ def get_config() -> ConfigManager:
112
+ """Get the global configuration manager instance."""
113
+ global _config_manager
114
+ if _config_manager is None:
115
+ _config_manager = ConfigManager()
116
+ return _config_manager
117
+
118
+ def reload_config():
119
+ """Reload the configuration from file."""
120
+ global _config_manager
121
+ _config_manager = ConfigManager()
src/database_manager.py ADDED
@@ -0,0 +1,255 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import sqlite3
2
+ from typing import Optional, List, Dict, Any
3
+ from sqlalchemy import create_engine, text
4
+ from sqlalchemy.orm import sessionmaker
5
+ from models import Base, Supplier, Customer, Product, Purchase, Sale, EntityExtraction
6
+
7
+ class DatabaseManager:
8
+ def __init__(self, db_path: str = "chatbot.db"):
9
+ self.db_path = db_path
10
+ self.engine = create_engine(f"sqlite:///{db_path}")
11
+ Base.metadata.create_all(self.engine)
12
+ Session = sessionmaker(bind=self.engine)
13
+ self.session = Session()
14
+ self._initialize_data()
15
+
16
+ def _initialize_data(self):
17
+ """Initialize database with sample data"""
18
+ # Add default suppliers if they don't exist
19
+ suppliers = ["TechMart", "Office Supplies Co", "Electronics Plus"]
20
+ for supplier_name in suppliers:
21
+ existing = self.session.query(Supplier).filter_by(name=supplier_name).first()
22
+ if not existing:
23
+ supplier = Supplier(name=supplier_name)
24
+ self.session.add(supplier)
25
+
26
+ # Add default products
27
+ products = [
28
+ ("USB drives", "Electronics"),
29
+ ("Office chairs", "Furniture"),
30
+ ("Laptops", "Electronics"),
31
+ ("Monitors", "Electronics"),
32
+ ("Keyboards", "Electronics")
33
+ ]
34
+ for product_name, category in products:
35
+ existing = self.session.query(Product).filter_by(name=product_name).first()
36
+ if not existing:
37
+ product = Product(name=product_name, category=category)
38
+ self.session.add(product)
39
+
40
+ self.session.commit()
41
+
42
+ def process_transaction(self, entities: EntityExtraction):
43
+ """Process a transaction based on extracted entities"""
44
+ try:
45
+ if entities.transaction_type == "purchase":
46
+ return self._process_purchase(entities)
47
+ elif entities.transaction_type == "sale":
48
+ return self._process_sale(entities)
49
+ else:
50
+ return None, "Could not determine transaction type"
51
+ except Exception as e:
52
+ self.session.rollback()
53
+ return None, f"Error processing transaction: {str(e)}"
54
+
55
+ def _process_purchase(self, entities: EntityExtraction) -> str:
56
+ """Process a purchase transaction"""
57
+ # Get or create supplier
58
+ supplier = None
59
+ if entities.supplier:
60
+ supplier = self.session.query(Supplier).filter_by(name=entities.supplier).first()
61
+ if not supplier:
62
+ supplier = Supplier(name=entities.supplier)
63
+ self.session.add(supplier)
64
+ self.session.flush()
65
+
66
+ # Get or create product
67
+ product = None
68
+ if entities.product:
69
+ product = self.session.query(Product).filter_by(name=entities.product).first()
70
+ if not product:
71
+ product = Product(name=entities.product)
72
+ self.session.add(product)
73
+ self.session.flush()
74
+
75
+ # Create purchase record
76
+ purchase = Purchase(
77
+ supplier_id=supplier.id if supplier else None,
78
+ product_id=product.id if product else None,
79
+ quantity=entities.quantity or 1,
80
+ unit_price=entities.unit_price or 0,
81
+ total_cost=entities.total_amount or (entities.quantity or 1) * (entities.unit_price or 0),
82
+ notes=entities.notes
83
+ )
84
+
85
+ self.session.add(purchase)
86
+ self.session.commit()
87
+
88
+ return purchase.id, f"Purchase recorded: {entities.quantity or 1}x {entities.product or 'Unknown'} from {entities.supplier or 'Unknown'} for €{entities.total_amount or 0}"
89
+
90
+ def _process_sale(self, entities: EntityExtraction) -> str:
91
+ """Process a sale transaction"""
92
+ # Get or create customer
93
+ customer = None
94
+ if entities.customer:
95
+ customer = self.session.query(Customer).filter_by(name=entities.customer).first()
96
+ if not customer:
97
+ customer = Customer(name=entities.customer)
98
+ self.session.add(customer)
99
+ self.session.flush()
100
+
101
+ # Get or create product
102
+ product = None
103
+ if entities.product:
104
+ product = self.session.query(Product).filter_by(name=entities.product).first()
105
+ if not product:
106
+ product = Product(name=entities.product)
107
+ self.session.add(product)
108
+ self.session.flush()
109
+
110
+ # Create sale record
111
+ sale = Sale(
112
+ customer_id=customer.id if customer else None,
113
+ product_id=product.id if product else None,
114
+ quantity=entities.quantity or 1,
115
+ unit_price=entities.unit_price or 0,
116
+ total_amount=entities.total_amount or (entities.quantity or 1) * (entities.unit_price or 0),
117
+ notes=entities.notes
118
+ )
119
+
120
+ self.session.add(sale)
121
+ self.session.commit()
122
+
123
+ return sale.id, f"Sale recorded: {entities.quantity or 1}x {entities.product or 'Unknown'} to {entities.customer or 'Unknown'} for €{entities.total_amount or 0}"
124
+
125
+ def query_data(self, query: str) -> List[Dict[str, Any]]:
126
+ """Execute a query and return results"""
127
+ try:
128
+ result = self.session.execute(text(query))
129
+ columns = result.keys()
130
+ rows = result.fetchall()
131
+ return [dict(zip(columns, row)) for row in rows]
132
+ except Exception as e:
133
+ return [{"error": str(e)}]
134
+
135
+ def get_recent_transactions(self, limit: int = 10) -> Dict[str, List[Dict]]:
136
+ """Get recent purchases and sales"""
137
+ purchases = self.session.query(Purchase).order_by(Purchase.purchase_date.desc()).limit(limit).all()
138
+ sales = self.session.query(Sale).order_by(Sale.sale_date.desc()).limit(limit).all()
139
+
140
+ purchase_data = []
141
+ for p in purchases:
142
+ purchase_data.append({
143
+ "id": p.id,
144
+ "supplier": p.supplier.name if p.supplier else "Unknown",
145
+ "product": p.product.name if p.product else "Unknown",
146
+ "quantity": p.quantity,
147
+ "unit_price": float(p.unit_price),
148
+ "total_cost": float(p.total_cost),
149
+ "date": p.purchase_date.isoformat(),
150
+ "type": "purchase"
151
+ })
152
+
153
+ sale_data = []
154
+ for s in sales:
155
+ sale_data.append({
156
+ "id": s.id,
157
+ "customer": s.customer.name if s.customer else "Unknown",
158
+ "product": s.product.name if s.product else "Unknown",
159
+ "quantity": s.quantity,
160
+ "unit_price": float(s.unit_price),
161
+ "total_amount": float(s.total_amount),
162
+ "date": s.sale_date.isoformat(),
163
+ "type": "sale"
164
+ })
165
+
166
+ return {"purchases": purchase_data, "sales": sale_data}
167
+
168
+ def search_transactions(self, search_term: str) -> List[Dict[str, Any]]:
169
+ """Search transactions by supplier, customer, or product"""
170
+ results = []
171
+
172
+ # Search purchases
173
+ purchases = self.session.query(Purchase).join(Supplier, Purchase.supplier_id == Supplier.id, isouter=True)\
174
+ .join(Product, Purchase.product_id == Product.id, isouter=True)\
175
+ .filter(
176
+ (Supplier.name.contains(search_term)) |
177
+ (Product.name.contains(search_term)) |
178
+ (Purchase.notes.contains(search_term))
179
+ ).all()
180
+
181
+ for p in purchases:
182
+ results.append({
183
+ "id": p.id,
184
+ "type": "purchase",
185
+ "supplier": p.supplier.name if p.supplier else "Unknown",
186
+ "product": p.product.name if p.product else "Unknown",
187
+ "quantity": p.quantity,
188
+ "unit_price": float(p.unit_price),
189
+ "total": float(p.total_cost),
190
+ "date": p.purchase_date.isoformat()
191
+ })
192
+
193
+ # Search sales
194
+ sales = self.session.query(Sale).join(Customer, Sale.customer_id == Customer.id, isouter=True)\
195
+ .join(Product, Sale.product_id == Product.id, isouter=True)\
196
+ .filter(
197
+ (Customer.name.contains(search_term)) |
198
+ (Product.name.contains(search_term)) |
199
+ (Sale.notes.contains(search_term))
200
+ ).all()
201
+
202
+ for s in sales:
203
+ results.append({
204
+ "id": s.id,
205
+ "type": "sale",
206
+ "customer": s.customer.name if s.customer else "Unknown",
207
+ "product": s.product.name if s.product else "Unknown",
208
+ "quantity": s.quantity,
209
+ "unit_price": float(s.unit_price),
210
+ "total": float(s.total_amount),
211
+ "date": s.sale_date.isoformat()
212
+ })
213
+
214
+ return sorted(results, key=lambda x: x["date"], reverse=True)
215
+
216
+ def get_transaction_by_id(self, transaction_id: int, transaction_type: str) -> Optional[Dict[str, Any]]:
217
+ """Retrieve a specific transaction by ID and type"""
218
+ try:
219
+ if transaction_type == "purchase":
220
+ transaction = self.session.query(Purchase).filter_by(id=transaction_id).first()
221
+ if transaction:
222
+ return {
223
+ "id": transaction.id,
224
+ "type": "purchase",
225
+ "supplier_id": transaction.supplier_id,
226
+ "product_id": transaction.product_id,
227
+ "quantity": transaction.quantity,
228
+ "unit_price": transaction.unit_price,
229
+ "total_cost": transaction.total_cost,
230
+ "purchase_date": transaction.purchase_date.isoformat() if transaction.purchase_date else None,
231
+ "notes": transaction.notes
232
+ }
233
+ elif transaction_type == "sale":
234
+ transaction = self.session.query(Sale).filter_by(id=transaction_id).first()
235
+ if transaction:
236
+ return {
237
+ "id": transaction.id,
238
+ "type": "sale",
239
+ "customer_id": transaction.customer_id,
240
+ "product_id": transaction.product_id,
241
+ "quantity": transaction.quantity,
242
+ "unit_price": transaction.unit_price,
243
+ "total_amount": transaction.total_amount,
244
+ "sale_date": transaction.sale_date.isoformat() if transaction.sale_date else None,
245
+ "notes": transaction.notes
246
+ }
247
+
248
+ return None
249
+ except Exception as e:
250
+ print(f"Error retrieving transaction by ID: {e}")
251
+ return None
252
+
253
+ def close(self):
254
+ """Close database connection"""
255
+ self.session.close()
src/entity_extractor.py ADDED
@@ -0,0 +1,204 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import re
2
+ import spacy
3
+ from typing import Optional, Dict, Any
4
+ from datetime import datetime
5
+ from dateutil import parser as date_parser
6
+ from models import EntityExtraction
7
+
8
+ class EntityExtractor:
9
+ def __init__(self):
10
+ try:
11
+ self.nlp = spacy.load("en_core_web_sm")
12
+ except OSError:
13
+ print("Warning: spaCy model not found. Install with: python -m spacy download en_core_web_sm")
14
+ self.nlp = None
15
+
16
+ def extract_entities(self, text: str) -> EntityExtraction:
17
+ """Extract entities from user input text"""
18
+ text_lower = text.lower()
19
+
20
+ # Determine transaction type
21
+ transaction_type = self._detect_transaction_type(text_lower)
22
+
23
+ # Extract entities
24
+ product = self._extract_product(text)
25
+ quantity = self._extract_quantity(text)
26
+ unit = self._extract_unit(text)
27
+ supplier = self._extract_supplier(text) if transaction_type == "purchase" else None
28
+ customer = self._extract_customer(text) if transaction_type == "sale" else None
29
+ unit_price = self._extract_unit_price(text)
30
+ total_amount = self._calculate_total(quantity, unit_price)
31
+
32
+ return EntityExtraction(
33
+ product=product,
34
+ quantity=quantity,
35
+ unit=unit,
36
+ supplier=supplier,
37
+ customer=customer,
38
+ unit_price=unit_price,
39
+ total_amount=total_amount,
40
+ transaction_type=transaction_type,
41
+ notes=text
42
+ )
43
+
44
+ def _detect_transaction_type(self, text: str) -> str:
45
+ """Detect if this is a purchase or sale"""
46
+ purchase_keywords = ["purchase", "buy", "bought", "order", "from", "supplier"]
47
+ sale_keywords = ["sale", "sell", "sold", "to", "customer", "client"]
48
+
49
+ purchase_score = sum(1 for keyword in purchase_keywords if keyword in text)
50
+ sale_score = sum(1 for keyword in sale_keywords if keyword in text)
51
+
52
+ return "purchase" if purchase_score >= sale_score else "sale"
53
+
54
+ def _extract_product(self, text: str) -> Optional[str]:
55
+ """Extract product name from text"""
56
+ # Enhanced product patterns to handle various formats
57
+ product_patterns = [
58
+ # Pattern for "X units of Y" format (e.g., "20 tons of Apples")
59
+ r"(?:\d+)\s*(?:tons?|kg|kilograms?|pounds?|lbs?|pieces?|units?|items?|boxes?)\s+of\s+([a-zA-Z\s]+?)(?:\s+from|\s+at|\s+for|\s*€|\s*\$|$)",
60
+
61
+ # Pattern for "bought/purchased X Y" format
62
+ r"(?:bought|purchased|buy|purchase|sold|sale|sell)\s+(?:\d+\s*(?:tons?|kg|pieces?|units?)?\s+)?(?:of\s+)?([a-zA-Z\s]+?)(?:\s+from|\s+to|\s+at|\s+for|\s*€|\s*\$)",
63
+
64
+ # Pattern for quantity followed by product
65
+ r"(?:\d+)\s*(?:x\s+)?([a-zA-Z\s]+?)(?:\s+from|\s+at|\s+for|\s*€|\s*\$)",
66
+
67
+ # Pattern for standalone capitalized product names
68
+ r"\b([A-Z][a-zA-Z]*(?:\s+[A-Z][a-zA-Z]*)*)\b(?!\s+(?:from|at|for|€|\$))",
69
+ ]
70
+
71
+ for pattern in product_patterns:
72
+ match = re.search(pattern, text, re.IGNORECASE)
73
+ if match:
74
+ product = match.group(1).strip()
75
+ # Filter out common non-product words
76
+ if product.lower() not in ['from', 'at', 'for', 'to', 'we', 'i', 'you', 'the', 'a', 'an', 'and', 'or']:
77
+ return product
78
+
79
+ # Use spaCy for named entity recognition if available
80
+ if self.nlp:
81
+ doc = self.nlp(text)
82
+ for ent in doc.ents:
83
+ if ent.label_ in ["PRODUCT", "ORG"] and len(ent.text) > 2:
84
+ return ent.text
85
+
86
+ return None
87
+
88
+ def _extract_quantity(self, text: str) -> Optional[int]:
89
+ """Extract quantity from text"""
90
+ # Enhanced quantity patterns to handle various units
91
+ quantity_patterns = [
92
+ # Numbers with explicit units
93
+ r"(\d+(?:\.\d+)?)\s*(?:tons?|kg|kilograms?|pounds?|lbs?|pieces?|units?|items?|boxes?)",
94
+ # Numbers followed by "of" or "x"
95
+ r"(\d+(?:\.\d+)?)\s*(?:of|x)\s+",
96
+ # Numbers in transaction context
97
+ r"(?:bought|purchased|buy|purchase|sold|sale|sell)\s+(?:of\s+)?(\d+(?:\.\d+)?)",
98
+ # Standalone numbers at start
99
+ r"^(\d+(?:\.\d+)?)\s+",
100
+ ]
101
+
102
+ for pattern in quantity_patterns:
103
+ match = re.search(pattern, text, re.IGNORECASE)
104
+ if match:
105
+ try:
106
+ # Convert to int, handling decimal quantities
107
+ quantity = float(match.group(1))
108
+ return int(quantity) if quantity.is_integer() else int(round(quantity))
109
+ except (ValueError, AttributeError):
110
+ continue
111
+
112
+ return None
113
+
114
+ def _extract_unit(self, text: str) -> Optional[str]:
115
+ """Extract unit from text (tons, kg, pieces, etc.)"""
116
+ # Common unit patterns
117
+ unit_patterns = [
118
+ r"\d+(?:\.\d+)?\s*(tons?|kg|kilograms?|pounds?|lbs?|pieces?|units?|items?|boxes?|liters?|gallons?)",
119
+ ]
120
+
121
+ for pattern in unit_patterns:
122
+ match = re.search(pattern, text, re.IGNORECASE)
123
+ if match:
124
+ unit = match.group(1).lower()
125
+ # Normalize units
126
+ unit_mapping = {
127
+ 'ton': 'tons', 'kg': 'kg', 'kilogram': 'kg', 'kilograms': 'kg',
128
+ 'pound': 'lbs', 'pounds': 'lbs', 'lb': 'lbs', 'lbs': 'lbs',
129
+ 'piece': 'pieces', 'pieces': 'pieces',
130
+ 'unit': 'units', 'units': 'units',
131
+ 'item': 'items', 'items': 'items',
132
+ 'box': 'boxes', 'boxes': 'boxes',
133
+ 'liter': 'liters', 'liters': 'liters',
134
+ 'gallon': 'gallons', 'gallons': 'gallons'
135
+ }
136
+ return unit_mapping.get(unit, unit)
137
+
138
+ return None
139
+
140
+ def _extract_supplier(self, text: str) -> Optional[str]:
141
+ """Extract supplier name from text"""
142
+ # Look for "from [supplier]" patterns
143
+ supplier_patterns = [
144
+ r"from\s+([A-Za-z\s]+?)(?:\s+at|\s+for|\s*€|\s*\$|$)",
145
+ r"supplier\s+([A-Za-z\s]+?)(?:\s+at|\s+for|\s*€|\s*\$|$)",
146
+ ]
147
+
148
+ for pattern in supplier_patterns:
149
+ match = re.search(pattern, text, re.IGNORECASE)
150
+ if match:
151
+ return match.group(1).strip()
152
+
153
+ # Use spaCy for organization detection
154
+ if self.nlp:
155
+ doc = self.nlp(text)
156
+ for ent in doc.ents:
157
+ if ent.label_ == "ORG":
158
+ return ent.text
159
+
160
+ return None
161
+
162
+ def _extract_customer(self, text: str) -> Optional[str]:
163
+ """Extract customer name from text"""
164
+ # Look for "to [customer]" patterns
165
+ customer_patterns = [
166
+ r"to\s+([A-Za-z\s]+?)(?:\s+at|\s+for|\s*€|\s*\$|$)",
167
+ r"customer\s+([A-Za-z\s]+?)(?:\s+at|\s+for|\s*€|\s*\$|$)",
168
+ ]
169
+
170
+ for pattern in customer_patterns:
171
+ match = re.search(pattern, text, re.IGNORECASE)
172
+ if match:
173
+ return match.group(1).strip()
174
+
175
+ # Use spaCy for person detection
176
+ if self.nlp:
177
+ doc = self.nlp(text)
178
+ for ent in doc.ents:
179
+ if ent.label_ == "PERSON":
180
+ return ent.text
181
+
182
+ return None
183
+
184
+ def _extract_unit_price(self, text: str) -> Optional[float]:
185
+ """Extract unit price from text"""
186
+ # Look for price patterns
187
+ price_patterns = [
188
+ r"(?:at|for|€|$)\s*(\d+(?:\.\d{2})?)\s*(?:each|per|unit)?",
189
+ r"(\d+(?:\.\d{2})?)\s*(?:€|$)\s*(?:each|per|unit)",
190
+ r"(?:price|cost)?\s*(?:of)?\s*(\d+(?:\.\d{2})?)\s*(?:€|$)",
191
+ ]
192
+
193
+ for pattern in price_patterns:
194
+ match = re.search(pattern, text, re.IGNORECASE)
195
+ if match:
196
+ return float(match.group(1))
197
+
198
+ return None
199
+
200
+ def _calculate_total(self, quantity: Optional[int], unit_price: Optional[float]) -> Optional[float]:
201
+ """Calculate total amount"""
202
+ if quantity and unit_price:
203
+ return quantity * unit_price
204
+ return None
src/intent_classifier.py ADDED
@@ -0,0 +1,173 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import openai
2
+ import os
3
+ import dirtyjson as json
4
+ from typing import Dict, Any, Optional, Tuple
5
+ from enum import Enum
6
+ from pydantic import BaseModel
7
+
8
+ class IntentType(str, Enum):
9
+ TRANSACTION = "transaction"
10
+ QUERY = "query"
11
+ SEMANTIC_SEARCH = "semantic_search"
12
+ GENERAL_INFO = "general_info"
13
+
14
+ class IntentResult(BaseModel):
15
+ intent: IntentType
16
+ confidence: float
17
+ reasoning: str
18
+ entities_hint: Optional[str] = None
19
+
20
+ class IntentClassifier:
21
+ def __init__(self, api_key: Optional[str] = None):
22
+ """Initialize OpenAI client for intent classification"""
23
+ self.client = openai.OpenAI(
24
+ api_key=api_key or os.getenv('OPENAI_API_KEY')
25
+ )
26
+
27
+ def classify_intent(self, user_message: str) -> IntentResult:
28
+ """
29
+ Classify user intent using OpenAI API
30
+ Returns: IntentResult with intent type, confidence, and reasoning
31
+ """
32
+
33
+ system_prompt = """You are an expert intent classifier for a business chatbot that handles sales, purchases, and general information storage.
34
+
35
+ Given a user message, classify it into one of these intents:
36
+
37
+ 1. **QUERY**: User wants to retrieve or analyze STRUCTURED data from SQL database tables
38
+ - Examples: "How many USB drives did we buy?" (counts from purchases table)
39
+ - Examples: "What's the total value of all sales?" (sum from sales table)
40
+ - Examples: "Show me recent transactions" (list from transactions table)
41
+ - Examples: "List all customers" (data from customers table)
42
+ - Key indicators: Asking for counts, totals, lists, recent data from business transactions
43
+ - Must be answerable from structured database tables (purchases, sales, customers, suppliers, products)
44
+
45
+ 2. **SEMANTIC_SEARCH**: User wants to find contextual information, tasks, or unstructured data
46
+ - Examples: "What does Mark need to do?" (searching for task/context info)
47
+ - Examples: "Find events related to supplier meetings" (contextual search)
48
+ - Examples: "When do I have the meeting with George?" (calendar/scheduling info)
49
+ - Examples: "Show me similar purchases to this one" (similarity search)
50
+ - Examples: "What did we discuss in the last meeting?" (meeting notes/context)
51
+ - Key indicators: Questions about tasks, meetings, discussions, or contextual information
52
+ - Information that would NOT be in structured database tables
53
+
54
+ 3. **TRANSACTION**: User wants to record a business transaction (purchase or sale)
55
+ - Examples: "Add a purchase of 20 USB drives from TechMart at €5 each"
56
+ - Examples: "Sold 10 laptops to John Smith at €800 each"
57
+ - Contains: product names, quantities, suppliers/customers, prices
58
+ - Action: Recording new business data
59
+
60
+ 4. **GENERAL_INFO**: User wants to store general business information or notes
61
+ - It cannot be a question.
62
+ - Examples: "Meeting with new supplier scheduled for next week"
63
+ - Examples: "Remember to check inventory levels before next order"
64
+ - Examples: "Mark needs to call the supplier tomorrow"
65
+ - Contains: notes, reminders, general business information, task assignments
66
+
67
+ Return your response in this exact JSON format:
68
+ {
69
+ "intent": "transaction|query|semantic_search|general_info",
70
+ "confidence": 0.0-1.0,
71
+ "reasoning": "Brief explanation of why you chose this intent",
72
+ "entities_hint": "Optional: Key entities you detected (for transaction intent)"
73
+ }
74
+
75
+ Be precise and consider context carefully. If unsure, choose the most likely intent and indicate lower confidence."""
76
+
77
+ user_prompt = f'Classify the intent of this user message: "{user_message}"'
78
+
79
+ try:
80
+ response = self.client.chat.completions.create(
81
+ model="gpt-4o-mini",
82
+ messages=[
83
+ {"role": "system", "content": system_prompt},
84
+ {"role": "user", "content": user_prompt}
85
+ ],
86
+ temperature=0.1,
87
+ max_tokens=300
88
+ )
89
+
90
+ response_text = response.choices[0].message.content.strip()
91
+
92
+ # Clean JSON response more carefully
93
+ if response_text.startswith("```json"):
94
+ response_text = response_text[7:]
95
+ if response_text.startswith("```"):
96
+ response_text = response_text[3:]
97
+ if response_text.endswith("```"):
98
+ response_text = response_text[:-3]
99
+
100
+ response_text = response_text.strip()
101
+
102
+ # Parse JSON response
103
+ try:
104
+ result_dict = json.loads(response_text)
105
+
106
+ # Validate intent value
107
+ intent_value = result_dict.get("intent", "").lower()
108
+ if intent_value not in [e.value for e in IntentType]:
109
+ print(f"Invalid intent value: {intent_value}")
110
+ return self._fallback_classification(user_message, f"Invalid intent: {intent_value}")
111
+
112
+ return IntentResult(
113
+ intent=IntentType(intent_value),
114
+ confidence=float(result_dict.get("confidence", 0.5)),
115
+ reasoning=result_dict.get("reasoning", "No reasoning provided"),
116
+ entities_hint=result_dict.get("entities_hint")
117
+ )
118
+ except Exception as e:
119
+ # Fallback if JSON parsing fails
120
+ print(f"JSON parsing error: {e}")
121
+ print(f"Raw response: {response_text}")
122
+ return self._fallback_classification(user_message, f"JSON parsing failed: {str(e)}")
123
+
124
+ except Exception as e:
125
+ print(f"Error in intent classification: {e}")
126
+ return self._fallback_classification(user_message, str(e))
127
+
128
+ def _fallback_classification(self, user_message: str, error_info: str) -> IntentResult:
129
+ """Fallback classification when OpenAI API fails"""
130
+ message_lower = user_message.lower()
131
+
132
+ # Simple keyword-based fallback
133
+ transaction_keywords = ["purchase", "buy", "sold", "sale", "from", "to", "€", "$"]
134
+ query_keywords = ["how many", "total", "list all", "recent transactions", "count"]
135
+ search_keywords = ["similar", "like", "related", "about", "need to do", "meeting", "discuss", "task"]
136
+
137
+ if any(keyword in message_lower for keyword in transaction_keywords):
138
+ intent = IntentType.TRANSACTION
139
+ confidence = 0.6
140
+ elif any(keyword in message_lower for keyword in query_keywords):
141
+ intent = IntentType.QUERY
142
+ confidence = 0.6
143
+ elif any(keyword in message_lower for keyword in search_keywords):
144
+ intent = IntentType.SEMANTIC_SEARCH
145
+ confidence = 0.6
146
+ else:
147
+ intent = IntentType.GENERAL_INFO
148
+ confidence = 0.5
149
+
150
+ return IntentResult(
151
+ intent=intent,
152
+ confidence=confidence,
153
+ reasoning=f"Fallback classification due to API error: {error_info[:100]}",
154
+ entities_hint=None
155
+ )
156
+
157
+ def get_intent_description(self, intent: IntentType) -> str:
158
+ """Get human-readable description of intent type"""
159
+ descriptions = {
160
+ IntentType.TRANSACTION: "Recording a business transaction (purchase or sale)",
161
+ IntentType.QUERY: "Retrieving or analyzing data from the database",
162
+ IntentType.SEMANTIC_SEARCH: "Finding similar events or information",
163
+ IntentType.GENERAL_INFO: "Storing general business information or notes"
164
+ }
165
+ return descriptions.get(intent, "Unknown intent type")
166
+
167
+ def batch_classify(self, messages: list[str]) -> list[IntentResult]:
168
+ """Classify multiple messages efficiently"""
169
+ results = []
170
+ for message in messages:
171
+ result = self.classify_intent(message)
172
+ results.append(result)
173
+ return results
src/models.py ADDED
@@ -0,0 +1,104 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from datetime import datetime
2
+ from typing import Optional, List
3
+ from pydantic import BaseModel, Field
4
+ from sqlalchemy import Column, Integer, String, DateTime, ForeignKey, Numeric, Text, create_engine
5
+ from sqlalchemy.ext.declarative import declarative_base
6
+ from sqlalchemy.orm import relationship, sessionmaker
7
+
8
+ Base = declarative_base()
9
+
10
+ class Supplier(Base):
11
+ __tablename__ = "suppliers"
12
+
13
+ id = Column(Integer, primary_key=True, autoincrement=True)
14
+ name = Column(String(255), nullable=False, unique=True)
15
+ contact_info = Column(Text)
16
+ created_at = Column(DateTime, default=datetime.utcnow)
17
+
18
+ purchases = relationship("Purchase", back_populates="supplier")
19
+
20
+ class Customer(Base):
21
+ __tablename__ = "customers"
22
+
23
+ id = Column(Integer, primary_key=True, autoincrement=True)
24
+ name = Column(String(255), nullable=False)
25
+ email = Column(String(255))
26
+ phone = Column(String(50))
27
+ address = Column(Text)
28
+ created_at = Column(DateTime, default=datetime.utcnow)
29
+
30
+ sales = relationship("Sale", back_populates="customer")
31
+
32
+ class Product(Base):
33
+ __tablename__ = "products"
34
+
35
+ id = Column(Integer, primary_key=True, autoincrement=True)
36
+ name = Column(String(255), nullable=False)
37
+ description = Column(Text)
38
+ category = Column(String(100))
39
+ created_at = Column(DateTime, default=datetime.utcnow)
40
+
41
+ purchases = relationship("Purchase", back_populates="product")
42
+ sales = relationship("Sale", back_populates="product")
43
+
44
+ class Purchase(Base):
45
+ __tablename__ = "purchases"
46
+
47
+ id = Column(Integer, primary_key=True, autoincrement=True)
48
+ supplier_id = Column(Integer, ForeignKey("suppliers.id"))
49
+ product_id = Column(Integer, ForeignKey("products.id"))
50
+ quantity = Column(Integer, nullable=False)
51
+ unit_price = Column(Numeric(10, 2), nullable=False)
52
+ total_cost = Column(Numeric(10, 2), nullable=False)
53
+ purchase_date = Column(DateTime, default=datetime.utcnow)
54
+ notes = Column(Text)
55
+
56
+ supplier = relationship("Supplier", back_populates="purchases")
57
+ product = relationship("Product", back_populates="purchases")
58
+
59
+ class Sale(Base):
60
+ __tablename__ = "sales"
61
+
62
+ id = Column(Integer, primary_key=True, autoincrement=True)
63
+ customer_id = Column(Integer, ForeignKey("customers.id"))
64
+ product_id = Column(Integer, ForeignKey("products.id"))
65
+ quantity = Column(Integer, nullable=False)
66
+ unit_price = Column(Numeric(10, 2), nullable=False)
67
+ total_amount = Column(Numeric(10, 2), nullable=False)
68
+ sale_date = Column(DateTime, default=datetime.utcnow)
69
+ notes = Column(Text)
70
+
71
+ customer = relationship("Customer", back_populates="sales")
72
+ product = relationship("Product", back_populates="sales")
73
+
74
+ # Pydantic models for API
75
+ class EntityExtraction(BaseModel):
76
+ product: Optional[str] = None
77
+ quantity: Optional[int] = None
78
+ unit: Optional[str] = None # e.g., "tons", "pieces", "kg"
79
+ supplier: Optional[str] = None
80
+ customer: Optional[str] = None
81
+ unit_price: Optional[float] = None
82
+ total_amount: Optional[float] = None
83
+ transaction_type: str = Field(..., description="'purchase' or 'sale'")
84
+ notes: Optional[str] = None
85
+
86
+ class ChatbotRequest(BaseModel):
87
+ message: str
88
+ session_id: Optional[str] = None
89
+
90
+ class PendingTransaction(BaseModel):
91
+ entities: EntityExtraction
92
+ missing_fields: List[str]
93
+ session_id: str
94
+ original_message: str
95
+ clarification_responses: List[str] = []
96
+
97
+ class ChatbotResponse(BaseModel):
98
+ response: str
99
+ sql_executed: Optional[str] = None
100
+ entities_extracted: Optional[EntityExtraction] = None
101
+ vector_stored: bool = False
102
+ intent_detected: Optional[str] = None
103
+ intent_confidence: Optional[float] = None
104
+ awaiting_clarification: bool = False
src/nl_to_sql.py ADDED
@@ -0,0 +1,201 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import openai
2
+ import os
3
+ from typing import Dict, Any, Optional, Tuple
4
+ import re
5
+ import json
6
+
7
+ class NaturalLanguageToSQL:
8
+ def __init__(self, api_key: Optional[str] = None):
9
+ """Initialize OpenAI client for natural language to SQL conversion"""
10
+ self.client = openai.OpenAI(
11
+ api_key=api_key or os.getenv('OPENAI_API_KEY')
12
+ )
13
+
14
+ # Database schema description for the LLM
15
+ self.schema_description = """
16
+ Database Schema:
17
+
18
+ Table: suppliers
19
+ - id (INTEGER PRIMARY KEY)
20
+ - name (VARCHAR(255)) - Supplier company name
21
+ - contact_info (TEXT) - Contact information
22
+ - created_at (TIMESTAMP)
23
+
24
+ Table: customers
25
+ - id (INTEGER PRIMARY KEY)
26
+ - name (VARCHAR(255)) - Customer name
27
+ - email (VARCHAR(255))
28
+ - phone (VARCHAR(50))
29
+ - address (TEXT)
30
+ - created_at (TIMESTAMP)
31
+
32
+ Table: products
33
+ - id (INTEGER PRIMARY KEY)
34
+ - name (VARCHAR(255)) - Product name
35
+ - description (TEXT)
36
+ - category (VARCHAR(100)) - Product category
37
+ - created_at (TIMESTAMP)
38
+
39
+ Table: purchases
40
+ - id (INTEGER PRIMARY KEY)
41
+ - supplier_id (INTEGER) - Foreign key to suppliers table
42
+ - product_id (INTEGER) - Foreign key to products table
43
+ - quantity (INTEGER) - Number of items purchased
44
+ - unit_price (DECIMAL(10,2)) - Price per unit
45
+ - total_cost (DECIMAL(10,2)) - Total purchase cost
46
+ - purchase_date (TIMESTAMP) - When purchase was made
47
+ - notes (TEXT) - Additional notes
48
+
49
+ Table: sales
50
+ - id (INTEGER PRIMARY KEY)
51
+ - customer_id (INTEGER) - Foreign key to customers table
52
+ - product_id (INTEGER) - Foreign key to products table
53
+ - quantity (INTEGER) - Number of items sold
54
+ - unit_price (DECIMAL(10,2)) - Price per unit
55
+ - total_amount (DECIMAL(10,2)) - Total sale amount
56
+ - sale_date (TIMESTAMP) - When sale was made
57
+ - notes (TEXT) - Additional notes
58
+
59
+ Relationships:
60
+ - purchases.supplier_id β†’ suppliers.id
61
+ - purchases.product_id β†’ products.id
62
+ - sales.customer_id β†’ customers.id
63
+ - sales.product_id β†’ products.id
64
+ """
65
+
66
+ def convert_to_sql(self, natural_language_query: str) -> Tuple[str, str]:
67
+ """
68
+ Convert natural language query to SQL
69
+ Returns: (sql_query, explanation)
70
+ """
71
+
72
+ system_prompt = f"""You are an expert SQL query generator. Given a natural language question about a business database, generate the appropriate SQL query.
73
+
74
+ {self.schema_description}
75
+
76
+ Guidelines:
77
+ 1. Generate valid SQLite syntax
78
+ 2. Use JOINs when accessing related data across tables
79
+ 3. Use appropriate WHERE clauses for filtering
80
+ 4. Use aggregate functions (COUNT, SUM, AVG) when appropriate
81
+ 5. Use ORDER BY for sorting results
82
+ 6. Use LIMIT for restricting result count when reasonable
83
+ 7. Always use proper table aliases for clarity
84
+ 8. Handle date ranges using DATE() function for SQLite
85
+ 9. Use LIKE with % wildcards for text searches
86
+ 10. Return only the SQL query, no explanations unless specifically requested
87
+
88
+ Example queries:
89
+ - "Show all USB drives purchased" β†’ SELECT p.name, pu.quantity, pu.unit_price, s.name as supplier FROM purchases pu JOIN products p ON pu.product_id = p.id JOIN suppliers s ON pu.supplier_id = s.id WHERE p.name LIKE '%USB%'
90
+ - "Total sales this month" β†’ SELECT SUM(total_amount) FROM sales WHERE DATE(sale_date) >= DATE('now', 'start of month')
91
+ - "Top 5 customers by sales" β†’ SELECT c.name, SUM(s.total_amount) as total FROM sales s JOIN customers c ON s.customer_id = c.id GROUP BY c.id, c.name ORDER BY total DESC LIMIT 5
92
+ """
93
+
94
+ user_prompt = f"""Convert this natural language query to SQL:
95
+
96
+ "{natural_language_query}"
97
+
98
+ Return ONLY the SQL query, nothing else."""
99
+
100
+ try:
101
+ response = self.client.chat.completions.create(
102
+ model="gpt-4o-mini",
103
+ messages=[
104
+ {"role": "system", "content": system_prompt},
105
+ {"role": "user", "content": user_prompt}
106
+ ],
107
+ temperature=0.1,
108
+ max_tokens=500
109
+ )
110
+
111
+ sql_query = response.choices[0].message.content.strip()
112
+
113
+ # Clean up the SQL query (remove markdown formatting if present)
114
+ sql_query = re.sub(r'^```sql\s*', '', sql_query)
115
+ sql_query = re.sub(r'\s*```$', '', sql_query)
116
+ sql_query = sql_query.strip()
117
+
118
+ # Generate explanation
119
+ explanation = self._generate_explanation(natural_language_query, sql_query)
120
+
121
+ return sql_query, explanation
122
+
123
+ except Exception as e:
124
+ return f"-- Error generating SQL: {str(e)}", f"Failed to convert query: {str(e)}"
125
+
126
+ def _generate_explanation(self, nl_query: str, sql_query: str) -> str:
127
+ """Generate a human-readable explanation of what the SQL query does"""
128
+
129
+ system_prompt = """You are a helpful assistant that explains SQL queries in simple terms.
130
+ Given a natural language question and the corresponding SQL query, provide a brief explanation of what the SQL query does."""
131
+
132
+ user_prompt = f"""Natural language query: "{nl_query}"
133
+
134
+ SQL query: {sql_query}
135
+
136
+ Provide a brief explanation of what this SQL query does:"""
137
+
138
+ try:
139
+ response = self.client.chat.completions.create(
140
+ model="gpt-3.5-turbo",
141
+ messages=[
142
+ {"role": "system", "content": system_prompt},
143
+ {"role": "user", "content": user_prompt}
144
+ ],
145
+ temperature=0.3,
146
+ max_tokens=200
147
+ )
148
+
149
+ return response.choices[0].message.content.strip()
150
+
151
+ except Exception as e:
152
+ return f"Generated SQL query for: {nl_query}"
153
+
154
+ def validate_sql(self, sql_query: str) -> Tuple[bool, str]:
155
+ """
156
+ Basic validation of SQL query structure
157
+ Returns: (is_valid, error_message)
158
+ """
159
+
160
+ # Basic checks
161
+ sql_lower = sql_query.lower().strip()
162
+
163
+ # Check for dangerous operations
164
+ dangerous_keywords = ['drop', 'delete', 'truncate', 'alter', 'create', 'insert', 'update']
165
+ for keyword in dangerous_keywords:
166
+ if keyword in sql_lower and not sql_lower.startswith('select'):
167
+ return False, f"Query contains potentially dangerous keyword: {keyword}"
168
+
169
+ # Check if it starts with SELECT (read-only queries only)
170
+ if not sql_lower.startswith('select'):
171
+ return False, "Only SELECT queries are allowed for security"
172
+
173
+ # Basic syntax checks
174
+ if sql_query.count('(') != sql_query.count(')'):
175
+ return False, "Unmatched parentheses in query"
176
+
177
+ # Check for basic SQL injection patterns
178
+ injection_patterns = [r";\s*(drop|delete|insert|update)", r"--", r"/\*.*\*/"]
179
+ for pattern in injection_patterns:
180
+ if re.search(pattern, sql_lower):
181
+ return False, f"Query contains potentially unsafe pattern: {pattern}"
182
+
183
+ return True, "Query appears valid"
184
+
185
+ def suggest_corrections(self, natural_language_query: str, error_message: str) -> str:
186
+ """Suggest how to rephrase the query if it fails"""
187
+
188
+ suggestions = {
189
+ "table": "Make sure you're asking about purchases, sales, customers, suppliers, or products",
190
+ "column": "Try using terms like 'name', 'quantity', 'price', 'date', 'total'",
191
+ "syntax": "Try rephrasing your question more simply",
192
+ "ambiguous": "Be more specific about what data you want to see"
193
+ }
194
+
195
+ error_lower = error_message.lower()
196
+
197
+ for key, suggestion in suggestions.items():
198
+ if key in error_lower:
199
+ return f"Suggestion: {suggestion}"
200
+
201
+ return "Try rephrasing your question or ask for help with available data"
src/rag_handler.py ADDED
@@ -0,0 +1,204 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import openai
2
+ import os
3
+ from typing import List, Dict, Any, Optional
4
+ import json
5
+ from datetime import datetime
6
+
7
+ class RAGHandler:
8
+ def __init__(self, api_key: Optional[str] = None):
9
+ """Initialize OpenAI client for RAG responses"""
10
+ self.client = openai.OpenAI(
11
+ api_key=api_key or os.getenv('OPENAI_API_KEY')
12
+ )
13
+
14
+ def generate_rag_response(self, user_query: str, retrieved_documents: List[Dict[str, Any]]) -> str:
15
+ """
16
+ Generate a response using RAG (Retrieval-Augmented Generation)
17
+
18
+ Args:
19
+ user_query: The user's original query
20
+ retrieved_documents: List of documents from vector store with similarity scores
21
+
22
+ Returns:
23
+ Generated response based on retrieved context
24
+ """
25
+
26
+ if not retrieved_documents:
27
+ return "I couldn't find any relevant information to answer your query."
28
+
29
+ # Format retrieved documents for context
30
+ context = self._format_context(retrieved_documents)
31
+
32
+ system_prompt = """You are a helpful business assistant with access to a company's transaction history and business information.
33
+
34
+ Your role is to answer user questions based on the provided context from the company's records.
35
+
36
+ Guidelines:
37
+ 1. Answer based ONLY on the provided context
38
+ 2. If the context doesn't contain enough information, say so clearly
39
+ 3. Be specific and cite relevant details from the context
40
+ 4. Maintain a professional, helpful tone
41
+ 5. If asked about specific dates, transactions, or events, reference the exact information from context
42
+ 6. If the context contains multiple relevant items, summarize them appropriately
43
+ 7. Don't make up information not present in the context
44
+
45
+ Context format: Each document has a 'document' field with the actual content and 'metadata' with additional details like timestamps."""
46
+
47
+ user_prompt = f"""Based on the following business records, please answer this question: "{user_query}"
48
+
49
+ Context from company records:
50
+ {context}
51
+
52
+ Please provide a comprehensive answer based on the available information."""
53
+
54
+ try:
55
+ response = self.client.chat.completions.create(
56
+ model="gpt-4o-mini",
57
+ messages=[
58
+ {"role": "system", "content": system_prompt},
59
+ {"role": "user", "content": user_prompt}
60
+ ],
61
+ temperature=0.3,
62
+ max_tokens=800
63
+ )
64
+
65
+ return response.choices[0].message.content.strip()
66
+
67
+ except Exception as e:
68
+ return f"I encountered an error while processing your query: {str(e)}\n\nHowever, I found these relevant records:\n{self._format_fallback_response(retrieved_documents)}"
69
+
70
+ def _format_context(self, documents: List[Dict[str, Any]]) -> str:
71
+ """Format retrieved documents as context for the LLM"""
72
+ if not documents:
73
+ return "No relevant documents found."
74
+
75
+ context_parts = []
76
+
77
+ for i, doc in enumerate(documents, 1):
78
+ doc_content = doc.get('document', 'No content available')
79
+ metadata = doc.get('metadata', {})
80
+ distance = doc.get('distance', 'Unknown')
81
+
82
+ # Format document entry
83
+ context_entry = f"Document {i}:\n"
84
+ context_entry += f"Content: {doc_content}\n"
85
+
86
+ # Add metadata if available
87
+ if metadata:
88
+ if 'timestamp' in metadata:
89
+ try:
90
+ # Format timestamp nicely
91
+ timestamp = metadata['timestamp']
92
+ if isinstance(timestamp, str):
93
+ date_part = timestamp[:10] if len(timestamp) >= 10 else timestamp
94
+ context_entry += f"Date: {date_part}\n"
95
+ except:
96
+ pass
97
+
98
+ if 'type' in metadata:
99
+ context_entry += f"Type: {metadata['type']}\n"
100
+
101
+ # Add transaction data if available
102
+ if 'data' in metadata:
103
+ try:
104
+ data = json.loads(metadata['data']) if isinstance(metadata['data'], str) else metadata['data']
105
+ if isinstance(data, dict):
106
+ relevant_fields = ['product', 'quantity', 'supplier', 'customer', 'total', 'unit_price']
107
+ data_parts = []
108
+ for field in relevant_fields:
109
+ if field in data and data[field] is not None:
110
+ data_parts.append(f"{field}: {data[field]}")
111
+ if data_parts:
112
+ context_entry += f"Details: {', '.join(data_parts)}\n"
113
+ except:
114
+ pass
115
+
116
+ # Add similarity score
117
+ if distance is not None and distance != 'Unknown':
118
+ try:
119
+ similarity = 1 - float(distance) # Convert distance to similarity
120
+ context_entry += f"Relevance: {similarity:.2f}\n"
121
+ except:
122
+ pass
123
+
124
+ context_parts.append(context_entry)
125
+
126
+ return "\n" + "-" * 50 + "\n".join(context_parts)
127
+
128
+ def _format_fallback_response(self, documents: List[Dict[str, Any]]) -> str:
129
+ """Create a fallback response when LLM fails"""
130
+ if not documents:
131
+ return "No relevant information found."
132
+
133
+ response_parts = []
134
+
135
+ for i, doc in enumerate(documents, 1):
136
+ doc_content = doc.get('document', 'No content available')
137
+ metadata = doc.get('metadata', {})
138
+
139
+ entry = f"{i}. {doc_content}"
140
+
141
+ if metadata.get('timestamp'):
142
+ try:
143
+ date_part = metadata['timestamp'][:10]
144
+ entry += f" (Date: {date_part})"
145
+ except:
146
+ pass
147
+
148
+ response_parts.append(entry)
149
+
150
+ return "\n".join(response_parts)
151
+
152
+ def enhance_search_query(self, user_query: str) -> str:
153
+ """
154
+ Enhance the user's search query for better vector retrieval
155
+
156
+ Args:
157
+ user_query: Original user query
158
+
159
+ Returns:
160
+ Enhanced query for better semantic search
161
+ """
162
+
163
+ system_prompt = """You are an expert at reformulating search queries for business records retrieval.
164
+
165
+ Given a user's question, create an enhanced search query that will better match relevant business documents in a vector database.
166
+
167
+ Guidelines:
168
+ 1. Extract key business concepts (products, suppliers, customers, dates, amounts)
169
+ 2. Add relevant synonyms and related terms
170
+ 3. Focus on business transaction terminology
171
+ 4. Keep it concise but comprehensive
172
+ 5. Don't change the core intent of the original query
173
+
174
+ Examples:
175
+ - "When is my meeting with George?" β†’ "meeting George supplier customer appointment scheduled"
176
+ - "Show me laptop purchases" β†’ "laptop computer purchase buy bought supplier transaction"
177
+ - "Similar sales to John" β†’ "John customer sale sold transaction similar"
178
+
179
+ Return only the enhanced query, nothing else."""
180
+
181
+ user_prompt = f'Enhance this search query for better business records retrieval: "{user_query}"'
182
+
183
+ try:
184
+ response = self.client.chat.completions.create(
185
+ model="gpt-4o-mini",
186
+ messages=[
187
+ {"role": "system", "content": system_prompt},
188
+ {"role": "user", "content": user_prompt}
189
+ ],
190
+ temperature=0.2,
191
+ max_tokens=100
192
+ )
193
+
194
+ enhanced_query = response.choices[0].message.content.strip()
195
+
196
+ # Fallback to original if enhancement fails
197
+ if not enhanced_query or len(enhanced_query) < 3:
198
+ return user_query
199
+
200
+ return enhanced_query
201
+
202
+ except Exception as e:
203
+ print(f"Query enhancement failed: {e}")
204
+ return user_query
src/transaction_clarifier.py ADDED
@@ -0,0 +1,281 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import openai
2
+ import os
3
+ import json
4
+ from typing import Dict, Any, Optional, List, Tuple
5
+ from enum import Enum
6
+ from pydantic import BaseModel
7
+ from models import EntityExtraction
8
+
9
+ class ClarificationStatus(str, Enum):
10
+ COMPLETE = "complete"
11
+ NEEDS_CLARIFICATION = "needs_clarification"
12
+ CANCELLED = "cancelled"
13
+
14
+ class ClarificationRequest(BaseModel):
15
+ missing_fields: List[str]
16
+ questions: List[str]
17
+ suggested_values: Dict[str, Any] = {}
18
+ explanation: str
19
+
20
+ class TransactionClarifier:
21
+ def __init__(self, api_key: Optional[str] = None):
22
+ """Initialize OpenAI client for transaction clarification"""
23
+ self.client = openai.OpenAI(
24
+ api_key=api_key or os.getenv('OPENAI_API_KEY')
25
+ )
26
+
27
+ def analyze_transaction_completeness(self, entities: EntityExtraction) -> Tuple[ClarificationStatus, Optional[ClarificationRequest]]:
28
+ """
29
+ Analyze if a transaction has all necessary information
30
+
31
+ Args:
32
+ entities: Extracted entities from user input
33
+
34
+ Returns:
35
+ Tuple of (status, clarification_request)
36
+ """
37
+
38
+ # Define required and optional fields based on transaction type
39
+ if entities.transaction_type == "purchase":
40
+ required_fields = ["product", "quantity", "supplier", "unit_price"]
41
+ optional_fields = ["total_amount"]
42
+ elif entities.transaction_type == "sale":
43
+ required_fields = ["product", "quantity", "customer", "unit_price"]
44
+ optional_fields = ["total_amount"]
45
+ else:
46
+ return ClarificationStatus.COMPLETE, None
47
+
48
+ # Check for missing required fields
49
+ missing_fields = []
50
+ entity_dict = entities.dict()
51
+
52
+ for field in required_fields:
53
+ if not entity_dict.get(field):
54
+ missing_fields.append(field)
55
+
56
+ # If all required fields are present, transaction is complete
57
+ if not missing_fields:
58
+ return ClarificationStatus.COMPLETE, None
59
+
60
+ # Generate intelligent clarification request
61
+ clarification = self._generate_clarification_request(entities, missing_fields)
62
+
63
+ return ClarificationStatus.NEEDS_CLARIFICATION, clarification
64
+
65
+ def _generate_clarification_request(self, entities: EntityExtraction, missing_fields: List[str]) -> ClarificationRequest:
66
+ """Generate intelligent questions for missing information"""
67
+
68
+ # Prepare context about what we already know
69
+ known_info = {}
70
+ entity_dict = entities.dict()
71
+
72
+ for field, value in entity_dict.items():
73
+ if value is not None and field != "notes":
74
+ known_info[field] = value
75
+
76
+ system_prompt = f"""You are a helpful business assistant helping complete a {entities.transaction_type} transaction.
77
+
78
+ Generate natural, conversational questions to gather missing information. The user should be able to:
79
+ 1. Provide the missing information
80
+ 2. Say "N/A" or "skip" if the information is not available/applicable
81
+ 3. Ask for suggestions if they're unsure
82
+
83
+ Create personalized questions based on the context of what we already know.
84
+
85
+ Return your response in this exact JSON format:
86
+ {{
87
+ "questions": ["question1", "question2", ...],
88
+ "suggested_values": {{"field": "suggested_value", ...}},
89
+ "explanation": "Brief explanation of why we need this information"
90
+ }}
91
+
92
+ Missing fields to ask about: {missing_fields}
93
+ Transaction type: {entities.transaction_type}
94
+ """
95
+
96
+ user_prompt = f"""We're processing a {entities.transaction_type} transaction and need to gather some missing information.
97
+
98
+ What we already know:
99
+ {json.dumps(known_info, indent=2)}
100
+
101
+ Missing fields: {missing_fields}
102
+
103
+ Generate friendly, specific questions to gather the missing information. Make suggestions when appropriate."""
104
+
105
+ try:
106
+ response = self.client.chat.completions.create(
107
+ model="gpt-4o-mini",
108
+ messages=[
109
+ {"role": "system", "content": system_prompt},
110
+ {"role": "user", "content": user_prompt}
111
+ ],
112
+ temperature=0.3,
113
+ max_tokens=400
114
+ )
115
+
116
+ response_text = response.choices[0].message.content.strip()
117
+
118
+ try:
119
+ result_dict = json.loads(response_text)
120
+ return ClarificationRequest(
121
+ missing_fields=missing_fields,
122
+ questions=result_dict.get("questions", []),
123
+ suggested_values=result_dict.get("suggested_values", {}),
124
+ explanation=result_dict.get("explanation", "I need some additional information to complete this transaction.")
125
+ )
126
+ except (json.JSONDecodeError, KeyError) as e:
127
+ # Fallback to simple questions
128
+ return self._generate_fallback_questions(entities, missing_fields)
129
+
130
+ except Exception as e:
131
+ print(f"Error generating clarification: {e}")
132
+ return self._generate_fallback_questions(entities, missing_fields)
133
+
134
+ def _generate_fallback_questions(self, entities: EntityExtraction, missing_fields: List[str]) -> ClarificationRequest:
135
+ """Generate fallback questions when LLM fails"""
136
+
137
+ question_templates = {
138
+ "product": "What product or item is involved in this transaction?",
139
+ "quantity": f"How many units {'were purchased' if entities.transaction_type == 'purchase' else 'were sold'}?",
140
+ "supplier": "Which supplier or vendor is this purchase from?",
141
+ "customer": "Who is the customer for this sale?",
142
+ "unit_price": "What is the price per unit?",
143
+ "total_amount": "What is the total amount for this transaction?"
144
+ }
145
+
146
+ questions = []
147
+ for field in missing_fields:
148
+ questions.append(question_templates.get(field, f"What is the {field.replace('_', ' ')}?"))
149
+
150
+ return ClarificationRequest(
151
+ missing_fields=missing_fields,
152
+ questions=questions,
153
+ suggested_values={},
154
+ explanation="I need some additional information to complete this transaction."
155
+ )
156
+
157
+ def process_clarification_response(self, original_entities: EntityExtraction,
158
+ missing_fields: List[str],
159
+ user_response: str) -> Tuple[EntityExtraction, bool]:
160
+ """
161
+ Process user's response to clarification questions
162
+
163
+ Args:
164
+ original_entities: Original extracted entities
165
+ missing_fields: Fields we asked about
166
+ user_response: User's response to our questions
167
+
168
+ Returns:
169
+ Tuple of (updated_entities, is_complete)
170
+ """
171
+
172
+ system_prompt = f"""You are processing a user's response to clarification questions about a {original_entities.transaction_type} transaction.
173
+
174
+ Extract the missing information from the user's response. The user may:
175
+ 1. Provide specific values for the missing fields
176
+ 2. Say "N/A", "skip", "not applicable", or similar to indicate the field should be null
177
+ 3. Ask for help or say they don't know
178
+
179
+ Missing fields we asked about: {missing_fields}
180
+
181
+ Return a JSON object with the extracted values. Use null for fields that are N/A or skipped.
182
+
183
+ Example response format:
184
+ {{
185
+ "product": "extracted product name",
186
+ "quantity": 10,
187
+ "supplier": null,
188
+ "unit_price": 5.99,
189
+ "interpretation": "Brief explanation of what you extracted"
190
+ }}"""
191
+
192
+ user_prompt = f"""Original transaction: {original_entities.transaction_type}
193
+ Missing fields: {missing_fields}
194
+ User's response: "{user_response}"
195
+
196
+ Extract the values for the missing fields from the user's response."""
197
+
198
+ try:
199
+ response = self.client.chat.completions.create(
200
+ model="gpt-4o-mini",
201
+ messages=[
202
+ {"role": "system", "content": system_prompt},
203
+ {"role": "user", "content": user_prompt}
204
+ ],
205
+ temperature=0.1,
206
+ max_tokens=300
207
+ )
208
+
209
+ response_text = response.choices[0].message.content.strip()
210
+
211
+ try:
212
+ extracted_values = json.loads(response_text)
213
+
214
+ # Update original entities with extracted values
215
+ updated_entities = self._update_entities(original_entities, extracted_values, missing_fields)
216
+
217
+ # Check if transaction is now complete
218
+ status, _ = self.analyze_transaction_completeness(updated_entities)
219
+ is_complete = (status == ClarificationStatus.COMPLETE)
220
+
221
+ return updated_entities, is_complete
222
+
223
+ except (json.JSONDecodeError, KeyError) as e:
224
+ print(f"Error parsing clarification response: {e}")
225
+ return original_entities, False
226
+
227
+ except Exception as e:
228
+ print(f"Error processing clarification: {e}")
229
+ return original_entities, False
230
+
231
+ def _update_entities(self, original_entities: EntityExtraction,
232
+ extracted_values: Dict[str, Any],
233
+ missing_fields: List[str]) -> EntityExtraction:
234
+ """Update entities with extracted clarification values"""
235
+
236
+ # Convert to dict for easier manipulation
237
+ entity_dict = original_entities.dict()
238
+
239
+ # Update with extracted values
240
+ for field in missing_fields:
241
+ if field in extracted_values:
242
+ value = extracted_values[field]
243
+
244
+ # Handle type conversions
245
+ if field in ["quantity"] and value is not None:
246
+ try:
247
+ entity_dict[field] = int(value)
248
+ except (ValueError, TypeError):
249
+ entity_dict[field] = None
250
+ elif field in ["unit_price", "total_amount"] and value is not None:
251
+ try:
252
+ entity_dict[field] = float(value)
253
+ except (ValueError, TypeError):
254
+ entity_dict[field] = None
255
+ else:
256
+ entity_dict[field] = value
257
+
258
+ # Recalculate total if we have quantity and unit_price
259
+ if entity_dict.get("quantity") and entity_dict.get("unit_price"):
260
+ entity_dict["total_amount"] = entity_dict["quantity"] * entity_dict["unit_price"]
261
+
262
+ return EntityExtraction(**entity_dict)
263
+
264
+ def format_clarification_message(self, clarification: ClarificationRequest) -> str:
265
+ """Format clarification request as a user-friendly message"""
266
+
267
+ message = f"πŸ“ {clarification.explanation}\n\n"
268
+
269
+ for i, question in enumerate(clarification.questions, 1):
270
+ message += f"{i}. {question}\n"
271
+
272
+ # Add suggestions if available
273
+ if clarification.suggested_values:
274
+ message += "\nπŸ’‘ Suggestions:\n"
275
+ for field, suggestion in clarification.suggested_values.items():
276
+ message += f" β€’ {field.replace('_', ' ').title()}: {suggestion}\n"
277
+
278
+ message += "\n✨ You can say 'N/A' or 'skip' for any information that's not available."
279
+ message += "\nπŸ“ž Please provide the missing information in your next message."
280
+
281
+ return message
src/vector_store.py ADDED
@@ -0,0 +1,214 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import chromadb
2
+ from sentence_transformers import SentenceTransformer
3
+ from typing import List, Dict, Any, Optional
4
+ import json
5
+ from datetime import datetime
6
+
7
+ class VectorStore:
8
+ def __init__(self, collection_name: str = "chatbot_events"):
9
+ self.client = chromadb.PersistentClient(path="./chroma_db")
10
+ self.collection = self.client.get_or_create_collection(name=collection_name)
11
+ try:
12
+ self.model = SentenceTransformer('all-MiniLM-L6-v2')
13
+ except Exception as e:
14
+ print(f"Warning: Could not load sentence transformer model: {e}")
15
+ self.model = None
16
+
17
+ def add_transaction_event(self, transaction_data: Dict[str, Any], user_query: str, sql_transaction_id: Optional[int] = None) -> bool:
18
+ """Add a transaction event to the vector store"""
19
+ if not self.model:
20
+ return False
21
+
22
+ try:
23
+ # Create a semantic summary of the event
24
+ summary = self._create_event_summary(transaction_data, user_query)
25
+
26
+ # Generate embedding
27
+ embedding = self.model.encode(summary).tolist()
28
+
29
+ # Create document ID - include SQL ID if available for better linking
30
+ doc_id = f"transaction_{sql_transaction_id or 'unknown'}_{datetime.now().isoformat()}_{hash(summary) % 10000}"
31
+
32
+ # Prepare metadata with SQL transaction linking
33
+ metadata = {
34
+ "type": "transaction",
35
+ "transaction_type": transaction_data.get("type", "unknown"),
36
+ "timestamp": datetime.now().isoformat(),
37
+ "user_query": user_query,
38
+ "data": json.dumps(transaction_data)
39
+ }
40
+
41
+ # Add SQL transaction ID to metadata for linking
42
+ if sql_transaction_id is not None:
43
+ metadata["sql_transaction_id"] = sql_transaction_id
44
+ metadata["sql_table"] = f"{transaction_data.get('type', 'unknown')}s" # purchases or sales
45
+
46
+ # Store in vector database
47
+ self.collection.add(
48
+ documents=[summary],
49
+ embeddings=[embedding],
50
+ metadatas=[metadata],
51
+ ids=[doc_id]
52
+ )
53
+
54
+ return True
55
+ except Exception as e:
56
+ print(f"Error adding transaction event: {e}")
57
+ return False
58
+
59
+ def get_transaction_by_sql_id(self, sql_transaction_id: int, transaction_type: str) -> Optional[Dict[str, Any]]:
60
+ """Retrieve vector store entry linked to a specific SQL transaction ID"""
61
+ try:
62
+ # Query the collection for entries with matching SQL transaction ID
63
+ results = self.collection.get(
64
+ where={
65
+ "sql_transaction_id": sql_transaction_id,
66
+ "transaction_type": transaction_type
67
+ },
68
+ limit=1
69
+ )
70
+
71
+ if results and results['documents']:
72
+ return {
73
+ "id": results['ids'][0],
74
+ "document": results['documents'][0],
75
+ "metadata": results['metadatas'][0]
76
+ }
77
+
78
+ return None
79
+ except Exception as e:
80
+ print(f"Error retrieving transaction by SQL ID: {e}")
81
+ return None
82
+
83
+ def add_general_event(self, event_text: str, event_type: str = "general") -> bool:
84
+ """Add a general event or information to the vector store"""
85
+ if not self.model:
86
+ return False
87
+
88
+ try:
89
+ # Generate embedding
90
+ embedding = self.model.encode(event_text).tolist()
91
+
92
+ # Create document ID
93
+ doc_id = f"event_{datetime.now().isoformat()}_{hash(event_text) % 10000}"
94
+
95
+ # Store in vector database
96
+ self.collection.add(
97
+ documents=[event_text],
98
+ embeddings=[embedding],
99
+ metadatas=[{
100
+ "type": event_type,
101
+ "timestamp": datetime.now().isoformat()
102
+ }],
103
+ ids=[doc_id]
104
+ )
105
+
106
+ return True
107
+ except Exception as e:
108
+ print(f"Error adding general event: {e}")
109
+ return False
110
+
111
+ def search_similar_events(self, query: str, n_results: int = 5) -> List[Dict[str, Any]]:
112
+ """Search for similar events based on semantic similarity"""
113
+ if not self.model:
114
+ return []
115
+
116
+ try:
117
+ # Generate query embedding
118
+ query_embedding = self.model.encode(query).tolist()
119
+
120
+ # Search vector database
121
+ results = self.collection.query(
122
+ query_embeddings=[query_embedding],
123
+ n_results=n_results
124
+ )
125
+
126
+ # Format results
127
+ formatted_results = []
128
+ if results['documents'] and results['documents'][0]:
129
+ for i, doc in enumerate(results['documents'][0]):
130
+ result = {
131
+ "document": doc,
132
+ "distance": results['distances'][0][i] if results['distances'] else None,
133
+ "metadata": results['metadatas'][0][i] if results['metadatas'] else {}
134
+ }
135
+ formatted_results.append(result)
136
+
137
+ return formatted_results
138
+ except Exception as e:
139
+ print(f"Error searching events: {e}")
140
+ return []
141
+
142
+ def get_recent_events(self, n_results: int = 10) -> List[Dict[str, Any]]:
143
+ """Get recent events from the vector store"""
144
+ try:
145
+ results = self.collection.get(
146
+ limit=n_results,
147
+ include=["documents", "metadatas"]
148
+ )
149
+
150
+ formatted_results = []
151
+ if results['documents']:
152
+ for i, doc in enumerate(results['documents']):
153
+ result = {
154
+ "document": doc,
155
+ "metadata": results['metadatas'][i] if results['metadatas'] else {}
156
+ }
157
+ formatted_results.append(result)
158
+
159
+ # Sort by timestamp if available
160
+ formatted_results.sort(
161
+ key=lambda x: x.get('metadata', {}).get('timestamp', ''),
162
+ reverse=True
163
+ )
164
+
165
+ return formatted_results
166
+ except Exception as e:
167
+ print(f"Error getting recent events: {e}")
168
+ return []
169
+
170
+ def _create_event_summary(self, transaction_data: Dict[str, Any], user_query: str) -> str:
171
+ """Create a semantic summary of a transaction event"""
172
+ summary_parts = []
173
+
174
+ # Add transaction type
175
+ trans_type = transaction_data.get("type", "transaction")
176
+ summary_parts.append(f"Business {trans_type} event:")
177
+
178
+ # Add key details
179
+ if "product" in transaction_data:
180
+ summary_parts.append(f"Product: {transaction_data['product']}")
181
+
182
+ if "quantity" in transaction_data:
183
+ summary_parts.append(f"Quantity: {transaction_data['quantity']}")
184
+
185
+ if "supplier" in transaction_data:
186
+ summary_parts.append(f"Supplier: {transaction_data['supplier']}")
187
+
188
+ if "customer" in transaction_data:
189
+ summary_parts.append(f"Customer: {transaction_data['customer']}")
190
+
191
+ if "total" in transaction_data:
192
+ summary_parts.append(f"Total amount: €{transaction_data['total']}")
193
+
194
+ # Add original user query for context
195
+ summary_parts.append(f"Original request: {user_query}")
196
+
197
+ return " | ".join(summary_parts)
198
+
199
+ def delete_collection(self):
200
+ """Delete the entire collection (use with caution)"""
201
+ try:
202
+ self.client.delete_collection(name=self.collection.name)
203
+ return True
204
+ except Exception as e:
205
+ print(f"Error deleting collection: {e}")
206
+ return False
207
+
208
+ def get_collection_count(self) -> int:
209
+ """Get the number of documents in the collection"""
210
+ try:
211
+ return self.collection.count()
212
+ except Exception as e:
213
+ print(f"Error getting collection count: {e}")
214
+ return 0
tests/test_chatbot.py ADDED
@@ -0,0 +1,49 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ #!/usr/bin/env python3
2
+
3
+ import sys
4
+ import os
5
+ sys.path.append(os.path.join(os.path.dirname(__file__), 'src'))
6
+
7
+ from chatbot import Chatbot
8
+ from models import ChatbotRequest
9
+
10
+ def test_chatbot():
11
+ print("πŸ§ͺ Testing Chatbot System")
12
+ print("="*50)
13
+
14
+ chatbot = Chatbot()
15
+
16
+ # Test cases
17
+ test_cases = [
18
+ "Add a purchase of 20 USB drives from TechMart at €5 each",
19
+ "Sold 10 laptops to John Smith at €800 each",
20
+ "Purchase 5 office chairs from Office Supplies Co at €150 per chair",
21
+ "Show recent transactions",
22
+ "Find USB drives",
23
+ "Search TechMart",
24
+ "Meeting with new supplier scheduled for next week"
25
+ ]
26
+
27
+ for i, test_message in enumerate(test_cases, 1):
28
+ print(f"\nπŸ” Test {i}: {test_message}")
29
+ print("-" * 50)
30
+
31
+ request = ChatbotRequest(message=test_message)
32
+ response = chatbot.process_message(request)
33
+
34
+ print(f"Response: {response.response}")
35
+
36
+ if response.entities_extracted:
37
+ entities = response.entities_extracted
38
+ print(f"Entities: {entities.transaction_type} - {entities.product} ({entities.quantity}x) - €{entities.total_amount}")
39
+
40
+ if response.vector_stored:
41
+ print("βœ… Stored in vector database")
42
+
43
+ print()
44
+
45
+ chatbot.close()
46
+ print("βœ… All tests completed!")
47
+
48
+ if __name__ == "__main__":
49
+ test_chatbot()
tests/test_intent_classifier.py ADDED
@@ -0,0 +1,98 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ #!/usr/bin/env python3
2
+
3
+ import sys
4
+ import os
5
+ sys.path.append(os.path.join(os.path.dirname(__file__), 'src'))
6
+
7
+ from intent_classifier import IntentClassifier, IntentType
8
+
9
+ def test_intent_classification():
10
+ print("πŸ§ͺ Testing OpenAI Intent Classification")
11
+ print("="*60)
12
+ print("Note: Make sure to set OPENAI_API_KEY environment variable")
13
+ print("="*60)
14
+
15
+ classifier = IntentClassifier()
16
+
17
+ # Test cases with expected intents
18
+ test_cases = [
19
+ # Transaction intents
20
+ ("Add a purchase of 20 USB drives from TechMart at €5 each", IntentType.TRANSACTION),
21
+ ("Sold 10 laptops to John Smith at €800 each", IntentType.TRANSACTION),
22
+ ("Purchase 5 office chairs from Office Supplies Co at €150 per chair", IntentType.TRANSACTION),
23
+ ("We bought 100 pens from Staples for $2 each", IntentType.TRANSACTION),
24
+
25
+ # Query intents
26
+ ("How many USB drives did we purchase?", IntentType.QUERY),
27
+ ("What's the total value of all purchases?", IntentType.QUERY),
28
+ ("Show me all sales to John Smith", IntentType.QUERY),
29
+ ("List recent transactions", IntentType.QUERY),
30
+ ("What's our total spending on electronics?", IntentType.QUERY),
31
+
32
+ # Semantic search intents
33
+ ("Show me similar purchases to this one", IntentType.SEMANTIC_SEARCH),
34
+ ("Find events related to supplier meetings", IntentType.SEMANTIC_SEARCH),
35
+ ("What's similar to our last laptop purchase?", IntentType.SEMANTIC_SEARCH),
36
+ ("Show me related transactions", IntentType.SEMANTIC_SEARCH),
37
+
38
+ # General info intents
39
+ ("Meeting with new supplier scheduled for next week", IntentType.GENERAL_INFO),
40
+ ("Remember to check inventory levels before next order", IntentType.GENERAL_INFO),
41
+ ("The conference call went well today", IntentType.GENERAL_INFO),
42
+ ("Don't forget to update the quarterly report", IntentType.GENERAL_INFO),
43
+
44
+ # Edge cases
45
+ ("Hello", IntentType.GENERAL_INFO),
46
+ ("What's the weather like?", IntentType.GENERAL_INFO),
47
+ ("Can you help me?", IntentType.GENERAL_INFO),
48
+ ]
49
+
50
+ correct_predictions = 0
51
+ total_predictions = len(test_cases)
52
+
53
+ for i, (message, expected_intent) in enumerate(test_cases, 1):
54
+ print(f"\nπŸ” Test {i}: {message}")
55
+ print("-" * 60)
56
+
57
+ result = classifier.classify_intent(message)
58
+
59
+ print(f"Expected: {expected_intent.value}")
60
+ print(f"Predicted: {result.intent.value}")
61
+ print(f"Confidence: {result.confidence:.2f}")
62
+ print(f"Reasoning: {result.reasoning}")
63
+
64
+ if result.entities_hint:
65
+ print(f"Entities: {result.entities_hint}")
66
+
67
+ is_correct = result.intent == expected_intent
68
+ if is_correct:
69
+ print("βœ… CORRECT")
70
+ correct_predictions += 1
71
+ else:
72
+ print("❌ INCORRECT")
73
+
74
+ print()
75
+
76
+ # Summary
77
+ accuracy = correct_predictions / total_predictions
78
+ print("="*60)
79
+ print(f"πŸ“Š Results Summary:")
80
+ print(f"Correct predictions: {correct_predictions}/{total_predictions}")
81
+ print(f"Accuracy: {accuracy:.2%}")
82
+ print("="*60)
83
+
84
+ if accuracy >= 0.8:
85
+ print("πŸŽ‰ Excellent accuracy! Intent classification is working well.")
86
+ elif accuracy >= 0.6:
87
+ print("πŸ‘ Good accuracy. Consider refining prompts for better results.")
88
+ else:
89
+ print("⚠️ Low accuracy. Review and improve the classification prompts.")
90
+
91
+ if __name__ == "__main__":
92
+ if not os.getenv('OPENAI_API_KEY'):
93
+ print("❌ Error: OPENAI_API_KEY environment variable not set")
94
+ print("Please set your OpenAI API key:")
95
+ print("export OPENAI_API_KEY='your-key-here'")
96
+ sys.exit(1)
97
+
98
+ test_intent_classification()
tests/test_interactive_transactions.py ADDED
@@ -0,0 +1,179 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ #!/usr/bin/env python3
2
+
3
+ import sys
4
+ import os
5
+ sys.path.append(os.path.join(os.path.dirname(__file__), 'src'))
6
+
7
+ from chatbot import Chatbot
8
+ from models import ChatbotRequest
9
+
10
+ def test_interactive_transactions():
11
+ print("πŸ§ͺ Testing Interactive Transaction Completion")
12
+ print("="*70)
13
+ print("Note: Make sure to set OPENAI_API_KEY environment variable")
14
+ print("="*70)
15
+
16
+ chatbot = Chatbot()
17
+
18
+ test_scenarios = [
19
+ {
20
+ "name": "Complete Purchase Transaction",
21
+ "initial": "I bought 20 USB drives from TechMart at €5 each",
22
+ "expected_complete": True,
23
+ "description": "Should be complete with all required fields"
24
+ },
25
+ {
26
+ "name": "Incomplete Purchase - Missing Supplier",
27
+ "initial": "I bought 10 laptops at €800 each",
28
+ "clarifications": ["Electronics Plus"],
29
+ "expected_questions": ["supplier"],
30
+ "description": "Should ask for supplier information"
31
+ },
32
+ {
33
+ "name": "Incomplete Purchase - Missing Multiple Fields",
34
+ "initial": "I bought some office chairs",
35
+ "clarifications": ["15 chairs", "Office Supplies Co", "€150 per chair"],
36
+ "expected_questions": ["quantity", "supplier", "unit_price"],
37
+ "description": "Should ask for quantity, supplier, and price"
38
+ },
39
+ {
40
+ "name": "Sale with Missing Customer",
41
+ "initial": "Sold 5 laptops at €900 each",
42
+ "clarifications": ["ABC Corporation"],
43
+ "expected_questions": ["customer"],
44
+ "description": "Should ask for customer information"
45
+ },
46
+ {
47
+ "name": "Transaction with N/A Fields",
48
+ "initial": "Bought 100 pens",
49
+ "clarifications": ["Staples", "$2 each", "N/A"],
50
+ "expected_questions": ["supplier", "unit_price"],
51
+ "description": "Should handle N/A responses gracefully"
52
+ }
53
+ ]
54
+
55
+ for i, scenario in enumerate(test_scenarios, 1):
56
+ print(f"\nπŸ” Test Scenario {i}: {scenario['name']}")
57
+ print("-" * 60)
58
+ print(f"Description: {scenario['description']}")
59
+ print(f"Initial input: {scenario['initial']}")
60
+
61
+ # Test initial transaction request
62
+ session_id = f"test_session_{i}"
63
+ request = ChatbotRequest(message=scenario['initial'], session_id=session_id)
64
+ response = chatbot.process_message(request)
65
+
66
+ print(f"\nπŸ€– Initial Response:")
67
+ print(response.response)
68
+
69
+ if response.awaiting_clarification:
70
+ print(f"βœ… Correctly identified as incomplete transaction")
71
+
72
+ # Process clarifications if provided
73
+ if "clarifications" in scenario:
74
+ print(f"\nπŸ“ Providing clarifications...")
75
+
76
+ for j, clarification in enumerate(scenario["clarifications"], 1):
77
+ print(f"\n Clarification {j}: {clarification}")
78
+
79
+ clarification_request = ChatbotRequest(
80
+ message=clarification,
81
+ session_id=session_id
82
+ )
83
+ clarification_response = chatbot.process_message(clarification_request)
84
+
85
+ print(f" πŸ€– Response: {clarification_response.response[:100]}{'...' if len(clarification_response.response) > 100 else ''}")
86
+
87
+ if not clarification_response.awaiting_clarification:
88
+ print(f" βœ… Transaction completed!")
89
+ break
90
+ else:
91
+ print(f" ⏳ Still waiting for more information...")
92
+ else:
93
+ if scenario.get("expected_complete", False):
94
+ print(f"βœ… Correctly completed transaction without clarification")
95
+ else:
96
+ print(f"❌ Expected clarification but transaction was completed")
97
+
98
+ print(f"\nIntent detected: {response.intent_detected}")
99
+ if response.entities_extracted:
100
+ entities = response.entities_extracted
101
+ print(f"Entities: {entities.transaction_type} - {entities.product} ({entities.quantity}x) - €{entities.total_amount}")
102
+
103
+ print("\n" + "="*60)
104
+
105
+ print("\nπŸ§ͺ Testing Edge Cases")
106
+ print("-" * 40)
107
+
108
+ # Test cancellation
109
+ print("\nπŸ” Testing Transaction Cancellation")
110
+ request = ChatbotRequest(message="I bought some items", session_id="cancel_test")
111
+ response = chatbot.process_message(request)
112
+
113
+ if response.awaiting_clarification:
114
+ print("βœ… Transaction requires clarification")
115
+ cancel_request = ChatbotRequest(message="cancel", session_id="cancel_test")
116
+ cancel_response = chatbot.process_message(cancel_request)
117
+ print(f"πŸ€– Cancel response: {cancel_response.response}")
118
+
119
+ if not cancel_response.awaiting_clarification:
120
+ print("βœ… Transaction successfully cancelled")
121
+ else:
122
+ print("❌ Transaction not properly cancelled")
123
+
124
+ # Test invalid session
125
+ print("\nπŸ” Testing Invalid Session Response")
126
+ invalid_request = ChatbotRequest(message="More information here", session_id="nonexistent")
127
+ invalid_response = chatbot.process_message(invalid_request)
128
+ print(f"πŸ€– Invalid session response: {invalid_response.response}")
129
+
130
+ chatbot.close()
131
+ print("\nβœ… Interactive transaction tests completed!")
132
+
133
+ def test_clarification_quality():
134
+ print("\nπŸ”¬ Testing Clarification Question Quality")
135
+ print("-" * 50)
136
+
137
+ chatbot = Chatbot()
138
+
139
+ # Test various incomplete scenarios to see question quality
140
+ incomplete_scenarios = [
141
+ "I bought something expensive",
142
+ "Purchase from TechMart",
143
+ "Sold items to a customer",
144
+ "€1000 transaction yesterday",
145
+ "Bought 50 units"
146
+ ]
147
+
148
+ for i, scenario in enumerate(incomplete_scenarios, 1):
149
+ print(f"\nπŸ” Scenario {i}: {scenario}")
150
+ print("-" * 30)
151
+
152
+ request = ChatbotRequest(message=scenario, session_id=f"quality_test_{i}")
153
+ response = chatbot.process_message(request)
154
+
155
+ if response.awaiting_clarification:
156
+ print("πŸ“ Clarification questions generated:")
157
+ # Extract questions from response for analysis
158
+ lines = response.response.split('\n')
159
+ questions = [line.strip() for line in lines if line.strip() and any(char.isdigit() and line.strip().startswith(char) for char in '123456789')]
160
+
161
+ for q in questions[:3]: # Show first 3 questions
162
+ print(f" β€’ {q}")
163
+
164
+ print(f"βœ… Generated {len(questions)} clarification questions")
165
+ else:
166
+ print("❌ No clarification requested (unexpected)")
167
+
168
+ chatbot.close()
169
+ print("\nβœ… Clarification quality tests completed!")
170
+
171
+ if __name__ == "__main__":
172
+ if not os.getenv('OPENAI_API_KEY'):
173
+ print("❌ Error: OPENAI_API_KEY environment variable not set")
174
+ print("Please set your OpenAI API key:")
175
+ print("export OPENAI_API_KEY='your-key-here'")
176
+ sys.exit(1)
177
+
178
+ test_interactive_transactions()
179
+ test_clarification_quality()
tests/test_nl_search.py ADDED
@@ -0,0 +1,76 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ #!/usr/bin/env python3
2
+
3
+ import sys
4
+ import os
5
+ sys.path.append(os.path.join(os.path.dirname(__file__), 'src'))
6
+
7
+ from chatbot import Chatbot
8
+ from models import ChatbotRequest
9
+
10
+ def test_natural_language_search():
11
+ print("πŸ§ͺ Testing Natural Language to SQL Search")
12
+ print("="*60)
13
+ print("Note: Make sure to set OPENAI_API_KEY environment variable")
14
+ print("="*60)
15
+
16
+ chatbot = Chatbot()
17
+
18
+ # First add some test data
19
+ setup_queries = [
20
+ "Add a purchase of 20 USB drives from TechMart at €5 each",
21
+ "Add a purchase of 10 laptops from Electronics Plus at €800 each",
22
+ "Sold 5 USB drives to John Smith at €7 each",
23
+ "Sold 2 laptops to ABC Corp at €900 each"
24
+ ]
25
+
26
+ print("πŸ“ Setting up test data...")
27
+ for query in setup_queries:
28
+ request = ChatbotRequest(message=query)
29
+ response = chatbot.process_message(request)
30
+ print(f"βœ“ {query}")
31
+
32
+ print("\nπŸ” Testing Natural Language Queries...")
33
+ print("-" * 60)
34
+
35
+ # Test natural language search queries
36
+ test_queries = [
37
+ "How many USB drives did we purchase?",
38
+ "What's the total value of all purchases?",
39
+ "Show me all sales to John Smith",
40
+ "Which suppliers have we bought from?",
41
+ "What products did we sell this month?",
42
+ "Show me the most expensive purchases",
43
+ "How much revenue did we generate from laptop sales?",
44
+ "List all transactions with TechMart",
45
+ "What's our total spending on electronics?",
46
+ "Show me customers who bought laptops"
47
+ ]
48
+
49
+ for i, query in enumerate(test_queries, 1):
50
+ print(f"\nπŸ” Test {i}: {query}")
51
+ print("-" * 50)
52
+
53
+ request = ChatbotRequest(message=query)
54
+ response = chatbot.process_message(request)
55
+
56
+ print(f"Response: {response.response}")
57
+
58
+ if response.sql_executed:
59
+ print(f"Generated SQL: {response.sql_executed}")
60
+
61
+ if response.intent_detected:
62
+ print(f"Intent: {response.intent_detected} (confidence: {response.intent_confidence:.2f})")
63
+
64
+ print()
65
+
66
+ chatbot.close()
67
+ print("βœ… Natural language search tests completed!")
68
+
69
+ if __name__ == "__main__":
70
+ if not os.getenv('OPENAI_API_KEY'):
71
+ print("❌ Error: OPENAI_API_KEY environment variable not set")
72
+ print("Please set your OpenAI API key:")
73
+ print("export OPENAI_API_KEY='your-key-here'")
74
+ sys.exit(1)
75
+
76
+ test_natural_language_search()
tests/test_rag_search.py ADDED
@@ -0,0 +1,132 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ #!/usr/bin/env python3
2
+
3
+ import sys
4
+ import os
5
+ sys.path.append(os.path.join(os.path.dirname(__file__), 'src'))
6
+
7
+ from chatbot import Chatbot
8
+ from models import ChatbotRequest
9
+
10
+ def test_rag_functionality():
11
+ print("πŸ§ͺ Testing RAG (Retrieval-Augmented Generation) Functionality")
12
+ print("="*70)
13
+ print("Note: Make sure to set OPENAI_API_KEY environment variable")
14
+ print("="*70)
15
+
16
+ chatbot = Chatbot()
17
+
18
+ # First, populate the system with diverse data
19
+ setup_data = [
20
+ # Transaction data
21
+ "Add a purchase of 20 USB drives from TechMart at €5 each",
22
+ "Add a purchase of 10 laptops from Electronics Plus at €800 each",
23
+ "Sold 5 USB drives to John Smith at €7 each",
24
+ "Sold 2 laptops to ABC Corp at €900 each",
25
+ "Purchase 15 office chairs from Office Supplies Co at €150 per chair",
26
+
27
+ # Business events and meetings
28
+ "Meeting with George scheduled for next Tuesday at 2 PM to discuss new laptop supplier contract",
29
+ "Conference call with TechMart went well - they agreed to bulk discounts for USB drives",
30
+ "Quarterly review meeting completed - need to increase laptop inventory before Q4",
31
+ "Supplier evaluation: Electronics Plus provides excellent laptops but delivery times are slow",
32
+ "Team meeting notes: Focus on ergonomic office furniture for the new office space",
33
+ "Customer feedback: John Smith very satisfied with USB drive quality and pricing",
34
+ "Important reminder: Check inventory levels before placing next electronics order",
35
+ "Budget planning: Allocate €50,000 for office equipment in next quarter"
36
+ ]
37
+
38
+ print("πŸ“ Setting up test data...")
39
+ for i, data in enumerate(setup_data, 1):
40
+ request = ChatbotRequest(message=data)
41
+ response = chatbot.process_message(request)
42
+ print(f"βœ“ {i:2d}. {data[:60]}{'...' if len(data) > 60 else ''}")
43
+
44
+ print(f"\nβœ… Setup complete! Added {len(setup_data)} records.")
45
+ print("\nπŸ” Testing RAG-powered semantic search...")
46
+ print("-" * 70)
47
+
48
+ # Test various types of semantic search queries
49
+ test_queries = [
50
+ # Meeting and event queries
51
+ "When is my meeting with George?",
52
+ "What was discussed in the TechMart meeting?",
53
+ "Tell me about recent meetings and discussions",
54
+
55
+ # Product and supplier queries
56
+ "What do we know about TechMart as a supplier?",
57
+ "Show me information about laptop purchases and suppliers",
58
+ "What feedback have we received about our products?",
59
+
60
+ # Business planning queries
61
+ "What are our budget plans for next quarter?",
62
+ "What inventory considerations should I be aware of?",
63
+ "Tell me about office equipment and furniture plans",
64
+
65
+ # Customer information
66
+ "What do we know about John Smith?",
67
+ "Show me customer feedback and satisfaction information",
68
+
69
+ # Operational queries
70
+ "What reminders and important notes do I have?",
71
+ "Tell me about supplier evaluations and performance",
72
+ "What are the key business insights from recent records?"
73
+ ]
74
+
75
+ for i, query in enumerate(test_queries, 1):
76
+ print(f"\nπŸ” Test {i}: {query}")
77
+ print("-" * 50)
78
+
79
+ request = ChatbotRequest(message=query)
80
+ response = chatbot.process_message(request)
81
+
82
+ print(f"πŸ€– Response: {response.response}")
83
+
84
+ if response.intent_detected:
85
+ print(f"🎯 Intent: {response.intent_detected} (confidence: {response.intent_confidence:.2f})")
86
+
87
+ print()
88
+
89
+ chatbot.close()
90
+ print("βœ… RAG functionality tests completed!")
91
+ print("\nπŸ“Š Expected Behavior:")
92
+ print("- RAG should provide contextual, specific answers based on stored information")
93
+ print("- Responses should cite relevant details from business records")
94
+ print("- Should handle queries about meetings, suppliers, customers, and business plans")
95
+ print("- Should indicate when information is not available in the records")
96
+
97
+ def test_rag_edge_cases():
98
+ print("\nπŸ”¬ Testing RAG Edge Cases")
99
+ print("-" * 40)
100
+
101
+ chatbot = Chatbot()
102
+
103
+ edge_case_queries = [
104
+ "Tell me about suppliers we've never worked with",
105
+ "What happened in 1995?",
106
+ "Show me information about flying cars",
107
+ "What's the weather like today?",
108
+ "Tell me about George's favorite color"
109
+ ]
110
+
111
+ for i, query in enumerate(edge_case_queries, 1):
112
+ print(f"\nπŸ” Edge Case {i}: {query}")
113
+ print("-" * 30)
114
+
115
+ request = ChatbotRequest(message=query)
116
+ response = chatbot.process_message(request)
117
+
118
+ print(f"πŸ€– Response: {response.response}")
119
+ print()
120
+
121
+ chatbot.close()
122
+ print("βœ… Edge case testing completed!")
123
+
124
+ if __name__ == "__main__":
125
+ if not os.getenv('OPENAI_API_KEY'):
126
+ print("❌ Error: OPENAI_API_KEY environment variable not set")
127
+ print("Please set your OpenAI API key:")
128
+ print("export OPENAI_API_KEY='your-key-here'")
129
+ sys.exit(1)
130
+
131
+ test_rag_functionality()
132
+ test_rag_edge_cases()