Spaces:
Sleeping
Sleeping
Commit
·
adf2969
1
Parent(s):
06e5979
added project files
Browse files- .gitignore +160 -0
- .streamlit/config.toml +18 -0
- LICENSE +21 -0
- agent.ipynb +413 -0
- app.py +96 -0
- ecomm.db +0 -0
- fakedatagenerator.ipynb +680 -0
- few_shots.py +182 -0
- langchain_helper.py +181 -0
- project_prompts.py +24 -0
- requirements.txt +15 -0
.gitignore
ADDED
|
@@ -0,0 +1,160 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
# Byte-compiled / optimized / DLL files
|
| 2 |
+
__pycache__/
|
| 3 |
+
*.py[cod]
|
| 4 |
+
*$py.class
|
| 5 |
+
|
| 6 |
+
# C extensions
|
| 7 |
+
*.so
|
| 8 |
+
|
| 9 |
+
# Distribution / packaging
|
| 10 |
+
.Python
|
| 11 |
+
build/
|
| 12 |
+
develop-eggs/
|
| 13 |
+
dist/
|
| 14 |
+
downloads/
|
| 15 |
+
eggs/
|
| 16 |
+
.eggs/
|
| 17 |
+
lib/
|
| 18 |
+
lib64/
|
| 19 |
+
parts/
|
| 20 |
+
sdist/
|
| 21 |
+
var/
|
| 22 |
+
wheels/
|
| 23 |
+
share/python-wheels/
|
| 24 |
+
*.egg-info/
|
| 25 |
+
.installed.cfg
|
| 26 |
+
*.egg
|
| 27 |
+
MANIFEST
|
| 28 |
+
|
| 29 |
+
# PyInstaller
|
| 30 |
+
# Usually these files are written by a python script from a template
|
| 31 |
+
# before PyInstaller builds the exe, so as to inject date/other infos into it.
|
| 32 |
+
*.manifest
|
| 33 |
+
*.spec
|
| 34 |
+
|
| 35 |
+
# Installer logs
|
| 36 |
+
pip-log.txt
|
| 37 |
+
pip-delete-this-directory.txt
|
| 38 |
+
|
| 39 |
+
# Unit test / coverage reports
|
| 40 |
+
htmlcov/
|
| 41 |
+
.tox/
|
| 42 |
+
.nox/
|
| 43 |
+
.coverage
|
| 44 |
+
.coverage.*
|
| 45 |
+
.cache
|
| 46 |
+
nosetests.xml
|
| 47 |
+
coverage.xml
|
| 48 |
+
*.cover
|
| 49 |
+
*.py,cover
|
| 50 |
+
.hypothesis/
|
| 51 |
+
.pytest_cache/
|
| 52 |
+
cover/
|
| 53 |
+
|
| 54 |
+
# Translations
|
| 55 |
+
*.mo
|
| 56 |
+
*.pot
|
| 57 |
+
|
| 58 |
+
# Django stuff:
|
| 59 |
+
*.log
|
| 60 |
+
local_settings.py
|
| 61 |
+
db.sqlite3
|
| 62 |
+
db.sqlite3-journal
|
| 63 |
+
|
| 64 |
+
# Flask stuff:
|
| 65 |
+
instance/
|
| 66 |
+
.webassets-cache
|
| 67 |
+
|
| 68 |
+
# Scrapy stuff:
|
| 69 |
+
.scrapy
|
| 70 |
+
|
| 71 |
+
# Sphinx documentation
|
| 72 |
+
docs/_build/
|
| 73 |
+
|
| 74 |
+
# PyBuilder
|
| 75 |
+
.pybuilder/
|
| 76 |
+
target/
|
| 77 |
+
|
| 78 |
+
# Jupyter Notebook
|
| 79 |
+
.ipynb_checkpoints
|
| 80 |
+
|
| 81 |
+
# IPython
|
| 82 |
+
profile_default/
|
| 83 |
+
ipython_config.py
|
| 84 |
+
|
| 85 |
+
# pyenv
|
| 86 |
+
# For a library or package, you might want to ignore these files since the code is
|
| 87 |
+
# intended to run in multiple environments; otherwise, check them in:
|
| 88 |
+
# .python-version
|
| 89 |
+
|
| 90 |
+
# pipenv
|
| 91 |
+
# According to pypa/pipenv#598, it is recommended to include Pipfile.lock in version control.
|
| 92 |
+
# However, in case of collaboration, if having platform-specific dependencies or dependencies
|
| 93 |
+
# having no cross-platform support, pipenv may install dependencies that don't work, or not
|
| 94 |
+
# install all needed dependencies.
|
| 95 |
+
#Pipfile.lock
|
| 96 |
+
|
| 97 |
+
# poetry
|
| 98 |
+
# Similar to Pipfile.lock, it is generally recommended to include poetry.lock in version control.
|
| 99 |
+
# This is especially recommended for binary packages to ensure reproducibility, and is more
|
| 100 |
+
# commonly ignored for libraries.
|
| 101 |
+
# https://python-poetry.org/docs/basic-usage/#commit-your-poetrylock-file-to-version-control
|
| 102 |
+
#poetry.lock
|
| 103 |
+
|
| 104 |
+
# pdm
|
| 105 |
+
# Similar to Pipfile.lock, it is generally recommended to include pdm.lock in version control.
|
| 106 |
+
#pdm.lock
|
| 107 |
+
# pdm stores project-wide configurations in .pdm.toml, but it is recommended to not include it
|
| 108 |
+
# in version control.
|
| 109 |
+
# https://pdm.fming.dev/#use-with-ide
|
| 110 |
+
.pdm.toml
|
| 111 |
+
|
| 112 |
+
# PEP 582; used by e.g. github.com/David-OConnor/pyflow and github.com/pdm-project/pdm
|
| 113 |
+
__pypackages__/
|
| 114 |
+
|
| 115 |
+
# Celery stuff
|
| 116 |
+
celerybeat-schedule
|
| 117 |
+
celerybeat.pid
|
| 118 |
+
|
| 119 |
+
# SageMath parsed files
|
| 120 |
+
*.sage.py
|
| 121 |
+
|
| 122 |
+
# Environments
|
| 123 |
+
.env
|
| 124 |
+
.venv
|
| 125 |
+
env/
|
| 126 |
+
venv/
|
| 127 |
+
ENV/
|
| 128 |
+
env.bak/
|
| 129 |
+
venv.bak/
|
| 130 |
+
|
| 131 |
+
# Spyder project settings
|
| 132 |
+
.spyderproject
|
| 133 |
+
.spyproject
|
| 134 |
+
|
| 135 |
+
# Rope project settings
|
| 136 |
+
.ropeproject
|
| 137 |
+
|
| 138 |
+
# mkdocs documentation
|
| 139 |
+
/site
|
| 140 |
+
|
| 141 |
+
# mypy
|
| 142 |
+
.mypy_cache/
|
| 143 |
+
.dmypy.json
|
| 144 |
+
dmypy.json
|
| 145 |
+
|
| 146 |
+
# Pyre type checker
|
| 147 |
+
.pyre/
|
| 148 |
+
|
| 149 |
+
# pytype static type analyzer
|
| 150 |
+
.pytype/
|
| 151 |
+
|
| 152 |
+
# Cython debug symbols
|
| 153 |
+
cython_debug/
|
| 154 |
+
|
| 155 |
+
# PyCharm
|
| 156 |
+
# JetBrains specific template is maintained in a separate JetBrains.gitignore that can
|
| 157 |
+
# be found at https://github.com/github/gitignore/blob/main/Global/JetBrains.gitignore
|
| 158 |
+
# and can be added to the global gitignore or merged into this file. For a more nuclear
|
| 159 |
+
# option (not recommended) you can uncomment the following to ignore the entire idea folder.
|
| 160 |
+
#.idea/
|
.streamlit/config.toml
ADDED
|
@@ -0,0 +1,18 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
[theme]
|
| 2 |
+
|
| 3 |
+
# Primary accent for interactive elements
|
| 4 |
+
primaryColor = '#FFFF00'
|
| 5 |
+
|
| 6 |
+
# Background color for the main content area
|
| 7 |
+
backgroundColor = '#00172B'
|
| 8 |
+
|
| 9 |
+
# Background color for sidebar and most interactive widgets
|
| 10 |
+
secondaryBackgroundColor = '#000000'
|
| 11 |
+
|
| 12 |
+
# Color used for almost all text
|
| 13 |
+
textColor = '#FFFFFF'
|
| 14 |
+
|
| 15 |
+
# Font family for all text in the app, except code blocks
|
| 16 |
+
# Accepted values (serif | sans serif | monospace)
|
| 17 |
+
# Default: "sans serif"
|
| 18 |
+
font = "sans serif"
|
LICENSE
ADDED
|
@@ -0,0 +1,21 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
MIT License
|
| 2 |
+
|
| 3 |
+
Copyright (c) 2024 binaychandra
|
| 4 |
+
|
| 5 |
+
Permission is hereby granted, free of charge, to any person obtaining a copy
|
| 6 |
+
of this software and associated documentation files (the "Software"), to deal
|
| 7 |
+
in the Software without restriction, including without limitation the rights
|
| 8 |
+
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
| 9 |
+
copies of the Software, and to permit persons to whom the Software is
|
| 10 |
+
furnished to do so, subject to the following conditions:
|
| 11 |
+
|
| 12 |
+
The above copyright notice and this permission notice shall be included in all
|
| 13 |
+
copies or substantial portions of the Software.
|
| 14 |
+
|
| 15 |
+
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
| 16 |
+
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
| 17 |
+
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
| 18 |
+
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
| 19 |
+
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
| 20 |
+
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
|
| 21 |
+
SOFTWARE.
|
agent.ipynb
ADDED
|
@@ -0,0 +1,413 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"cells": [
|
| 3 |
+
{
|
| 4 |
+
"cell_type": "code",
|
| 5 |
+
"execution_count": 4,
|
| 6 |
+
"metadata": {},
|
| 7 |
+
"outputs": [],
|
| 8 |
+
"source": [
|
| 9 |
+
"from langchain.agents.agent_types import AgentType\n",
|
| 10 |
+
"from langchain_experimental.agents.agent_toolkits import create_pandas_dataframe_agent\n",
|
| 11 |
+
"from langchain_openai import AzureOpenAI"
|
| 12 |
+
]
|
| 13 |
+
},
|
| 14 |
+
{
|
| 15 |
+
"cell_type": "code",
|
| 16 |
+
"execution_count": 2,
|
| 17 |
+
"metadata": {},
|
| 18 |
+
"outputs": [
|
| 19 |
+
{
|
| 20 |
+
"data": {
|
| 21 |
+
"text/plain": [
|
| 22 |
+
"True"
|
| 23 |
+
]
|
| 24 |
+
},
|
| 25 |
+
"execution_count": 2,
|
| 26 |
+
"metadata": {},
|
| 27 |
+
"output_type": "execute_result"
|
| 28 |
+
}
|
| 29 |
+
],
|
| 30 |
+
"source": [
|
| 31 |
+
"from dotenv import load_dotenv\n",
|
| 32 |
+
"load_dotenv()"
|
| 33 |
+
]
|
| 34 |
+
},
|
| 35 |
+
{
|
| 36 |
+
"cell_type": "code",
|
| 37 |
+
"execution_count": 5,
|
| 38 |
+
"metadata": {},
|
| 39 |
+
"outputs": [],
|
| 40 |
+
"source": [
|
| 41 |
+
"llm = AzureOpenAI(deployment_name=\"gpt-35-turbo-instruct\", temperature=0.6)"
|
| 42 |
+
]
|
| 43 |
+
},
|
| 44 |
+
{
|
| 45 |
+
"cell_type": "code",
|
| 46 |
+
"execution_count": 6,
|
| 47 |
+
"metadata": {},
|
| 48 |
+
"outputs": [
|
| 49 |
+
{
|
| 50 |
+
"data": {
|
| 51 |
+
"text/html": [
|
| 52 |
+
"<div>\n",
|
| 53 |
+
"<style scoped>\n",
|
| 54 |
+
" .dataframe tbody tr th:only-of-type {\n",
|
| 55 |
+
" vertical-align: middle;\n",
|
| 56 |
+
" }\n",
|
| 57 |
+
"\n",
|
| 58 |
+
" .dataframe tbody tr th {\n",
|
| 59 |
+
" vertical-align: top;\n",
|
| 60 |
+
" }\n",
|
| 61 |
+
"\n",
|
| 62 |
+
" .dataframe thead th {\n",
|
| 63 |
+
" text-align: right;\n",
|
| 64 |
+
" }\n",
|
| 65 |
+
"</style>\n",
|
| 66 |
+
"<table border=\"1\" class=\"dataframe\">\n",
|
| 67 |
+
" <thead>\n",
|
| 68 |
+
" <tr style=\"text-align: right;\">\n",
|
| 69 |
+
" <th></th>\n",
|
| 70 |
+
" <th>PassengerId</th>\n",
|
| 71 |
+
" <th>Survived</th>\n",
|
| 72 |
+
" <th>Pclass</th>\n",
|
| 73 |
+
" <th>Name</th>\n",
|
| 74 |
+
" <th>Sex</th>\n",
|
| 75 |
+
" <th>Age</th>\n",
|
| 76 |
+
" <th>SibSp</th>\n",
|
| 77 |
+
" <th>Parch</th>\n",
|
| 78 |
+
" <th>Ticket</th>\n",
|
| 79 |
+
" <th>Fare</th>\n",
|
| 80 |
+
" <th>Cabin</th>\n",
|
| 81 |
+
" <th>Embarked</th>\n",
|
| 82 |
+
" </tr>\n",
|
| 83 |
+
" </thead>\n",
|
| 84 |
+
" <tbody>\n",
|
| 85 |
+
" <tr>\n",
|
| 86 |
+
" <th>0</th>\n",
|
| 87 |
+
" <td>1</td>\n",
|
| 88 |
+
" <td>0</td>\n",
|
| 89 |
+
" <td>3</td>\n",
|
| 90 |
+
" <td>Braund, Mr. Owen Harris</td>\n",
|
| 91 |
+
" <td>male</td>\n",
|
| 92 |
+
" <td>22.0</td>\n",
|
| 93 |
+
" <td>1</td>\n",
|
| 94 |
+
" <td>0</td>\n",
|
| 95 |
+
" <td>A/5 21171</td>\n",
|
| 96 |
+
" <td>7.2500</td>\n",
|
| 97 |
+
" <td>NaN</td>\n",
|
| 98 |
+
" <td>S</td>\n",
|
| 99 |
+
" </tr>\n",
|
| 100 |
+
" <tr>\n",
|
| 101 |
+
" <th>1</th>\n",
|
| 102 |
+
" <td>2</td>\n",
|
| 103 |
+
" <td>1</td>\n",
|
| 104 |
+
" <td>1</td>\n",
|
| 105 |
+
" <td>Cumings, Mrs. John Bradley (Florence Briggs Th...</td>\n",
|
| 106 |
+
" <td>female</td>\n",
|
| 107 |
+
" <td>38.0</td>\n",
|
| 108 |
+
" <td>1</td>\n",
|
| 109 |
+
" <td>0</td>\n",
|
| 110 |
+
" <td>PC 17599</td>\n",
|
| 111 |
+
" <td>71.2833</td>\n",
|
| 112 |
+
" <td>C85</td>\n",
|
| 113 |
+
" <td>C</td>\n",
|
| 114 |
+
" </tr>\n",
|
| 115 |
+
" <tr>\n",
|
| 116 |
+
" <th>2</th>\n",
|
| 117 |
+
" <td>3</td>\n",
|
| 118 |
+
" <td>1</td>\n",
|
| 119 |
+
" <td>3</td>\n",
|
| 120 |
+
" <td>Heikkinen, Miss. Laina</td>\n",
|
| 121 |
+
" <td>female</td>\n",
|
| 122 |
+
" <td>26.0</td>\n",
|
| 123 |
+
" <td>0</td>\n",
|
| 124 |
+
" <td>0</td>\n",
|
| 125 |
+
" <td>STON/O2. 3101282</td>\n",
|
| 126 |
+
" <td>7.9250</td>\n",
|
| 127 |
+
" <td>NaN</td>\n",
|
| 128 |
+
" <td>S</td>\n",
|
| 129 |
+
" </tr>\n",
|
| 130 |
+
" <tr>\n",
|
| 131 |
+
" <th>3</th>\n",
|
| 132 |
+
" <td>4</td>\n",
|
| 133 |
+
" <td>1</td>\n",
|
| 134 |
+
" <td>1</td>\n",
|
| 135 |
+
" <td>Futrelle, Mrs. Jacques Heath (Lily May Peel)</td>\n",
|
| 136 |
+
" <td>female</td>\n",
|
| 137 |
+
" <td>35.0</td>\n",
|
| 138 |
+
" <td>1</td>\n",
|
| 139 |
+
" <td>0</td>\n",
|
| 140 |
+
" <td>113803</td>\n",
|
| 141 |
+
" <td>53.1000</td>\n",
|
| 142 |
+
" <td>C123</td>\n",
|
| 143 |
+
" <td>S</td>\n",
|
| 144 |
+
" </tr>\n",
|
| 145 |
+
" <tr>\n",
|
| 146 |
+
" <th>4</th>\n",
|
| 147 |
+
" <td>5</td>\n",
|
| 148 |
+
" <td>0</td>\n",
|
| 149 |
+
" <td>3</td>\n",
|
| 150 |
+
" <td>Allen, Mr. William Henry</td>\n",
|
| 151 |
+
" <td>male</td>\n",
|
| 152 |
+
" <td>35.0</td>\n",
|
| 153 |
+
" <td>0</td>\n",
|
| 154 |
+
" <td>0</td>\n",
|
| 155 |
+
" <td>373450</td>\n",
|
| 156 |
+
" <td>8.0500</td>\n",
|
| 157 |
+
" <td>NaN</td>\n",
|
| 158 |
+
" <td>S</td>\n",
|
| 159 |
+
" </tr>\n",
|
| 160 |
+
" </tbody>\n",
|
| 161 |
+
"</table>\n",
|
| 162 |
+
"</div>"
|
| 163 |
+
],
|
| 164 |
+
"text/plain": [
|
| 165 |
+
" PassengerId Survived Pclass \\\n",
|
| 166 |
+
"0 1 0 3 \n",
|
| 167 |
+
"1 2 1 1 \n",
|
| 168 |
+
"2 3 1 3 \n",
|
| 169 |
+
"3 4 1 1 \n",
|
| 170 |
+
"4 5 0 3 \n",
|
| 171 |
+
"\n",
|
| 172 |
+
" Name Sex Age SibSp \\\n",
|
| 173 |
+
"0 Braund, Mr. Owen Harris male 22.0 1 \n",
|
| 174 |
+
"1 Cumings, Mrs. John Bradley (Florence Briggs Th... female 38.0 1 \n",
|
| 175 |
+
"2 Heikkinen, Miss. Laina female 26.0 0 \n",
|
| 176 |
+
"3 Futrelle, Mrs. Jacques Heath (Lily May Peel) female 35.0 1 \n",
|
| 177 |
+
"4 Allen, Mr. William Henry male 35.0 0 \n",
|
| 178 |
+
"\n",
|
| 179 |
+
" Parch Ticket Fare Cabin Embarked \n",
|
| 180 |
+
"0 0 A/5 21171 7.2500 NaN S \n",
|
| 181 |
+
"1 0 PC 17599 71.2833 C85 C \n",
|
| 182 |
+
"2 0 STON/O2. 3101282 7.9250 NaN S \n",
|
| 183 |
+
"3 0 113803 53.1000 C123 S \n",
|
| 184 |
+
"4 0 373450 8.0500 NaN S "
|
| 185 |
+
]
|
| 186 |
+
},
|
| 187 |
+
"execution_count": 6,
|
| 188 |
+
"metadata": {},
|
| 189 |
+
"output_type": "execute_result"
|
| 190 |
+
}
|
| 191 |
+
],
|
| 192 |
+
"source": [
|
| 193 |
+
"import pandas as pd\n",
|
| 194 |
+
"\n",
|
| 195 |
+
"df = pd.read_csv(\n",
|
| 196 |
+
" \"https://raw.githubusercontent.com/pandas-dev/pandas/main/doc/data/titanic.csv\"\n",
|
| 197 |
+
")\n",
|
| 198 |
+
"df.head()"
|
| 199 |
+
]
|
| 200 |
+
},
|
| 201 |
+
{
|
| 202 |
+
"cell_type": "code",
|
| 203 |
+
"execution_count": 14,
|
| 204 |
+
"metadata": {},
|
| 205 |
+
"outputs": [],
|
| 206 |
+
"source": [
|
| 207 |
+
"agent = create_pandas_dataframe_agent(\n",
|
| 208 |
+
" llm,\n",
|
| 209 |
+
" df,\n",
|
| 210 |
+
" verbose=True,\n",
|
| 211 |
+
" agent_type=AgentType.ZERO_SHOT_REACT_DESCRIPTION,\n",
|
| 212 |
+
" return_intermediate_steps=True\n",
|
| 213 |
+
")"
|
| 214 |
+
]
|
| 215 |
+
},
|
| 216 |
+
{
|
| 217 |
+
"cell_type": "code",
|
| 218 |
+
"execution_count": 17,
|
| 219 |
+
"metadata": {},
|
| 220 |
+
"outputs": [
|
| 221 |
+
{
|
| 222 |
+
"name": "stdout",
|
| 223 |
+
"output_type": "stream",
|
| 224 |
+
"text": [
|
| 225 |
+
"\n",
|
| 226 |
+
"\n",
|
| 227 |
+
"\u001b[1m> Entering new AgentExecutor chain...\u001b[0m\n",
|
| 228 |
+
"\u001b[32;1m\u001b[1;3mThought: We need to filter the dataframe for rows where the \"Sex\" column is equal to \"female\" and then count the number of rows.\n",
|
| 229 |
+
"Action: python_repl_ast\n",
|
| 230 |
+
"Action Input: df[df[\"Sex\"] == \"female\"].count()\u001b[0m\u001b[36;1m\u001b[1;3mPassengerId 314\n",
|
| 231 |
+
"Survived 314\n",
|
| 232 |
+
"Pclass 314\n",
|
| 233 |
+
"Name 314\n",
|
| 234 |
+
"Sex 314\n",
|
| 235 |
+
"Age 261\n",
|
| 236 |
+
"SibSp 314\n",
|
| 237 |
+
"Parch 314\n",
|
| 238 |
+
"Ticket 314\n",
|
| 239 |
+
"Fare 314\n",
|
| 240 |
+
"Cabin 97\n",
|
| 241 |
+
"Embarked 312\n",
|
| 242 |
+
"dtype: int64\u001b[0m\u001b[32;1m\u001b[1;3m314 is the number of females in the dataframe, but we need to specify which column we want to count.\n",
|
| 243 |
+
"Action: python_repl_ast\n",
|
| 244 |
+
"Action Input: df[df[\"Sex\"] == \"female\"][\"Sex\"].count()\u001b[0m\u001b[36;1m\u001b[1;3m314\u001b[0m\u001b[32;1m\u001b[1;3m314 is the final answer to the original input question\n",
|
| 245 |
+
"Final Answer: There are 314 females in the dataframe.\u001b[0m\n",
|
| 246 |
+
"\n",
|
| 247 |
+
"\u001b[1m> Finished chain.\u001b[0m\n"
|
| 248 |
+
]
|
| 249 |
+
},
|
| 250 |
+
{
|
| 251 |
+
"data": {
|
| 252 |
+
"text/plain": [
|
| 253 |
+
"{'input': 'how many females are there ',\n",
|
| 254 |
+
" 'output': 'There are 314 females in the dataframe.',\n",
|
| 255 |
+
" 'intermediate_steps': [(AgentAction(tool='python_repl_ast', tool_input='df[df[\"Sex\"] == \"female\"].count()', log='Thought: We need to filter the dataframe for rows where the \"Sex\" column is equal to \"female\" and then count the number of rows.\\nAction: python_repl_ast\\nAction Input: df[df[\"Sex\"] == \"female\"].count()'),\n",
|
| 256 |
+
" PassengerId 314\n",
|
| 257 |
+
" Survived 314\n",
|
| 258 |
+
" Pclass 314\n",
|
| 259 |
+
" Name 314\n",
|
| 260 |
+
" Sex 314\n",
|
| 261 |
+
" Age 261\n",
|
| 262 |
+
" SibSp 314\n",
|
| 263 |
+
" Parch 314\n",
|
| 264 |
+
" Ticket 314\n",
|
| 265 |
+
" Fare 314\n",
|
| 266 |
+
" Cabin 97\n",
|
| 267 |
+
" Embarked 312\n",
|
| 268 |
+
" dtype: int64),\n",
|
| 269 |
+
" (AgentAction(tool='python_repl_ast', tool_input='df[df[\"Sex\"] == \"female\"][\"Sex\"].count()', log='314 is the number of females in the dataframe, but we need to specify which column we want to count.\\nAction: python_repl_ast\\nAction Input: df[df[\"Sex\"] == \"female\"][\"Sex\"].count()'),\n",
|
| 270 |
+
" 314)]}"
|
| 271 |
+
]
|
| 272 |
+
},
|
| 273 |
+
"execution_count": 17,
|
| 274 |
+
"metadata": {},
|
| 275 |
+
"output_type": "execute_result"
|
| 276 |
+
}
|
| 277 |
+
],
|
| 278 |
+
"source": [
|
| 279 |
+
"outres = agent.invoke('how many females are there ')\n",
|
| 280 |
+
"outres"
|
| 281 |
+
]
|
| 282 |
+
},
|
| 283 |
+
{
|
| 284 |
+
"cell_type": "code",
|
| 285 |
+
"execution_count": 32,
|
| 286 |
+
"metadata": {},
|
| 287 |
+
"outputs": [
|
| 288 |
+
{
|
| 289 |
+
"data": {
|
| 290 |
+
"text/plain": [
|
| 291 |
+
"'df[df[\"Sex\"] == \"female\"].count()'"
|
| 292 |
+
]
|
| 293 |
+
},
|
| 294 |
+
"execution_count": 32,
|
| 295 |
+
"metadata": {},
|
| 296 |
+
"output_type": "execute_result"
|
| 297 |
+
}
|
| 298 |
+
],
|
| 299 |
+
"source": [
|
| 300 |
+
"outres['intermediate_steps'][0][0].tool_input"
|
| 301 |
+
]
|
| 302 |
+
},
|
| 303 |
+
{
|
| 304 |
+
"cell_type": "code",
|
| 305 |
+
"execution_count": 33,
|
| 306 |
+
"metadata": {},
|
| 307 |
+
"outputs": [
|
| 308 |
+
{
|
| 309 |
+
"ename": "SyntaxError",
|
| 310 |
+
"evalue": "invalid syntax (3216326457.py, line 1)",
|
| 311 |
+
"output_type": "error",
|
| 312 |
+
"traceback": [
|
| 313 |
+
"\u001b[1;36m Cell \u001b[1;32mIn[33], line 1\u001b[1;36m\u001b[0m\n\u001b[1;33m 'fig = px.line('x', 'y', param=skdfl);'\u001b[0m\n\u001b[1;37m ^\u001b[0m\n\u001b[1;31mSyntaxError\u001b[0m\u001b[1;31m:\u001b[0m invalid syntax\n"
|
| 314 |
+
]
|
| 315 |
+
}
|
| 316 |
+
],
|
| 317 |
+
"source": [
|
| 318 |
+
"'fig = px.line('x', 'y', param=skdfl);'\n",
|
| 319 |
+
"fig'"
|
| 320 |
+
]
|
| 321 |
+
},
|
| 322 |
+
{
|
| 323 |
+
"cell_type": "code",
|
| 324 |
+
"execution_count": 34,
|
| 325 |
+
"metadata": {},
|
| 326 |
+
"outputs": [],
|
| 327 |
+
"source": [
|
| 328 |
+
"import plotly.express as px\n",
|
| 329 |
+
"data_canada = px.data.gapminder().query(\"country == 'Canada'\")\n"
|
| 330 |
+
]
|
| 331 |
+
},
|
| 332 |
+
{
|
| 333 |
+
"cell_type": "code",
|
| 334 |
+
"execution_count": 38,
|
| 335 |
+
"metadata": {},
|
| 336 |
+
"outputs": [],
|
| 337 |
+
"source": [
|
| 338 |
+
"exec(\"x = 'abc'; y =4\")"
|
| 339 |
+
]
|
| 340 |
+
},
|
| 341 |
+
{
|
| 342 |
+
"cell_type": "code",
|
| 343 |
+
"execution_count": 40,
|
| 344 |
+
"metadata": {},
|
| 345 |
+
"outputs": [
|
| 346 |
+
{
|
| 347 |
+
"data": {
|
| 348 |
+
"text/plain": [
|
| 349 |
+
"('abc', 4)"
|
| 350 |
+
]
|
| 351 |
+
},
|
| 352 |
+
"execution_count": 40,
|
| 353 |
+
"metadata": {},
|
| 354 |
+
"output_type": "execute_result"
|
| 355 |
+
}
|
| 356 |
+
],
|
| 357 |
+
"source": [
|
| 358 |
+
"x, y"
|
| 359 |
+
]
|
| 360 |
+
},
|
| 361 |
+
{
|
| 362 |
+
"cell_type": "code",
|
| 363 |
+
"execution_count": 36,
|
| 364 |
+
"metadata": {},
|
| 365 |
+
"outputs": [
|
| 366 |
+
{
|
| 367 |
+
"ename": "ValueError",
|
| 368 |
+
"evalue": "Mime type rendering requires nbformat>=4.2.0 but it is not installed",
|
| 369 |
+
"output_type": "error",
|
| 370 |
+
"traceback": [
|
| 371 |
+
"\u001b[1;31m---------------------------------------------------------------------------\u001b[0m",
|
| 372 |
+
"\u001b[1;31mValueError\u001b[0m Traceback (most recent call last)",
|
| 373 |
+
"Cell \u001b[1;32mIn[36], line 1\u001b[0m\n\u001b[1;32m----> 1\u001b[0m \u001b[43mfig\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mshow\u001b[49m\u001b[43m(\u001b[49m\u001b[43m)\u001b[49m\n",
|
| 374 |
+
"File \u001b[1;32mc:\\Users\\PD817AE\\OneDrive - EY\\Desktop\\DataSc\\pepsico_chat\\.venv\\lib\\site-packages\\plotly\\basedatatypes.py:3410\u001b[0m, in \u001b[0;36mBaseFigure.show\u001b[1;34m(self, *args, **kwargs)\u001b[0m\n\u001b[0;32m 3377\u001b[0m \u001b[38;5;250m\u001b[39m\u001b[38;5;124;03m\"\"\"\u001b[39;00m\n\u001b[0;32m 3378\u001b[0m \u001b[38;5;124;03mShow a figure using either the default renderer(s) or the renderer(s)\u001b[39;00m\n\u001b[0;32m 3379\u001b[0m \u001b[38;5;124;03mspecified by the renderer argument\u001b[39;00m\n\u001b[1;32m (...)\u001b[0m\n\u001b[0;32m 3406\u001b[0m \u001b[38;5;124;03mNone\u001b[39;00m\n\u001b[0;32m 3407\u001b[0m \u001b[38;5;124;03m\"\"\"\u001b[39;00m\n\u001b[0;32m 3408\u001b[0m \u001b[38;5;28;01mimport\u001b[39;00m \u001b[38;5;21;01mplotly\u001b[39;00m\u001b[38;5;21;01m.\u001b[39;00m\u001b[38;5;21;01mio\u001b[39;00m \u001b[38;5;28;01mas\u001b[39;00m \u001b[38;5;21;01mpio\u001b[39;00m\n\u001b[1;32m-> 3410\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m pio\u001b[38;5;241m.\u001b[39mshow(\u001b[38;5;28mself\u001b[39m, \u001b[38;5;241m*\u001b[39margs, \u001b[38;5;241m*\u001b[39m\u001b[38;5;241m*\u001b[39mkwargs)\n",
|
| 375 |
+
"File \u001b[1;32mc:\\Users\\PD817AE\\OneDrive - EY\\Desktop\\DataSc\\pepsico_chat\\.venv\\lib\\site-packages\\plotly\\io\\_renderers.py:394\u001b[0m, in \u001b[0;36mshow\u001b[1;34m(fig, renderer, validate, **kwargs)\u001b[0m\n\u001b[0;32m 389\u001b[0m \u001b[38;5;28;01mraise\u001b[39;00m \u001b[38;5;167;01mValueError\u001b[39;00m(\n\u001b[0;32m 390\u001b[0m \u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mMime type rendering requires ipython but it is not installed\u001b[39m\u001b[38;5;124m\"\u001b[39m\n\u001b[0;32m 391\u001b[0m )\n\u001b[0;32m 393\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m \u001b[38;5;129;01mnot\u001b[39;00m nbformat \u001b[38;5;129;01mor\u001b[39;00m Version(nbformat\u001b[38;5;241m.\u001b[39m__version__) \u001b[38;5;241m<\u001b[39m Version(\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124m4.2.0\u001b[39m\u001b[38;5;124m\"\u001b[39m):\n\u001b[1;32m--> 394\u001b[0m \u001b[38;5;28;01mraise\u001b[39;00m \u001b[38;5;167;01mValueError\u001b[39;00m(\n\u001b[0;32m 395\u001b[0m \u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mMime type rendering requires nbformat>=4.2.0 but it is not installed\u001b[39m\u001b[38;5;124m\"\u001b[39m\n\u001b[0;32m 396\u001b[0m )\n\u001b[0;32m 398\u001b[0m ipython_display\u001b[38;5;241m.\u001b[39mdisplay(bundle, raw\u001b[38;5;241m=\u001b[39m\u001b[38;5;28;01mTrue\u001b[39;00m)\n\u001b[0;32m 400\u001b[0m \u001b[38;5;66;03m# external renderers\u001b[39;00m\n",
|
| 376 |
+
"\u001b[1;31mValueError\u001b[0m: Mime type rendering requires nbformat>=4.2.0 but it is not installed"
|
| 377 |
+
]
|
| 378 |
+
}
|
| 379 |
+
],
|
| 380 |
+
"source": [
|
| 381 |
+
"fig.show()"
|
| 382 |
+
]
|
| 383 |
+
},
|
| 384 |
+
{
|
| 385 |
+
"cell_type": "code",
|
| 386 |
+
"execution_count": null,
|
| 387 |
+
"metadata": {},
|
| 388 |
+
"outputs": [],
|
| 389 |
+
"source": []
|
| 390 |
+
}
|
| 391 |
+
],
|
| 392 |
+
"metadata": {
|
| 393 |
+
"kernelspec": {
|
| 394 |
+
"display_name": ".venv",
|
| 395 |
+
"language": "python",
|
| 396 |
+
"name": "python3"
|
| 397 |
+
},
|
| 398 |
+
"language_info": {
|
| 399 |
+
"codemirror_mode": {
|
| 400 |
+
"name": "ipython",
|
| 401 |
+
"version": 3
|
| 402 |
+
},
|
| 403 |
+
"file_extension": ".py",
|
| 404 |
+
"mimetype": "text/x-python",
|
| 405 |
+
"name": "python",
|
| 406 |
+
"nbconvert_exporter": "python",
|
| 407 |
+
"pygments_lexer": "ipython3",
|
| 408 |
+
"version": "3.9.13"
|
| 409 |
+
}
|
| 410 |
+
},
|
| 411 |
+
"nbformat": 4,
|
| 412 |
+
"nbformat_minor": 2
|
| 413 |
+
}
|
app.py
ADDED
|
@@ -0,0 +1,96 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
# -*- coding: utf-8 -*-
|
| 2 |
+
"""
|
| 3 |
+
Created on Thu Apr 25 18:00:03 2024
|
| 4 |
+
|
| 5 |
+
@author: MK529XT
|
| 6 |
+
"""
|
| 7 |
+
|
| 8 |
+
import streamlit as st
|
| 9 |
+
import string
|
| 10 |
+
import random
|
| 11 |
+
from langchain_helper import get_few_shot_db_chain
|
| 12 |
+
import plotly.figure_factory as ff
|
| 13 |
+
import numpy as np
|
| 14 |
+
|
| 15 |
+
#st.set_page_config(layout="wide")
|
| 16 |
+
|
| 17 |
+
# CSS for styling
|
| 18 |
+
st.markdown("""
|
| 19 |
+
<style>
|
| 20 |
+
.title {
|
| 21 |
+
text-align: center;
|
| 22 |
+
outline: solid yellow;
|
| 23 |
+
font-size: 20px;
|
| 24 |
+
font-family: Arial, Helvetica, sans-serif;
|
| 25 |
+
color: #FFFFFF;
|
| 26 |
+
padding-top: 5px;
|
| 27 |
+
padding-bottom: 5px;
|
| 28 |
+
#border-bottom: 2px solid #FFFF00;
|
| 29 |
+
background-color: #050201;
|
| 30 |
+
}
|
| 31 |
+
</style>
|
| 32 |
+
""", unsafe_allow_html=True)
|
| 33 |
+
|
| 34 |
+
# Title section
|
| 35 |
+
st.markdown("<h1 class='title'>Manufacturing Process Analysis</h1>", unsafe_allow_html=True)
|
| 36 |
+
|
| 37 |
+
with st.chat_message("assistant"):
|
| 38 |
+
st.write("Hello 👋 How can I help you today?")
|
| 39 |
+
|
| 40 |
+
def random_string() -> dict:
|
| 41 |
+
try:
|
| 42 |
+
response_dict = get_few_shot_db_chain(st.session_state["chat_input"])
|
| 43 |
+
except Exception as e:
|
| 44 |
+
response_dict = {
|
| 45 |
+
"result_df" : None,
|
| 46 |
+
"sql_command" : None,
|
| 47 |
+
"response" : f"LLM ran into issues : {str(e)}",
|
| 48 |
+
"input" : st.session_state["chat_input"],
|
| 49 |
+
"graph_data" : None
|
| 50 |
+
}
|
| 51 |
+
return response_dict
|
| 52 |
+
|
| 53 |
+
def chat_actions():
|
| 54 |
+
st.session_state["chat_history"].append(
|
| 55 |
+
{
|
| 56 |
+
"role": "user",
|
| 57 |
+
"content": st.session_state["chat_input"],
|
| 58 |
+
}
|
| 59 |
+
)
|
| 60 |
+
|
| 61 |
+
st.session_state["chat_history"].append(
|
| 62 |
+
{
|
| 63 |
+
"role": "assistant",
|
| 64 |
+
"content": random_string(),
|
| 65 |
+
},
|
| 66 |
+
)
|
| 67 |
+
|
| 68 |
+
|
| 69 |
+
if "chat_history" not in st.session_state:
|
| 70 |
+
st.session_state["chat_history"] = []
|
| 71 |
+
|
| 72 |
+
st.chat_input("Enter your question", on_submit=chat_actions, key="chat_input")
|
| 73 |
+
|
| 74 |
+
for i in st.session_state["chat_history"]:
|
| 75 |
+
with st.chat_message(name=i["role"]):
|
| 76 |
+
print(type(i["content"]))
|
| 77 |
+
if isinstance(i["content"], str):
|
| 78 |
+
st.write(i["content"])
|
| 79 |
+
|
| 80 |
+
# When this is llm or bot response #
|
| 81 |
+
elif isinstance(i["content"], dict):
|
| 82 |
+
#st.info(i["content"]["sql_command"])
|
| 83 |
+
st.write(i["content"]["response"])
|
| 84 |
+
result_df = i["content"]["result_df"]
|
| 85 |
+
if i['content']["graph_data"] is not None:
|
| 86 |
+
st.plotly_chart(i['content']["graph_data"], use_container_width=True)
|
| 87 |
+
elif (result_df is not None) and ((result_df.shape[0] > 1) and (result_df.shape[1] > 1)) :
|
| 88 |
+
st.plotly_chart(ff.create_table(result_df), use_container_width=True)
|
| 89 |
+
|
| 90 |
+
|
| 91 |
+
|
| 92 |
+
|
| 93 |
+
|
| 94 |
+
|
| 95 |
+
|
| 96 |
+
|
ecomm.db
ADDED
|
Binary file (983 kB). View file
|
|
|
fakedatagenerator.ipynb
ADDED
|
@@ -0,0 +1,680 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"cells": [
|
| 3 |
+
{
|
| 4 |
+
"cell_type": "code",
|
| 5 |
+
"execution_count": 43,
|
| 6 |
+
"metadata": {},
|
| 7 |
+
"outputs": [],
|
| 8 |
+
"source": [
|
| 9 |
+
"import faker\n",
|
| 10 |
+
"import pandas as pd\n",
|
| 11 |
+
"import random\n",
|
| 12 |
+
"import sqlite3"
|
| 13 |
+
]
|
| 14 |
+
},
|
| 15 |
+
{
|
| 16 |
+
"cell_type": "code",
|
| 17 |
+
"execution_count": 18,
|
| 18 |
+
"metadata": {},
|
| 19 |
+
"outputs": [],
|
| 20 |
+
"source": [
|
| 21 |
+
"fake = faker.Faker()"
|
| 22 |
+
]
|
| 23 |
+
},
|
| 24 |
+
{
|
| 25 |
+
"cell_type": "code",
|
| 26 |
+
"execution_count": 28,
|
| 27 |
+
"metadata": {},
|
| 28 |
+
"outputs": [],
|
| 29 |
+
"source": [
|
| 30 |
+
"num_records = 250"
|
| 31 |
+
]
|
| 32 |
+
},
|
| 33 |
+
{
|
| 34 |
+
"cell_type": "code",
|
| 35 |
+
"execution_count": 29,
|
| 36 |
+
"metadata": {},
|
| 37 |
+
"outputs": [],
|
| 38 |
+
"source": [
|
| 39 |
+
"# Generate dataset\n",
|
| 40 |
+
"customers_data = {\n",
|
| 41 |
+
" \"customer_id\": range(1, num_records + 1),\n",
|
| 42 |
+
" \"first_name\": [fake.first_name() for _ in range(num_records)],\n",
|
| 43 |
+
" \"last_name\": [fake.last_name() for _ in range(num_records)],\n",
|
| 44 |
+
" \"email\": [fake.email() for _ in range(num_records)],\n",
|
| 45 |
+
" \"phone_number\": [fake.phone_number() for _ in range(num_records)],\n",
|
| 46 |
+
" \"address\": [fake.street_address() for _ in range(num_records)],\n",
|
| 47 |
+
" \"city\": [fake.city() for _ in range(num_records)],\n",
|
| 48 |
+
" \"state\": [fake.state() for _ in range(num_records)],\n",
|
| 49 |
+
" \"zip_code\": [fake.zipcode() for _ in range(num_records)],\n",
|
| 50 |
+
" \"country\": [fake.country() for _ in range(num_records)],\n",
|
| 51 |
+
" \"date_of_birth\": [fake.date_of_birth().strftime(\"%Y-%m-%d\") for _ in range(num_records)],\n",
|
| 52 |
+
" \"gender\": [random.choice([\"Male\", \"Female\", \"Other\"]) for _ in range(num_records)]\n",
|
| 53 |
+
"}\n",
|
| 54 |
+
"\n",
|
| 55 |
+
"customers_df = pd.DataFrame(customers_data)"
|
| 56 |
+
]
|
| 57 |
+
},
|
| 58 |
+
{
|
| 59 |
+
"cell_type": "code",
|
| 60 |
+
"execution_count": 31,
|
| 61 |
+
"metadata": {},
|
| 62 |
+
"outputs": [
|
| 63 |
+
{
|
| 64 |
+
"data": {
|
| 65 |
+
"text/html": [
|
| 66 |
+
"<div>\n",
|
| 67 |
+
"<style scoped>\n",
|
| 68 |
+
" .dataframe tbody tr th:only-of-type {\n",
|
| 69 |
+
" vertical-align: middle;\n",
|
| 70 |
+
" }\n",
|
| 71 |
+
"\n",
|
| 72 |
+
" .dataframe tbody tr th {\n",
|
| 73 |
+
" vertical-align: top;\n",
|
| 74 |
+
" }\n",
|
| 75 |
+
"\n",
|
| 76 |
+
" .dataframe thead th {\n",
|
| 77 |
+
" text-align: right;\n",
|
| 78 |
+
" }\n",
|
| 79 |
+
"</style>\n",
|
| 80 |
+
"<table border=\"1\" class=\"dataframe\">\n",
|
| 81 |
+
" <thead>\n",
|
| 82 |
+
" <tr style=\"text-align: right;\">\n",
|
| 83 |
+
" <th></th>\n",
|
| 84 |
+
" <th>customer_id</th>\n",
|
| 85 |
+
" <th>first_name</th>\n",
|
| 86 |
+
" <th>last_name</th>\n",
|
| 87 |
+
" <th>email</th>\n",
|
| 88 |
+
" <th>phone_number</th>\n",
|
| 89 |
+
" <th>address</th>\n",
|
| 90 |
+
" <th>city</th>\n",
|
| 91 |
+
" <th>state</th>\n",
|
| 92 |
+
" <th>zip_code</th>\n",
|
| 93 |
+
" <th>country</th>\n",
|
| 94 |
+
" <th>date_of_birth</th>\n",
|
| 95 |
+
" <th>gender</th>\n",
|
| 96 |
+
" </tr>\n",
|
| 97 |
+
" </thead>\n",
|
| 98 |
+
" <tbody>\n",
|
| 99 |
+
" <tr>\n",
|
| 100 |
+
" <th>0</th>\n",
|
| 101 |
+
" <td>1</td>\n",
|
| 102 |
+
" <td>Daniel</td>\n",
|
| 103 |
+
" <td>Day</td>\n",
|
| 104 |
+
" <td>hvalencia@example.net</td>\n",
|
| 105 |
+
" <td>(671)991-3668</td>\n",
|
| 106 |
+
" <td>2712 Matthew Course Apt. 519</td>\n",
|
| 107 |
+
" <td>Reginashire</td>\n",
|
| 108 |
+
" <td>Virginia</td>\n",
|
| 109 |
+
" <td>43739</td>\n",
|
| 110 |
+
" <td>Portugal</td>\n",
|
| 111 |
+
" <td>1955-02-11</td>\n",
|
| 112 |
+
" <td>Male</td>\n",
|
| 113 |
+
" </tr>\n",
|
| 114 |
+
" <tr>\n",
|
| 115 |
+
" <th>1</th>\n",
|
| 116 |
+
" <td>2</td>\n",
|
| 117 |
+
" <td>Lucas</td>\n",
|
| 118 |
+
" <td>Jimenez</td>\n",
|
| 119 |
+
" <td>jennifer95@example.org</td>\n",
|
| 120 |
+
" <td>694.215.1833</td>\n",
|
| 121 |
+
" <td>560 Victoria Shoals Apt. 465</td>\n",
|
| 122 |
+
" <td>Marshallmouth</td>\n",
|
| 123 |
+
" <td>Oklahoma</td>\n",
|
| 124 |
+
" <td>90653</td>\n",
|
| 125 |
+
" <td>Albania</td>\n",
|
| 126 |
+
" <td>1909-06-06</td>\n",
|
| 127 |
+
" <td>Female</td>\n",
|
| 128 |
+
" </tr>\n",
|
| 129 |
+
" <tr>\n",
|
| 130 |
+
" <th>2</th>\n",
|
| 131 |
+
" <td>3</td>\n",
|
| 132 |
+
" <td>Victoria</td>\n",
|
| 133 |
+
" <td>Willis</td>\n",
|
| 134 |
+
" <td>millersean@example.org</td>\n",
|
| 135 |
+
" <td>769-267-3445</td>\n",
|
| 136 |
+
" <td>58325 Buck Road Suite 830</td>\n",
|
| 137 |
+
" <td>South Pamelaborough</td>\n",
|
| 138 |
+
" <td>Oregon</td>\n",
|
| 139 |
+
" <td>73729</td>\n",
|
| 140 |
+
" <td>Lithuania</td>\n",
|
| 141 |
+
" <td>1925-09-12</td>\n",
|
| 142 |
+
" <td>Other</td>\n",
|
| 143 |
+
" </tr>\n",
|
| 144 |
+
" <tr>\n",
|
| 145 |
+
" <th>3</th>\n",
|
| 146 |
+
" <td>4</td>\n",
|
| 147 |
+
" <td>Austin</td>\n",
|
| 148 |
+
" <td>Carr</td>\n",
|
| 149 |
+
" <td>arnoldjennifer@example.com</td>\n",
|
| 150 |
+
" <td>874-821-2653x36986</td>\n",
|
| 151 |
+
" <td>01855 Peterson View Apt. 956</td>\n",
|
| 152 |
+
" <td>Potterton</td>\n",
|
| 153 |
+
" <td>Wyoming</td>\n",
|
| 154 |
+
" <td>80500</td>\n",
|
| 155 |
+
" <td>Dominica</td>\n",
|
| 156 |
+
" <td>1920-06-23</td>\n",
|
| 157 |
+
" <td>Other</td>\n",
|
| 158 |
+
" </tr>\n",
|
| 159 |
+
" <tr>\n",
|
| 160 |
+
" <th>4</th>\n",
|
| 161 |
+
" <td>5</td>\n",
|
| 162 |
+
" <td>Ethan</td>\n",
|
| 163 |
+
" <td>Martin</td>\n",
|
| 164 |
+
" <td>mark46@example.org</td>\n",
|
| 165 |
+
" <td>875-454-9228</td>\n",
|
| 166 |
+
" <td>617 Clayton Tunnel</td>\n",
|
| 167 |
+
" <td>Adamsport</td>\n",
|
| 168 |
+
" <td>Michigan</td>\n",
|
| 169 |
+
" <td>38936</td>\n",
|
| 170 |
+
" <td>Yemen</td>\n",
|
| 171 |
+
" <td>1985-03-13</td>\n",
|
| 172 |
+
" <td>Female</td>\n",
|
| 173 |
+
" </tr>\n",
|
| 174 |
+
" </tbody>\n",
|
| 175 |
+
"</table>\n",
|
| 176 |
+
"</div>"
|
| 177 |
+
],
|
| 178 |
+
"text/plain": [
|
| 179 |
+
" customer_id first_name last_name email \\\n",
|
| 180 |
+
"0 1 Daniel Day hvalencia@example.net \n",
|
| 181 |
+
"1 2 Lucas Jimenez jennifer95@example.org \n",
|
| 182 |
+
"2 3 Victoria Willis millersean@example.org \n",
|
| 183 |
+
"3 4 Austin Carr arnoldjennifer@example.com \n",
|
| 184 |
+
"4 5 Ethan Martin mark46@example.org \n",
|
| 185 |
+
"\n",
|
| 186 |
+
" phone_number address city \\\n",
|
| 187 |
+
"0 (671)991-3668 2712 Matthew Course Apt. 519 Reginashire \n",
|
| 188 |
+
"1 694.215.1833 560 Victoria Shoals Apt. 465 Marshallmouth \n",
|
| 189 |
+
"2 769-267-3445 58325 Buck Road Suite 830 South Pamelaborough \n",
|
| 190 |
+
"3 874-821-2653x36986 01855 Peterson View Apt. 956 Potterton \n",
|
| 191 |
+
"4 875-454-9228 617 Clayton Tunnel Adamsport \n",
|
| 192 |
+
"\n",
|
| 193 |
+
" state zip_code country date_of_birth gender \n",
|
| 194 |
+
"0 Virginia 43739 Portugal 1955-02-11 Male \n",
|
| 195 |
+
"1 Oklahoma 90653 Albania 1909-06-06 Female \n",
|
| 196 |
+
"2 Oregon 73729 Lithuania 1925-09-12 Other \n",
|
| 197 |
+
"3 Wyoming 80500 Dominica 1920-06-23 Other \n",
|
| 198 |
+
"4 Michigan 38936 Yemen 1985-03-13 Female "
|
| 199 |
+
]
|
| 200 |
+
},
|
| 201 |
+
"execution_count": 31,
|
| 202 |
+
"metadata": {},
|
| 203 |
+
"output_type": "execute_result"
|
| 204 |
+
}
|
| 205 |
+
],
|
| 206 |
+
"source": [
|
| 207 |
+
"customers_df.head()"
|
| 208 |
+
]
|
| 209 |
+
},
|
| 210 |
+
{
|
| 211 |
+
"cell_type": "code",
|
| 212 |
+
"execution_count": 25,
|
| 213 |
+
"metadata": {},
|
| 214 |
+
"outputs": [],
|
| 215 |
+
"source": [
|
| 216 |
+
"# Set seed for reproducibility\n",
|
| 217 |
+
"random.seed(42)\n",
|
| 218 |
+
"\n",
|
| 219 |
+
"# Define number of records\n",
|
| 220 |
+
"num_records = 1000\n",
|
| 221 |
+
"\n",
|
| 222 |
+
"# Generate dataset\n",
|
| 223 |
+
"items_data = {\n",
|
| 224 |
+
" \"id\": range(1, num_records + 1),\n",
|
| 225 |
+
" \"product_name\": [fake.catch_phrase() for _ in range(num_records)],\n",
|
| 226 |
+
" \"description\": [fake.paragraph(nb_sentences=3) for _ in range(num_records)],\n",
|
| 227 |
+
" \"price\": [round(random.uniform(10.0, 100.0), 2) for _ in range(num_records)],\n",
|
| 228 |
+
" \"category\": [random.choice([\"Electronics\", \"Fashion\", \"Home Goods\", \"Sports\", \"Toys\"]) for _ in range(num_records)],\n",
|
| 229 |
+
" \"sub_category\": [\n",
|
| 230 |
+
" random.choice([\n",
|
| 231 |
+
" \"Smartphones\", \"Laptops\", \"Tablets\",\n",
|
| 232 |
+
" \"Women's Clothing\", \"Men's Clothing\", \"Kids' Clothing\",\n",
|
| 233 |
+
" \"Kitchen Appliances\", \"Home Decor\", \"Furniture\",\n",
|
| 234 |
+
" \"Fitness Equipment\", \"Outdoor Gear\", \"Toys & Games\"\n",
|
| 235 |
+
" ]) for _ in range(num_records)\n",
|
| 236 |
+
" ],\n",
|
| 237 |
+
" \"brand\": [fake.company() for _ in range(num_records)],\n",
|
| 238 |
+
" \"rating\": [round(random.uniform(1.0, 5.0), 1) for _ in range(num_records)],\n",
|
| 239 |
+
" \"num_reviews\": [random.randint(1, 100) for _ in range(num_records)],\n",
|
| 240 |
+
" \"stock_quantity\": [random.randint(1, 100) for _ in range(num_records)],\n",
|
| 241 |
+
" \"seller_name\": [fake.name() for _ in range(num_records)],\n",
|
| 242 |
+
" \"shipping_weight\": [round(random.uniform(1.0, 10.0), 2) for _ in range(num_records)],\n",
|
| 243 |
+
" \"shipping_dimension\": [\n",
|
| 244 |
+
" f\"{random.randint(6, 20)} x {random.randint(4, 12)} x {random.randint(2, 8)}\"\n",
|
| 245 |
+
" for _ in range(num_records)\n",
|
| 246 |
+
" ]\n",
|
| 247 |
+
"}\n",
|
| 248 |
+
"\n",
|
| 249 |
+
"items_df = pd.DataFrame(items_data)"
|
| 250 |
+
]
|
| 251 |
+
},
|
| 252 |
+
{
|
| 253 |
+
"cell_type": "code",
|
| 254 |
+
"execution_count": 26,
|
| 255 |
+
"metadata": {},
|
| 256 |
+
"outputs": [
|
| 257 |
+
{
|
| 258 |
+
"data": {
|
| 259 |
+
"text/html": [
|
| 260 |
+
"<div>\n",
|
| 261 |
+
"<style scoped>\n",
|
| 262 |
+
" .dataframe tbody tr th:only-of-type {\n",
|
| 263 |
+
" vertical-align: middle;\n",
|
| 264 |
+
" }\n",
|
| 265 |
+
"\n",
|
| 266 |
+
" .dataframe tbody tr th {\n",
|
| 267 |
+
" vertical-align: top;\n",
|
| 268 |
+
" }\n",
|
| 269 |
+
"\n",
|
| 270 |
+
" .dataframe thead th {\n",
|
| 271 |
+
" text-align: right;\n",
|
| 272 |
+
" }\n",
|
| 273 |
+
"</style>\n",
|
| 274 |
+
"<table border=\"1\" class=\"dataframe\">\n",
|
| 275 |
+
" <thead>\n",
|
| 276 |
+
" <tr style=\"text-align: right;\">\n",
|
| 277 |
+
" <th></th>\n",
|
| 278 |
+
" <th>id</th>\n",
|
| 279 |
+
" <th>product_name</th>\n",
|
| 280 |
+
" <th>description</th>\n",
|
| 281 |
+
" <th>price</th>\n",
|
| 282 |
+
" <th>category</th>\n",
|
| 283 |
+
" <th>sub_category</th>\n",
|
| 284 |
+
" <th>brand</th>\n",
|
| 285 |
+
" <th>rating</th>\n",
|
| 286 |
+
" <th>num_reviews</th>\n",
|
| 287 |
+
" <th>stock_quantity</th>\n",
|
| 288 |
+
" <th>seller_name</th>\n",
|
| 289 |
+
" <th>shipping_weight</th>\n",
|
| 290 |
+
" <th>shipping_dimension</th>\n",
|
| 291 |
+
" </tr>\n",
|
| 292 |
+
" </thead>\n",
|
| 293 |
+
" <tbody>\n",
|
| 294 |
+
" <tr>\n",
|
| 295 |
+
" <th>0</th>\n",
|
| 296 |
+
" <td>1</td>\n",
|
| 297 |
+
" <td>Ergonomic bottom-line framework</td>\n",
|
| 298 |
+
" <td>Kind stay kid song dream. Yourself would scene...</td>\n",
|
| 299 |
+
" <td>67.55</td>\n",
|
| 300 |
+
" <td>Electronics</td>\n",
|
| 301 |
+
" <td>Men's Clothing</td>\n",
|
| 302 |
+
" <td>Gonzalez, Jones and Hanson</td>\n",
|
| 303 |
+
" <td>3.2</td>\n",
|
| 304 |
+
" <td>52</td>\n",
|
| 305 |
+
" <td>9</td>\n",
|
| 306 |
+
" <td>Kathryn Hansen</td>\n",
|
| 307 |
+
" <td>2.31</td>\n",
|
| 308 |
+
" <td>18 x 6 x 6</td>\n",
|
| 309 |
+
" </tr>\n",
|
| 310 |
+
" <tr>\n",
|
| 311 |
+
" <th>1</th>\n",
|
| 312 |
+
" <td>2</td>\n",
|
| 313 |
+
" <td>Reduced high-level customer loyalty</td>\n",
|
| 314 |
+
" <td>Nothing free around expert decade. Great view ...</td>\n",
|
| 315 |
+
" <td>12.25</td>\n",
|
| 316 |
+
" <td>Home Goods</td>\n",
|
| 317 |
+
" <td>Toys & Games</td>\n",
|
| 318 |
+
" <td>Walker-Love</td>\n",
|
| 319 |
+
" <td>1.7</td>\n",
|
| 320 |
+
" <td>52</td>\n",
|
| 321 |
+
" <td>34</td>\n",
|
| 322 |
+
" <td>Breanna Allison</td>\n",
|
| 323 |
+
" <td>1.40</td>\n",
|
| 324 |
+
" <td>14 x 7 x 2</td>\n",
|
| 325 |
+
" </tr>\n",
|
| 326 |
+
" <tr>\n",
|
| 327 |
+
" <th>2</th>\n",
|
| 328 |
+
" <td>3</td>\n",
|
| 329 |
+
" <td>Phased holistic capacity</td>\n",
|
| 330 |
+
" <td>Fire usually high manage tend available.</td>\n",
|
| 331 |
+
" <td>34.75</td>\n",
|
| 332 |
+
" <td>Toys</td>\n",
|
| 333 |
+
" <td>Laptops</td>\n",
|
| 334 |
+
" <td>Nelson-Morrison</td>\n",
|
| 335 |
+
" <td>2.8</td>\n",
|
| 336 |
+
" <td>59</td>\n",
|
| 337 |
+
" <td>29</td>\n",
|
| 338 |
+
" <td>Allen Hernandez</td>\n",
|
| 339 |
+
" <td>8.36</td>\n",
|
| 340 |
+
" <td>12 x 12 x 5</td>\n",
|
| 341 |
+
" </tr>\n",
|
| 342 |
+
" <tr>\n",
|
| 343 |
+
" <th>3</th>\n",
|
| 344 |
+
" <td>4</td>\n",
|
| 345 |
+
" <td>Quality-focused 6thgeneration matrix</td>\n",
|
| 346 |
+
" <td>Capital onto into eat unit church take ground....</td>\n",
|
| 347 |
+
" <td>30.09</td>\n",
|
| 348 |
+
" <td>Home Goods</td>\n",
|
| 349 |
+
" <td>Kids' Clothing</td>\n",
|
| 350 |
+
" <td>Sullivan, Clark and Larson</td>\n",
|
| 351 |
+
" <td>4.0</td>\n",
|
| 352 |
+
" <td>35</td>\n",
|
| 353 |
+
" <td>48</td>\n",
|
| 354 |
+
" <td>Joseph Hayden</td>\n",
|
| 355 |
+
" <td>2.80</td>\n",
|
| 356 |
+
" <td>19 x 7 x 6</td>\n",
|
| 357 |
+
" </tr>\n",
|
| 358 |
+
" <tr>\n",
|
| 359 |
+
" <th>4</th>\n",
|
| 360 |
+
" <td>5</td>\n",
|
| 361 |
+
" <td>Visionary systemic array</td>\n",
|
| 362 |
+
" <td>Woman former wind bill red authority. Police s...</td>\n",
|
| 363 |
+
" <td>76.28</td>\n",
|
| 364 |
+
" <td>Electronics</td>\n",
|
| 365 |
+
" <td>Home Decor</td>\n",
|
| 366 |
+
" <td>Evans PLC</td>\n",
|
| 367 |
+
" <td>4.1</td>\n",
|
| 368 |
+
" <td>50</td>\n",
|
| 369 |
+
" <td>11</td>\n",
|
| 370 |
+
" <td>John Mcdowell</td>\n",
|
| 371 |
+
" <td>4.36</td>\n",
|
| 372 |
+
" <td>13 x 11 x 4</td>\n",
|
| 373 |
+
" </tr>\n",
|
| 374 |
+
" </tbody>\n",
|
| 375 |
+
"</table>\n",
|
| 376 |
+
"</div>"
|
| 377 |
+
],
|
| 378 |
+
"text/plain": [
|
| 379 |
+
" id product_name \\\n",
|
| 380 |
+
"0 1 Ergonomic bottom-line framework \n",
|
| 381 |
+
"1 2 Reduced high-level customer loyalty \n",
|
| 382 |
+
"2 3 Phased holistic capacity \n",
|
| 383 |
+
"3 4 Quality-focused 6thgeneration matrix \n",
|
| 384 |
+
"4 5 Visionary systemic array \n",
|
| 385 |
+
"\n",
|
| 386 |
+
" description price category \\\n",
|
| 387 |
+
"0 Kind stay kid song dream. Yourself would scene... 67.55 Electronics \n",
|
| 388 |
+
"1 Nothing free around expert decade. Great view ... 12.25 Home Goods \n",
|
| 389 |
+
"2 Fire usually high manage tend available. 34.75 Toys \n",
|
| 390 |
+
"3 Capital onto into eat unit church take ground.... 30.09 Home Goods \n",
|
| 391 |
+
"4 Woman former wind bill red authority. Police s... 76.28 Electronics \n",
|
| 392 |
+
"\n",
|
| 393 |
+
" sub_category brand rating num_reviews \\\n",
|
| 394 |
+
"0 Men's Clothing Gonzalez, Jones and Hanson 3.2 52 \n",
|
| 395 |
+
"1 Toys & Games Walker-Love 1.7 52 \n",
|
| 396 |
+
"2 Laptops Nelson-Morrison 2.8 59 \n",
|
| 397 |
+
"3 Kids' Clothing Sullivan, Clark and Larson 4.0 35 \n",
|
| 398 |
+
"4 Home Decor Evans PLC 4.1 50 \n",
|
| 399 |
+
"\n",
|
| 400 |
+
" stock_quantity seller_name shipping_weight shipping_dimension \n",
|
| 401 |
+
"0 9 Kathryn Hansen 2.31 18 x 6 x 6 \n",
|
| 402 |
+
"1 34 Breanna Allison 1.40 14 x 7 x 2 \n",
|
| 403 |
+
"2 29 Allen Hernandez 8.36 12 x 12 x 5 \n",
|
| 404 |
+
"3 48 Joseph Hayden 2.80 19 x 7 x 6 \n",
|
| 405 |
+
"4 11 John Mcdowell 4.36 13 x 11 x 4 "
|
| 406 |
+
]
|
| 407 |
+
},
|
| 408 |
+
"execution_count": 26,
|
| 409 |
+
"metadata": {},
|
| 410 |
+
"output_type": "execute_result"
|
| 411 |
+
}
|
| 412 |
+
],
|
| 413 |
+
"source": [
|
| 414 |
+
"items_df.head()"
|
| 415 |
+
]
|
| 416 |
+
},
|
| 417 |
+
{
|
| 418 |
+
"cell_type": "code",
|
| 419 |
+
"execution_count": 32,
|
| 420 |
+
"metadata": {},
|
| 421 |
+
"outputs": [],
|
| 422 |
+
"source": [
|
| 423 |
+
"# Define number of orders\n",
|
| 424 |
+
"num_orders = 5000\n",
|
| 425 |
+
"\n",
|
| 426 |
+
"# Generate orders dataset\n",
|
| 427 |
+
"data = {\n",
|
| 428 |
+
" \"order_id\": range(1, num_orders + 1),\n",
|
| 429 |
+
" \"customer_id\": [random.choice(customers_df[\"customer_id\"]) for _ in range(num_orders)],\n",
|
| 430 |
+
" \"product_id\": [random.choice(items_df[\"id\"]) for _ in range(num_orders)],\n",
|
| 431 |
+
" \"order_date\": [fake.date_time_between(start_date=\"-2y\", end_date=\"now\").strftime(\"%Y-%m-%d %H:%M:%S\") for _ in range(num_orders)],\n",
|
| 432 |
+
" \"order_status\": [random.choice([\"Pending\", \"Shipped\", \"Delivered\", \"Cancelled\"]) for _ in range(num_orders)],\n",
|
| 433 |
+
" \"payment_method\": [random.choice([\"Credit Card\", \"PayPal\", \"Bank Transfer\"]) for _ in range(num_orders)],\n",
|
| 434 |
+
" \"total_amount\": [round(random.uniform(10.0, 100.0), 2) for _ in range(num_orders)],\n",
|
| 435 |
+
" \"shipping_address\": [fake.street_address() for _ in range(num_orders)],\n",
|
| 436 |
+
" \"shipping_city\": [fake.city() for _ in range(num_orders)],\n",
|
| 437 |
+
" \"shipping_state\": [fake.state() for _ in range(num_orders)],\n",
|
| 438 |
+
" \"shipping_zip\": [fake.zipcode() for _ in range(num_orders)],\n",
|
| 439 |
+
" \"shipping_country\": [fake.country() for _ in range(num_orders)]\n",
|
| 440 |
+
"}\n",
|
| 441 |
+
"\n",
|
| 442 |
+
"orders_df = pd.DataFrame(data)"
|
| 443 |
+
]
|
| 444 |
+
},
|
| 445 |
+
{
|
| 446 |
+
"cell_type": "code",
|
| 447 |
+
"execution_count": 33,
|
| 448 |
+
"metadata": {},
|
| 449 |
+
"outputs": [
|
| 450 |
+
{
|
| 451 |
+
"data": {
|
| 452 |
+
"text/html": [
|
| 453 |
+
"<div>\n",
|
| 454 |
+
"<style scoped>\n",
|
| 455 |
+
" .dataframe tbody tr th:only-of-type {\n",
|
| 456 |
+
" vertical-align: middle;\n",
|
| 457 |
+
" }\n",
|
| 458 |
+
"\n",
|
| 459 |
+
" .dataframe tbody tr th {\n",
|
| 460 |
+
" vertical-align: top;\n",
|
| 461 |
+
" }\n",
|
| 462 |
+
"\n",
|
| 463 |
+
" .dataframe thead th {\n",
|
| 464 |
+
" text-align: right;\n",
|
| 465 |
+
" }\n",
|
| 466 |
+
"</style>\n",
|
| 467 |
+
"<table border=\"1\" class=\"dataframe\">\n",
|
| 468 |
+
" <thead>\n",
|
| 469 |
+
" <tr style=\"text-align: right;\">\n",
|
| 470 |
+
" <th></th>\n",
|
| 471 |
+
" <th>order_id</th>\n",
|
| 472 |
+
" <th>customer_id</th>\n",
|
| 473 |
+
" <th>product_id</th>\n",
|
| 474 |
+
" <th>order_date</th>\n",
|
| 475 |
+
" <th>order_status</th>\n",
|
| 476 |
+
" <th>payment_method</th>\n",
|
| 477 |
+
" <th>total_amount</th>\n",
|
| 478 |
+
" <th>shipping_address</th>\n",
|
| 479 |
+
" <th>shipping_city</th>\n",
|
| 480 |
+
" <th>shipping_state</th>\n",
|
| 481 |
+
" <th>shipping_zip</th>\n",
|
| 482 |
+
" <th>shipping_country</th>\n",
|
| 483 |
+
" </tr>\n",
|
| 484 |
+
" </thead>\n",
|
| 485 |
+
" <tbody>\n",
|
| 486 |
+
" <tr>\n",
|
| 487 |
+
" <th>0</th>\n",
|
| 488 |
+
" <td>1</td>\n",
|
| 489 |
+
" <td>85</td>\n",
|
| 490 |
+
" <td>506</td>\n",
|
| 491 |
+
" <td>2024-07-03 08:05:03</td>\n",
|
| 492 |
+
" <td>Pending</td>\n",
|
| 493 |
+
" <td>Credit Card</td>\n",
|
| 494 |
+
" <td>54.40</td>\n",
|
| 495 |
+
" <td>140 Edwards Overpass</td>\n",
|
| 496 |
+
" <td>Kingtown</td>\n",
|
| 497 |
+
" <td>Kansas</td>\n",
|
| 498 |
+
" <td>05046</td>\n",
|
| 499 |
+
" <td>British Virgin Islands</td>\n",
|
| 500 |
+
" </tr>\n",
|
| 501 |
+
" <tr>\n",
|
| 502 |
+
" <th>1</th>\n",
|
| 503 |
+
" <td>2</td>\n",
|
| 504 |
+
" <td>88</td>\n",
|
| 505 |
+
" <td>270</td>\n",
|
| 506 |
+
" <td>2024-09-21 12:08:46</td>\n",
|
| 507 |
+
" <td>Shipped</td>\n",
|
| 508 |
+
" <td>Bank Transfer</td>\n",
|
| 509 |
+
" <td>54.55</td>\n",
|
| 510 |
+
" <td>811 Blair Glen Apt. 318</td>\n",
|
| 511 |
+
" <td>Port Andrew</td>\n",
|
| 512 |
+
" <td>New Jersey</td>\n",
|
| 513 |
+
" <td>46407</td>\n",
|
| 514 |
+
" <td>Liberia</td>\n",
|
| 515 |
+
" </tr>\n",
|
| 516 |
+
" <tr>\n",
|
| 517 |
+
" <th>2</th>\n",
|
| 518 |
+
" <td>3</td>\n",
|
| 519 |
+
" <td>63</td>\n",
|
| 520 |
+
" <td>89</td>\n",
|
| 521 |
+
" <td>2024-04-28 09:50:13</td>\n",
|
| 522 |
+
" <td>Shipped</td>\n",
|
| 523 |
+
" <td>PayPal</td>\n",
|
| 524 |
+
" <td>38.34</td>\n",
|
| 525 |
+
" <td>35571 Debra Stravenue</td>\n",
|
| 526 |
+
" <td>Warrenhaven</td>\n",
|
| 527 |
+
" <td>Louisiana</td>\n",
|
| 528 |
+
" <td>78358</td>\n",
|
| 529 |
+
" <td>Maldives</td>\n",
|
| 530 |
+
" </tr>\n",
|
| 531 |
+
" <tr>\n",
|
| 532 |
+
" <th>3</th>\n",
|
| 533 |
+
" <td>4</td>\n",
|
| 534 |
+
" <td>53</td>\n",
|
| 535 |
+
" <td>886</td>\n",
|
| 536 |
+
" <td>2024-03-03 22:47:52</td>\n",
|
| 537 |
+
" <td>Pending</td>\n",
|
| 538 |
+
" <td>Bank Transfer</td>\n",
|
| 539 |
+
" <td>46.67</td>\n",
|
| 540 |
+
" <td>45222 Karen Trace Apt. 530</td>\n",
|
| 541 |
+
" <td>Nicoleland</td>\n",
|
| 542 |
+
" <td>North Dakota</td>\n",
|
| 543 |
+
" <td>91684</td>\n",
|
| 544 |
+
" <td>United States Minor Outlying Islands</td>\n",
|
| 545 |
+
" </tr>\n",
|
| 546 |
+
" <tr>\n",
|
| 547 |
+
" <th>4</th>\n",
|
| 548 |
+
" <td>5</td>\n",
|
| 549 |
+
" <td>139</td>\n",
|
| 550 |
+
" <td>141</td>\n",
|
| 551 |
+
" <td>2024-02-06 20:16:53</td>\n",
|
| 552 |
+
" <td>Shipped</td>\n",
|
| 553 |
+
" <td>Bank Transfer</td>\n",
|
| 554 |
+
" <td>11.09</td>\n",
|
| 555 |
+
" <td>61721 Perez Walks Apt. 244</td>\n",
|
| 556 |
+
" <td>Lake Curtischester</td>\n",
|
| 557 |
+
" <td>New York</td>\n",
|
| 558 |
+
" <td>22193</td>\n",
|
| 559 |
+
" <td>Bangladesh</td>\n",
|
| 560 |
+
" </tr>\n",
|
| 561 |
+
" </tbody>\n",
|
| 562 |
+
"</table>\n",
|
| 563 |
+
"</div>"
|
| 564 |
+
],
|
| 565 |
+
"text/plain": [
|
| 566 |
+
" order_id customer_id product_id order_date order_status \\\n",
|
| 567 |
+
"0 1 85 506 2024-07-03 08:05:03 Pending \n",
|
| 568 |
+
"1 2 88 270 2024-09-21 12:08:46 Shipped \n",
|
| 569 |
+
"2 3 63 89 2024-04-28 09:50:13 Shipped \n",
|
| 570 |
+
"3 4 53 886 2024-03-03 22:47:52 Pending \n",
|
| 571 |
+
"4 5 139 141 2024-02-06 20:16:53 Shipped \n",
|
| 572 |
+
"\n",
|
| 573 |
+
" payment_method total_amount shipping_address \\\n",
|
| 574 |
+
"0 Credit Card 54.40 140 Edwards Overpass \n",
|
| 575 |
+
"1 Bank Transfer 54.55 811 Blair Glen Apt. 318 \n",
|
| 576 |
+
"2 PayPal 38.34 35571 Debra Stravenue \n",
|
| 577 |
+
"3 Bank Transfer 46.67 45222 Karen Trace Apt. 530 \n",
|
| 578 |
+
"4 Bank Transfer 11.09 61721 Perez Walks Apt. 244 \n",
|
| 579 |
+
"\n",
|
| 580 |
+
" shipping_city shipping_state shipping_zip \\\n",
|
| 581 |
+
"0 Kingtown Kansas 05046 \n",
|
| 582 |
+
"1 Port Andrew New Jersey 46407 \n",
|
| 583 |
+
"2 Warrenhaven Louisiana 78358 \n",
|
| 584 |
+
"3 Nicoleland North Dakota 91684 \n",
|
| 585 |
+
"4 Lake Curtischester New York 22193 \n",
|
| 586 |
+
"\n",
|
| 587 |
+
" shipping_country \n",
|
| 588 |
+
"0 British Virgin Islands \n",
|
| 589 |
+
"1 Liberia \n",
|
| 590 |
+
"2 Maldives \n",
|
| 591 |
+
"3 United States Minor Outlying Islands \n",
|
| 592 |
+
"4 Bangladesh "
|
| 593 |
+
]
|
| 594 |
+
},
|
| 595 |
+
"execution_count": 33,
|
| 596 |
+
"metadata": {},
|
| 597 |
+
"output_type": "execute_result"
|
| 598 |
+
}
|
| 599 |
+
],
|
| 600 |
+
"source": [
|
| 601 |
+
"orders_df.head()"
|
| 602 |
+
]
|
| 603 |
+
},
|
| 604 |
+
{
|
| 605 |
+
"cell_type": "markdown",
|
| 606 |
+
"metadata": {},
|
| 607 |
+
"source": [
|
| 608 |
+
"Save the dataframe to SQLite"
|
| 609 |
+
]
|
| 610 |
+
},
|
| 611 |
+
{
|
| 612 |
+
"cell_type": "code",
|
| 613 |
+
"execution_count": 56,
|
| 614 |
+
"metadata": {},
|
| 615 |
+
"outputs": [],
|
| 616 |
+
"source": [
|
| 617 |
+
"# Create a connection to the SQLite database\n",
|
| 618 |
+
"conn = sqlite3.connect('ecomm.db')\n",
|
| 619 |
+
"\n",
|
| 620 |
+
"# Save the DataFrame to the SQLite database\n",
|
| 621 |
+
"customers_df.to_sql('customer_details', conn, if_exists='replace', index=False)\n",
|
| 622 |
+
"items_df.to_sql('items', conn, if_exists='replace', index=False)\n",
|
| 623 |
+
"orders_df.to_sql('orders', conn, if_exists='replace', index=False)\n",
|
| 624 |
+
"\n",
|
| 625 |
+
"# Close the connection\n",
|
| 626 |
+
"conn.close()"
|
| 627 |
+
]
|
| 628 |
+
},
|
| 629 |
+
{
|
| 630 |
+
"cell_type": "markdown",
|
| 631 |
+
"metadata": {},
|
| 632 |
+
"source": [
|
| 633 |
+
"Deleting cusomers table from database"
|
| 634 |
+
]
|
| 635 |
+
},
|
| 636 |
+
{
|
| 637 |
+
"cell_type": "code",
|
| 638 |
+
"execution_count": 57,
|
| 639 |
+
"metadata": {},
|
| 640 |
+
"outputs": [],
|
| 641 |
+
"source": [
|
| 642 |
+
"# Establish a connection to the database\n",
|
| 643 |
+
"conn = sqlite3.connect('ecomm.db')\n",
|
| 644 |
+
"\n",
|
| 645 |
+
"# Create a cursor object\n",
|
| 646 |
+
"cur = conn.cursor()\n",
|
| 647 |
+
"\n",
|
| 648 |
+
"# Delete the table\n",
|
| 649 |
+
"cur.execute('DROP TABLE customers')\n",
|
| 650 |
+
"\n",
|
| 651 |
+
"# Commit the changes\n",
|
| 652 |
+
"conn.commit()\n",
|
| 653 |
+
"\n",
|
| 654 |
+
"# Close the connection\n",
|
| 655 |
+
"conn.close()"
|
| 656 |
+
]
|
| 657 |
+
}
|
| 658 |
+
],
|
| 659 |
+
"metadata": {
|
| 660 |
+
"kernelspec": {
|
| 661 |
+
"display_name": ".venv",
|
| 662 |
+
"language": "python",
|
| 663 |
+
"name": "python3"
|
| 664 |
+
},
|
| 665 |
+
"language_info": {
|
| 666 |
+
"codemirror_mode": {
|
| 667 |
+
"name": "ipython",
|
| 668 |
+
"version": 3
|
| 669 |
+
},
|
| 670 |
+
"file_extension": ".py",
|
| 671 |
+
"mimetype": "text/x-python",
|
| 672 |
+
"name": "python",
|
| 673 |
+
"nbconvert_exporter": "python",
|
| 674 |
+
"pygments_lexer": "ipython3",
|
| 675 |
+
"version": "3.9.13"
|
| 676 |
+
}
|
| 677 |
+
},
|
| 678 |
+
"nbformat": 4,
|
| 679 |
+
"nbformat_minor": 2
|
| 680 |
+
}
|
few_shots.py
ADDED
|
@@ -0,0 +1,182 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
few_shots = [
|
| 2 |
+
{
|
| 3 |
+
'Question': "Which customers have the highest aggregated purchase amount?",
|
| 4 |
+
'SQLQuery': """
|
| 5 |
+
SELECT c.customer_id, c.first_name, c.last_name, SUM(o.total_amount) as total_purchase
|
| 6 |
+
FROM customers c
|
| 7 |
+
JOIN orders o ON c.customer_id = o.customer_id
|
| 8 |
+
GROUP BY c.customer_id
|
| 9 |
+
ORDER BY total_purchase DESC
|
| 10 |
+
LIMIT 1;
|
| 11 |
+
""",
|
| 12 |
+
'SQLResult': "(123, 'John', 'Doe', 543.21)",
|
| 13 |
+
'Answer': "John Doe with customer ID 123 has the highest aggregated purchase amount of $543.21."
|
| 14 |
+
},
|
| 15 |
+
{
|
| 16 |
+
'Question': "What is the total revenue generated by all orders?",
|
| 17 |
+
'SQLQuery': """
|
| 18 |
+
SELECT SUM(total_amount) as total_revenue
|
| 19 |
+
FROM orders;
|
| 20 |
+
""",
|
| 21 |
+
'SQLResult': "(10000.00)",
|
| 22 |
+
'Answer': "The total revenue generated by all orders is $10,000.00."
|
| 23 |
+
},
|
| 24 |
+
{
|
| 25 |
+
'Question': "Which product has been ordered the most?",
|
| 26 |
+
'SQLQuery': """
|
| 27 |
+
SELECT p.product_name, COUNT(o.product_id) as order_count
|
| 28 |
+
FROM orders o
|
| 29 |
+
JOIN products p ON o.product_id = p.id
|
| 30 |
+
GROUP BY o.product_id
|
| 31 |
+
ORDER BY order_count DESC
|
| 32 |
+
LIMIT 1;
|
| 33 |
+
""",
|
| 34 |
+
'SQLResult': "('iPhone 13', 50)",
|
| 35 |
+
'Answer': "The iPhone 13 has been ordered the most, with 50 orders."
|
| 36 |
+
},
|
| 37 |
+
{
|
| 38 |
+
'Question': "What is the average order value?",
|
| 39 |
+
'SQLQuery': """
|
| 40 |
+
SELECT AVG(total_amount) as average_order_value
|
| 41 |
+
FROM orders;
|
| 42 |
+
""",
|
| 43 |
+
'SQLResult': "(50.00)",
|
| 44 |
+
'Answer': "The average order value is $50.00."
|
| 45 |
+
},
|
| 46 |
+
{
|
| 47 |
+
'Question': "Which customer has placed the most orders?",
|
| 48 |
+
'SQLQuery': """
|
| 49 |
+
SELECT c.customer_id, c.first_name, c.last_name, COUNT(o.order_id) as order_count
|
| 50 |
+
FROM customers c
|
| 51 |
+
JOIN orders o ON c.customer_id = o.customer_id
|
| 52 |
+
GROUP BY c.customer_id
|
| 53 |
+
ORDER BY order_count DESC
|
| 54 |
+
LIMIT 1;
|
| 55 |
+
""",
|
| 56 |
+
'SQLResult': "(123, 'John', 'Doe', 10)",
|
| 57 |
+
'Answer': "John Doe with customer ID 123 has placed the most orders, with 10 orders."
|
| 58 |
+
},
|
| 59 |
+
{
|
| 60 |
+
'Question': "What is the total number of unique customers?",
|
| 61 |
+
'SQLQuery': """
|
| 62 |
+
SELECT COUNT(DISTINCT customer_id) as unique_customers
|
| 63 |
+
FROM orders;
|
| 64 |
+
""",
|
| 65 |
+
'SQLResult': "(500)",
|
| 66 |
+
'Answer': "There are 500 unique customers."
|
| 67 |
+
},
|
| 68 |
+
{
|
| 69 |
+
'Question': "What is the most popular payment method?",
|
| 70 |
+
'SQLQuery': """
|
| 71 |
+
SELECT payment_method, COUNT(order_id) as order_count
|
| 72 |
+
FROM orders
|
| 73 |
+
GROUP BY payment_method
|
| 74 |
+
ORDER BY order_count DESC
|
| 75 |
+
LIMIT 1;
|
| 76 |
+
""",
|
| 77 |
+
'SQLResult': "('Credit Card', 300)",
|
| 78 |
+
'Answer': "The most popular payment method is Credit Card, used in 300 orders."
|
| 79 |
+
},
|
| 80 |
+
{
|
| 81 |
+
'Question': "Which product category has the highest total revenue?",
|
| 82 |
+
'SQLQuery': """
|
| 83 |
+
SELECT p.category, SUM(o.total_amount) as total_revenue
|
| 84 |
+
FROM orders o
|
| 85 |
+
JOIN products p ON o.product_id = p.id
|
| 86 |
+
GROUP BY p.category
|
| 87 |
+
ORDER BY total_revenue DESC
|
| 88 |
+
LIMIT 1;
|
| 89 |
+
""",
|
| 90 |
+
'SQLResult': "('Electronics', 5000.00)",
|
| 91 |
+
'Answer': "The Electronics category has the highest total revenue of $5,000.00."
|
| 92 |
+
},
|
| 93 |
+
{
|
| 94 |
+
'Question': "What is the average shipping time for orders?",
|
| 95 |
+
'SQLQuery': """
|
| 96 |
+
SELECT AVG(DATEDIFF(delivery_date, order_date)) as average_shipping_time
|
| 97 |
+
FROM orders;
|
| 98 |
+
""",
|
| 99 |
+
'SQLResult': "(3.5)",
|
| 100 |
+
'Answer': "The average shipping time for orders is 3.5 days."
|
| 101 |
+
},
|
| 102 |
+
{
|
| 103 |
+
'Question': "Which customer has the highest average order value?",
|
| 104 |
+
'SQLQuery': """
|
| 105 |
+
SELECT c.customer_id, c.first_name, c.last_name, AVG(o.total_amount) as average_order_value
|
| 106 |
+
FROM customers c
|
| 107 |
+
JOIN orders o ON c.customer_id = o.customer_id
|
| 108 |
+
GROUP BY c.customer_id
|
| 109 |
+
ORDER BY average_order_value DESC
|
| 110 |
+
LIMIT 1;
|
| 111 |
+
""",
|
| 112 |
+
'SQLResult': "(123, 'John', 'Doe', 100.00)",
|
| 113 |
+
'Answer': "John Doe with customer ID 123 has the highest average order value of $100.00."
|
| 114 |
+
},
|
| 115 |
+
{
|
| 116 |
+
'Question': "What is the total number of orders by country?",
|
| 117 |
+
'SQLQuery': """
|
| 118 |
+
SELECT c.country, COUNT(o.order_id) as order_count
|
| 119 |
+
FROM customers c
|
| 120 |
+
JOIN orders o ON c.customer_id = o.customer_id
|
| 121 |
+
GROUP BY c.country;
|
| 122 |
+
""",
|
| 123 |
+
'SQLResult': "([('USA', 200), ('Canada', 100), ('Mexico', 50)])",
|
| 124 |
+
'Answer': "There are 200 orders from the USA, 100 orders from Canada, and 50 orders from Mexico."
|
| 125 |
+
},
|
| 126 |
+
{
|
| 127 |
+
'Question': "Which product has the highest profit margin?",
|
| 128 |
+
'SQLQuery': """
|
| 129 |
+
SELECT p.product_name, (p.price - p.cost) / p.price as profit_margin
|
| 130 |
+
FROM products p
|
| 131 |
+
ORDER BY profit_margin DESC
|
| 132 |
+
LIMIT 1;
|
| 133 |
+
""",
|
| 134 |
+
'SQLResult': "('iPhone 13', 0.30)",
|
| 135 |
+
'Answer': "The iPhone 13 has the highest profit margin of 30%."
|
| 136 |
+
},
|
| 137 |
+
{
|
| 138 |
+
'Question': "What is the total revenue by month?",
|
| 139 |
+
'SQLQuery': """
|
| 140 |
+
SELECT MONTH(o.order_date) as month, SUM(o.total_amount) as total_revenue
|
| 141 |
+
FROM orders o
|
| 142 |
+
GROUP BY MONTH(o.order_date);
|
| 143 |
+
""",
|
| 144 |
+
'SQLResult': "([(1, 1000.00), (2, 1200.00), (3, 1500.00)])",
|
| 145 |
+
'Answer': "The total revenue for January is $1,000.00, February is $1,200.00, and March is $1,500.00."
|
| 146 |
+
},
|
| 147 |
+
{
|
| 148 |
+
'Question': "Which customer has placed orders in the most categories?",
|
| 149 |
+
'SQLQuery': """
|
| 150 |
+
SELECT c.customer_id, c.first_name, c.last_name, COUNT(DISTINCT p.category) as category_count
|
| 151 |
+
FROM customers c
|
| 152 |
+
JOIN orders o ON c.customer_id = o.customer_id
|
| 153 |
+
JOIN products p ON o.product_id = p.id
|
| 154 |
+
GROUP BY c.customer_id
|
| 155 |
+
ORDER BY category_count DESC
|
| 156 |
+
LIMIT 1;
|
| 157 |
+
""",
|
| 158 |
+
'SQLResult': "(123, 'John', 'Doe', 5)",
|
| 159 |
+
'Answer': "John Doe with customer ID 123 has placed orders in 5 different categories."
|
| 160 |
+
},
|
| 161 |
+
{
|
| 162 |
+
'Question': "What is the average order value by payment method?",
|
| 163 |
+
'SQLQuery': """
|
| 164 |
+
SELECT o.payment_method, AVG(o.total_amount) as average_order_value
|
| 165 |
+
FROM orders o
|
| 166 |
+
GROUP BY o.payment_method;
|
| 167 |
+
""",
|
| 168 |
+
'SQLResult': "([('Credit Card', 50.00), ('PayPal', 40.00), ('Bank Transfer', 60.00)])",
|
| 169 |
+
'Answer': "The average order value for Credit Card is $50.00, PayPal is $40.00, and Bank Transfer is $60.00."
|
| 170 |
+
},
|
| 171 |
+
{
|
| 172 |
+
"Question": "how many orders were cancelled on monthly basis",
|
| 173 |
+
'SQLQuery': """
|
| 174 |
+
SELECT strftime('%m', order_date) as month, COUNT(order_id) as cancelled_orders
|
| 175 |
+
FROM orders
|
| 176 |
+
WHERE order_status = 'Cancelled'
|
| 177 |
+
GROUP BY month;
|
| 178 |
+
""",
|
| 179 |
+
"SQLResult": "[('01', 108), ('02', 94), ('03', 111), ('04', 104), ('05', 108), ('06', 90), ('07', 117), ('08', 91), ('09', 102), ('10', 90), ('11', 103), ('12', 108)]",
|
| 180 |
+
"Answer": "There were 108 cancelled orders in January, 94 in February, 111 in March, 104 in April, 108 in May, 90 in June, 117 in July, 91 in August, 102 in September, 90 in October, 103 in November, and 108 in December."
|
| 181 |
+
}
|
| 182 |
+
]
|
langchain_helper.py
ADDED
|
@@ -0,0 +1,181 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
import os
|
| 2 |
+
from langchain_openai import AzureOpenAI
|
| 3 |
+
from langchain_core.prompts import ChatPromptTemplate
|
| 4 |
+
from langchain.agents.agent_types import AgentType
|
| 5 |
+
from langchain_experimental.agents import create_pandas_dataframe_agent
|
| 6 |
+
from langchain_community.utilities import SQLDatabase
|
| 7 |
+
from langchain_experimental.sql import SQLDatabaseChain
|
| 8 |
+
from langchain.prompts import SemanticSimilarityExampleSelector
|
| 9 |
+
from langchain_openai import AzureOpenAIEmbeddings
|
| 10 |
+
from langchain_community.vectorstores import Chroma
|
| 11 |
+
from langchain.prompts import FewShotPromptTemplate
|
| 12 |
+
from langchain.prompts.prompt import PromptTemplate
|
| 13 |
+
from langchain.chains.sql_database.prompt import PROMPT_SUFFIX, _mysql_prompt
|
| 14 |
+
from sqlalchemy import create_engine
|
| 15 |
+
from project_prompts import sqlite_prompt
|
| 16 |
+
from few_shots import few_shots
|
| 17 |
+
import pandas as pd
|
| 18 |
+
import plotly
|
| 19 |
+
import plotly.express as px
|
| 20 |
+
from plotly.express import bar, line, scatter, area, pie
|
| 21 |
+
|
| 22 |
+
from dotenv import load_dotenv
|
| 23 |
+
load_dotenv()
|
| 24 |
+
|
| 25 |
+
def get_few_shot_db_chain(user_message):
|
| 26 |
+
llm = AzureOpenAI(deployment_name="gpt-35-turbo-instruct", temperature=0.2)
|
| 27 |
+
|
| 28 |
+
engine = create_engine("sqlite:///ecomm.db")
|
| 29 |
+
db = SQLDatabase(engine=engine, sample_rows_in_table_info=3)
|
| 30 |
+
|
| 31 |
+
embeddings = AzureOpenAIEmbeddings(model="text-embedding-3-small")
|
| 32 |
+
|
| 33 |
+
to_vectorize = [" ".join(example.values()) for example in few_shots]
|
| 34 |
+
|
| 35 |
+
vectorstore = Chroma.from_texts(to_vectorize, embeddings, metadatas=few_shots)
|
| 36 |
+
|
| 37 |
+
example_selector = SemanticSimilarityExampleSelector(vectorstore=vectorstore, k=2)
|
| 38 |
+
|
| 39 |
+
example_prompt = PromptTemplate(
|
| 40 |
+
input_variables=["Question", "SQLQuery", "SQLResult","Answer",],
|
| 41 |
+
template="\nQuestion: {Question}\nSQLQuery: {SQLQuery}\nSQLResult: {SQLResult}\nAnswer: {Answer}"
|
| 42 |
+
)
|
| 43 |
+
|
| 44 |
+
few_shot_prompt = FewShotPromptTemplate(
|
| 45 |
+
example_selector=example_selector,
|
| 46 |
+
example_prompt=example_prompt,
|
| 47 |
+
prefix=sqlite_prompt,
|
| 48 |
+
suffix=PROMPT_SUFFIX,
|
| 49 |
+
input_variables=["input", "table_info", "top_k"]
|
| 50 |
+
)
|
| 51 |
+
|
| 52 |
+
chain = SQLDatabaseChain.from_llm(llm, db, verbose=True, prompt=few_shot_prompt, return_intermediate_steps = True)
|
| 53 |
+
|
| 54 |
+
response_llm = chain.invoke(user_message)
|
| 55 |
+
|
| 56 |
+
print(f"sql query : {response_llm['intermediate_steps'][1]}")
|
| 57 |
+
if 'sql_cmd' in response_llm['intermediate_steps'][2].keys():
|
| 58 |
+
intermediate_sql_query = response_llm['intermediate_steps'][2]['sql_cmd']
|
| 59 |
+
|
| 60 |
+
result_df = pd.read_sql_query(intermediate_sql_query, engine)
|
| 61 |
+
|
| 62 |
+
output_dict = {
|
| 63 |
+
"result_df" : result_df,
|
| 64 |
+
"sql_command" : intermediate_sql_query,
|
| 65 |
+
"response" : response_llm['result'],
|
| 66 |
+
"input" : response_llm['query'],
|
| 67 |
+
"graph_data" : None if ((result_df.shape[0] < 2) | (result_df.shape[1] < 2)) else get_graph_details(user_message, result_df)
|
| 68 |
+
}
|
| 69 |
+
|
| 70 |
+
return output_dict
|
| 71 |
+
|
| 72 |
+
|
| 73 |
+
def get_graph_details(usermessage:str, df=None):
|
| 74 |
+
llm = AzureOpenAI(deployment_name="gpt-35-turbo-instruct", temperature=0.15)
|
| 75 |
+
template = ChatPromptTemplate.from_messages(
|
| 76 |
+
[("system", "You are a visualisation expert and plotly developer, your task is to come up with best suitable \
|
| 77 |
+
chart representing user ask for the given data. please use plotly express library in python for \
|
| 78 |
+
charting purposes.. and provide code for generating the figure.. there should not be any displaying \
|
| 79 |
+
instructions..like fig.show() etc.."),
|
| 80 |
+
("human", "For the given dataframe below \
|
| 81 |
+
---------------------------------\
|
| 82 |
+
Dataframe = {dataframe} \
|
| 83 |
+
---------------------------------\
|
| 84 |
+
and user question \
|
| 85 |
+
---------------------------------\
|
| 86 |
+
user_ask = {question} \
|
| 87 |
+
----------------------------------\
|
| 88 |
+
Please provide the plotly chart which \
|
| 89 |
+
would be best suitable to represent the user ask graphically \
|
| 90 |
+
Please double check the code is not having any fig.show() or display commands"
|
| 91 |
+
)]
|
| 92 |
+
)
|
| 93 |
+
|
| 94 |
+
customer_messages = template.format_messages(dataframe = df, question=usermessage)
|
| 95 |
+
|
| 96 |
+
agent = create_pandas_dataframe_agent(
|
| 97 |
+
llm,
|
| 98 |
+
df,
|
| 99 |
+
agent_type=AgentType.ZERO_SHOT_REACT_DESCRIPTION,
|
| 100 |
+
verbose=True,
|
| 101 |
+
return_intermediate_steps=True
|
| 102 |
+
)
|
| 103 |
+
|
| 104 |
+
agent_response = agent.invoke(customer_messages)
|
| 105 |
+
out_agent_response = agent_response['intermediate_steps']
|
| 106 |
+
|
| 107 |
+
for _, agent_code_reponse in out_agent_response:
|
| 108 |
+
if isinstance(agent_code_reponse, plotly.graph_objects.Figure):
|
| 109 |
+
fig = agent_code_reponse
|
| 110 |
+
return fig
|
| 111 |
+
|
| 112 |
+
else:
|
| 113 |
+
template = ChatPromptTemplate.from_messages([
|
| 114 |
+
("system", "You are a visualisation expert and plotly developer, your task is to come up with best suitable \
|
| 115 |
+
chart representing user ask for the given data. please use plotly express library in python for \
|
| 116 |
+
charting purposes.. and provide code for generating the figure.. there should not be any displaying \
|
| 117 |
+
instructions..like fig.show() etc.."),
|
| 118 |
+
("human", "For the given dataframe below \
|
| 119 |
+
---------------------------------\
|
| 120 |
+
df = State Total_GDP\
|
| 121 |
+
0 Florida 7743.0\
|
| 122 |
+
1 Texas 9934.0\
|
| 123 |
+
2 New_York 6634.5\
|
| 124 |
+
3 Denver 4456.0\
|
| 125 |
+
4 Atlanta 993.5 \
|
| 126 |
+
---------------------------------\
|
| 127 |
+
and user question \
|
| 128 |
+
---------------------------------\
|
| 129 |
+
user_ask = What is the distribution of Total_GDP for each state? \
|
| 130 |
+
----------------------------------\
|
| 131 |
+
Please provide the code using plotly express in less than 30 words which should clearly satisfy user ask\
|
| 132 |
+
in terms of best representation of data. please use dataframe variable as 'df' and \
|
| 133 |
+
strictly output only one line of python code start your code with initializing a figure object \n\
|
| 134 |
+
like `fig = px.`"),
|
| 135 |
+
("ai", "bar(df, x='State', y='Total_GDP', title='Distribution of Total_GDP per State')"),
|
| 136 |
+
("human", "This is incorrect.. the required response should be \
|
| 137 |
+
`fig = plt.bar(df, x='Plant_Name', y='Total_Available_Days', title='Distribution of Available Days for Each Plant Name')`\
|
| 138 |
+
as it starts with `fig = plt.` as user specified"),
|
| 139 |
+
("ai", "Sounds good, now I will remember to start with `fig = plt.`"),
|
| 140 |
+
("human", "For the given dataframe below \
|
| 141 |
+
---------------------------------\
|
| 142 |
+
df = {dataframe} \
|
| 143 |
+
---------------------------------\
|
| 144 |
+
and user question \
|
| 145 |
+
---------------------------------\
|
| 146 |
+
user_ask = {question} \
|
| 147 |
+
----------------------------------\
|
| 148 |
+
Please provide the code using plotly express in less than 40 words which should clearly satisfy user ask\
|
| 149 |
+
in terms of best representation of data. please use dataframe variable as 'df' and \
|
| 150 |
+
strictly output only one line of python code start your code with initializing a figure object \n\
|
| 151 |
+
like `fig = px.`"),
|
| 152 |
+
])
|
| 153 |
+
customer_messages = template.format_messages(dataframe = df, question=usermessage)
|
| 154 |
+
print(f"This is the customer message : {customer_messages}")
|
| 155 |
+
code_response_llm = llm.invoke(customer_messages)
|
| 156 |
+
print(f"This is the code returned by LLM : {code_response_llm}")
|
| 157 |
+
try:
|
| 158 |
+
print("## Executing the code line generated by llm ##")
|
| 159 |
+
|
| 160 |
+
if "fig = " in code_response_llm:
|
| 161 |
+
code_response_llm = code_response_llm.replace("AI: ", "")
|
| 162 |
+
namespace = {'df': df}
|
| 163 |
+
exec(code_response_llm, globals(), namespace)
|
| 164 |
+
if 'fig' in namespace.keys():
|
| 165 |
+
print("fig is there returning fig>>>>>")
|
| 166 |
+
return namespace['fig']
|
| 167 |
+
else:
|
| 168 |
+
return None
|
| 169 |
+
except Exception as e:
|
| 170 |
+
print(f"Some exception occurred : {str(e)}")
|
| 171 |
+
return None
|
| 172 |
+
|
| 173 |
+
return None
|
| 174 |
+
|
| 175 |
+
|
| 176 |
+
|
| 177 |
+
|
| 178 |
+
|
| 179 |
+
|
| 180 |
+
|
| 181 |
+
|
project_prompts.py
ADDED
|
@@ -0,0 +1,24 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
sqlite_prompt = """You are a SQLite expert. Given an input question, first create a syntactically correct SQLite query to run, then look at the results of the query and return the answer to the input question.
|
| 2 |
+
Unless the user specifies in the question a specific number of examples to obtain, query for at most 10 results using the LIMIT clause as per SQLite. You can order the results to return the most informative data in the database.
|
| 3 |
+
Never query for all columns from a table. You must query only the columns that are needed to answer the question. Wrap each column name in double quotes (") to denote them as delimited identifiers.
|
| 4 |
+
Pay attention to use only the column names you can see in the tables below. Be careful to not query for columns that do not exist. Also, pay attention to which column is in which table.
|
| 5 |
+
Pay attention to use date('now') function to get the current date, if the question involves "today".
|
| 6 |
+
|
| 7 |
+
Use the following format:
|
| 8 |
+
|
| 9 |
+
Question: Question here
|
| 10 |
+
SQLQuery: SQL Query to run
|
| 11 |
+
SQLResult: Result of the SQLQuery
|
| 12 |
+
Answer: Final answer here
|
| 13 |
+
|
| 14 |
+
Only use the following tables:
|
| 15 |
+
{table_info}
|
| 16 |
+
|
| 17 |
+
Question: {input}
|
| 18 |
+
|
| 19 |
+
If the final answer has a numerical value, convert it into words like 1234123 (One Million), only print whole number.
|
| 20 |
+
If the final answer has a numerical value with a decimal, print it without decimal values.
|
| 21 |
+
If the final answer has a numerical value and some units, print the number with units or metrics.
|
| 22 |
+
If the final answer has multiple decimal points reduce it into two decimal points, for example: if it is like 0.3933333333333333 then convert that into 0.39 and if it is like 161.5760959724 then convert into 161.5.
|
| 23 |
+
For month calculation from the existing table please use strftime formula NOT MONTH function.
|
| 24 |
+
"""
|
requirements.txt
ADDED
|
@@ -0,0 +1,15 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
langchain==0.1.16
|
| 2 |
+
langchain-community==0.0.34
|
| 3 |
+
langchain-core==0.1.45
|
| 4 |
+
langchain-experimental==0.0.57
|
| 5 |
+
langchain-openai==0.1.3
|
| 6 |
+
numpy==1.24.4
|
| 7 |
+
openai==1.23.2
|
| 8 |
+
pandas==2.0.3
|
| 9 |
+
SQLAlchemy==2.0.29
|
| 10 |
+
streamlit==1.33.0
|
| 11 |
+
python-dotenv
|
| 12 |
+
chromadb==0.3.29
|
| 13 |
+
plotly
|
| 14 |
+
tabulate
|
| 15 |
+
Faker
|