Spaces:

binaychandra
/

visualquery

Running

App Files Files Community

binaychandra commited on Oct 28, 2024

Commit

adf2969

1 Parent(s): 06e5979

added project files

Browse files

Files changed (11) hide show

.gitignore +160 -0
.streamlit/config.toml +18 -0
LICENSE +21 -0
agent.ipynb +413 -0
app.py +96 -0
ecomm.db +0 -0
fakedatagenerator.ipynb +680 -0
few_shots.py +182 -0
langchain_helper.py +181 -0
project_prompts.py +24 -0
requirements.txt +15 -0

.gitignore ADDED Viewed

	@@ -0,0 +1,160 @@

+# Byte-compiled / optimized / DLL files
+__pycache__/
+*.py[cod]
+*$py.class
+# C extensions
+*.so
+# Distribution / packaging
+.Python
+build/
+develop-eggs/
+dist/
+downloads/
+eggs/
+.eggs/
+lib/
+lib64/
+parts/
+sdist/
+var/
+wheels/
+share/python-wheels/
+*.egg-info/
+.installed.cfg
+*.egg
+MANIFEST
+# PyInstaller
+#  Usually these files are written by a python script from a template
+#  before PyInstaller builds the exe, so as to inject date/other infos into it.
+*.manifest
+*.spec
+# Installer logs
+pip-log.txt
+pip-delete-this-directory.txt
+# Unit test / coverage reports
+htmlcov/
+.tox/
+.nox/
+.coverage
+.coverage.*
+.cache
+nosetests.xml
+coverage.xml
+*.cover
+*.py,cover
+.hypothesis/
+.pytest_cache/
+cover/
+# Translations
+*.mo
+*.pot
+# Django stuff:
+*.log
+local_settings.py
+db.sqlite3
+db.sqlite3-journal
+# Flask stuff:
+instance/
+.webassets-cache
+# Scrapy stuff:
+.scrapy
+# Sphinx documentation
+docs/_build/
+# PyBuilder
+.pybuilder/
+target/
+# Jupyter Notebook
+.ipynb_checkpoints
+# IPython
+profile_default/
+ipython_config.py
+# pyenv
+#   For a library or package, you might want to ignore these files since the code is
+#   intended to run in multiple environments; otherwise, check them in:
+# .python-version
+# pipenv
+#   According to pypa/pipenv#598, it is recommended to include Pipfile.lock in version control.
+#   However, in case of collaboration, if having platform-specific dependencies or dependencies
+#   having no cross-platform support, pipenv may install dependencies that don't work, or not
+#   install all needed dependencies.
+#Pipfile.lock
+# poetry
+#   Similar to Pipfile.lock, it is generally recommended to include poetry.lock in version control.
+#   This is especially recommended for binary packages to ensure reproducibility, and is more
+#   commonly ignored for libraries.
+#   https://python-poetry.org/docs/basic-usage/#commit-your-poetrylock-file-to-version-control
+#poetry.lock
+# pdm
+#   Similar to Pipfile.lock, it is generally recommended to include pdm.lock in version control.
+#pdm.lock
+#   pdm stores project-wide configurations in .pdm.toml, but it is recommended to not include it
+#   in version control.
+#   https://pdm.fming.dev/#use-with-ide
+.pdm.toml
+# PEP 582; used by e.g. github.com/David-OConnor/pyflow and github.com/pdm-project/pdm
+__pypackages__/
+# Celery stuff
+celerybeat-schedule
+celerybeat.pid
+# SageMath parsed files
+*.sage.py
+# Environments
+.env
+.venv
+env/
+venv/
+ENV/
+env.bak/
+venv.bak/
+# Spyder project settings
+.spyderproject
+.spyproject
+# Rope project settings
+.ropeproject
+# mkdocs documentation
+/site
+# mypy
+.mypy_cache/
+.dmypy.json
+dmypy.json
+# Pyre type checker
+.pyre/
+# pytype static type analyzer
+.pytype/
+# Cython debug symbols
+cython_debug/
+# PyCharm
+#  JetBrains specific template is maintained in a separate JetBrains.gitignore that can
+#  be found at https://github.com/github/gitignore/blob/main/Global/JetBrains.gitignore
+#  and can be added to the global gitignore or merged into this file.  For a more nuclear
+#  option (not recommended) you can uncomment the following to ignore the entire idea folder.
+#.idea/

.streamlit/config.toml ADDED Viewed

	@@ -0,0 +1,18 @@

+[theme]
+# Primary accent for interactive elements
+primaryColor = '#FFFF00'
+# Background color for the main content area
+backgroundColor = '#00172B'
+# Background color for sidebar and most interactive widgets
+secondaryBackgroundColor = '#000000'
+# Color used for almost all text
+textColor = '#FFFFFF'
+# Font family for all text in the app, except code blocks
+# Accepted values (serif | sans serif | monospace)
+# Default: "sans serif"
+font = "sans serif"

LICENSE ADDED Viewed

	@@ -0,0 +1,21 @@

+MIT License
+Copyright (c) 2024 binaychandra
+Permission is hereby granted, free of charge, to any person obtaining a copy
+of this software and associated documentation files (the "Software"), to deal
+in the Software without restriction, including without limitation the rights
+to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+copies of the Software, and to permit persons to whom the Software is
+furnished to do so, subject to the following conditions:
+The above copyright notice and this permission notice shall be included in all
+copies or substantial portions of the Software.
+THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+SOFTWARE.

agent.ipynb ADDED Viewed

	@@ -0,0 +1,413 @@

+{
+ "cells": [
+  {
+   "cell_type": "code",
+   "execution_count": 4,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "from langchain.agents.agent_types import AgentType\n",
+    "from langchain_experimental.agents.agent_toolkits import create_pandas_dataframe_agent\n",
+    "from langchain_openai import AzureOpenAI"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 2,
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "text/plain": [
+       "True"
+      ]
+     },
+     "execution_count": 2,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "from dotenv import load_dotenv\n",
+    "load_dotenv()"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 5,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "llm = AzureOpenAI(deployment_name=\"gpt-35-turbo-instruct\", temperature=0.6)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 6,
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "text/html": [
+       "<div>\n",
+       "<style scoped>\n",
+       "    .dataframe tbody tr th:only-of-type {\n",
+       "        vertical-align: middle;\n",
+       "    }\n",
+       "\n",
+       "    .dataframe tbody tr th {\n",
+       "        vertical-align: top;\n",
+       "    }\n",
+       "\n",
+       "    .dataframe thead th {\n",
+       "        text-align: right;\n",
+       "    }\n",
+       "</style>\n",
+       "<table border=\"1\" class=\"dataframe\">\n",
+       "  <thead>\n",
+       "    <tr style=\"text-align: right;\">\n",
+       "      <th></th>\n",
+       "      <th>PassengerId</th>\n",
+       "      <th>Survived</th>\n",
+       "      <th>Pclass</th>\n",
+       "      <th>Name</th>\n",
+       "      <th>Sex</th>\n",
+       "      <th>Age</th>\n",
+       "      <th>SibSp</th>\n",
+       "      <th>Parch</th>\n",
+       "      <th>Ticket</th>\n",
+       "      <th>Fare</th>\n",
+       "      <th>Cabin</th>\n",
+       "      <th>Embarked</th>\n",
+       "    </tr>\n",
+       "  </thead>\n",
+       "  <tbody>\n",
+       "    <tr>\n",
+       "      <th>0</th>\n",
+       "      <td>1</td>\n",
+       "      <td>0</td>\n",
+       "      <td>3</td>\n",
+       "      <td>Braund, Mr. Owen Harris</td>\n",
+       "      <td>male</td>\n",
+       "      <td>22.0</td>\n",
+       "      <td>1</td>\n",
+       "      <td>0</td>\n",
+       "      <td>A/5 21171</td>\n",
+       "      <td>7.2500</td>\n",
+       "      <td>NaN</td>\n",
+       "      <td>S</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>1</th>\n",
+       "      <td>2</td>\n",
+       "      <td>1</td>\n",
+       "      <td>1</td>\n",
+       "      <td>Cumings, Mrs. John Bradley (Florence Briggs Th...</td>\n",
+       "      <td>female</td>\n",
+       "      <td>38.0</td>\n",
+       "      <td>1</td>\n",
+       "      <td>0</td>\n",
+       "      <td>PC 17599</td>\n",
+       "      <td>71.2833</td>\n",
+       "      <td>C85</td>\n",
+       "      <td>C</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>2</th>\n",
+       "      <td>3</td>\n",
+       "      <td>1</td>\n",
+       "      <td>3</td>\n",
+       "      <td>Heikkinen, Miss. Laina</td>\n",
+       "      <td>female</td>\n",
+       "      <td>26.0</td>\n",
+       "      <td>0</td>\n",
+       "      <td>0</td>\n",
+       "      <td>STON/O2. 3101282</td>\n",
+       "      <td>7.9250</td>\n",
+       "      <td>NaN</td>\n",
+       "      <td>S</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>3</th>\n",
+       "      <td>4</td>\n",
+       "      <td>1</td>\n",
+       "      <td>1</td>\n",
+       "      <td>Futrelle, Mrs. Jacques Heath (Lily May Peel)</td>\n",
+       "      <td>female</td>\n",
+       "      <td>35.0</td>\n",
+       "      <td>1</td>\n",
+       "      <td>0</td>\n",
+       "      <td>113803</td>\n",
+       "      <td>53.1000</td>\n",
+       "      <td>C123</td>\n",
+       "      <td>S</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>4</th>\n",
+       "      <td>5</td>\n",
+       "      <td>0</td>\n",
+       "      <td>3</td>\n",
+       "      <td>Allen, Mr. William Henry</td>\n",
+       "      <td>male</td>\n",
+       "      <td>35.0</td>\n",
+       "      <td>0</td>\n",
+       "      <td>0</td>\n",
+       "      <td>373450</td>\n",
+       "      <td>8.0500</td>\n",
+       "      <td>NaN</td>\n",
+       "      <td>S</td>\n",
+       "    </tr>\n",
+       "  </tbody>\n",
+       "</table>\n",
+       "</div>"
+      ],
+      "text/plain": [
+       "   PassengerId  Survived  Pclass  \\\n",
+       "0            1         0       3   \n",
+       "1            2         1       1   \n",
+       "2            3         1       3   \n",
+       "3            4         1       1   \n",
+       "4            5         0       3   \n",
+       "\n",
+       "                                                Name     Sex   Age  SibSp  \\\n",
+       "0                            Braund, Mr. Owen Harris    male  22.0      1   \n",
+       "1  Cumings, Mrs. John Bradley (Florence Briggs Th...  female  38.0      1   \n",
+       "2                             Heikkinen, Miss. Laina  female  26.0      0   \n",
+       "3       Futrelle, Mrs. Jacques Heath (Lily May Peel)  female  35.0      1   \n",
+       "4                           Allen, Mr. William Henry    male  35.0      0   \n",
+       "\n",
+       "   Parch            Ticket     Fare Cabin Embarked  \n",
+       "0      0         A/5 21171   7.2500   NaN        S  \n",
+       "1      0          PC 17599  71.2833   C85        C  \n",
+       "2      0  STON/O2. 3101282   7.9250   NaN        S  \n",
+       "3      0            113803  53.1000  C123        S  \n",
+       "4      0            373450   8.0500   NaN        S  "
+      ]
+     },
+     "execution_count": 6,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "import pandas as pd\n",
+    "\n",
+    "df = pd.read_csv(\n",
+    "    \"https://raw.githubusercontent.com/pandas-dev/pandas/main/doc/data/titanic.csv\"\n",
+    ")\n",
+    "df.head()"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 14,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "agent = create_pandas_dataframe_agent(\n",
+    "    llm,\n",
+    "    df,\n",
+    "    verbose=True,\n",
+    "    agent_type=AgentType.ZERO_SHOT_REACT_DESCRIPTION,\n",
+    "    return_intermediate_steps=True\n",
+    ")"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 17,
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\n",
+      "\n",
+      "\u001b[1m> Entering new AgentExecutor chain...\u001b[0m\n",
+      "\u001b[32;1m\u001b[1;3mThought: We need to filter the dataframe for rows where the \"Sex\" column is equal to \"female\" and then count the number of rows.\n",
+      "Action: python_repl_ast\n",
+      "Action Input: df[df[\"Sex\"] == \"female\"].count()\u001b[0m\u001b[36;1m\u001b[1;3mPassengerId    314\n",
+      "Survived       314\n",
+      "Pclass         314\n",
+      "Name           314\n",
+      "Sex            314\n",
+      "Age            261\n",
+      "SibSp          314\n",
+      "Parch          314\n",
+      "Ticket         314\n",
+      "Fare           314\n",
+      "Cabin           97\n",
+      "Embarked       312\n",
+      "dtype: int64\u001b[0m\u001b[32;1m\u001b[1;3m314 is the number of females in the dataframe, but we need to specify which column we want to count.\n",
+      "Action: python_repl_ast\n",
+      "Action Input: df[df[\"Sex\"] == \"female\"][\"Sex\"].count()\u001b[0m\u001b[36;1m\u001b[1;3m314\u001b[0m\u001b[32;1m\u001b[1;3m314 is the final answer to the original input question\n",
+      "Final Answer: There are 314 females in the dataframe.\u001b[0m\n",
+      "\n",
+      "\u001b[1m> Finished chain.\u001b[0m\n"
+     ]
+    },
+    {
+     "data": {
+      "text/plain": [
+       "{'input': 'how many females are there ',\n",
+       " 'output': 'There are 314 females in the dataframe.',\n",
+       " 'intermediate_steps': [(AgentAction(tool='python_repl_ast', tool_input='df[df[\"Sex\"] == \"female\"].count()', log='Thought: We need to filter the dataframe for rows where the \"Sex\" column is equal to \"female\" and then count the number of rows.\\nAction: python_repl_ast\\nAction Input: df[df[\"Sex\"] == \"female\"].count()'),\n",
+       "   PassengerId    314\n",
+       "   Survived       314\n",
+       "   Pclass         314\n",
+       "   Name           314\n",
+       "   Sex            314\n",
+       "   Age            261\n",
+       "   SibSp          314\n",
+       "   Parch          314\n",
+       "   Ticket         314\n",
+       "   Fare           314\n",
+       "   Cabin           97\n",
+       "   Embarked       312\n",
+       "   dtype: int64),\n",
+       "  (AgentAction(tool='python_repl_ast', tool_input='df[df[\"Sex\"] == \"female\"][\"Sex\"].count()', log='314 is the number of females in the dataframe, but we need to specify which column we want to count.\\nAction: python_repl_ast\\nAction Input: df[df[\"Sex\"] == \"female\"][\"Sex\"].count()'),\n",
+       "   314)]}"
+      ]
+     },
+     "execution_count": 17,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "outres = agent.invoke('how many females are there ')\n",
+    "outres"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 32,
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "text/plain": [
+       "'df[df[\"Sex\"] == \"female\"].count()'"
+      ]
+     },
+     "execution_count": 32,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "outres['intermediate_steps'][0][0].tool_input"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 33,
+   "metadata": {},
+   "outputs": [
+    {
+     "ename": "SyntaxError",
+     "evalue": "invalid syntax (3216326457.py, line 1)",
+     "output_type": "error",
+     "traceback": [
+      "\u001b[1;36m  Cell \u001b[1;32mIn[33], line 1\u001b[1;36m\u001b[0m\n\u001b[1;33m    'fig = px.line('x', 'y', param=skdfl);'\u001b[0m\n\u001b[1;37m                    ^\u001b[0m\n\u001b[1;31mSyntaxError\u001b[0m\u001b[1;31m:\u001b[0m invalid syntax\n"
+     ]
+    }
+   ],
+   "source": [
+    "'fig = px.line('x', 'y', param=skdfl);'\n",
+    "fig'"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 34,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "import plotly.express as px\n",
+    "data_canada = px.data.gapminder().query(\"country == 'Canada'\")\n"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 38,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "exec(\"x = 'abc'; y =4\")"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 40,
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "text/plain": [
+       "('abc', 4)"
+      ]
+     },
+     "execution_count": 40,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "x, y"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 36,
+   "metadata": {},
+   "outputs": [
+    {
+     "ename": "ValueError",
+     "evalue": "Mime type rendering requires nbformat>=4.2.0 but it is not installed",
+     "output_type": "error",
+     "traceback": [
+      "\u001b[1;31m---------------------------------------------------------------------------\u001b[0m",
+      "\u001b[1;31mValueError\u001b[0m                                Traceback (most recent call last)",
+      "Cell \u001b[1;32mIn[36], line 1\u001b[0m\n\u001b[1;32m----> 1\u001b[0m \u001b[43mfig\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mshow\u001b[49m\u001b[43m(\u001b[49m\u001b[43m)\u001b[49m\n",
+      "File \u001b[1;32mc:\\Users\\PD817AE\\OneDrive - EY\\Desktop\\DataSc\\pepsico_chat\\.venv\\lib\\site-packages\\plotly\\basedatatypes.py:3410\u001b[0m, in \u001b[0;36mBaseFigure.show\u001b[1;34m(self, *args, **kwargs)\u001b[0m\n\u001b[0;32m   3377\u001b[0m \u001b[38;5;250m\u001b[39m\u001b[38;5;124;03m\"\"\"\u001b[39;00m\n\u001b[0;32m   3378\u001b[0m \u001b[38;5;124;03mShow a figure using either the default renderer(s) or the renderer(s)\u001b[39;00m\n\u001b[0;32m   3379\u001b[0m \u001b[38;5;124;03mspecified by the renderer argument\u001b[39;00m\n\u001b[1;32m   (...)\u001b[0m\n\u001b[0;32m   3406\u001b[0m \u001b[38;5;124;03mNone\u001b[39;00m\n\u001b[0;32m   3407\u001b[0m \u001b[38;5;124;03m\"\"\"\u001b[39;00m\n\u001b[0;32m   3408\u001b[0m \u001b[38;5;28;01mimport\u001b[39;00m \u001b[38;5;21;01mplotly\u001b[39;00m\u001b[38;5;21;01m.\u001b[39;00m\u001b[38;5;21;01mio\u001b[39;00m \u001b[38;5;28;01mas\u001b[39;00m \u001b[38;5;21;01mpio\u001b[39;00m\n\u001b[1;32m-> 3410\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m pio\u001b[38;5;241m.\u001b[39mshow(\u001b[38;5;28mself\u001b[39m, \u001b[38;5;241m*\u001b[39margs, \u001b[38;5;241m*\u001b[39m\u001b[38;5;241m*\u001b[39mkwargs)\n",
+      "File \u001b[1;32mc:\\Users\\PD817AE\\OneDrive - EY\\Desktop\\DataSc\\pepsico_chat\\.venv\\lib\\site-packages\\plotly\\io\\_renderers.py:394\u001b[0m, in \u001b[0;36mshow\u001b[1;34m(fig, renderer, validate, **kwargs)\u001b[0m\n\u001b[0;32m    389\u001b[0m         \u001b[38;5;28;01mraise\u001b[39;00m \u001b[38;5;167;01mValueError\u001b[39;00m(\n\u001b[0;32m    390\u001b[0m             \u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mMime type rendering requires ipython but it is not installed\u001b[39m\u001b[38;5;124m\"\u001b[39m\n\u001b[0;32m    391\u001b[0m         )\n\u001b[0;32m    393\u001b[0m     \u001b[38;5;28;01mif\u001b[39;00m \u001b[38;5;129;01mnot\u001b[39;00m nbformat \u001b[38;5;129;01mor\u001b[39;00m Version(nbformat\u001b[38;5;241m.\u001b[39m__version__) \u001b[38;5;241m<\u001b[39m Version(\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124m4.2.0\u001b[39m\u001b[38;5;124m\"\u001b[39m):\n\u001b[1;32m--> 394\u001b[0m         \u001b[38;5;28;01mraise\u001b[39;00m \u001b[38;5;167;01mValueError\u001b[39;00m(\n\u001b[0;32m    395\u001b[0m             \u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mMime type rendering requires nbformat>=4.2.0 but it is not installed\u001b[39m\u001b[38;5;124m\"\u001b[39m\n\u001b[0;32m    396\u001b[0m         )\n\u001b[0;32m    398\u001b[0m     ipython_display\u001b[38;5;241m.\u001b[39mdisplay(bundle, raw\u001b[38;5;241m=\u001b[39m\u001b[38;5;28;01mTrue\u001b[39;00m)\n\u001b[0;32m    400\u001b[0m \u001b[38;5;66;03m# external renderers\u001b[39;00m\n",
+      "\u001b[1;31mValueError\u001b[0m: Mime type rendering requires nbformat>=4.2.0 but it is not installed"
+     ]
+    }
+   ],
+   "source": [
+    "fig.show()"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": []
+  }
+ ],
+ "metadata": {
+  "kernelspec": {
+   "display_name": ".venv",
+   "language": "python",
+   "name": "python3"
+  },
+  "language_info": {
+   "codemirror_mode": {
+    "name": "ipython",
+    "version": 3
+   },
+   "file_extension": ".py",
+   "mimetype": "text/x-python",
+   "name": "python",
+   "nbconvert_exporter": "python",
+   "pygments_lexer": "ipython3",
+   "version": "3.9.13"
+  }
+ },
+ "nbformat": 4,
+ "nbformat_minor": 2
+}

app.py ADDED Viewed

	@@ -0,0 +1,96 @@

+# -*- coding: utf-8 -*-
+"""
+Created on Thu Apr 25 18:00:03 2024
+@author: MK529XT
+"""
+import streamlit as st
+import string
+import random
+from langchain_helper import get_few_shot_db_chain
+import plotly.figure_factory as ff
+import numpy as np
+#st.set_page_config(layout="wide")
+# CSS for styling
+st.markdown("""
+<style>
+        .title {
+            text-align: center;
+            outline: solid yellow;
+            font-size: 20px;
+            font-family: Arial, Helvetica, sans-serif;
+            color: #FFFFFF;
+            padding-top: 5px;
+            padding-bottom: 5px;
+            #border-bottom: 2px solid #FFFF00;
+            background-color: #050201;
+        }
+</style>
+""", unsafe_allow_html=True)
+# Title section
+st.markdown("<h1 class='title'>Manufacturing Process Analysis</h1>", unsafe_allow_html=True)
+with st.chat_message("assistant"):
+    st.write("Hello 👋 How can I help you today?")
+def random_string() -> dict:
+    try:
+        response_dict = get_few_shot_db_chain(st.session_state["chat_input"])
+    except Exception as e:
+        response_dict = {
+            "result_df" : None,
+            "sql_command" : None,
+            "response" : f"LLM ran into issues : {str(e)}",
+            "input" : st.session_state["chat_input"],
+            "graph_data" : None
+        }
+    return response_dict
+def chat_actions():
+    st.session_state["chat_history"].append(
+        {
+            "role": "user",
+            "content": st.session_state["chat_input"],
+        }
+    )
+    st.session_state["chat_history"].append(
+        {
+            "role": "assistant",
+            "content": random_string(),
+        },
+    )
+if "chat_history" not in st.session_state:
+    st.session_state["chat_history"] = []
+st.chat_input("Enter your question", on_submit=chat_actions, key="chat_input")
+for i in st.session_state["chat_history"]:
+    with st.chat_message(name=i["role"]):
+        print(type(i["content"]))
+        if isinstance(i["content"], str):
+            st.write(i["content"])
+        # When this is llm or bot response #
+        elif isinstance(i["content"], dict):
+            #st.info(i["content"]["sql_command"])
+            st.write(i["content"]["response"])
+            result_df = i["content"]["result_df"]
+            if i['content']["graph_data"] is not None:
+                st.plotly_chart(i['content']["graph_data"], use_container_width=True)
+            elif (result_df is not None) and ((result_df.shape[0] > 1) and (result_df.shape[1] > 1)) :
+                st.plotly_chart(ff.create_table(result_df), use_container_width=True)

ecomm.db ADDED Viewed

Binary file (983 kB). View file

fakedatagenerator.ipynb ADDED Viewed

	@@ -0,0 +1,680 @@

+{
+ "cells": [
+  {
+   "cell_type": "code",
+   "execution_count": 43,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "import faker\n",
+    "import pandas as pd\n",
+    "import random\n",
+    "import sqlite3"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 18,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "fake = faker.Faker()"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 28,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "num_records = 250"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 29,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# Generate dataset\n",
+    "customers_data = {\n",
+    "    \"customer_id\": range(1, num_records + 1),\n",
+    "    \"first_name\": [fake.first_name() for _ in range(num_records)],\n",
+    "    \"last_name\": [fake.last_name() for _ in range(num_records)],\n",
+    "    \"email\": [fake.email() for _ in range(num_records)],\n",
+    "    \"phone_number\": [fake.phone_number() for _ in range(num_records)],\n",
+    "    \"address\": [fake.street_address() for _ in range(num_records)],\n",
+    "    \"city\": [fake.city() for _ in range(num_records)],\n",
+    "    \"state\": [fake.state() for _ in range(num_records)],\n",
+    "    \"zip_code\": [fake.zipcode() for _ in range(num_records)],\n",
+    "    \"country\": [fake.country() for _ in range(num_records)],\n",
+    "    \"date_of_birth\": [fake.date_of_birth().strftime(\"%Y-%m-%d\") for _ in range(num_records)],\n",
+    "    \"gender\": [random.choice([\"Male\", \"Female\", \"Other\"]) for _ in range(num_records)]\n",
+    "}\n",
+    "\n",
+    "customers_df = pd.DataFrame(customers_data)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 31,
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "text/html": [
+       "<div>\n",
+       "<style scoped>\n",
+       "    .dataframe tbody tr th:only-of-type {\n",
+       "        vertical-align: middle;\n",
+       "    }\n",
+       "\n",
+       "    .dataframe tbody tr th {\n",
+       "        vertical-align: top;\n",
+       "    }\n",
+       "\n",
+       "    .dataframe thead th {\n",
+       "        text-align: right;\n",
+       "    }\n",
+       "</style>\n",
+       "<table border=\"1\" class=\"dataframe\">\n",
+       "  <thead>\n",
+       "    <tr style=\"text-align: right;\">\n",
+       "      <th></th>\n",
+       "      <th>customer_id</th>\n",
+       "      <th>first_name</th>\n",
+       "      <th>last_name</th>\n",
+       "      <th>email</th>\n",
+       "      <th>phone_number</th>\n",
+       "      <th>address</th>\n",
+       "      <th>city</th>\n",
+       "      <th>state</th>\n",
+       "      <th>zip_code</th>\n",
+       "      <th>country</th>\n",
+       "      <th>date_of_birth</th>\n",
+       "      <th>gender</th>\n",
+       "    </tr>\n",
+       "  </thead>\n",
+       "  <tbody>\n",
+       "    <tr>\n",
+       "      <th>0</th>\n",
+       "      <td>1</td>\n",
+       "      <td>Daniel</td>\n",
+       "      <td>Day</td>\n",
+       "      <td>hvalencia@example.net</td>\n",
+       "      <td>(671)991-3668</td>\n",
+       "      <td>2712 Matthew Course Apt. 519</td>\n",
+       "      <td>Reginashire</td>\n",
+       "      <td>Virginia</td>\n",
+       "      <td>43739</td>\n",
+       "      <td>Portugal</td>\n",
+       "      <td>1955-02-11</td>\n",
+       "      <td>Male</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>1</th>\n",
+       "      <td>2</td>\n",
+       "      <td>Lucas</td>\n",
+       "      <td>Jimenez</td>\n",
+       "      <td>jennifer95@example.org</td>\n",
+       "      <td>694.215.1833</td>\n",
+       "      <td>560 Victoria Shoals Apt. 465</td>\n",
+       "      <td>Marshallmouth</td>\n",
+       "      <td>Oklahoma</td>\n",
+       "      <td>90653</td>\n",
+       "      <td>Albania</td>\n",
+       "      <td>1909-06-06</td>\n",
+       "      <td>Female</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>2</th>\n",
+       "      <td>3</td>\n",
+       "      <td>Victoria</td>\n",
+       "      <td>Willis</td>\n",
+       "      <td>millersean@example.org</td>\n",
+       "      <td>769-267-3445</td>\n",
+       "      <td>58325 Buck Road Suite 830</td>\n",
+       "      <td>South Pamelaborough</td>\n",
+       "      <td>Oregon</td>\n",
+       "      <td>73729</td>\n",
+       "      <td>Lithuania</td>\n",
+       "      <td>1925-09-12</td>\n",
+       "      <td>Other</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>3</th>\n",
+       "      <td>4</td>\n",
+       "      <td>Austin</td>\n",
+       "      <td>Carr</td>\n",
+       "      <td>arnoldjennifer@example.com</td>\n",
+       "      <td>874-821-2653x36986</td>\n",
+       "      <td>01855 Peterson View Apt. 956</td>\n",
+       "      <td>Potterton</td>\n",
+       "      <td>Wyoming</td>\n",
+       "      <td>80500</td>\n",
+       "      <td>Dominica</td>\n",
+       "      <td>1920-06-23</td>\n",
+       "      <td>Other</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>4</th>\n",
+       "      <td>5</td>\n",
+       "      <td>Ethan</td>\n",
+       "      <td>Martin</td>\n",
+       "      <td>mark46@example.org</td>\n",
+       "      <td>875-454-9228</td>\n",
+       "      <td>617 Clayton Tunnel</td>\n",
+       "      <td>Adamsport</td>\n",
+       "      <td>Michigan</td>\n",
+       "      <td>38936</td>\n",
+       "      <td>Yemen</td>\n",
+       "      <td>1985-03-13</td>\n",
+       "      <td>Female</td>\n",
+       "    </tr>\n",
+       "  </tbody>\n",
+       "</table>\n",
+       "</div>"
+      ],
+      "text/plain": [
+       "   customer_id first_name last_name                       email  \\\n",
+       "0            1     Daniel       Day       hvalencia@example.net   \n",
+       "1            2      Lucas   Jimenez      jennifer95@example.org   \n",
+       "2            3   Victoria    Willis      millersean@example.org   \n",
+       "3            4     Austin      Carr  arnoldjennifer@example.com   \n",
+       "4            5      Ethan    Martin          mark46@example.org   \n",
+       "\n",
+       "         phone_number                       address                 city  \\\n",
+       "0       (671)991-3668  2712 Matthew Course Apt. 519          Reginashire   \n",
+       "1        694.215.1833  560 Victoria Shoals Apt. 465        Marshallmouth   \n",
+       "2        769-267-3445     58325 Buck Road Suite 830  South Pamelaborough   \n",
+       "3  874-821-2653x36986  01855 Peterson View Apt. 956            Potterton   \n",
+       "4        875-454-9228            617 Clayton Tunnel            Adamsport   \n",
+       "\n",
+       "      state zip_code    country date_of_birth  gender  \n",
+       "0  Virginia    43739   Portugal    1955-02-11    Male  \n",
+       "1  Oklahoma    90653    Albania    1909-06-06  Female  \n",
+       "2    Oregon    73729  Lithuania    1925-09-12   Other  \n",
+       "3   Wyoming    80500   Dominica    1920-06-23   Other  \n",
+       "4  Michigan    38936      Yemen    1985-03-13  Female  "
+      ]
+     },
+     "execution_count": 31,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "customers_df.head()"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 25,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# Set seed for reproducibility\n",
+    "random.seed(42)\n",
+    "\n",
+    "# Define number of records\n",
+    "num_records = 1000\n",
+    "\n",
+    "# Generate dataset\n",
+    "items_data = {\n",
+    "    \"id\": range(1, num_records + 1),\n",
+    "    \"product_name\": [fake.catch_phrase() for _ in range(num_records)],\n",
+    "    \"description\": [fake.paragraph(nb_sentences=3) for _ in range(num_records)],\n",
+    "    \"price\": [round(random.uniform(10.0, 100.0), 2) for _ in range(num_records)],\n",
+    "    \"category\": [random.choice([\"Electronics\", \"Fashion\", \"Home Goods\", \"Sports\", \"Toys\"]) for _ in range(num_records)],\n",
+    "    \"sub_category\": [\n",
+    "        random.choice([\n",
+    "            \"Smartphones\", \"Laptops\", \"Tablets\",\n",
+    "            \"Women's Clothing\", \"Men's Clothing\", \"Kids' Clothing\",\n",
+    "            \"Kitchen Appliances\", \"Home Decor\", \"Furniture\",\n",
+    "            \"Fitness Equipment\", \"Outdoor Gear\", \"Toys & Games\"\n",
+    "        ]) for _ in range(num_records)\n",
+    "    ],\n",
+    "    \"brand\": [fake.company() for _ in range(num_records)],\n",
+    "    \"rating\": [round(random.uniform(1.0, 5.0), 1) for _ in range(num_records)],\n",
+    "    \"num_reviews\": [random.randint(1, 100) for _ in range(num_records)],\n",
+    "    \"stock_quantity\": [random.randint(1, 100) for _ in range(num_records)],\n",
+    "    \"seller_name\": [fake.name() for _ in range(num_records)],\n",
+    "    \"shipping_weight\": [round(random.uniform(1.0, 10.0), 2) for _ in range(num_records)],\n",
+    "    \"shipping_dimension\": [\n",
+    "        f\"{random.randint(6, 20)} x {random.randint(4, 12)} x {random.randint(2, 8)}\"\n",
+    "        for _ in range(num_records)\n",
+    "    ]\n",
+    "}\n",
+    "\n",
+    "items_df = pd.DataFrame(items_data)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 26,
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "text/html": [
+       "<div>\n",
+       "<style scoped>\n",
+       "    .dataframe tbody tr th:only-of-type {\n",
+       "        vertical-align: middle;\n",
+       "    }\n",
+       "\n",
+       "    .dataframe tbody tr th {\n",
+       "        vertical-align: top;\n",
+       "    }\n",
+       "\n",
+       "    .dataframe thead th {\n",
+       "        text-align: right;\n",
+       "    }\n",
+       "</style>\n",
+       "<table border=\"1\" class=\"dataframe\">\n",
+       "  <thead>\n",
+       "    <tr style=\"text-align: right;\">\n",
+       "      <th></th>\n",
+       "      <th>id</th>\n",
+       "      <th>product_name</th>\n",
+       "      <th>description</th>\n",
+       "      <th>price</th>\n",
+       "      <th>category</th>\n",
+       "      <th>sub_category</th>\n",
+       "      <th>brand</th>\n",
+       "      <th>rating</th>\n",
+       "      <th>num_reviews</th>\n",
+       "      <th>stock_quantity</th>\n",
+       "      <th>seller_name</th>\n",
+       "      <th>shipping_weight</th>\n",
+       "      <th>shipping_dimension</th>\n",
+       "    </tr>\n",
+       "  </thead>\n",
+       "  <tbody>\n",
+       "    <tr>\n",
+       "      <th>0</th>\n",
+       "      <td>1</td>\n",
+       "      <td>Ergonomic bottom-line framework</td>\n",
+       "      <td>Kind stay kid song dream. Yourself would scene...</td>\n",
+       "      <td>67.55</td>\n",
+       "      <td>Electronics</td>\n",
+       "      <td>Men's Clothing</td>\n",
+       "      <td>Gonzalez, Jones and Hanson</td>\n",
+       "      <td>3.2</td>\n",
+       "      <td>52</td>\n",
+       "      <td>9</td>\n",
+       "      <td>Kathryn Hansen</td>\n",
+       "      <td>2.31</td>\n",
+       "      <td>18 x 6 x 6</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>1</th>\n",
+       "      <td>2</td>\n",
+       "      <td>Reduced high-level customer loyalty</td>\n",
+       "      <td>Nothing free around expert decade. Great view ...</td>\n",
+       "      <td>12.25</td>\n",
+       "      <td>Home Goods</td>\n",
+       "      <td>Toys &amp; Games</td>\n",
+       "      <td>Walker-Love</td>\n",
+       "      <td>1.7</td>\n",
+       "      <td>52</td>\n",
+       "      <td>34</td>\n",
+       "      <td>Breanna Allison</td>\n",
+       "      <td>1.40</td>\n",
+       "      <td>14 x 7 x 2</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>2</th>\n",
+       "      <td>3</td>\n",
+       "      <td>Phased holistic capacity</td>\n",
+       "      <td>Fire usually high manage tend available.</td>\n",
+       "      <td>34.75</td>\n",
+       "      <td>Toys</td>\n",
+       "      <td>Laptops</td>\n",
+       "      <td>Nelson-Morrison</td>\n",
+       "      <td>2.8</td>\n",
+       "      <td>59</td>\n",
+       "      <td>29</td>\n",
+       "      <td>Allen Hernandez</td>\n",
+       "      <td>8.36</td>\n",
+       "      <td>12 x 12 x 5</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>3</th>\n",
+       "      <td>4</td>\n",
+       "      <td>Quality-focused 6thgeneration matrix</td>\n",
+       "      <td>Capital onto into eat unit church take ground....</td>\n",
+       "      <td>30.09</td>\n",
+       "      <td>Home Goods</td>\n",
+       "      <td>Kids' Clothing</td>\n",
+       "      <td>Sullivan, Clark and Larson</td>\n",
+       "      <td>4.0</td>\n",
+       "      <td>35</td>\n",
+       "      <td>48</td>\n",
+       "      <td>Joseph Hayden</td>\n",
+       "      <td>2.80</td>\n",
+       "      <td>19 x 7 x 6</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>4</th>\n",
+       "      <td>5</td>\n",
+       "      <td>Visionary systemic array</td>\n",
+       "      <td>Woman former wind bill red authority. Police s...</td>\n",
+       "      <td>76.28</td>\n",
+       "      <td>Electronics</td>\n",
+       "      <td>Home Decor</td>\n",
+       "      <td>Evans PLC</td>\n",
+       "      <td>4.1</td>\n",
+       "      <td>50</td>\n",
+       "      <td>11</td>\n",
+       "      <td>John Mcdowell</td>\n",
+       "      <td>4.36</td>\n",
+       "      <td>13 x 11 x 4</td>\n",
+       "    </tr>\n",
+       "  </tbody>\n",
+       "</table>\n",
+       "</div>"
+      ],
+      "text/plain": [
+       "   id                          product_name  \\\n",
+       "0   1       Ergonomic bottom-line framework   \n",
+       "1   2   Reduced high-level customer loyalty   \n",
+       "2   3              Phased holistic capacity   \n",
+       "3   4  Quality-focused 6thgeneration matrix   \n",
+       "4   5              Visionary systemic array   \n",
+       "\n",
+       "                                         description  price     category  \\\n",
+       "0  Kind stay kid song dream. Yourself would scene...  67.55  Electronics   \n",
+       "1  Nothing free around expert decade. Great view ...  12.25   Home Goods   \n",
+       "2           Fire usually high manage tend available.  34.75         Toys   \n",
+       "3  Capital onto into eat unit church take ground....  30.09   Home Goods   \n",
+       "4  Woman former wind bill red authority. Police s...  76.28  Electronics   \n",
+       "\n",
+       "     sub_category                       brand  rating  num_reviews  \\\n",
+       "0  Men's Clothing  Gonzalez, Jones and Hanson     3.2           52   \n",
+       "1    Toys & Games                 Walker-Love     1.7           52   \n",
+       "2         Laptops             Nelson-Morrison     2.8           59   \n",
+       "3  Kids' Clothing  Sullivan, Clark and Larson     4.0           35   \n",
+       "4      Home Decor                   Evans PLC     4.1           50   \n",
+       "\n",
+       "   stock_quantity      seller_name  shipping_weight shipping_dimension  \n",
+       "0               9   Kathryn Hansen             2.31         18 x 6 x 6  \n",
+       "1              34  Breanna Allison             1.40         14 x 7 x 2  \n",
+       "2              29  Allen Hernandez             8.36        12 x 12 x 5  \n",
+       "3              48    Joseph Hayden             2.80         19 x 7 x 6  \n",
+       "4              11    John Mcdowell             4.36        13 x 11 x 4  "
+      ]
+     },
+     "execution_count": 26,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "items_df.head()"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 32,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# Define number of orders\n",
+    "num_orders = 5000\n",
+    "\n",
+    "# Generate orders dataset\n",
+    "data = {\n",
+    "    \"order_id\": range(1, num_orders + 1),\n",
+    "    \"customer_id\": [random.choice(customers_df[\"customer_id\"]) for _ in range(num_orders)],\n",
+    "    \"product_id\": [random.choice(items_df[\"id\"]) for _ in range(num_orders)],\n",
+    "    \"order_date\": [fake.date_time_between(start_date=\"-2y\", end_date=\"now\").strftime(\"%Y-%m-%d %H:%M:%S\") for _ in range(num_orders)],\n",
+    "    \"order_status\": [random.choice([\"Pending\", \"Shipped\", \"Delivered\", \"Cancelled\"]) for _ in range(num_orders)],\n",
+    "    \"payment_method\": [random.choice([\"Credit Card\", \"PayPal\", \"Bank Transfer\"]) for _ in range(num_orders)],\n",
+    "    \"total_amount\": [round(random.uniform(10.0, 100.0), 2) for _ in range(num_orders)],\n",
+    "    \"shipping_address\": [fake.street_address() for _ in range(num_orders)],\n",
+    "    \"shipping_city\": [fake.city() for _ in range(num_orders)],\n",
+    "    \"shipping_state\": [fake.state() for _ in range(num_orders)],\n",
+    "    \"shipping_zip\": [fake.zipcode() for _ in range(num_orders)],\n",
+    "    \"shipping_country\": [fake.country() for _ in range(num_orders)]\n",
+    "}\n",
+    "\n",
+    "orders_df = pd.DataFrame(data)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 33,
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "text/html": [
+       "<div>\n",
+       "<style scoped>\n",
+       "    .dataframe tbody tr th:only-of-type {\n",
+       "        vertical-align: middle;\n",
+       "    }\n",
+       "\n",
+       "    .dataframe tbody tr th {\n",
+       "        vertical-align: top;\n",
+       "    }\n",
+       "\n",
+       "    .dataframe thead th {\n",
+       "        text-align: right;\n",
+       "    }\n",
+       "</style>\n",
+       "<table border=\"1\" class=\"dataframe\">\n",
+       "  <thead>\n",
+       "    <tr style=\"text-align: right;\">\n",
+       "      <th></th>\n",
+       "      <th>order_id</th>\n",
+       "      <th>customer_id</th>\n",
+       "      <th>product_id</th>\n",
+       "      <th>order_date</th>\n",
+       "      <th>order_status</th>\n",
+       "      <th>payment_method</th>\n",
+       "      <th>total_amount</th>\n",
+       "      <th>shipping_address</th>\n",
+       "      <th>shipping_city</th>\n",
+       "      <th>shipping_state</th>\n",
+       "      <th>shipping_zip</th>\n",
+       "      <th>shipping_country</th>\n",
+       "    </tr>\n",
+       "  </thead>\n",
+       "  <tbody>\n",
+       "    <tr>\n",
+       "      <th>0</th>\n",
+       "      <td>1</td>\n",
+       "      <td>85</td>\n",
+       "      <td>506</td>\n",
+       "      <td>2024-07-03 08:05:03</td>\n",
+       "      <td>Pending</td>\n",
+       "      <td>Credit Card</td>\n",
+       "      <td>54.40</td>\n",
+       "      <td>140 Edwards Overpass</td>\n",
+       "      <td>Kingtown</td>\n",
+       "      <td>Kansas</td>\n",
+       "      <td>05046</td>\n",
+       "      <td>British Virgin Islands</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>1</th>\n",
+       "      <td>2</td>\n",
+       "      <td>88</td>\n",
+       "      <td>270</td>\n",
+       "      <td>2024-09-21 12:08:46</td>\n",
+       "      <td>Shipped</td>\n",
+       "      <td>Bank Transfer</td>\n",
+       "      <td>54.55</td>\n",
+       "      <td>811 Blair Glen Apt. 318</td>\n",
+       "      <td>Port Andrew</td>\n",
+       "      <td>New Jersey</td>\n",
+       "      <td>46407</td>\n",
+       "      <td>Liberia</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>2</th>\n",
+       "      <td>3</td>\n",
+       "      <td>63</td>\n",
+       "      <td>89</td>\n",
+       "      <td>2024-04-28 09:50:13</td>\n",
+       "      <td>Shipped</td>\n",
+       "      <td>PayPal</td>\n",
+       "      <td>38.34</td>\n",
+       "      <td>35571 Debra Stravenue</td>\n",
+       "      <td>Warrenhaven</td>\n",
+       "      <td>Louisiana</td>\n",
+       "      <td>78358</td>\n",
+       "      <td>Maldives</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>3</th>\n",
+       "      <td>4</td>\n",
+       "      <td>53</td>\n",
+       "      <td>886</td>\n",
+       "      <td>2024-03-03 22:47:52</td>\n",
+       "      <td>Pending</td>\n",
+       "      <td>Bank Transfer</td>\n",
+       "      <td>46.67</td>\n",
+       "      <td>45222 Karen Trace Apt. 530</td>\n",
+       "      <td>Nicoleland</td>\n",
+       "      <td>North Dakota</td>\n",
+       "      <td>91684</td>\n",
+       "      <td>United States Minor Outlying Islands</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>4</th>\n",
+       "      <td>5</td>\n",
+       "      <td>139</td>\n",
+       "      <td>141</td>\n",
+       "      <td>2024-02-06 20:16:53</td>\n",
+       "      <td>Shipped</td>\n",
+       "      <td>Bank Transfer</td>\n",
+       "      <td>11.09</td>\n",
+       "      <td>61721 Perez Walks Apt. 244</td>\n",
+       "      <td>Lake Curtischester</td>\n",
+       "      <td>New York</td>\n",
+       "      <td>22193</td>\n",
+       "      <td>Bangladesh</td>\n",
+       "    </tr>\n",
+       "  </tbody>\n",
+       "</table>\n",
+       "</div>"
+      ],
+      "text/plain": [
+       "   order_id  customer_id  product_id           order_date order_status  \\\n",
+       "0         1           85         506  2024-07-03 08:05:03      Pending   \n",
+       "1         2           88         270  2024-09-21 12:08:46      Shipped   \n",
+       "2         3           63          89  2024-04-28 09:50:13      Shipped   \n",
+       "3         4           53         886  2024-03-03 22:47:52      Pending   \n",
+       "4         5          139         141  2024-02-06 20:16:53      Shipped   \n",
+       "\n",
+       "  payment_method  total_amount            shipping_address  \\\n",
+       "0    Credit Card         54.40        140 Edwards Overpass   \n",
+       "1  Bank Transfer         54.55     811 Blair Glen Apt. 318   \n",
+       "2         PayPal         38.34       35571 Debra Stravenue   \n",
+       "3  Bank Transfer         46.67  45222 Karen Trace Apt. 530   \n",
+       "4  Bank Transfer         11.09  61721 Perez Walks Apt. 244   \n",
+       "\n",
+       "        shipping_city shipping_state shipping_zip  \\\n",
+       "0            Kingtown         Kansas        05046   \n",
+       "1         Port Andrew     New Jersey        46407   \n",
+       "2         Warrenhaven      Louisiana        78358   \n",
+       "3          Nicoleland   North Dakota        91684   \n",
+       "4  Lake Curtischester       New York        22193   \n",
+       "\n",
+       "                       shipping_country  \n",
+       "0                British Virgin Islands  \n",
+       "1                               Liberia  \n",
+       "2                              Maldives  \n",
+       "3  United States Minor Outlying Islands  \n",
+       "4                            Bangladesh  "
+      ]
+     },
+     "execution_count": 33,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "orders_df.head()"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "Save the dataframe to SQLite"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 56,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# Create a connection to the SQLite database\n",
+    "conn = sqlite3.connect('ecomm.db')\n",
+    "\n",
+    "# Save the DataFrame to the SQLite database\n",
+    "customers_df.to_sql('customer_details', conn, if_exists='replace', index=False)\n",
+    "items_df.to_sql('items', conn, if_exists='replace', index=False)\n",
+    "orders_df.to_sql('orders', conn, if_exists='replace', index=False)\n",
+    "\n",
+    "# Close the connection\n",
+    "conn.close()"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "Deleting cusomers table from database"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 57,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# Establish a connection to the database\n",
+    "conn = sqlite3.connect('ecomm.db')\n",
+    "\n",
+    "# Create a cursor object\n",
+    "cur = conn.cursor()\n",
+    "\n",
+    "# Delete the table\n",
+    "cur.execute('DROP TABLE customers')\n",
+    "\n",
+    "# Commit the changes\n",
+    "conn.commit()\n",
+    "\n",
+    "# Close the connection\n",
+    "conn.close()"
+   ]
+  }
+ ],
+ "metadata": {
+  "kernelspec": {
+   "display_name": ".venv",
+   "language": "python",
+   "name": "python3"
+  },
+  "language_info": {
+   "codemirror_mode": {
+    "name": "ipython",
+    "version": 3
+   },
+   "file_extension": ".py",
+   "mimetype": "text/x-python",
+   "name": "python",
+   "nbconvert_exporter": "python",
+   "pygments_lexer": "ipython3",
+   "version": "3.9.13"
+  }
+ },
+ "nbformat": 4,
+ "nbformat_minor": 2
+}

few_shots.py ADDED Viewed

	@@ -0,0 +1,182 @@

+few_shots = [
+    {
+        'Question': "Which customers have the highest aggregated purchase amount?",
+        'SQLQuery': """
+            SELECT c.customer_id, c.first_name, c.last_name, SUM(o.total_amount) as total_purchase
+            FROM customers c
+            JOIN orders o ON c.customer_id = o.customer_id
+            GROUP BY c.customer_id
+            ORDER BY total_purchase DESC
+            LIMIT 1;
+        """,
+        'SQLResult': "(123, 'John', 'Doe', 543.21)",
+        'Answer': "John Doe with customer ID 123 has the highest aggregated purchase amount of $543.21."
+    },
+    {
+        'Question': "What is the total revenue generated by all orders?",
+        'SQLQuery': """
+            SELECT SUM(total_amount) as total_revenue
+            FROM orders;
+        """,
+        'SQLResult': "(10000.00)",
+        'Answer': "The total revenue generated by all orders is $10,000.00."
+    },
+    {
+        'Question': "Which product has been ordered the most?",
+        'SQLQuery': """
+            SELECT p.product_name, COUNT(o.product_id) as order_count
+            FROM orders o
+            JOIN products p ON o.product_id = p.id
+            GROUP BY o.product_id
+            ORDER BY order_count DESC
+            LIMIT 1;
+        """,
+        'SQLResult': "('iPhone 13', 50)",
+        'Answer': "The iPhone 13 has been ordered the most, with 50 orders."
+    },
+    {
+        'Question': "What is the average order value?",
+        'SQLQuery': """
+            SELECT AVG(total_amount) as average_order_value
+            FROM orders;
+        """,
+        'SQLResult': "(50.00)",
+        'Answer': "The average order value is $50.00."
+    },
+    {
+        'Question': "Which customer has placed the most orders?",
+        'SQLQuery': """
+            SELECT c.customer_id, c.first_name, c.last_name, COUNT(o.order_id) as order_count
+            FROM customers c
+            JOIN orders o ON c.customer_id = o.customer_id
+            GROUP BY c.customer_id
+            ORDER BY order_count DESC
+            LIMIT 1;
+        """,
+        'SQLResult': "(123, 'John', 'Doe', 10)",
+        'Answer': "John Doe with customer ID 123 has placed the most orders, with 10 orders."
+    },
+    {
+        'Question': "What is the total number of unique customers?",
+        'SQLQuery': """
+            SELECT COUNT(DISTINCT customer_id) as unique_customers
+            FROM orders;
+        """,
+        'SQLResult': "(500)",
+        'Answer': "There are 500 unique customers."
+    },
+    {
+        'Question': "What is the most popular payment method?",
+        'SQLQuery': """
+            SELECT payment_method, COUNT(order_id) as order_count
+            FROM orders
+            GROUP BY payment_method
+            ORDER BY order_count DESC
+            LIMIT 1;
+        """,
+        'SQLResult': "('Credit Card', 300)",
+        'Answer': "The most popular payment method is Credit Card, used in 300 orders."
+    },
+    {
+        'Question': "Which product category has the highest total revenue?",
+        'SQLQuery': """
+            SELECT p.category, SUM(o.total_amount) as total_revenue
+            FROM orders o
+            JOIN products p ON o.product_id = p.id
+            GROUP BY p.category
+            ORDER BY total_revenue DESC
+            LIMIT 1;
+        """,
+        'SQLResult': "('Electronics', 5000.00)",
+        'Answer': "The Electronics category has the highest total revenue of $5,000.00."
+    },
+    {
+        'Question': "What is the average shipping time for orders?",
+        'SQLQuery': """
+            SELECT AVG(DATEDIFF(delivery_date, order_date)) as average_shipping_time
+            FROM orders;
+        """,
+        'SQLResult': "(3.5)",
+        'Answer': "The average shipping time for orders is 3.5 days."
+    },
+    {
+        'Question': "Which customer has the highest average order value?",
+        'SQLQuery': """
+            SELECT c.customer_id, c.first_name, c.last_name, AVG(o.total_amount) as average_order_value
+            FROM customers c
+            JOIN orders o ON c.customer_id = o.customer_id
+            GROUP BY c.customer_id
+            ORDER BY average_order_value DESC
+            LIMIT 1;
+        """,
+        'SQLResult': "(123, 'John', 'Doe', 100.00)",
+        'Answer': "John Doe with customer ID 123 has the highest average order value of $100.00."
+    },
+    {
+        'Question': "What is the total number of orders by country?",
+        'SQLQuery': """
+            SELECT c.country, COUNT(o.order_id) as order_count
+            FROM customers c
+            JOIN orders o ON c.customer_id = o.customer_id
+            GROUP BY c.country;
+        """,
+        'SQLResult': "([('USA', 200), ('Canada', 100), ('Mexico', 50)])",
+        'Answer': "There are 200 orders from the USA, 100 orders from Canada, and 50 orders from Mexico."
+    },
+    {
+        'Question': "Which product has the highest profit margin?",
+        'SQLQuery': """
+            SELECT p.product_name, (p.price - p.cost) / p.price as profit_margin
+            FROM products p
+            ORDER BY profit_margin DESC
+            LIMIT 1;
+        """,
+        'SQLResult': "('iPhone 13', 0.30)",
+        'Answer': "The iPhone 13 has the highest profit margin of 30%."
+    },
+    {
+        'Question': "What is the total revenue by month?",
+        'SQLQuery': """
+            SELECT MONTH(o.order_date) as month, SUM(o.total_amount) as total_revenue
+            FROM orders o
+            GROUP BY MONTH(o.order_date);
+        """,
+        'SQLResult': "([(1, 1000.00), (2, 1200.00), (3, 1500.00)])",
+        'Answer': "The total revenue for January is $1,000.00, February is $1,200.00, and March is $1,500.00."
+    },
+    {
+        'Question': "Which customer has placed orders in the most categories?",
+        'SQLQuery': """
+            SELECT c.customer_id, c.first_name, c.last_name, COUNT(DISTINCT p.category) as category_count
+            FROM customers c
+            JOIN orders o ON c.customer_id = o.customer_id
+            JOIN products p ON o.product_id = p.id
+            GROUP BY c.customer_id
+            ORDER BY category_count DESC
+            LIMIT 1;
+        """,
+        'SQLResult': "(123, 'John', 'Doe', 5)",
+        'Answer': "John Doe with customer ID 123 has placed orders in 5 different categories."
+    },
+    {
+        'Question': "What is the average order value by payment method?",
+        'SQLQuery': """
+            SELECT o.payment_method, AVG(o.total_amount) as average_order_value
+            FROM orders o
+            GROUP BY o.payment_method;
+        """,
+        'SQLResult': "([('Credit Card', 50.00), ('PayPal', 40.00), ('Bank Transfer', 60.00)])",
+        'Answer': "The average order value for Credit Card is $50.00, PayPal is $40.00, and Bank Transfer is $60.00."
+    },
+    {
+        "Question": "how many orders were cancelled on monthly basis",
+        'SQLQuery': """
+            SELECT strftime('%m', order_date) as month, COUNT(order_id) as cancelled_orders
+            FROM orders
+            WHERE order_status = 'Cancelled'
+            GROUP BY month;
+        """,
+        "SQLResult": "[('01', 108), ('02', 94), ('03', 111), ('04', 104), ('05', 108), ('06', 90), ('07', 117), ('08', 91), ('09', 102), ('10', 90), ('11', 103), ('12', 108)]",
+        "Answer": "There were 108 cancelled orders in January, 94 in February, 111 in March, 104 in April, 108 in May, 90 in June, 117 in July, 91 in August, 102 in September, 90 in October, 103 in November, and 108 in December."
+    }
+]

langchain_helper.py ADDED Viewed

	@@ -0,0 +1,181 @@

+import os
+from langchain_openai import AzureOpenAI
+from langchain_core.prompts import ChatPromptTemplate
+from langchain.agents.agent_types import AgentType
+from langchain_experimental.agents import create_pandas_dataframe_agent
+from langchain_community.utilities import SQLDatabase
+from langchain_experimental.sql import SQLDatabaseChain
+from langchain.prompts import SemanticSimilarityExampleSelector
+from langchain_openai import AzureOpenAIEmbeddings
+from langchain_community.vectorstores import Chroma
+from langchain.prompts import FewShotPromptTemplate
+from langchain.prompts.prompt import PromptTemplate
+from langchain.chains.sql_database.prompt import PROMPT_SUFFIX, _mysql_prompt
+from sqlalchemy import create_engine
+from project_prompts import sqlite_prompt
+from few_shots import few_shots
+import pandas as pd
+import plotly
+import plotly.express as px
+from plotly.express import bar, line, scatter, area, pie
+from dotenv import load_dotenv
+load_dotenv()
+def get_few_shot_db_chain(user_message):
+    llm = AzureOpenAI(deployment_name="gpt-35-turbo-instruct", temperature=0.2)
+    engine = create_engine("sqlite:///ecomm.db")
+    db = SQLDatabase(engine=engine, sample_rows_in_table_info=3)
+    embeddings = AzureOpenAIEmbeddings(model="text-embedding-3-small")
+    to_vectorize = [" ".join(example.values()) for example in few_shots]
+    vectorstore = Chroma.from_texts(to_vectorize, embeddings, metadatas=few_shots)
+    example_selector = SemanticSimilarityExampleSelector(vectorstore=vectorstore, k=2)
+    example_prompt = PromptTemplate(
+        input_variables=["Question", "SQLQuery", "SQLResult","Answer",],
+        template="\nQuestion: {Question}\nSQLQuery: {SQLQuery}\nSQLResult: {SQLResult}\nAnswer: {Answer}"
+    )
+    few_shot_prompt = FewShotPromptTemplate(
+                                example_selector=example_selector,
+                                example_prompt=example_prompt,
+                                prefix=sqlite_prompt,
+                                suffix=PROMPT_SUFFIX,
+                                input_variables=["input", "table_info", "top_k"]
+                            )
+    chain = SQLDatabaseChain.from_llm(llm, db, verbose=True, prompt=few_shot_prompt, return_intermediate_steps = True)
+    response_llm = chain.invoke(user_message)
+    print(f"sql query : {response_llm['intermediate_steps'][1]}")
+    if 'sql_cmd' in response_llm['intermediate_steps'][2].keys():
+        intermediate_sql_query = response_llm['intermediate_steps'][2]['sql_cmd']
+    result_df = pd.read_sql_query(intermediate_sql_query, engine)
+    output_dict = {
+        "result_df" : result_df,
+        "sql_command" : intermediate_sql_query,
+        "response" : response_llm['result'],
+        "input" : response_llm['query'],
+        "graph_data" : None if ((result_df.shape[0] < 2) | (result_df.shape[1] < 2)) else get_graph_details(user_message, result_df)
+    }
+    return output_dict
+def get_graph_details(usermessage:str, df=None):
+    llm = AzureOpenAI(deployment_name="gpt-35-turbo-instruct", temperature=0.15)
+    template = ChatPromptTemplate.from_messages(
+        [("system", "You are a visualisation expert and plotly developer, your task is to come up with best suitable \
+                     chart representing user ask for the given data. please use plotly express library in python for \
+                     charting purposes.. and provide code for generating the figure.. there should not be any displaying \
+                     instructions..like fig.show() etc.."),
+         ("human", "For the given dataframe below \
+                    ---------------------------------\
+                    Dataframe = {dataframe} \
+                    ---------------------------------\
+                    and user question \
+                    ---------------------------------\
+                    user_ask =  {question} \
+                    ----------------------------------\
+                    Please provide the plotly chart which \
+                    would be best suitable to represent  the user ask graphically \
+                    Please double check the code is not having any fig.show() or display commands"
+                    )]
+    )
+    customer_messages = template.format_messages(dataframe = df, question=usermessage)
+    agent = create_pandas_dataframe_agent(
+                llm,
+                df,
+                agent_type=AgentType.ZERO_SHOT_REACT_DESCRIPTION,
+                verbose=True,
+                return_intermediate_steps=True
+            )
+    agent_response = agent.invoke(customer_messages)
+    out_agent_response = agent_response['intermediate_steps']
+    for _, agent_code_reponse in out_agent_response:
+        if isinstance(agent_code_reponse, plotly.graph_objects.Figure):
+            fig = agent_code_reponse
+            return fig
+    else:
+        template = ChatPromptTemplate.from_messages([
+            ("system", "You are a visualisation expert and plotly developer, your task is to come up with best suitable \
+                     chart representing user ask for the given data. please use plotly express library in python for \
+                     charting purposes.. and provide code for generating the figure.. there should not be any displaying \
+                     instructions..like fig.show() etc.."),
+            ("human", "For the given dataframe below \
+                    ---------------------------------\
+                    df =   State  Total_GDP\
+                        0  Florida                7743.0\
+                        1  Texas                9934.0\
+                        2  New_York                6634.5\
+                        3  Denver                4456.0\
+                        4  Atlanta                 993.5 \
+                    ---------------------------------\
+                    and user question \
+                    ---------------------------------\
+                    user_ask = What is the distribution of Total_GDP for each state? \
+                    ----------------------------------\
+                    Please provide the code using plotly express in less than 30 words which should clearly satisfy user ask\
+                    in terms of best representation of data. please use dataframe variable as 'df' and \
+                    strictly output only one line of python code start your code with initializing a figure object \n\
+                    like `fig = px.`"),
+            ("ai", "bar(df, x='State', y='Total_GDP', title='Distribution of Total_GDP per State')"),
+            ("human", "This is incorrect.. the required response should be \
+                    `fig = plt.bar(df, x='Plant_Name', y='Total_Available_Days', title='Distribution of Available Days for Each Plant Name')`\
+                    as it starts with `fig = plt.` as user specified"),
+            ("ai", "Sounds good, now I will remember to start with `fig = plt.`"),
+            ("human", "For the given dataframe below \
+                    ---------------------------------\
+                    df = {dataframe} \
+                    ---------------------------------\
+                    and user question \
+                    ---------------------------------\
+                    user_ask =  {question} \
+                    ----------------------------------\
+                    Please provide the code using plotly express in less than 40 words which should clearly satisfy user ask\
+                    in terms of best representation of data. please use dataframe variable as 'df' and \
+                    strictly output only one line of python code start your code with initializing a figure object \n\
+                    like `fig = px.`"),
+        ])
+        customer_messages = template.format_messages(dataframe = df, question=usermessage)
+        print(f"This is the customer message : {customer_messages}")
+        code_response_llm = llm.invoke(customer_messages)
+        print(f"This is the code returned by LLM : {code_response_llm}")
+        try:
+            print("## Executing the code line generated by llm ##")
+            if "fig = " in code_response_llm:
+                code_response_llm = code_response_llm.replace("AI: ", "")
+                namespace = {'df': df}
+                exec(code_response_llm, globals(), namespace)
+                if 'fig' in namespace.keys():
+                    print("fig is there returning fig>>>>>")
+                    return namespace['fig']
+            else:
+                return None
+        except Exception as e:
+            print(f"Some exception occurred : {str(e)}")
+            return None
+    return None

project_prompts.py ADDED Viewed

	@@ -0,0 +1,24 @@

+sqlite_prompt = """You are a SQLite expert. Given an input question, first create a syntactically correct SQLite query to run, then look at the results of the query and return the answer to the input question.
+    Unless the user specifies in the question a specific number of examples to obtain, query for at most 10 results using the LIMIT clause as per SQLite. You can order the results to return the most informative data in the database.
+    Never query for all columns from a table. You must query only the columns that are needed to answer the question. Wrap each column name in double quotes (") to denote them as delimited identifiers.
+    Pay attention to use only the column names you can see in the tables below. Be careful to not query for columns that do not exist. Also, pay attention to which column is in which table.
+    Pay attention to use date('now') function to get the current date, if the question involves "today".
+    Use the following format:
+    Question: Question here
+    SQLQuery: SQL Query to run
+    SQLResult: Result of the SQLQuery
+    Answer: Final answer here
+    Only use the following tables:
+    {table_info}
+    Question: {input}
+    If the final answer has a numerical value, convert it into words like 1234123 (One Million), only print whole number.
+    If the final answer has a numerical value with a decimal, print it without decimal values.
+    If the final answer has a numerical value and some units, print the number with units or metrics.
+    If the final answer has multiple decimal points reduce it into two decimal points, for example: if it is like 0.3933333333333333 then convert that into 0.39 and if it is like 161.5760959724 then convert into 161.5.
+    For month calculation from the existing table please use strftime formula NOT MONTH function.
+    """

requirements.txt ADDED Viewed

	@@ -0,0 +1,15 @@

+langchain==0.1.16
+langchain-community==0.0.34
+langchain-core==0.1.45
+langchain-experimental==0.0.57
+langchain-openai==0.1.3
+numpy==1.24.4
+openai==1.23.2
+pandas==2.0.3
+SQLAlchemy==2.0.29
+streamlit==1.33.0
+python-dotenv
+chromadb==0.3.29
+plotly
+tabulate
+Faker