Spaces:

Nikhil-Murade
/

TalentEdge-AI-Agent

Sleeping

App Files Files Community

Nikhil-Murade commited on Sep 24, 2024

Commit

3d51c0d

verified ·

1 Parent(s): 4d59c5e

Upload 3 files

Browse files

Files changed (3) hide show

data_science_100k.db +0 -0
requirements.txt +82 -0
sql_agent_db.py +166 -0

data_science_100k.db ADDED Viewed

Binary file (209 kB). View file

requirements.txt ADDED Viewed

	@@ -0,0 +1,82 @@

+aiohappyeyeballs==2.4.0
+aiohttp==3.10.5
+aiosignal==1.3.1
+altair==5.4.1
+annotated-types==0.7.0
+anyio==4.4.0
+async-timeout==4.0.3
+attrs==24.2.0
+blinker==1.8.2
+cachetools==5.5.0
+certifi==2024.8.30
+charset-normalizer==3.3.2
+click==8.1.7
+dataclasses-json==0.6.7
+distro==1.9.0
+dnspython==2.6.1
+exceptiongroup==1.2.2
+frozenlist==1.4.1
+gitdb==4.0.11
+GitPython==3.1.43
+greenlet==3.1.0
+h11==0.14.0
+httpcore==1.0.5
+httpx==0.27.2
+idna==3.10
+Jinja2==3.1.4
+jsonpatch==1.33
+jsonpointer==3.0.0
+jsonschema==4.23.0
+jsonschema-specifications==2023.12.1
+langchain==0.1.6
+langchain-community==0.0.20
+langchain-core==0.1.23
+langchain-experimental==0.0.49
+langchain-openai==0.0.5
+langsmith==0.0.87
+markdown-it-py==3.0.0
+MarkupSafe==2.1.5
+marshmallow==3.22.0
+mdurl==0.1.2
+multidict==6.1.0
+mypy-extensions==1.0.0
+narwhals==1.8.1
+numpy==1.26.4
+openai==1.12.0
+packaging==23.2
+pandas==2.2.2
+pillow==10.4.0
+protobuf==5.28.1
+pyarrow==17.0.0
+pydantic==2.9.1
+pydantic_core==2.23.3
+pydeck==0.9.1
+Pygments==2.18.0
+pymongo==4.8.0
+pyodbc==5.1.0
+python-dateutil==2.9.0.post0
+python-dotenv==1.0.1
+pytz==2024.2
+PyYAML==6.0.2
+referencing==0.35.1
+regex==2024.9.11
+requests==2.32.3
+rich==13.8.1
+rpds-py==0.20.0
+six==1.16.0
+smmap==5.0.1
+sniffio==1.3.1
+SQLAlchemy==2.0.30
+streamlit==1.38.0
+tabulate==0.9.0
+tenacity==8.5.0
+tiktoken==0.5.2
+toml==0.10.2
+tornado==6.4.1
+tqdm==4.66.5
+typing-inspect==0.9.0
+typing_extensions==4.12.2
+tzdata==2024.1
+urllib3==2.2.3
+watchdog==4.0.2
+yarl==1.11.1

sql_agent_db.py ADDED Viewed

	@@ -0,0 +1,166 @@

+import os
+from dotenv import load_dotenv
+from langchain_openai import ChatOpenAI
+import pandas as pd
+from sqlalchemy import create_engine
+# load environment variables from .env file
+load_dotenv()
+openai_key = os.getenv("OPENAI_API_KEY")
+llm_name = "gpt-3.5-turbo"
+model = ChatOpenAI(api_key=openai_key, model=llm_name)
+# read csv file
+df = pd.read_csv("Struct Data_Data Science 100K.csv")
+from langchain.agents import create_sql_agent
+from langchain_community.agent_toolkits.sql.toolkit import SQLDatabaseToolkit
+from langchain_community.utilities import SQLDatabase
+# Create db from csv file
+# Path to your SQLite database file
+database_file_path = "./db/data_science_100k.db"
+# Create an engine to connect to the SQLite database
+# SQLite only requires the path to the database file
+engine = create_engine(f"sqlite:///{database_file_path}")
+file_url = "./ds_salaries.csv"
+os.makedirs(os.path.dirname(database_file_path), exist_ok=True)
+df = pd.read_csv(file_url)
+df.to_sql("DataScience100k", con=engine, if_exists="replace", index=False)
+print(f"Database created successfully! {df}")
+# db = SQLDatabase.from_uri(f"sqlite:///{database_file_path}")
+# toolkit = SQLDatabaseToolkit(db=db, llm=model)
+# QUESTION = """How many data scietists are their and their avg salaries, and also how many of them are from US"""
+# sql_agent = create_sql_agent(
+#     toolkit=toolkit,
+#     llm=model,
+#     verbose=True
+# )
+# sql_agent.invoke(QUESTION)
+# res = sql_agent.invoke(QUESTION)
+# # print(res)
+# Part 2 : Prepare the sql prompt
+MSSQL_AGENT_PREFIX = """
+You are an agent designed to interact with a SQL database.
+## Instructions:
+- Given an input question, create a syntactically correct {dialect} query
+to run, then look at the results of the query and return the answer.
+- Unless the user specifies a specific number of examples they wish to
+obtain, **ALWAYS** limit your query to at most {top_k} results.
+- You can order the results by a relevant column to return the most
+interesting examples in the database.
+- Never query for all the columns from a specific table, only ask for
+the relevant columns given the question.
+- You have access to tools for interacting with the database.
+- You MUST double check your query before executing it.If you get an error
+while executing a query,rewrite the query and try again.
+- DO NOT make any DML statements (INSERT, UPDATE, DELETE, DROP etc.)
+to the database.
+- DO NOT MAKE UP AN ANSWER OR USE PRIOR KNOWLEDGE, ONLY USE THE RESULTS
+OF THE CALCULATIONS YOU HAVE DONE.
+- Your response should be in Markdown. However, **when running  a SQL Query
+in "Action Input", do not include the markdown backticks**.
+Those are only for formatting the response, not for executing the command.
+- ALWAYS, as part of your final answer, explain how you got to the answer
+on a section that starts with: "Explanation:". Include the SQL query as
+part of the explanation section.
+- If the question does not seem related to the database, just return
+"I don\'t know" as the answer.
+- Only use the below tools. Only use the information returned by the
+below tools to construct your query and final answer.
+- Do not make up table names, only use the tables returned by any of the
+tools below.
+- as part of your final answer, please include the SQL query you used in json format or code format
+## Tools:
+"""
+MSSQL_AGENT_FORMAT_INSTRUCTIONS = """
+## Use the following format:
+Question: the input question you must answer.
+Thought: you should always think about what to do.
+Action: the action to take, should be one of [{tool_names}].
+Action Input: the input to the action.
+Observation: the result of the action.
+... (this Thought/Action/Action Input/Observation can repeat N times)
+Thought: I now know the final answer.
+Final Answer: the final answer to the original input question.
+Example of Final Answer:
+<=== Beginning of example
+Action: query_sql_db
+Action Input:
+SELECT TOP (10) [base_salary], [grade]
+FROM salaries_2023
+WHERE state = 'Division'
+Observation:
+[(27437.0,), (27088.0,), (26762.0,), (26521.0,), (26472.0,), (26421.0,), (26408.0,)]
+Thought:I now know the final answer
+Final Answer: There were 27437 workers making 100,000.
+Explanation:
+I queried the `xyz` table for the `salary` column where the department
+is 'IGM' and the date starts with '2020'. The query returned a list of tuples
+with the bazse salary for each day in 2020. To answer the question,
+I took the sum of all the salaries in the list, which is 27437.
+I used the following query
+```sql
+SELECT [salary] FROM xyztable WHERE department = 'IGM' AND date LIKE '2020%'"
+```
+===> End of Example
+"""
+db = SQLDatabase.from_uri(f"sqlite:///{database_file_path}")
+toolkit = SQLDatabaseToolkit(db=db, llm=model)
+# QUESTION = """How many data scietists are their and their avg salaries, and also how many of them are from US"""
+sql_agent = create_sql_agent(
+    prefix=MSSQL_AGENT_PREFIX,
+    format_instructions=MSSQL_AGENT_FORMAT_INSTRUCTIONS,
+    toolkit=toolkit,
+    llm=model,
+    tok_k=30,
+    verbose=True
+)
+# res = sql_agent.invoke(QUESTION)
+import streamlit as st
+st.title("SQL Query AI Agent")
+question = st.text_input("Enter your query:")
+if st.button("Run Query"):
+    if question:
+        res = sql_agent.invoke(question)
+        st.markdown(res["output"])
+else:
+    st.error("Please Enter a Query.")