Spaces:
Sleeping
Sleeping
| import os | |
| from dotenv import load_dotenv | |
| from langchain_openai import ChatOpenAI | |
| import pandas as pd | |
| from sqlalchemy import create_engine | |
| # load environment variables from .env file | |
| load_dotenv() | |
| openai_key = os.getenv("OPENAI_API_KEY") | |
| llm_name = "gpt-3.5-turbo" | |
| model = ChatOpenAI(api_key=openai_key, model=llm_name) | |
| # read csv file | |
| df = pd.read_csv("Struct Data_Data Science 100K.csv") | |
| from langchain.agents import create_sql_agent | |
| from langchain_community.agent_toolkits.sql.toolkit import SQLDatabaseToolkit | |
| from langchain_community.utilities import SQLDatabase | |
| # Create db from csv file | |
| # Path to your SQLite database file | |
| database_file_path = "data_science_100k.db" | |
| # Create an engine to connect to the SQLite database | |
| # SQLite only requires the path to the database file | |
| engine = create_engine(f"sqlite:///{database_file_path}") | |
| file_url = "./Struct Data_Data Science 100K.csv" | |
| # os.makedirs(os.path.dirname(database_file_path), exist_ok=True) | |
| df = pd.read_csv(file_url) | |
| df.to_sql("DataScience100k", con=engine, if_exists="replace", index=False) | |
| print(f"Database created successfully! {df}") | |
| # db = SQLDatabase.from_uri(f"sqlite:///{database_file_path}") | |
| # toolkit = SQLDatabaseToolkit(db=db, llm=model) | |
| # QUESTION = """How many data scietists are their and their avg salaries, and also how many of them are from US""" | |
| # sql_agent = create_sql_agent( | |
| # toolkit=toolkit, | |
| # llm=model, | |
| # verbose=True | |
| # ) | |
| # sql_agent.invoke(QUESTION) | |
| # res = sql_agent.invoke(QUESTION) | |
| # # print(res) | |
| # Part 2 : Prepare the sql prompt | |
| MSSQL_AGENT_PREFIX = """ | |
| You are an agent designed to interact with a SQL database. | |
| ## Instructions: | |
| - Given an input question, create a syntactically correct {dialect} query | |
| to run, then look at the results of the query and return the answer. | |
| - Unless the user specifies a specific number of examples they wish to | |
| obtain, **ALWAYS** limit your query to at most {top_k} results. | |
| - You can order the results by a relevant column to return the most | |
| interesting examples in the database. | |
| - Never query for all the columns from a specific table, only ask for | |
| the relevant columns given the question. | |
| - You have access to tools for interacting with the database. | |
| - You MUST double check your query before executing it.If you get an error | |
| while executing a query,rewrite the query and try again. | |
| - DO NOT make any DML statements (INSERT, UPDATE, DELETE, DROP etc.) | |
| to the database. | |
| - DO NOT MAKE UP AN ANSWER OR USE PRIOR KNOWLEDGE, ONLY USE THE RESULTS | |
| OF THE CALCULATIONS YOU HAVE DONE. | |
| - Your response should be in Markdown. However, **when running a SQL Query | |
| in "Action Input", do not include the markdown backticks**. | |
| Those are only for formatting the response, not for executing the command. | |
| - ALWAYS, as part of your final answer, explain how you got to the answer | |
| on a section that starts with: "Explanation:". Include the SQL query as | |
| part of the explanation section. | |
| - If the question does not seem related to the database, just return | |
| "I don\'t know" as the answer. | |
| - Only use the below tools. Only use the information returned by the | |
| below tools to construct your query and final answer. | |
| - Do not make up table names, only use the tables returned by any of the | |
| tools below. | |
| - as part of your final answer, please include the SQL query you used in json format or code format | |
| ## Tools: | |
| """ | |
| MSSQL_AGENT_FORMAT_INSTRUCTIONS = """ | |
| ## Use the following format: | |
| Question: the input question you must answer. | |
| Thought: you should always think about what to do. | |
| Action: the action to take, should be one of [{tool_names}]. | |
| Action Input: the input to the action. | |
| Observation: the result of the action. | |
| (this Thought/Action/Action Input/Observation can repeat N times) | |
| Thought: I now know the final answer. | |
| Final Answer: the final answer to the original input question. | |
| Example of Final Answer: | |
| <=== Beginning of example | |
| Action: query_sql_db | |
| Action Input: | |
| SELECT TOP (10) [base_salary], [grade] | |
| FROM salaries_2023 | |
| WHERE state = 'Division' | |
| Observation: | |
| [(27437.0,), (27088.0,), (26762.0,), (26521.0,), (26472.0,), (26421.0,), (26408.0,)] | |
| Thought:I now know the final answer | |
| Final Answer: There were 27437 workers making 100,000. | |
| Explanation: | |
| I queried the `xyz` table for the `salary` column where the department | |
| is 'IGM' and the date starts with '2020'. The query returned a list of tuples | |
| with the bazse salary for each day in 2020. To answer the question, | |
| I took the sum of all the salaries in the list, which is 27437. | |
| I used the following query | |
| ```sql | |
| SELECT [salary] FROM xyztable WHERE department = 'IGM' AND date LIKE '2020%'" | |
| ``` | |
| ===> End of Example | |
| """ | |
| db = SQLDatabase.from_uri(f"sqlite:///{database_file_path}") | |
| toolkit = SQLDatabaseToolkit(db=db, llm=model) | |
| # QUESTION = """How many data scietists are their and their avg salaries, and also how many of them are from US""" | |
| sql_agent = create_sql_agent( | |
| prefix=MSSQL_AGENT_PREFIX, | |
| format_instructions=MSSQL_AGENT_FORMAT_INSTRUCTIONS, | |
| toolkit=toolkit, | |
| llm=model, | |
| tok_k=30, | |
| verbose=True | |
| ) | |
| # res = sql_agent.invoke(QUESTION) | |
| import streamlit as st | |
| st.title("SQL Query AI Agent") | |
| question = st.text_input("Enter your query:") | |
| if st.button("Run Query"): | |
| if question: | |
| res = sql_agent.invoke(question) | |
| st.markdown(res["output"]) | |
| else: | |
| st.error("Please Enter a Query.") |