Omkar1872 commited on
Commit
3743009
·
verified ·
1 Parent(s): 557eef3

Upload 5 files

Browse files
Files changed (5) hide show
  1. app.py +125 -0
  2. model_api.py +46 -0
  3. questions.txt +32 -0
  4. requirements.txt +7 -3
  5. sample_data.csv +11 -0
app.py ADDED
@@ -0,0 +1,125 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import pandas as pd
2
+ import sqlite3
3
+ import streamlit as st
4
+ import tempfile
5
+ from model_api import query_hf_model # Hugging Face API wrapper
6
+
7
+ # -----------------------------
8
+ # Connect to SQLite
9
+ # -----------------------------
10
+ @st.cache_resource
11
+ def connect_sqlite(db_path="data.db"):
12
+ try:
13
+ conn = sqlite3.connect(db_path, check_same_thread=False)
14
+ return conn
15
+ except Exception as e:
16
+ st.error(f"Error connecting to SQLite: {e}")
17
+ return None
18
+
19
+ # -----------------------------
20
+ # Load CSV to SQLite
21
+ # -----------------------------
22
+ def load_csv_to_sqlite(file, conn):
23
+ try:
24
+ df = pd.read_csv(file)
25
+ df.to_sql("csv_data", conn, if_exists="replace", index=False)
26
+ st.success("CSV data loaded into SQLite successfully.")
27
+ except Exception as e:
28
+ st.error(f"Error loading CSV: {e}")
29
+
30
+ # -----------------------------
31
+ # Generate SQL query using HF API
32
+ # -----------------------------
33
+ def generate_query(user_input, conn):
34
+ try:
35
+ # Get column names from SQLite
36
+ cursor = conn.cursor()
37
+ cursor.execute("PRAGMA table_info(csv_data)")
38
+ field_names = [row[1] for row in cursor.fetchall()]
39
+
40
+ # Build prompt for HF model
41
+ prompt = f"""
42
+ You are a MySQL expert. Only respond with a MySQL SELECT query in this exact format:
43
+ SELECT column1, column2 FROM csv_data WHERE condition;
44
+
45
+ Rules:
46
+ - Use only these fields from the 'csv_data' table: {field_names}
47
+ - All field names are case-sensitive.
48
+ - String values must be in single quotes.
49
+ - For GROUP BY queries, do not include non-aggregated columns in SELECT unless they are also in GROUP BY.
50
+ - If extracting year from a date column (e.g., LaunchDate), use the YEAR() function in MySQL.
51
+ - Assume all dates are stored as TEXT in the format 'YYYY-MM-DD'.
52
+
53
+ User request:
54
+ \"{user_input}\"
55
+ """
56
+
57
+ # Read HF token from Streamlit secrets
58
+ hf_token = st.secrets["HF_TOKEN"]
59
+ if not hf_token:
60
+ st.error("HF_TOKEN not found. Please add it in Streamlit secrets.")
61
+ return None
62
+
63
+ # Call Hugging Face API
64
+ query = query_hf_model(prompt, hf_token)
65
+
66
+ # Safety check: only allow SELECT queries
67
+ if not query.lower().strip().startswith("select"):
68
+ st.error("Generated SQL is not a SELECT query. Aborting.")
69
+ return None
70
+
71
+ return query
72
+ except Exception as e:
73
+ st.error(f"Error generating query: {e}")
74
+ return None
75
+
76
+ # -----------------------------
77
+ # Execute SQL query on SQLite
78
+ # -----------------------------
79
+ def execute_query(query, conn):
80
+ try:
81
+ df = pd.read_sql_query(query, conn)
82
+ return df
83
+ except Exception as e:
84
+ st.error(f"Query execution error: {e}")
85
+ return pd.DataFrame()
86
+
87
+ # -----------------------------
88
+ # Streamlit app
89
+ # -----------------------------
90
+ def main():
91
+ st.title("Natural Language to SQL Query and Output Generator (SQLite + HF API)")
92
+
93
+ # Connect to SQLite
94
+ conn = connect_sqlite()
95
+ if not conn:
96
+ st.stop()
97
+
98
+ # Upload CSV
99
+ uploaded_file = st.file_uploader("Upload your CSV file", type=["csv"])
100
+ if uploaded_file:
101
+ with tempfile.NamedTemporaryFile(delete=False) as tmp:
102
+ tmp.write(uploaded_file.read())
103
+ tmp_path = tmp.name
104
+ load_csv_to_sqlite(tmp_path, conn)
105
+
106
+ st.markdown("---")
107
+ user_input = st.text_input("Ask your query (in plain English):")
108
+ if user_input:
109
+ query = generate_query(user_input, conn)
110
+ if query:
111
+ st.code(query, language="sql")
112
+ data = execute_query(query, conn)
113
+ if not data.empty:
114
+ st.dataframe(data)
115
+
116
+ # Download result as CSV
117
+ csv = data.to_csv(index=False).encode("utf-8")
118
+ st.download_button("Download Result as CSV", csv, "result.csv", "text/csv")
119
+ else:
120
+ st.warning("No matching records found.")
121
+ else:
122
+ st.error("Could not generate a valid SQL query.")
123
+
124
+ if __name__ == "__main__":
125
+ main()
model_api.py ADDED
@@ -0,0 +1,46 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # model_api.py
2
+ import requests
3
+ import re
4
+
5
+ HF_MODEL = "mistralai/Mistral-7B-Instruct-v0.2" # You can change to any hosted HF model
6
+
7
+ def query_hf_model(prompt: str, hf_token: str, max_tokens: int = 256):
8
+ """
9
+ Calls the Hugging Face Inference API to generate SQL from a prompt.
10
+ Returns the SQL query as a string.
11
+ """
12
+ api_url = f"https://api-inference.huggingface.co/models/{HF_MODEL}"
13
+ headers = {"Authorization": f"Bearer {hf_token}"}
14
+ payload = {
15
+ "inputs": prompt,
16
+ "parameters": {"max_new_tokens": max_tokens, "temperature": 0.0},
17
+ "options": {"wait_for_model": True}
18
+ }
19
+
20
+ resp = requests.post(api_url, headers=headers, json=payload, timeout=60)
21
+ resp.raise_for_status()
22
+ data = resp.json()
23
+
24
+ # Extract generated text safely
25
+ if isinstance(data, list) and len(data) > 0:
26
+ item = data[0]
27
+ if isinstance(item, dict):
28
+ text = item.get("generated_text") or item.get("text") or str(item)
29
+ else:
30
+ text = str(item)
31
+ elif isinstance(data, dict):
32
+ if "error" in data:
33
+ raise RuntimeError(f"Model error: {data['error']}")
34
+ text = data.get("generated_text") or data.get("text") or str(data)
35
+ else:
36
+ text = str(data)
37
+
38
+ # Remove code fences if present
39
+ text = re.sub(r"```.*?```", "", text, flags=re.S).strip()
40
+
41
+ # Return only SELECT queries
42
+ match = re.search(r"(?i)^\s*select\b.*", text, flags=re.S)
43
+ if match:
44
+ return match.group(0)
45
+ else:
46
+ return text.strip()
questions.txt ADDED
@@ -0,0 +1,32 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ Basic Level
2
+ Select all product details.
3
+
4
+ List all products in the 'Electronics' category.
5
+
6
+ Get the names and prices of all products with price > 100.
7
+
8
+ Count how many products are available in the dataset.
9
+
10
+ List all distinct product categories.
11
+
12
+ 🧩 Medium Level
13
+ Find the average rating for each category.
14
+
15
+ List the top 3 most expensive products.
16
+
17
+ Find the product with the highest number of reviews.
18
+
19
+ Count the number of products launched in each year.
20
+
21
+ List products with discounts greater than 10%.
22
+
23
+ 🧠 Advanced Level
24
+ Find the category with the highest average price.
25
+
26
+ List brands with more than 1 product and their average rating.
27
+
28
+ Find products where stock is below the average stock for their category.
29
+
30
+ Convert LaunchDate to date format and list products launched after Jan 1, 2022.
31
+
32
+ Rank products within each category based on rating and review count.
requirements.txt CHANGED
@@ -1,3 +1,7 @@
1
- altair
2
- pandas
3
- streamlit
 
 
 
 
 
1
+ streamlit
2
+ pandas
3
+ mysql-connector-python
4
+ langchain
5
+ langchain-community
6
+ langchain-ollama
7
+ ollama
sample_data.csv ADDED
@@ -0,0 +1,11 @@
 
 
 
 
 
 
 
 
 
 
 
 
1
+ ProductID,ProductName,Category,Price,Rating,ReviewCount,Stock,Discount,Brand,LaunchDate
2
+ 101,Wireless Mouse,Electronics,25.99,4.5,200,150,10%,Logitech,15-01-2022
3
+ 102,Gaming Keyboard,Electronics,75.49,4.7,350,85,5%,Corsair,20-11-2021
4
+ 103,Noise Cancelling Headphones,Electronics,199.99,4.8,125,60,15%,Sony,05-09-2021
5
+ 104,Running Shoes,Sports,49.99,4.3,500,200,20%,Nike,10-02-2022
6
+ 105,Smartwatch,Electronics,159.99,4.6,220,45,10%,Apple,30-03-2022
7
+ 106,Blender,Home & Kitchen,39.99,4.2,180,120,12%,Ninja,25-12-2021
8
+ 107,Coffee Maker,Home & Kitchen,89.99,4.1,240,90,8%,Keurig,15-08-2021
9
+ 108,Yoga Mat,Sports,29.99,4.4,320,300,5%,Manduka,05-01-2022
10
+ 109,Smart TV,Electronics,399.99,4.7,150,25,15%,Samsung,10-06-2022
11
+ 110,Vacuum Cleaner,Home & Kitchen,129.99,4.3,290,70,10%,Dyson,20-10-2021