Balaprime commited on
Commit
3851cd3
·
verified ·
1 Parent(s): 6d641fd

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +166 -0
app.py CHANGED
@@ -0,0 +1,166 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from dotenv import load_dotenv
2
+ import os
3
+ from sentence_transformers import SentenceTransformer
4
+ import gradio as gr
5
+ from sklearn.metrics.pairwise import cosine_similarity
6
+ from groq import Groq
7
+ import sqlite3
8
+ import pandas as pd
9
+
10
+ load_dotenv()
11
+ api = os.getenv("groq_api_key")
12
+
13
+ # 🔹 STEP 1: Create a sample in-memory SQLite database with mock data
14
+ def setup_database():
15
+ conn = sqlite3.connect("college.db")
16
+ cursor = conn.cursor()
17
+
18
+ # Drop existing tables
19
+ cursor.execute("DROP TABLE IF EXISTS student;")
20
+ cursor.execute("DROP TABLE IF EXISTS employee;")
21
+ cursor.execute("DROP TABLE IF EXISTS course_info;")
22
+
23
+ # Student table
24
+ cursor.execute("""
25
+ CREATE TABLE student (
26
+ student_id INTEGER,
27
+ first_name TEXT,
28
+ last_name TEXT,
29
+ date_of_birth TEXT,
30
+ email TEXT,
31
+ phone_number TEXT,
32
+ major TEXT,
33
+ year_of_enrollment INTEGER
34
+ );
35
+ """)
36
+
37
+ cursor.execute("INSERT INTO student VALUES (1, 'Alice', 'Smith', '2000-05-01', 'alice@example.com', '1234567890', 'Computer Science', 2019);")
38
+
39
+ # Employee table
40
+ cursor.execute("""
41
+ CREATE TABLE employee (
42
+ employee_id INTEGER,
43
+ first_name TEXT,
44
+ last_name TEXT,
45
+ email TEXT,
46
+ department TEXT,
47
+ position TEXT,
48
+ salary REAL,
49
+ date_of_joining TEXT
50
+ );
51
+ """)
52
+
53
+ cursor.execute("INSERT INTO employee VALUES (101, 'John', 'Doe', 'john@college.edu', 'CSE', 'Professor', 80000, '2015-08-20');")
54
+
55
+ # Course table
56
+ cursor.execute("""
57
+ CREATE TABLE course_info (
58
+ course_id INTEGER,
59
+ course_name TEXT,
60
+ course_code TEXT,
61
+ instructor_id INTEGER,
62
+ department TEXT,
63
+ credits INTEGER,
64
+ semester TEXT
65
+ );
66
+ """)
67
+
68
+ cursor.execute("INSERT INTO course_info VALUES (501, 'AI Basics', 'CS501', 101, 'CSE', 4, 'Fall');")
69
+
70
+ conn.commit()
71
+ conn.close()
72
+
73
+ # Call it once to setup
74
+ setup_database()
75
+
76
+ # 🔹 STEP 2: Embedding & LLM logic (unchanged mostly)
77
+ def create_metadata_embeddings():
78
+ student = """Table: student...""" # (same as your original metadata)
79
+ employee = """Table: employee..."""
80
+ course = """Table: course_info..."""
81
+ metadata_list = [student, employee, course]
82
+ model = SentenceTransformer('all-MiniLM-L6-v2')
83
+ embeddings = model.encode(metadata_list)
84
+ return embeddings, model, student, employee, course
85
+
86
+ def find_best_fit(embeddings, model, user_query, student, employee, course):
87
+ query_embedding = model.encode([user_query])
88
+ similarities = cosine_similarity(query_embedding, embeddings)
89
+ best_match_table = similarities.argmax()
90
+ return [student, employee, course][best_match_table]
91
+
92
+ def create_prompt(user_query, table_metadata):
93
+ system_prompt = """You are a SQL query generator specialized in generating SQL queries for a single table at a time. Your task is to accurately convert natural language queries into SQL statements based on the user's intent and the provided table metadata.
94
+
95
+ Rules:
96
+ - Multi-Table Queries Allowed: You can generate queries involving multiple tables using appropriate SQL JOIN operations, based on the provided metadata.
97
+ - Join Logic: Use INNER JOIN, LEFT JOIN, or other appropriate joins based on logical relationships (e.g., foreign keys like `student_id`, `instructor_id`, etc.) inferred from the metadata.
98
+ - Metadata-Based Validation: Always ensure the generated query matches the table names, columns, and data types as described in the metadata.
99
+ - User Intent: Accurately capture the user's requirements such as filters, sorting, aggregations, and selections across one or more tables.
100
+ - SQL Syntax: Use standard SQL syntax that is compatible with most relational database systems.
101
+ - Output Format: Provide only the SQL query in a single line. Do not include explanations or any extra text.
102
+
103
+ Input Format:
104
+ User Query: The user's natural language request.
105
+ Table Metadata: The structure of the relevant table, including the table name, column names, and data types.
106
+
107
+ Output Format:
108
+ SQL Query: A valid SQL query formatted for readability.
109
+ Do not output anything else except the SQL query.Not even a single word extra.Ouput the whole query in a single line only.
110
+ You are ready to generate SQL queries based on the user input and table metadata."""
111
+ user_prompt = f"User Query: {user_query}\nTable Metadata: {table_metadata}"
112
+ return system_prompt, user_prompt
113
+
114
+ def generate_sql(system_prompt, user_prompt):
115
+ client = Groq(api_key=api)
116
+ chat_completion = client.chat.completions.create(
117
+ messages=[
118
+ {"role": "system", "content": system_prompt},
119
+ {"role": "user", "content": user_prompt},
120
+ ],
121
+ model="llama3-70b-8192",
122
+ )
123
+ res = chat_completion.choices[0].message.content.strip()
124
+ if res.lower().startswith("select"):
125
+ return res
126
+ else:
127
+ return None
128
+
129
+ # 🔹 STEP 3: Execute SQL and return results
130
+ def execute_sql(sql_query):
131
+ try:
132
+ conn = sqlite3.connect("college.db")
133
+ df = pd.read_sql_query(sql_query, conn)
134
+ conn.close()
135
+ return df
136
+ except Exception as e:
137
+ return str(e)
138
+
139
+ # 🔹 STEP 4: Final combined response
140
+ def response(user_query):
141
+ embeddings, model, student, employee, course = create_metadata_embeddings()
142
+ table_metadata = find_best_fit(embeddings, model, user_query, student, employee, course)
143
+ system_prompt, user_prompt = create_prompt(user_query, table_metadata)
144
+ sql_query = generate_sql(system_prompt, user_prompt)
145
+
146
+ if sql_query:
147
+ result = execute_sql(sql_query)
148
+ return f"🧠 SQL Query:\n{sql_query}", result
149
+ else:
150
+ return "❌ Couldn't generate a valid SQL query.", None
151
+
152
+ # 🔹 Gradio UI
153
+ desc = """Ask a natural language question about students, employees, or courses. I'll generate and run a SQL query for you."""
154
+
155
+ demo = gr.Interface(
156
+ fn=response,
157
+ inputs=gr.Textbox(label="Your Question"),
158
+ outputs=[
159
+ gr.Textbox(label="Generated SQL Query"),
160
+ gr.Dataframe(label="Query Result")
161
+ ],
162
+ title="Natural Language to SQL + Result",
163
+ description=desc
164
+ )
165
+
166
+ demo.launch(share=True)