hmm404 commited on
Commit
530a106
·
verified ·
1 Parent(s): 7a7a0e6

Upload 3 files

Browse files
Files changed (3) hide show
  1. app.py +242 -0
  2. requirements.txt +3 -0
  3. schema.py +297 -0
app.py ADDED
@@ -0,0 +1,242 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import os
2
+ import re
3
+ import sqlite3
4
+ import warnings
5
+ import gradio as gr
6
+ import pandas as pd
7
+ from schema import schema
8
+ from langchain_nvidia_ai_endpoints import ChatNVIDIA
9
+
10
+ warnings.filterwarnings("ignore")
11
+ API_KEY = "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"
12
+ db_path = "wash_db.db"
13
+
14
+ client = ChatNVIDIA(
15
+ model="deepseek-ai/deepseek-r1",
16
+ api_key=API_KEY,
17
+ temperature=0.1,
18
+ top_p=1,
19
+ max_tokens=4096,
20
+ )
21
+
22
+ def get_table_names(schema: str):
23
+ return re.findall(r'TABLE (\w+)', schema)
24
+
25
+ def get_table_columns(schema: str, table: str):
26
+ m = re.search(rf'TABLE {table} \((.*?)\)', schema, re.DOTALL)
27
+ if m:
28
+ cols_block = m.group(1)
29
+ cols = re.findall(r'(\w+)', cols_block)
30
+ return [col for col in cols if col.lower() not in {"int", "primary", "key", "string", "bit", "real", "references"}]
31
+ return []
32
+
33
+ def agent_select_table(user_query, schema):
34
+ tables = get_table_names(schema)
35
+ # First, try longest keyword containment in table name
36
+ best = ""
37
+ best_len = 0
38
+ for table in tables:
39
+ for word in user_query.lower().split():
40
+ if word in table.lower() and len(word) > best_len:
41
+ best = table
42
+ best_len = len(word)
43
+ if best:
44
+ return best
45
+ # fallback: first table
46
+ return tables[0]
47
+
48
+ def agent_select_columns(user_query, table, schema):
49
+ columns = get_table_columns(schema, table)
50
+ selected = []
51
+ for col in columns:
52
+ if any(word in col.lower() for word in user_query.lower().split()):
53
+ selected.append(col)
54
+ return selected if selected else columns # fallback all columns
55
+
56
+ def build_sql_prompt(table, columns, schema, user_question, error_reason=None):
57
+ prompt = (
58
+ f"You are an expert SQL assistant.\n"
59
+ f"Schema: {schema}\n"
60
+ # f"Columns: {', '.join(columns)}\n"
61
+ f"User question: {user_question}\n"
62
+ "Write a valid SQLite SQL query answering the question using only the given table and columns.\n"
63
+ )
64
+ if error_reason:
65
+ prompt += f"The previous SQL query failed with the error: {error_reason}\nPlease fix and regenerate the SQL only."
66
+ return prompt
67
+
68
+ def extract_sql_query(text):
69
+ patterns = [
70
+ r"```sql\n(.*?)```",
71
+ r"```\n(.*?)```",
72
+ r"```(.*?)```",
73
+ ]
74
+
75
+ for pattern in patterns:
76
+ match = re.search(pattern, text, re.DOTALL | re.IGNORECASE)
77
+ if match:
78
+ return match.group(1).strip()
79
+ # Else, look for SELECT...;
80
+ match = re.search(r"(SELECT|INSERT|UPDATE|DELETE|CREATE|DROP|ALTER).*?;", text, re.DOTALL | re.IGNORECASE)
81
+ if match:
82
+ return match.group(0).strip()
83
+ lines = text.split('\n')
84
+ sql_lines = [l for l in lines if any(k in l.upper() for k in ['SELECT', 'FROM', 'WHERE', 'INSERT', 'UPDATE', 'DELETE'])]
85
+ if sql_lines:
86
+ return ' '.join(sql_lines)
87
+ return text.strip()
88
+
89
+ def execute_sql_query(sql_query, db_path=db_path):
90
+ try:
91
+ conn = sqlite3.connect(db_path)
92
+ df = pd.read_sql_query(sql_query, conn)
93
+ conn.close()
94
+ return df, None
95
+ except Exception as e:
96
+ return None, str(e)
97
+
98
+ def summarize_with_llm(table, columns, data, user_query):
99
+ preview = data.head(5).to_markdown(index=False) if data is not None and not data.empty else "No data returned."
100
+ prompt = (
101
+ f"User query: {user_query}\n"
102
+ f"SQL result preview \n{preview}\n"
103
+ f"Summarize the result, referencing the user query and the preview.)."
104
+ )
105
+ resp = client.invoke([{"role": "user", "content": prompt}])
106
+ return getattr(resp, "content", resp) if hasattr(resp, "content") else str(resp)
107
+
108
+ # def full_pipeline(user_question):
109
+ # table = agent_select_table(user_question, schema)
110
+ # columns = agent_select_columns(user_question, table, schema)
111
+ # yield {
112
+ # table_output: gr.update(value=table),
113
+ # columns_output: gr.update(value=", ".join(columns)),
114
+ # }
115
+ # sql_prompt = build_sql_prompt(table, columns, user_question)
116
+ # sql_query, error = "", None
117
+
118
+ # # Error-handling and retry loop
119
+ # for _ in range(5):
120
+ # llm_resp = client.invoke([{"role": "user", "content": sql_prompt}])
121
+ # llm_text = getattr(llm_resp, "content", llm_resp) if hasattr(llm_resp, "content") else str(llm_resp)
122
+ # sql_query = extract_sql_query(llm_text)
123
+ # results_df, error = execute_sql_query(sql_query)
124
+ # if not error:
125
+ # break
126
+ # sql_prompt = build_sql_prompt(table, columns, user_question, error_reason=error)
127
+ # # Summarize
128
+ # summary = summarize_with_llm(table, columns, results_df, user_question)
129
+ # # Format outputs
130
+ # columns_view = ", ".join(columns)
131
+ # sql_view = f"```sql\n{sql_query}\n```"
132
+ # status_view = f"Success" if not error else f"Query error: {error}"
133
+ # out_df = results_df if results_df is not None else pd.DataFrame()
134
+ # return sql_view, status_view, summary, table, columns_view, out_df
135
+
136
+ def full_pipeline_stream(user_question):
137
+ yield "Identifying relevant table and columns...", "", "", "", "", pd.DataFrame()
138
+ table = agent_select_table(user_question, schema)
139
+ columns = agent_select_columns(user_question, table, schema)
140
+ yield f"Table '{table}' selected.", "", "", table, ", ".join(columns), pd.DataFrame()
141
+
142
+ sql_prompt = build_sql_prompt(table, columns, user_question)
143
+ sql_query, error = "", None
144
+
145
+ for _ in range(5):
146
+ yield f"Generating SQL (attempt {_+1})...", "", "", table, ", ".join(columns), pd.DataFrame()
147
+ llm_resp = client.invoke([{"role": "user", "content": sql_prompt}])
148
+ llm_text = getattr(llm_resp, "content", llm_resp) if hasattr(llm_resp, "content") else str(llm_resp)
149
+ sql_query = extract_sql_query(llm_text)
150
+ results_df, error = execute_sql_query(sql_query)
151
+ if not error:
152
+ yield f"SQL executed successfully.", f"``````", "", table, ", ".join(columns), results_df
153
+ break
154
+ sql_prompt = build_sql_prompt(table, columns, user_question, error_reason=error)
155
+ yield f"Retrying due to error: {error}", f"``````", "", table, ", ".join(columns), pd.DataFrame()
156
+
157
+ if not error:
158
+ summary = summarize_with_llm(table, columns, results_df, user_question)
159
+ yield "Summarization complete.", f"``````", summary, table, ", ".join(columns), results_df
160
+ else:
161
+ yield f"Final error: {error}", f"``````", "No summary due to error.", table, ", ".join(columns), pd.DataFrame()
162
+ def full_pipeline(user_question):
163
+ # Step 1: Identify table and columns first
164
+ yield "", "", "", "", "", pd.DataFrame()
165
+ table = agent_select_table(user_question, schema)
166
+ columns = agent_select_columns(user_question, table, schema)
167
+
168
+ # Immediately return only these two visible outputs
169
+ yield {
170
+ table_output: gr.update(value=table),
171
+ columns_output: gr.update(value=", ".join(columns)),
172
+ }
173
+
174
+ # Step 2: Continue with downstream pipeline
175
+ sql_prompt = build_sql_prompt(table, columns, schema, user_question)
176
+ sql_query, error = "", None
177
+
178
+ for _ in range(5):
179
+ llm_resp = client.invoke([{"role": "user", "content": sql_prompt}])
180
+ llm_text = getattr(llm_resp, "content", llm_resp) if hasattr(llm_resp, "content") else str(llm_resp)
181
+ sql_query = extract_sql_query(llm_text)
182
+ results_df, error = execute_sql_query(sql_query)
183
+ if not error:
184
+ break
185
+ sql_prompt = build_sql_prompt(table, columns, schema, user_question, error_reason=error)
186
+
187
+ sql_view = f"\n{sql_query.strip()}\n"
188
+ status_view = "Success" if not error else f"Query error: {error}"
189
+ out_df = results_df if results_df is not None else pd.DataFrame()
190
+ yield {
191
+ sql_output: gr.update(value=sql_view),
192
+ status_output: gr.update(value=status_view),
193
+ results_output: gr.update(value=out_df)
194
+
195
+ }
196
+ summary = summarize_with_llm(table, columns, results_df, user_question).strip()
197
+
198
+
199
+
200
+
201
+ yield {
202
+ # sql_output: gr.update(value=sql_view),
203
+
204
+ summary_output: gr.update(value=summary),
205
+
206
+ }
207
+
208
+
209
+ with gr.Blocks(title="NL2SQL Pipeline)") as gradio_interface:
210
+ gr.Markdown("## NL2SQL Pipeline ")
211
+ gr.Markdown("Enter a question about the water supply database. The agent will select relevant table/columns, generate and retry SQL on error, show results and a grounded summary.")
212
+ with gr.Row():
213
+ input_text = gr.Textbox(label="Enter your natural language question", lines=3)
214
+ with gr.Row():
215
+ submit_btn = gr.Button("Generate, Execute & Summarize", variant="primary")
216
+ with gr.Row():
217
+ table_output = gr.Textbox(label="Table Used", lines=1)
218
+ columns_output = gr.Textbox(label="Columns Used", lines=2)
219
+ with gr.Row():
220
+ sql_output = gr.Textbox(label="Generated SQL Query", lines=5)
221
+ with gr.Row():
222
+ status_output = gr.Textbox(label="Execution Status", lines=2)
223
+ with gr.Row():
224
+ results_output = gr.Dataframe(label="Query Results", interactive=False)
225
+ with gr.Row():
226
+ summary_output = gr.Textbox(label="LLM-Grounded Summary", lines=5)
227
+ with gr.Row():
228
+ abort_btn = gr.Button("Abort / Stop Task")
229
+ running_event=submit_btn.click(
230
+ fn=full_pipeline,
231
+ inputs=input_text,
232
+ outputs=[sql_output, status_output, summary_output, table_output, columns_output, results_output]
233
+ )
234
+ abort_btn.click(
235
+ None,
236
+ inputs=None,
237
+ outputs=None,
238
+ cancels=[running_event],
239
+ queue=False
240
+ )
241
+ if __name__ == "__main__":
242
+ gradio_interface.launch()
requirements.txt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ gradio
2
+ pandas
3
+ langchain-nvidia-ai-endpoints
schema.py ADDED
@@ -0,0 +1,297 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ schema = """
2
+ TABLE states (
3
+ state_id INTEGER PRIMARY KEY,
4
+ lgd_state_id INTEGER NOT NULL,
5
+ state_name TEXT NOT NULL,
6
+ census_state INTEGER NOT NULL
7
+ )
8
+
9
+ TABLE districts (
10
+ district_id INTEGER PRIMARY KEY,
11
+ lgd_district_id INTEGER NOT NULL,
12
+ district_name TEXT NOT NULL,
13
+ census_district INTEGER NOT NULL
14
+ )
15
+
16
+ TABLE blocks (
17
+ block_id INTEGER PRIMARY KEY,
18
+ lgd_block_id INTEGER NOT NULL,
19
+ block_name TEXT NOT NULL
20
+ )
21
+
22
+ TABLE panchayats (
23
+ panchayat_id INTEGER PRIMARY KEY,
24
+ lgd_panchayat_id INTEGER NOT NULL,
25
+ panchayat_name TEXT NOT NULL
26
+ )
27
+
28
+ TABLE divisions (
29
+ division_id INTEGER PRIMARY KEY,
30
+ division_name TEXT NOT NULL
31
+ )
32
+
33
+ TABLE villages (
34
+ village_id INTEGER PRIMARY KEY,
35
+ lgd_village_id INTEGER NOT NULL,
36
+ village_name TEXT NOT NULL,
37
+ census_village TEXT NOT NULL,
38
+ village_type TEXT NOT NULL,
39
+ village_status TEXT NOT NULL,
40
+ vap_status TEXT NOT NULL,
41
+ vwsc_formed INTEGER NOT NULL,
42
+ village_certificate INTEGER NOT NULL,
43
+ gp_resolution INTEGER NOT NULL,
44
+ declaration_video INTEGER NOT NULL,
45
+ total_no_households INTEGER NOT NULL,
46
+ total_no_house_connection INTEGER NOT NULL,
47
+ no_of_ftk_trained_women INTEGER NOT NULL,
48
+ no_of_school INTEGER NOT NULL,
49
+ school_with_tap_connection INTEGER NOT NULL,
50
+ no_of_aws INTEGER NOT NULL,
51
+ no_of_aws_with_tap_connection INTEGER NOT NULL,
52
+ total_pop INTEGER NOT NULL,
53
+ gen_pop INTEGER NOT NULL,
54
+ sc_pop INTEGER NOT NULL,
55
+ st_pop INTEGER NOT NULL,
56
+ sanctioned_approved_status INTEGER,
57
+ work_order_updated_status INTEGER,
58
+ scheme_is_work_started_status INTEGER
59
+ )
60
+
61
+ TABLE habitations (
62
+ habitation_id INTEGER PRIMARY KEY,
63
+ habitation_name TEXT NOT NULL,
64
+ is_pvtg INTEGER NOT NULL,
65
+ community_access_planned INTEGER NOT NULL,
66
+ pvtg_fully_partial INTEGER NOT NULL,
67
+ pvtg_households INTEGER NOT NULL,
68
+ total_no_households INTEGER NOT NULL,
69
+ total_no_house_connection INTEGER NOT NULL,
70
+ is_pvtg_given_by_mota INTEGER NOT NULL
71
+ )
72
+
73
+ TABLE source_type_categories (
74
+ source_type_category_id INTEGER PRIMARY KEY,
75
+ description TEXT NOT NULL
76
+ )
77
+
78
+ TABLE source_types (
79
+ source_type_id INTEGER PRIMARY KEY,
80
+ description TEXT NOT NULL
81
+ )
82
+
83
+ TABLE storage_structure_types (
84
+ storage_structure_type_id INTEGER PRIMARY KEY,
85
+ description TEXT NOT NULL
86
+ )
87
+
88
+ TABLE categories (
89
+ category_id INTEGER PRIMARY KEY,
90
+ description TEXT NOT NULL
91
+ )
92
+
93
+ TABLE water_sources (
94
+ source_id INTEGER PRIMARY KEY,
95
+ location TEXT,
96
+ source_type_category_id INTEGER,
97
+ source_type_id INTEGER,
98
+ response_on TEXT,
99
+ scheme_id INTEGER,
100
+ latitude TEXT,
101
+ longitude TEXT,
102
+ pws_fhtc_status INTEGER
103
+ )
104
+
105
+ TABLE schemes (
106
+ scheme_id INTEGER PRIMARY KEY,
107
+ scheme_name TEXT,
108
+ category TEXT,
109
+ no_of_villages INTEGER,
110
+ household_planned INTEGER,
111
+ fhtc_provided INTEGER,
112
+ is_pws INTEGER,
113
+ fhtc_scheme TEXT,
114
+ is_jjm INTEGER,
115
+ sanction_year TEXT,
116
+ type TEXT,
117
+ work_order_date TEXT,
118
+ status TEXT,
119
+ physical_progress_in_percentage REAL,
120
+ handed_over_community_status TEXT,
121
+ handed_over_community_date TEXT,
122
+ estimated_cost REAL,
123
+ csr_donation REAL,
124
+ om_cost REAL,
125
+ expenditure REAL,
126
+ total_central_expenditure REAL,
127
+ central_expenditure_sc REAL,
128
+ central_expenditure_st REAL,
129
+ central_expenditure_gen REAL,
130
+ total_state_expenditure REAL,
131
+ state_expenditure_sc REAL,
132
+ state_expenditure_st REAL,
133
+ state_expenditure_gen REAL,
134
+ total_world_bank_expenditure REAL,
135
+ total_community_expenditure REAL,
136
+ total_csr_expenditure REAL,
137
+ total_other_expenditure REAL,
138
+ total_expenditure_during_JJM REAL,
139
+ latitude REAL NOT NULL,
140
+ longitude REAL NOT NULL,
141
+ location TEXT NOT NULL
142
+ )
143
+
144
+ TABLE scheme_assets (
145
+ id INTEGER PRIMARY KEY,
146
+ habitation_id INTEGER,
147
+ scheme_id INTEGER,
148
+ scheme_name TEXT,
149
+ latitude REAL,
150
+ longitude REAL,
151
+ location TEXT,
152
+ category_id INTEGER,
153
+ FOREIGN KEY (habitation_id) REFERENCES habitations(habitation_id),
154
+ FOREIGN KEY (scheme_id) REFERENCES schemes(scheme_id),
155
+ FOREIGN KEY (category_id) REFERENCES categories(category_id)
156
+ )
157
+
158
+ TABLE district_state_mapping (
159
+ district_id INTEGER PRIMARY KEY,
160
+ state_id INTEGER,
161
+ FOREIGN KEY (district_id) REFERENCES districts(district_id),
162
+ FOREIGN KEY (state_id) REFERENCES states(state_id)
163
+ )
164
+
165
+ TABLE block_district_mapping (
166
+ block_id INTEGER PRIMARY KEY,
167
+ district_id INTEGER,
168
+ FOREIGN KEY (block_id) REFERENCES blocks(block_id),
169
+ FOREIGN KEY (district_id) REFERENCES districts(district_id)
170
+ )
171
+
172
+ TABLE block_division_mapping (
173
+ block_id INTEGER PRIMARY KEY,
174
+ division_id INTEGER,
175
+ FOREIGN KEY (block_id) REFERENCES blocks(block_id),
176
+ FOREIGN KEY (division_id) REFERENCES divisions(division_id)
177
+ )
178
+
179
+ TABLE panchayat_block_mapping (
180
+ panchayat_id INTEGER PRIMARY KEY,
181
+ block_id INTEGER,
182
+ FOREIGN KEY (panchayat_id) REFERENCES panchayats(panchayat_id),
183
+ FOREIGN KEY (block_id) REFERENCES blocks(block_id)
184
+ )
185
+
186
+ TABLE village_panchayat_mapping (
187
+ village_id INTEGER PRIMARY KEY,
188
+ panchayat_id INTEGER,
189
+ FOREIGN KEY (village_id) REFERENCES villages(village_id),
190
+ FOREIGN KEY (panchayat_id) REFERENCES panchayats(panchayat_id)
191
+ )
192
+
193
+ TABLE habitation_village_mapping (
194
+ habitation_id INTEGER PRIMARY KEY,
195
+ village_id INTEGER,
196
+ FOREIGN KEY (habitation_id) REFERENCES habitations(habitation_id),
197
+ FOREIGN KEY (village_id) REFERENCES villages(village_id)
198
+ )
199
+
200
+ TABLE source_habitation_mapping (
201
+ source_id INTEGER PRIMARY KEY,
202
+ habitation_id INTEGER,
203
+ FOREIGN KEY (source_id) REFERENCES water_sources(source_id),
204
+ FOREIGN KEY (habitation_id) REFERENCES habitations(habitation_id)
205
+ )
206
+
207
+ TABLE scheme_village_mapping (
208
+ scheme_id INTEGER,
209
+ village_id INTEGER,
210
+ PRIMARY KEY (scheme_id, village_id),
211
+ FOREIGN KEY (scheme_id) REFERENCES schemes(scheme_id),
212
+ FOREIGN KEY (village_id) REFERENCES villages(village_id)
213
+ )
214
+
215
+ TABLE scheme_division_mapping (
216
+ scheme_id INTEGER PRIMARY KEY,
217
+ division_id INTEGER,
218
+ FOREIGN KEY (scheme_id) REFERENCES schemes(scheme_id),
219
+ FOREIGN KEY (division_id) REFERENCES divisions(division_id)
220
+ )
221
+
222
+ TABLE source_type_source_type_category_mapping (
223
+ source_type_id INTEGER PRIMARY KEY,
224
+ source_type_category_id INTEGER,
225
+ FOREIGN KEY (source_type_id) REFERENCES source_types(source_type_id),
226
+ FOREIGN KEY (source_type_category_id) REFERENCES source_type_categories(source_type_category_id)
227
+ )
228
+
229
+ TABLE wtps (
230
+ wtp_id INTEGER PRIMARY KEY,
231
+ wtp_name INTEGER NOT NULL
232
+ )
233
+
234
+ TABLE labs (
235
+ lab_id INTEGER PRIMARY KEY,
236
+ lab_name TEXT NOT NULL,
237
+ lab_type TEXT NOT NULL,
238
+ lab_group TEXT NOT NULL,
239
+ latitude REAL,
240
+ longitude REAL,
241
+ wtp_id INTEGER NOT NULL,
242
+ is_in_house INTEGER NOT NULL,
243
+ FOREIGN KEY (wtp_id) REFERENCES wtps(wtp_id)
244
+ )
245
+
246
+ TABLE parameters (
247
+ parameterid INTEGER PRIMARY KEY,
248
+ parameter_name TEXT NOT NULL,
249
+ measurement_unit TEXT NOT NULL,
250
+ acceptable_limit REAL NOT NULL,
251
+ permissible_limit TEXT NOT NULL,
252
+ value_type TEXT NOT NULL,
253
+ value_type_description TEXT NOT NULL,
254
+ public_rate INTEGER NOT NULL,
255
+ department_rate INTEGER NOT NULL,
256
+ commercial_rate INTEGER NOT NULL,
257
+ test_parameter_type TEXT NOT NULL
258
+ )
259
+
260
+ TABLE types (
261
+ type_id INTEGER PRIMARY KEY,
262
+ type_name TEXT NOT NULL,
263
+ description TEXT NOT NULL
264
+ )
265
+
266
+ TABLE wtp_village_mapping (
267
+ wtp_id INTEGER PRIMARY KEY,
268
+ village_id INTEGER,
269
+ FOREIGN KEY (wtp_id) REFERENCES wtps(wtp_id),
270
+ FOREIGN KEY (village_id) REFERENCES villages(village_id)
271
+ )
272
+
273
+ TABLE lab_village_mapping (
274
+ lab_id INTEGER,
275
+ village_id INTEGER,
276
+ FOREIGN KEY (lab_id) REFERENCES labs(lab_id),
277
+ FOREIGN KEY (village_id) REFERENCES villages(village_id)
278
+ )
279
+ """
280
+
281
+
282
+ system_prompt = f"""
283
+ You are a precise SQL query generator assistant working with the database schema below.
284
+
285
+ Only use the tables and columns explicitly provided in the schema when generating SQL.
286
+
287
+ Schema definition:
288
+ {schema}
289
+
290
+ Guidelines:
291
+ - Use the correct primary and foreign key relationships.
292
+ - Do not invent tables or columns not listed in the schema.
293
+ - If the natural language question is ambiguous, make a reasonable assumption about the intent.
294
+ - Output only the final SQL query. Do not add any explanations or commentary.
295
+
296
+ Instructions: The user question will be provided after this prompt. Write the SQL query that answers it.
297
+ """