victor7246 commited on
Commit
9f885f5
·
verified ·
1 Parent(s): 3a28483

Update utils.py

Browse files
Files changed (1) hide show
  1. utils.py +124 -48
utils.py CHANGED
@@ -32,8 +32,6 @@ from langchain.schema.language_model import BaseLanguageModel
32
  from langchain.utilities.sql_database import SQLDatabase
33
  from langchain_experimental.pydantic_v1 import Extra, Field, root_validator
34
 
35
- import streamlit as st
36
-
37
  emb_model = SentenceTransformer("all-MiniLM-L6-v2")
38
 
39
  class EmbeddingsSearch:
@@ -182,6 +180,50 @@ warnings.filterwarnings('ignore', message="pandas only supports SQLAlchemy conne
182
 
183
  intermediate_steps_KEY = "intermediate_steps"
184
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
185
  def num_tokens_from_string(string: str, encoding_name: str) -> int:
186
  encoding = tiktoken.get_encoding(encoding_name)
187
  num_tokens = len(encoding.encode(string))
@@ -276,7 +318,7 @@ class SQLDatabaseChainPatched(SQLDatabaseChain):
276
  history = inputs['history'].copy()
277
  history.reverse()
278
 
279
- inputs[self.input_key] = translate_to_english(self.llms['4k'], inputs[self.input_key])
280
  input_text = f"{inputs[self.input_key]} \nHistory: {history} \nSQLQuery:"
281
  _run_manager.on_text(input_text, verbose=self.verbose)
282
  # If not present, then defaults to None which is all tables.
@@ -306,50 +348,84 @@ class SQLDatabaseChainPatched(SQLDatabaseChain):
306
  # list to store estimated num of tokens
307
  self.intermediate_steps['n_tokens_list'] = []
308
  input_text_bkp = input_text
309
-
310
- # get sql
311
- self.llm_chain, n_tokens1 = self.prepare_llm(llm_inputs, chain=self.llm_chain)
312
- # self.intermediate_steps['n_tokens_list'].append(n_tokens1)
313
- sql_cmd = self.llm_chain.predict(
314
- callbacks=_run_manager.get_child(),
315
- **llm_inputs,
316
- ).strip()
317
-
318
- st.markdown(sql_cmd)
319
 
320
- # self.llm_chain = self.revert_to_small_model(chain=self.llm_chain)
321
- self.intermediate_steps['sql_cmd_unchecked'] = sql_cmd
322
-
323
- self.intermediate_steps['sql_cmd'] = clean_sql(sql_cmd)
324
 
325
- # run sql
326
- sql_data = self.database._execute(self.intermediate_steps['sql_cmd'], fetch='all')
327
- self.intermediate_steps['sql_data'] = sql_data
328
-
329
- # provide human answer
330
- input_text += f"{sql_cmd}\nSQLResult: {str(sql_data)}\nAnswer:"
331
- llm_inputs["input"] = input_text
332
- self.llm_chain, n_tokens3 = self.prepare_llm(llm_inputs, chain=self.llm_chain)
333
- # self.intermediate_steps['n_tokens_list'].append(n_tokens3)
334
- final_result = self.llm_chain.predict(
335
- callbacks=_run_manager.get_child(),
336
- **llm_inputs,
337
- ).strip()
338
- # self.llm_chain = self.revert_to_small_model(chain=self.llm_chain)
339
- self.intermediate_steps['result'] = final_result
340
-
341
- # provide explanation
342
- input_text += f"{final_result}\nExplanation:"
343
- llm_inputs["input"] = input_text
344
- self.llm_chain, n_tokens4 = self.prepare_llm(llm_inputs, chain=self.llm_chain)
345
- # self.intermediate_steps['n_tokens_list'].append(n_tokens3)
346
- explanation = self.llm_chain.predict(
347
- callbacks=_run_manager.get_child(),
348
- **llm_inputs,
349
- ).strip()
350
- # self.llm_chain = self.revert_to_small_model(chain=self.llm_chain)
351
- self.intermediate_steps['query_explanation'] = explanation
352
-
353
- if 'result' in self.intermediate_steps:
354
- self.intermediate_steps['translated_result'] = translate(self.llms['4k'], orig_question, self.intermediate_steps['result'])
355
-
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
32
  from langchain.utilities.sql_database import SQLDatabase
33
  from langchain_experimental.pydantic_v1 import Extra, Field, root_validator
34
 
 
 
35
  emb_model = SentenceTransformer("all-MiniLM-L6-v2")
36
 
37
  class EmbeddingsSearch:
 
180
 
181
  intermediate_steps_KEY = "intermediate_steps"
182
 
183
+ template = """
184
+
185
+ You are a database expert. Given an input question, first create a syntactically correct {dialect} query to run, then look at the results of the query and return the answer.
186
+ The final answer should be in a concise natural language.
187
+
188
+ Use the history if you can not understand the question.
189
+
190
+ If the question is in another language, translate it to English before proceeding.
191
+
192
+ Do not repeat the question while generating the SQL query.
193
+
194
+ Only generate a correct {dialect} query.
195
+
196
+ Once the SQLResult is available, generate the final answer in natural language format. Do not regenerate the question or SQL query in the final answer.
197
+
198
+ If the question asks any information for any particular number of days, use the lookback from the maximum date in the table, not from today's date.
199
+
200
+ Please note that MSSQL does not use LIMIT, but uses TOP clause.
201
+
202
+ You may also need to resolve the column name, as per the metadata. For instance, if the user asks about families and the column name is family, you should use family in the generated SQL.
203
+
204
+ Make sure that the column names are present in the table, by looking at the metadata.
205
+
206
+ If a question asks about availability over a period of time, you need to use SUM to calculate the total availability over that time period.
207
+
208
+ If a question mentions SKU, then use SKU column for filter, do not use any other column like comodity
209
+
210
+ If a question asks about AV of shortage, do not use AV in the SQL query as AV is not a valid column name. AV is the key in the Shortage column.
211
+
212
+ In the OpenOrderShotage table, the column Item should be used to extract the part ids, to answer questions related to shortage.
213
+
214
+ In the OpenOrderShotage table, Customer_Part_Name column is equivalent to SKU.
215
+
216
+ Use the following format:
217
+ Question: Question here
218
+ SQLQuery: SQL Query to run
219
+ SQLResult: Result of the SQLQuery
220
+ Answer: Final answer here.
221
+
222
+ Only use the following tables:
223
+ {table_info}
224
+ Question: {input}
225
+ """
226
+
227
  def num_tokens_from_string(string: str, encoding_name: str) -> int:
228
  encoding = tiktoken.get_encoding(encoding_name)
229
  num_tokens = len(encoding.encode(string))
 
318
  history = inputs['history'].copy()
319
  history.reverse()
320
 
321
+ #inputs[self.input_key] = translate_to_english(self.llms['4k'], inputs[self.input_key])
322
  input_text = f"{inputs[self.input_key]} \nHistory: {history} \nSQLQuery:"
323
  _run_manager.on_text(input_text, verbose=self.verbose)
324
  # If not present, then defaults to None which is all tables.
 
348
  # list to store estimated num of tokens
349
  self.intermediate_steps['n_tokens_list'] = []
350
  input_text_bkp = input_text
351
+ try:
352
+ # get sql
353
+ self.llm_chain, n_tokens1 = self.prepare_llm(llm_inputs, chain=self.llm_chain)
354
+ # self.intermediate_steps['n_tokens_list'].append(n_tokens1)
355
+ sql_cmd = self.llm_chain.predict(
356
+ callbacks=_run_manager.get_child(),
357
+ **llm_inputs,
358
+ ).strip()
359
+ # self.llm_chain = self.revert_to_small_model(chain=self.llm_chain)
360
+ self.intermediate_steps['sql_cmd_unchecked'] = sql_cmd
361
 
362
+ self.intermediate_steps['sql_cmd'] = clean_sql(sql_cmd)
 
 
 
363
 
364
+ # run sql
365
+ sql_data = self.database._execute(self.intermediate_steps['sql_cmd'], fetch='all')
366
+ self.intermediate_steps['sql_data'] = sql_data
367
+
368
+ # provide human answer
369
+ input_text += f"{sql_cmd}\nSQLResult: {str(sql_data)}\nAnswer:"
370
+ llm_inputs["input"] = input_text
371
+ self.llm_chain, n_tokens3 = self.prepare_llm(llm_inputs, chain=self.llm_chain)
372
+ # self.intermediate_steps['n_tokens_list'].append(n_tokens3)
373
+ final_result = self.llm_chain.predict(
374
+ callbacks=_run_manager.get_child(),
375
+ **llm_inputs,
376
+ ).strip()
377
+ # self.llm_chain = self.revert_to_small_model(chain=self.llm_chain)
378
+ self.intermediate_steps['result'] = final_result
379
+
380
+ # provide explanation
381
+ input_text += f"{final_result}\nExplanation:"
382
+ llm_inputs["input"] = input_text
383
+ self.llm_chain, n_tokens4 = self.prepare_llm(llm_inputs, chain=self.llm_chain)
384
+ # self.intermediate_steps['n_tokens_list'].append(n_tokens3)
385
+ explanation = self.llm_chain.predict(
386
+ callbacks=_run_manager.get_child(),
387
+ **llm_inputs,
388
+ ).strip()
389
+ # self.llm_chain = self.revert_to_small_model(chain=self.llm_chain)
390
+ self.intermediate_steps['query_explanation'] = explanation
391
+
392
+ #if 'result' in self.intermediate_steps:
393
+ # self.intermediate_steps['translated_result'] = translate(self.llms['4k'], orig_question, self.intermediate_steps['result'])
394
+
395
+ except:
396
+ #try:
397
+ sql_data_new = sql_data[-20:] + sql_data[:20]
398
+ input_text = input_text_bkp + f"{sql_cmd}\nSQLResult: {str(sql_data_new)}\nAnswer:"
399
+ llm_inputs["input"] = input_text
400
+ self.llm_chain, n_tokens3 = self.prepare_llm(llm_inputs, chain=self.llm_chain)
401
+ # self.intermediate_steps['n_tokens_list'].append(n_tokens3)
402
+ final_result = self.llm_chain.predict(
403
+ callbacks=_run_manager.get_child(),
404
+ **llm_inputs,
405
+ ).strip()
406
+ # self.llm_chain = self.revert_to_small_model(chain=self.llm_chain)
407
+ self.intermediate_steps['result'] = final_result
408
+
409
+ # provide explanation
410
+ input_text += f"{final_result}\nExplanation:"
411
+ llm_inputs["input"] = input_text
412
+ self.llm_chain, n_tokens4 = self.prepare_llm(llm_inputs, chain=self.llm_chain)
413
+ # self.intermediate_steps['n_tokens_list'].append(n_tokens3)
414
+ explanation = self.llm_chain.predict(
415
+ callbacks=_run_manager.get_child(),
416
+ **llm_inputs,
417
+ ).strip()
418
+ # self.llm_chain = self.revert_to_small_model(chain=self.llm_chain)
419
+ self.intermediate_steps['query_explanation'] = explanation
420
+ #if 'result' in self.intermediate_steps:
421
+ # self.intermediate_steps['translated_result'] = translate(self.llms['4k'], orig_question, self.intermediate_steps['result'])
422
+
423
+ except Exception as exc:
424
+ # Append intermediate steps to exception, to aid in logging and later
425
+ # improvement of few shot prompt seeds
426
+ #exc.intermediate_steps = self.intermediate_steps # type: ignore
427
+ #raise exc
428
+ self.intermediate_steps['result'] = "I don't know the answer for this."
429
+ #self.intermediate_steps['translated_result'] = "I don't know the answer for this."
430
+
431
+