Spaces:
Runtime error
Runtime error
| import torch | |
| from transformers import AutoTokenizer, AutoModelForCausalLM | |
| model_name = "defog/sqlcoder-7b-2" | |
| tokenizer = AutoTokenizer.from_pretrained(model_name) | |
| model = AutoModelForCausalLM.from_pretrained( | |
| model_name, | |
| trust_remote_code=True, | |
| # torch_dtype=torch.float16, | |
| load_in_8bit=True, | |
| device_map="auto", | |
| use_cache=True, | |
| ) | |
| prompt = """### Task | |
| Generate a SQL query to answer [QUESTION]{question}[/QUESTION] | |
| ### Instructions | |
| - If you cannot answer the question with the available database schema, return 'I do not know' | |
| - Remember that revenue is price multiplied by quantity | |
| - Remember that cost is supply_price multiplied by quantity | |
| ### Database Schema | |
| This query will run on a database whose schema is represented in this string: | |
| CREATE TABLE products ( | |
| product_id INTEGER PRIMARY KEY, -- Unique ID for each product | |
| name VARCHAR(50), -- Name of the product | |
| price DECIMAL(10,2), -- Price of each unit of the product | |
| quantity INTEGER -- Current quantity in stock | |
| ); | |
| CREATE TABLE customers ( | |
| customer_id INTEGER PRIMARY KEY, -- Unique ID for each customer | |
| name VARCHAR(50), -- Name of the customer | |
| address VARCHAR(100) -- Mailing address of the customer | |
| ); | |
| CREATE TABLE salespeople ( | |
| salesperson_id INTEGER PRIMARY KEY, -- Unique ID for each salesperson | |
| name VARCHAR(50), -- Name of the salesperson | |
| region VARCHAR(50) -- Geographic sales region | |
| ); | |
| CREATE TABLE sales ( | |
| sale_id INTEGER PRIMARY KEY, -- Unique ID for each sale | |
| product_id INTEGER, -- ID of product sold | |
| customer_id INTEGER, -- ID of customer who made purchase | |
| salesperson_id INTEGER, -- ID of salesperson who made the sale | |
| sale_date DATE, -- Date the sale occurred | |
| quantity INTEGER -- Quantity of product sold | |
| ); | |
| CREATE TABLE product_suppliers ( | |
| supplier_id INTEGER PRIMARY KEY, -- Unique ID for each supplier | |
| product_id INTEGER, -- Product ID supplied | |
| supply_price DECIMAL(10,2) -- Unit price charged by supplier | |
| ); | |
| -- sales.product_id can be joined with products.product_id | |
| -- sales.customer_id can be joined with customers.customer_id | |
| -- sales.salesperson_id can be joined with salespeople.salesperson_id | |
| -- product_suppliers.product_id can be joined with products.product_id | |
| ### Answer | |
| Given the database schema, here is the SQL query that answers [QUESTION]{question}[/QUESTION] | |
| [SQL] | |
| """ | |
| import sqlparse | |
| def generate_query(question): | |
| updated_prompt = prompt.format(question=question) | |
| inputs = tokenizer(updated_prompt, return_tensors="pt").to("cuda") | |
| generated_ids = model.generate( | |
| **inputs, | |
| num_return_sequences=1, | |
| eos_token_id=tokenizer.eos_token_id, | |
| pad_token_id=tokenizer.eos_token_id, | |
| max_new_tokens=400, | |
| do_sample=False, | |
| num_beams=1, | |
| ) | |
| outputs = tokenizer.batch_decode(generated_ids, skip_special_tokens=True) | |
| torch.cuda.empty_cache() | |
| torch.cuda.synchronize() | |
| # empty cache so that you do generate more results w/o memory crashing | |
| # particularly important on Colab β memory management is much more straightforward | |
| # when running on an inference service | |
| return sqlparse.format(outputs[0].split("[SQL]")[-1], reindent=True) | |
| question = "What was our revenue by product in the New York region last month?" | |
| generated_sql = generate_query(question) | |
| print(generated_sql) |