Spaces:

SpyroSigma
/

defog-sqlcoder-7b-2

Runtime error

App Files Files Community

defog-sqlcoder-7b-2 / app.py

SpyroSigma

Update app.py

491b679 verified over 1 year ago

raw

history blame contribute delete

3.33 kB

	import torch
	from transformers import AutoTokenizer, AutoModelForCausalLM

	model_name = "defog/sqlcoder-7b-2"
	tokenizer = AutoTokenizer.from_pretrained(model_name)
	model = AutoModelForCausalLM.from_pretrained(
	model_name,
	trust_remote_code=True,
	# torch_dtype=torch.float16,
	load_in_8bit=True,
	device_map="auto",
	use_cache=True,
	)

	prompt = """### Task
	Generate a SQL query to answer [QUESTION]{question}[/QUESTION]

	### Instructions
	- If you cannot answer the question with the available database schema, return 'I do not know'
	- Remember that revenue is price multiplied by quantity
	- Remember that cost is supply_price multiplied by quantity

	### Database Schema
	This query will run on a database whose schema is represented in this string:
	CREATE TABLE products (
	product_id INTEGER PRIMARY KEY, -- Unique ID for each product
	name VARCHAR(50), -- Name of the product
	price DECIMAL(10,2), -- Price of each unit of the product
	quantity INTEGER -- Current quantity in stock
	);

	CREATE TABLE customers (
	customer_id INTEGER PRIMARY KEY, -- Unique ID for each customer
	name VARCHAR(50), -- Name of the customer
	address VARCHAR(100) -- Mailing address of the customer
	);

	CREATE TABLE salespeople (
	salesperson_id INTEGER PRIMARY KEY, -- Unique ID for each salesperson
	name VARCHAR(50), -- Name of the salesperson
	region VARCHAR(50) -- Geographic sales region
	);

	CREATE TABLE sales (
	sale_id INTEGER PRIMARY KEY, -- Unique ID for each sale
	product_id INTEGER, -- ID of product sold
	customer_id INTEGER, -- ID of customer who made purchase
	salesperson_id INTEGER, -- ID of salesperson who made the sale
	sale_date DATE, -- Date the sale occurred
	quantity INTEGER -- Quantity of product sold
	);

	CREATE TABLE product_suppliers (
	supplier_id INTEGER PRIMARY KEY, -- Unique ID for each supplier
	product_id INTEGER, -- Product ID supplied
	supply_price DECIMAL(10,2) -- Unit price charged by supplier
	);

	-- sales.product_id can be joined with products.product_id
	-- sales.customer_id can be joined with customers.customer_id
	-- sales.salesperson_id can be joined with salespeople.salesperson_id
	-- product_suppliers.product_id can be joined with products.product_id

	### Answer
	Given the database schema, here is the SQL query that answers [QUESTION]{question}[/QUESTION]
	[SQL]
	"""

	import sqlparse

	def generate_query(question):
	updated_prompt = prompt.format(question=question)
	inputs = tokenizer(updated_prompt, return_tensors="pt").to("cuda")
	generated_ids = model.generate(
	**inputs,
	num_return_sequences=1,
	eos_token_id=tokenizer.eos_token_id,
	pad_token_id=tokenizer.eos_token_id,
	max_new_tokens=400,
	do_sample=False,
	num_beams=1,
	)
	outputs = tokenizer.batch_decode(generated_ids, skip_special_tokens=True)

	torch.cuda.empty_cache()
	torch.cuda.synchronize()
	# empty cache so that you do generate more results w/o memory crashing
	# particularly important on Colab – memory management is much more straightforward
	# when running on an inference service
	return sqlparse.format(outputs[0].split("[SQL]")[-1], reindent=True)

	question = "What was our revenue by product in the New York region last month?"
	generated_sql = generate_query(question)
	print(generated_sql)