|
|
--- |
|
|
library_name: transformers |
|
|
base_model: |
|
|
- Qwen/Qwen2.5-Coder-7B-Instruct |
|
|
pipeline_tag: text-generation |
|
|
--- |
|
|
|
|
|
## Usage |
|
|
|
|
|
|
|
|
```python |
|
|
import torch |
|
|
from transformers import AutoModelForCausalLM, AutoTokenizer |
|
|
from peft import PeftModel |
|
|
|
|
|
|
|
|
MODEL_NAME = "alpecevit/Qwen2.5-Coder-7B-Instruct-text2sql" |
|
|
|
|
|
tokenizer = AutoTokenizer.from_pretrained(MODEL_NAME) |
|
|
model = AutoModelForCausalLM.from_pretrained( |
|
|
MODEL_NAME, |
|
|
torch_dtype="auto", |
|
|
device_map="auto" |
|
|
) |
|
|
|
|
|
model = PeftModel.from_pretrained(model, MODEL_NAME) |
|
|
model.eval() |
|
|
|
|
|
test_data = { |
|
|
'db_id': 'cre_Students_Information_Systems', |
|
|
'schema': 'CREATE TABLE Students (\n`student_id` INTEGER NOT NULL,\n`bio_data` VARCHAR(255) NOT NULL,\n`student_details` VARCHAR(255) NOT NULL,\nPRIMARY KEY (`student_id`)\n)\nCREATE TABLE Transcripts (\n`transcript_id` INTEGER NOT NULL,\n`student_id` INTEGER NOT NULL,\n`date_of_transcript` DATETIME(3),\n`transcript_details` VARCHAR(255) NOT NULL,\nPRIMARY KEY (`transcript_id`),\nFOREIGN KEY (student_id) REFERENCES Students (student_id)\n)\nCREATE TABLE Behaviour_Monitoring (\n`behaviour_monitoring_id` INTEGER NOT NULL,\n`student_id` INTEGER NOT NULL,\n`behaviour_monitoring_details` VARCHAR(255) NOT NULL,\nPRIMARY KEY (`behaviour_monitoring_id`),\nFOREIGN KEY (student_id) REFERENCES Students (student_id)\n)\nCREATE TABLE Addresses (\n`address_id` INTEGER NOT NULL,\n`address_details` VARCHAR(255) NOT NULL,\nPRIMARY KEY (`address_id`)\n)\nCREATE TABLE Ref_Event_Types (\n`event_type_code` CHAR(10) NOT NULL,\n`event_type_description` VARCHAR(255) NOT NULL,\nPRIMARY KEY (`event_type_code`)\n)\nCREATE TABLE Ref_Achievement_Type (\n`achievement_type_code` CHAR(15) NOT NULL,\n`achievement_type_description` VARCHAR(80),\nPRIMARY KEY (`achievement_type_code`)\n)\nCREATE TABLE Ref_Address_Types (\n`address_type_code` CHAR(10) NOT NULL,\n`address_type_description` VARCHAR(255) NOT NULL,\nPRIMARY KEY (`address_type_code`)\n)\nCREATE TABLE Ref_Detention_Type (\n`detention_type_code` CHAR(10) NOT NULL,\n`detention_type_description` VARCHAR(80),\nPRIMARY KEY (`detention_type_code`)\n)\nCREATE TABLE Student_Events (\n`event_id` INTEGER NOT NULL,\n`event_type_code` CHAR(10) NOT NULL,\n`student_id` INTEGER NOT NULL,\n`event_date` DATETIME(3),\n`other_details` VARCHAR(255) NOT NULL,\nPRIMARY KEY (`event_id`),\nFOREIGN KEY (student_id) REFERENCES Students (student_id),\nFOREIGN KEY (event_type_code) REFERENCES Ref_Event_Types (event_type_code)\n)\nCREATE TABLE Teachers (\n`teacher_id` INTEGER NOT NULL,\n`teacher_details` VARCHAR(255),\nPRIMARY KEY (`teacher_id`)\n)\nCREATE TABLE Student_Loans (\n`student_loan_id` INTEGER NOT NULL,\n`student_id` INTEGER NOT NULL,\n`date_of_loan` DATETIME(3),\n`amount_of_loan` DECIMAL(15,4),\n`other_details` VARCHAR(255),\nPRIMARY KEY (`student_loan_id`),\nFOREIGN KEY (student_id) REFERENCES Students (student_id)\n)\nCREATE TABLE Classes (\n`class_id` INTEGER NOT NULL,\n`student_id` INTEGER NOT NULL,\n`teacher_id` INTEGER NOT NULL,\n`class_details` VARCHAR(255) NOT NULL,\nPRIMARY KEY (`class_id`),\nFOREIGN KEY (student_id) REFERENCES Students (student_id),\nFOREIGN KEY (teacher_id) REFERENCES Teachers (teacher_id)\n)\nCREATE TABLE Students_Addresses (\n`student_address_id` INTEGER NOT NULL,\n`address_id` INTEGER NOT NULL,\n`address_type_code` CHAR(10) NOT NULL,\n`student_id` INTEGER NOT NULL,\n`date_from` DATETIME(3),\n`date_to` DATETIME(3),\nPRIMARY KEY (`student_address_id`),\nFOREIGN KEY (student_id) REFERENCES Students (student_id),\nFOREIGN KEY (address_id) REFERENCES Addresses (address_id),\nFOREIGN KEY (address_type_code) REFERENCES Ref_Address_Types (address_type_code)\n)\nCREATE TABLE Detention (\n`detention_id` INTEGER NOT NULL,\n`detention_type_code` CHAR(10) NOT NULL,\n`student_id` INTEGER NOT NULL,\n`datetime_detention_start` DATETIME(3),\n`datetime_detention_end` DATETIME(3),\n`detention_summary` VARCHAR(255),\n`other_details` VARCHAR(255),\nPRIMARY KEY (`detention_id`),\nFOREIGN KEY (student_id) REFERENCES Students (student_id),\nFOREIGN KEY (detention_type_code) REFERENCES Ref_Detention_Type (detention_type_code)\n)\nCREATE TABLE Achievements (\n`achievement_id` INTEGER NOT NULL,\n`achievement_type_code` CHAR(15) NOT NULL,\n`student_id` INTEGER NOT NULL,\n`date_achievement` DATETIME(3),\n`achievement_details` VARCHAR(255),\n`other_details` VARCHAR(255),\nPRIMARY KEY (`achievement_id`),\nFOREIGN KEY (student_id) REFERENCES Students (student_id),\nFOREIGN KEY (achievement_type_code) REFERENCES Ref_Achievement_Type (achievement_type_code)\n)', |
|
|
'question': 'What are the type code, details, and date of each achievement?' |
|
|
} |
|
|
|
|
|
system_prompt = ( |
|
|
"You are an SQL query generator that converts natural language inputs into SQL queries, " |
|
|
"strictly adhering to the provided database schema. Your task is to:\n\n" |
|
|
"Carefully analyze the natural language input to understand its requirements. " |
|
|
"Generate a SQL query that retrieves the correct results based only on the database schema provided.\n\n" |
|
|
"Rules:\n\n" |
|
|
"Do NOT use any tables, columns, or data outside the provided database schema.\n" |
|
|
"Ensure the SQL query is syntactically correct and efficient.\n" |
|
|
"If the input involves ambiguity or missing details, " |
|
|
"assume the most common or straightforward interpretation.\n\n" |
|
|
"Respond with only the SQL query, formatted cleanly and consistently. " |
|
|
"Do not include explanations, comments, or extra text." |
|
|
) |
|
|
|
|
|
user_prompt = ( |
|
|
"Database schema: {schema}\n\n" |
|
|
"Natural language input: {user_query}\n\n" |
|
|
"Answer: " |
|
|
) |
|
|
|
|
|
messages = [ |
|
|
{"role": "system", "content": system_prompt}, |
|
|
{"role": "user", "content": user_prompt.format(schema=test_data['schema'], user_query=test_data['question'])} |
|
|
] |
|
|
|
|
|
text = tokenizer.apply_chat_template( |
|
|
messages, |
|
|
tokenize=False, |
|
|
add_generation_prompt=True |
|
|
) |
|
|
|
|
|
model_inputs = tokenizer([text], return_tensors="pt").to(model.device) |
|
|
|
|
|
generated_ids = model.generate( |
|
|
**model_inputs, |
|
|
max_new_tokens=512 |
|
|
) |
|
|
|
|
|
generated_ids = [ |
|
|
output_ids[len(input_ids):] for input_ids, output_ids in zip(model_inputs.input_ids, generated_ids) |
|
|
] |
|
|
|
|
|
response = tokenizer.batch_decode(generated_ids, skip_special_tokens=True)[0] |
|
|
``` |