| { |
| "architectures": [ |
| "Qwen2ForCausalLM" |
| ], |
| "attention_dropout": 0.0, |
| "dtype": "float16", |
| "eos_token_id": 151645, |
| "hidden_act": "silu", |
| "hidden_size": 3584, |
| "initializer_range": 0.02, |
| "intermediate_size": 18944, |
| "layer_types": [ |
| "full_attention", |
| "full_attention", |
| "full_attention", |
| "full_attention", |
| "full_attention", |
| "full_attention", |
| "full_attention", |
| "full_attention", |
| "full_attention", |
| "full_attention", |
| "full_attention", |
| "full_attention", |
| "full_attention", |
| "full_attention", |
| "full_attention", |
| "full_attention", |
| "full_attention", |
| "full_attention", |
| "full_attention", |
| "full_attention", |
| "full_attention", |
| "full_attention", |
| "full_attention", |
| "full_attention", |
| "full_attention", |
| "full_attention", |
| "full_attention", |
| "full_attention" |
| ], |
| "max_position_embeddings": 32768, |
| "max_window_layers": 28, |
| "model_type": "qwen2", |
| "num_attention_heads": 28, |
| "num_hidden_layers": 28, |
| "num_key_value_heads": 4, |
| "pad_token_id": 151643, |
| "quantization_config": { |
| "backend": "auto", |
| "batch_size": 1, |
| "bits": 8, |
| "block_name_to_quantize": null, |
| "cache_block_outputs": true, |
| "checkpoint_format": "gptq", |
| "damp_percent": 0.1, |
| "dataset": [ |
| "<|im_start|>system\nYou are a data science expert. Below, you are provided with a database schema and a natural language question. Your task is to understand the schema and generate a valid SQL query to answer the question.<|im_end|>\n<|im_start|>user\nDatabase Engine:\nSQLite\n\nDatabase Schema:\n```sql\nCREATE TABLE organizations (\n org_id INTEGER PRIMARY KEY,\n org_name TEXT NOT NULL,\n industry TEXT, -- 'Finance', 'Healthcare', 'Tech'\n hq_country TEXT,\n tier TEXT, -- 'Startup', 'Mid-Market', 'Fortune 500'\n created_at TIMESTAMP\n);\n\nCREATE TABLE cloud_resources (\n resource_id INTEGER PRIMARY KEY,\n org_id INTEGER,\n resource_type TEXT, -- 'Compute', 'Storage', 'Database'\n region TEXT, -- 'us-east-1', 'eu-west-1', 'ap-southeast-1'\n status TEXT, -- 'Active', 'Provisioning', 'Terminated'\n hourly_cost REAL,\n FOREIGN KEY (org_id) REFERENCES organizations(org_id)\n);\n\nCREATE TABLE usage_metrics (\n metric_id INTEGER PRIMARY KEY,\n resource_id INTEGER,\n usage_date DATE,\n cpu_utilization_avg REAL, -- 0 to 1.0\n memory_usage_gb REAL,\n data_egress_gb REAL,\n FOREIGN KEY (resource_id) REFERENCES cloud_resources(resource_id)\n);\n\nCREATE TABLE invoices (\n invoice_id INTEGER PRIMARY KEY,\n org_id INTEGER,\n billing_period TEXT, -- '2026-01'\n total_amount REAL,\n tax_amount REAL,\n payment_status TEXT, -- 'Paid', 'Pending', 'Overdue'\n FOREIGN KEY (org_id) REFERENCES organizations(org_id)\n);\n\nCREATE TABLE users (\n user_id INTEGER PRIMARY KEY,\n org_id INTEGER,\n full_name TEXT,\n role_id INTEGER,\n last_login TIMESTAMP,\n FOREIGN KEY (org_id) REFERENCES organizations(org_id)\n);\n\nCREATE TABLE roles (\n role_id INTEGER PRIMARY KEY,\n role_name TEXT, -- 'Admin', 'Developer', 'Billing_Reader'\n permission_level INTEGER -- 1 (Low) to 5 (Critical)\n);\n```\n\nQuestion:\nList all organizations in the 'Healthcare' industry located in 'Germany'.\n\nInstructions:\n- Make sure you only output the information that is asked in the question.\n- If the question asks for a specific column, make sure to only include that column in the SELECT clause, nothing more.\n- The generated query should return all of the information asked in the question without any missing or extra information.\n- Before generating the final SQL query, please think through the steps of how to write the query.\n\nOutput Format:\nIn your answer, please enclose the generated SQL query in a code block:\n```sql\n-- Your SQL query\n```<|im_end|>\n<|im_start|>assistant\n", |
| "<|im_start|>system\nYou are a data science expert. Below, you are provided with a database schema and a natural language question. Your task is to understand the schema and generate a valid SQL query to answer the question.<|im_end|>\n<|im_start|>user\nDatabase Engine:\nSQLite\n\nDatabase Schema:\n```sql\nCREATE TABLE organizations (\n org_id INTEGER PRIMARY KEY,\n org_name TEXT NOT NULL,\n industry TEXT, -- 'Finance', 'Healthcare', 'Tech'\n hq_country TEXT,\n tier TEXT, -- 'Startup', 'Mid-Market', 'Fortune 500'\n created_at TIMESTAMP\n);\n\nCREATE TABLE cloud_resources (\n resource_id INTEGER PRIMARY KEY,\n org_id INTEGER,\n resource_type TEXT, -- 'Compute', 'Storage', 'Database'\n region TEXT, -- 'us-east-1', 'eu-west-1', 'ap-southeast-1'\n status TEXT, -- 'Active', 'Provisioning', 'Terminated'\n hourly_cost REAL,\n FOREIGN KEY (org_id) REFERENCES organizations(org_id)\n);\n\nCREATE TABLE usage_metrics (\n metric_id INTEGER PRIMARY KEY,\n resource_id INTEGER,\n usage_date DATE,\n cpu_utilization_avg REAL, -- 0 to 1.0\n memory_usage_gb REAL,\n data_egress_gb REAL,\n FOREIGN KEY (resource_id) REFERENCES cloud_resources(resource_id)\n);\n\nCREATE TABLE invoices (\n invoice_id INTEGER PRIMARY KEY,\n org_id INTEGER,\n billing_period TEXT, -- '2026-01'\n total_amount REAL,\n tax_amount REAL,\n payment_status TEXT, -- 'Paid', 'Pending', 'Overdue'\n FOREIGN KEY (org_id) REFERENCES organizations(org_id)\n);\n\nCREATE TABLE users (\n user_id INTEGER PRIMARY KEY,\n org_id INTEGER,\n full_name TEXT,\n role_id INTEGER,\n last_login TIMESTAMP,\n FOREIGN KEY (org_id) REFERENCES organizations(org_id)\n);\n\nCREATE TABLE roles (\n role_id INTEGER PRIMARY KEY,\n role_name TEXT, -- 'Admin', 'Developer', 'Billing_Reader'\n permission_level INTEGER -- 1 (Low) to 5 (Critical)\n);\n```\n\nQuestion:\nCount the total number of 'Active' resources currently running in the 'us-east-1' region.\n\nInstructions:\n- Make sure you only output the information that is asked in the question.\n- If the question asks for a specific column, make sure to only include that column in the SELECT clause, nothing more.\n- The generated query should return all of the information asked in the question without any missing or extra information.\n- Before generating the final SQL query, please think through the steps of how to write the query.\n\nOutput Format:\nIn your answer, please enclose the generated SQL query in a code block:\n```sql\n-- Your SQL query\n```<|im_end|>\n<|im_start|>assistant\n", |
| "<|im_start|>system\nYou are a data science expert. Below, you are provided with a database schema and a natural language question. Your task is to understand the schema and generate a valid SQL query to answer the question.<|im_end|>\n<|im_start|>user\nDatabase Engine:\nSQLite\n\nDatabase Schema:\n```sql\nCREATE TABLE organizations (\n org_id INTEGER PRIMARY KEY,\n org_name TEXT NOT NULL,\n industry TEXT, -- 'Finance', 'Healthcare', 'Tech'\n hq_country TEXT,\n tier TEXT, -- 'Startup', 'Mid-Market', 'Fortune 500'\n created_at TIMESTAMP\n);\n\nCREATE TABLE cloud_resources (\n resource_id INTEGER PRIMARY KEY,\n org_id INTEGER,\n resource_type TEXT, -- 'Compute', 'Storage', 'Database'\n region TEXT, -- 'us-east-1', 'eu-west-1', 'ap-southeast-1'\n status TEXT, -- 'Active', 'Provisioning', 'Terminated'\n hourly_cost REAL,\n FOREIGN KEY (org_id) REFERENCES organizations(org_id)\n);\n\nCREATE TABLE usage_metrics (\n metric_id INTEGER PRIMARY KEY,\n resource_id INTEGER,\n usage_date DATE,\n cpu_utilization_avg REAL, -- 0 to 1.0\n memory_usage_gb REAL,\n data_egress_gb REAL,\n FOREIGN KEY (resource_id) REFERENCES cloud_resources(resource_id)\n);\n\nCREATE TABLE invoices (\n invoice_id INTEGER PRIMARY KEY,\n org_id INTEGER,\n billing_period TEXT, -- '2026-01'\n total_amount REAL,\n tax_amount REAL,\n payment_status TEXT, -- 'Paid', 'Pending', 'Overdue'\n FOREIGN KEY (org_id) REFERENCES organizations(org_id)\n);\n\nCREATE TABLE users (\n user_id INTEGER PRIMARY KEY,\n org_id INTEGER,\n full_name TEXT,\n role_id INTEGER,\n last_login TIMESTAMP,\n FOREIGN KEY (org_id) REFERENCES organizations(org_id)\n);\n\nCREATE TABLE roles (\n role_id INTEGER PRIMARY KEY,\n role_name TEXT, -- 'Admin', 'Developer', 'Billing_Reader'\n permission_level INTEGER -- 1 (Low) to 5 (Critical)\n);\n```\n\nQuestion:\nRetrieve the names of all users who have a 'permission_level' of 5.\n\nInstructions:\n- Make sure you only output the information that is asked in the question.\n- If the question asks for a specific column, make sure to only include that column in the SELECT clause, nothing more.\n- The generated query should return all of the information asked in the question without any missing or extra information.\n- Before generating the final SQL query, please think through the steps of how to write the query.\n\nOutput Format:\nIn your answer, please enclose the generated SQL query in a code block:\n```sql\n-- Your SQL query\n```<|im_end|>\n<|im_start|>assistant\n", |
| "<|im_start|>system\nYou are a data science expert. Below, you are provided with a database schema and a natural language question. Your task is to understand the schema and generate a valid SQL query to answer the question.<|im_end|>\n<|im_start|>user\nDatabase Engine:\nSQLite\n\nDatabase Schema:\n```sql\nCREATE TABLE organizations (\n org_id INTEGER PRIMARY KEY,\n org_name TEXT NOT NULL,\n industry TEXT, -- 'Finance', 'Healthcare', 'Tech'\n hq_country TEXT,\n tier TEXT, -- 'Startup', 'Mid-Market', 'Fortune 500'\n created_at TIMESTAMP\n);\n\nCREATE TABLE cloud_resources (\n resource_id INTEGER PRIMARY KEY,\n org_id INTEGER,\n resource_type TEXT, -- 'Compute', 'Storage', 'Database'\n region TEXT, -- 'us-east-1', 'eu-west-1', 'ap-southeast-1'\n status TEXT, -- 'Active', 'Provisioning', 'Terminated'\n hourly_cost REAL,\n FOREIGN KEY (org_id) REFERENCES organizations(org_id)\n);\n\nCREATE TABLE usage_metrics (\n metric_id INTEGER PRIMARY KEY,\n resource_id INTEGER,\n usage_date DATE,\n cpu_utilization_avg REAL, -- 0 to 1.0\n memory_usage_gb REAL,\n data_egress_gb REAL,\n FOREIGN KEY (resource_id) REFERENCES cloud_resources(resource_id)\n);\n\nCREATE TABLE invoices (\n invoice_id INTEGER PRIMARY KEY,\n org_id INTEGER,\n billing_period TEXT, -- '2026-01'\n total_amount REAL,\n tax_amount REAL,\n payment_status TEXT, -- 'Paid', 'Pending', 'Overdue'\n FOREIGN KEY (org_id) REFERENCES organizations(org_id)\n);\n\nCREATE TABLE users (\n user_id INTEGER PRIMARY KEY,\n org_id INTEGER,\n full_name TEXT,\n role_id INTEGER,\n last_login TIMESTAMP,\n FOREIGN KEY (org_id) REFERENCES organizations(org_id)\n);\n\nCREATE TABLE roles (\n role_id INTEGER PRIMARY KEY,\n role_name TEXT, -- 'Admin', 'Developer', 'Billing_Reader'\n permission_level INTEGER -- 1 (Low) to 5 (Critical)\n);\n```\n\nQuestion:\nFind the top 5 most expensive resources based on their hourly_cost.\n\nInstructions:\n- Make sure you only output the information that is asked in the question.\n- If the question asks for a specific column, make sure to only include that column in the SELECT clause, nothing more.\n- The generated query should return all of the information asked in the question without any missing or extra information.\n- Before generating the final SQL query, please think through the steps of how to write the query.\n\nOutput Format:\nIn your answer, please enclose the generated SQL query in a code block:\n```sql\n-- Your SQL query\n```<|im_end|>\n<|im_start|>assistant\n", |
| "<|im_start|>system\nYou are a data science expert. Below, you are provided with a database schema and a natural language question. Your task is to understand the schema and generate a valid SQL query to answer the question.<|im_end|>\n<|im_start|>user\nDatabase Engine:\nSQLite\n\nDatabase Schema:\n```sql\nCREATE TABLE organizations (\n org_id INTEGER PRIMARY KEY,\n org_name TEXT NOT NULL,\n industry TEXT, -- 'Finance', 'Healthcare', 'Tech'\n hq_country TEXT,\n tier TEXT, -- 'Startup', 'Mid-Market', 'Fortune 500'\n created_at TIMESTAMP\n);\n\nCREATE TABLE cloud_resources (\n resource_id INTEGER PRIMARY KEY,\n org_id INTEGER,\n resource_type TEXT, -- 'Compute', 'Storage', 'Database'\n region TEXT, -- 'us-east-1', 'eu-west-1', 'ap-southeast-1'\n status TEXT, -- 'Active', 'Provisioning', 'Terminated'\n hourly_cost REAL,\n FOREIGN KEY (org_id) REFERENCES organizations(org_id)\n);\n\nCREATE TABLE usage_metrics (\n metric_id INTEGER PRIMARY KEY,\n resource_id INTEGER,\n usage_date DATE,\n cpu_utilization_avg REAL, -- 0 to 1.0\n memory_usage_gb REAL,\n data_egress_gb REAL,\n FOREIGN KEY (resource_id) REFERENCES cloud_resources(resource_id)\n);\n\nCREATE TABLE invoices (\n invoice_id INTEGER PRIMARY KEY,\n org_id INTEGER,\n billing_period TEXT, -- '2026-01'\n total_amount REAL,\n tax_amount REAL,\n payment_status TEXT, -- 'Paid', 'Pending', 'Overdue'\n FOREIGN KEY (org_id) REFERENCES organizations(org_id)\n);\n\nCREATE TABLE users (\n user_id INTEGER PRIMARY KEY,\n org_id INTEGER,\n full_name TEXT,\n role_id INTEGER,\n last_login TIMESTAMP,\n FOREIGN KEY (org_id) REFERENCES organizations(org_id)\n);\n\nCREATE TABLE roles (\n role_id INTEGER PRIMARY KEY,\n role_name TEXT, -- 'Admin', 'Developer', 'Billing_Reader'\n permission_level INTEGER -- 1 (Low) to 5 (Critical)\n);\n```\n\nQuestion:\nList all organizations that have 'Overdue' invoices.\n\nInstructions:\n- Make sure you only output the information that is asked in the question.\n- If the question asks for a specific column, make sure to only include that column in the SELECT clause, nothing more.\n- The generated query should return all of the information asked in the question without any missing or extra information.\n- Before generating the final SQL query, please think through the steps of how to write the query.\n\nOutput Format:\nIn your answer, please enclose the generated SQL query in a code block:\n```sql\n-- Your SQL query\n```<|im_end|>\n<|im_start|>assistant\n", |
| "<|im_start|>system\nYou are a data science expert. Below, you are provided with a database schema and a natural language question. Your task is to understand the schema and generate a valid SQL query to answer the question.<|im_end|>\n<|im_start|>user\nDatabase Engine:\nSQLite\n\nDatabase Schema:\n```sql\nCREATE TABLE organizations (\n org_id INTEGER PRIMARY KEY,\n org_name TEXT NOT NULL,\n industry TEXT, -- 'Finance', 'Healthcare', 'Tech'\n hq_country TEXT,\n tier TEXT, -- 'Startup', 'Mid-Market', 'Fortune 500'\n created_at TIMESTAMP\n);\n\nCREATE TABLE cloud_resources (\n resource_id INTEGER PRIMARY KEY,\n org_id INTEGER,\n resource_type TEXT, -- 'Compute', 'Storage', 'Database'\n region TEXT, -- 'us-east-1', 'eu-west-1', 'ap-southeast-1'\n status TEXT, -- 'Active', 'Provisioning', 'Terminated'\n hourly_cost REAL,\n FOREIGN KEY (org_id) REFERENCES organizations(org_id)\n);\n\nCREATE TABLE usage_metrics (\n metric_id INTEGER PRIMARY KEY,\n resource_id INTEGER,\n usage_date DATE,\n cpu_utilization_avg REAL, -- 0 to 1.0\n memory_usage_gb REAL,\n data_egress_gb REAL,\n FOREIGN KEY (resource_id) REFERENCES cloud_resources(resource_id)\n);\n\nCREATE TABLE invoices (\n invoice_id INTEGER PRIMARY KEY,\n org_id INTEGER,\n billing_period TEXT, -- '2026-01'\n total_amount REAL,\n tax_amount REAL,\n payment_status TEXT, -- 'Paid', 'Pending', 'Overdue'\n FOREIGN KEY (org_id) REFERENCES organizations(org_id)\n);\n\nCREATE TABLE users (\n user_id INTEGER PRIMARY KEY,\n org_id INTEGER,\n full_name TEXT,\n role_id INTEGER,\n last_login TIMESTAMP,\n FOREIGN KEY (org_id) REFERENCES organizations(org_id)\n);\n\nCREATE TABLE roles (\n role_id INTEGER PRIMARY KEY,\n role_name TEXT, -- 'Admin', 'Developer', 'Billing_Reader'\n permission_level INTEGER -- 1 (Low) to 5 (Critical)\n);\n```\n\nQuestion:\nCalculate the total data_egress_gb for the entire month of February 2026.\n\nInstructions:\n- Make sure you only output the information that is asked in the question.\n- If the question asks for a specific column, make sure to only include that column in the SELECT clause, nothing more.\n- The generated query should return all of the information asked in the question without any missing or extra information.\n- Before generating the final SQL query, please think through the steps of how to write the query.\n\nOutput Format:\nIn your answer, please enclose the generated SQL query in a code block:\n```sql\n-- Your SQL query\n```<|im_end|>\n<|im_start|>assistant\n", |
| "<|im_start|>system\nYou are a data science expert. Below, you are provided with a database schema and a natural language question. Your task is to understand the schema and generate a valid SQL query to answer the question.<|im_end|>\n<|im_start|>user\nDatabase Engine:\nSQLite\n\nDatabase Schema:\n```sql\nCREATE TABLE organizations (\n org_id INTEGER PRIMARY KEY,\n org_name TEXT NOT NULL,\n industry TEXT, -- 'Finance', 'Healthcare', 'Tech'\n hq_country TEXT,\n tier TEXT, -- 'Startup', 'Mid-Market', 'Fortune 500'\n created_at TIMESTAMP\n);\n\nCREATE TABLE cloud_resources (\n resource_id INTEGER PRIMARY KEY,\n org_id INTEGER,\n resource_type TEXT, -- 'Compute', 'Storage', 'Database'\n region TEXT, -- 'us-east-1', 'eu-west-1', 'ap-southeast-1'\n status TEXT, -- 'Active', 'Provisioning', 'Terminated'\n hourly_cost REAL,\n FOREIGN KEY (org_id) REFERENCES organizations(org_id)\n);\n\nCREATE TABLE usage_metrics (\n metric_id INTEGER PRIMARY KEY,\n resource_id INTEGER,\n usage_date DATE,\n cpu_utilization_avg REAL, -- 0 to 1.0\n memory_usage_gb REAL,\n data_egress_gb REAL,\n FOREIGN KEY (resource_id) REFERENCES cloud_resources(resource_id)\n);\n\nCREATE TABLE invoices (\n invoice_id INTEGER PRIMARY KEY,\n org_id INTEGER,\n billing_period TEXT, -- '2026-01'\n total_amount REAL,\n tax_amount REAL,\n payment_status TEXT, -- 'Paid', 'Pending', 'Overdue'\n FOREIGN KEY (org_id) REFERENCES organizations(org_id)\n);\n\nCREATE TABLE users (\n user_id INTEGER PRIMARY KEY,\n org_id INTEGER,\n full_name TEXT,\n role_id INTEGER,\n last_login TIMESTAMP,\n FOREIGN KEY (org_id) REFERENCES organizations(org_id)\n);\n\nCREATE TABLE roles (\n role_id INTEGER PRIMARY KEY,\n role_name TEXT, -- 'Admin', 'Developer', 'Billing_Reader'\n permission_level INTEGER -- 1 (Low) to 5 (Critical)\n);\n```\n\nQuestion:\nDisplay each organization name and the total number of users they have assigned to the 'Admin' role.\n\nInstructions:\n- Make sure you only output the information that is asked in the question.\n- If the question asks for a specific column, make sure to only include that column in the SELECT clause, nothing more.\n- The generated query should return all of the information asked in the question without any missing or extra information.\n- Before generating the final SQL query, please think through the steps of how to write the query.\n\nOutput Format:\nIn your answer, please enclose the generated SQL query in a code block:\n```sql\n-- Your SQL query\n```<|im_end|>\n<|im_start|>assistant\n", |
| "<|im_start|>system\nYou are a data science expert. Below, you are provided with a database schema and a natural language question. Your task is to understand the schema and generate a valid SQL query to answer the question.<|im_end|>\n<|im_start|>user\nDatabase Engine:\nSQLite\n\nDatabase Schema:\n```sql\nCREATE TABLE organizations (\n org_id INTEGER PRIMARY KEY,\n org_name TEXT NOT NULL,\n industry TEXT, -- 'Finance', 'Healthcare', 'Tech'\n hq_country TEXT,\n tier TEXT, -- 'Startup', 'Mid-Market', 'Fortune 500'\n created_at TIMESTAMP\n);\n\nCREATE TABLE cloud_resources (\n resource_id INTEGER PRIMARY KEY,\n org_id INTEGER,\n resource_type TEXT, -- 'Compute', 'Storage', 'Database'\n region TEXT, -- 'us-east-1', 'eu-west-1', 'ap-southeast-1'\n status TEXT, -- 'Active', 'Provisioning', 'Terminated'\n hourly_cost REAL,\n FOREIGN KEY (org_id) REFERENCES organizations(org_id)\n);\n\nCREATE TABLE usage_metrics (\n metric_id INTEGER PRIMARY KEY,\n resource_id INTEGER,\n usage_date DATE,\n cpu_utilization_avg REAL, -- 0 to 1.0\n memory_usage_gb REAL,\n data_egress_gb REAL,\n FOREIGN KEY (resource_id) REFERENCES cloud_resources(resource_id)\n);\n\nCREATE TABLE invoices (\n invoice_id INTEGER PRIMARY KEY,\n org_id INTEGER,\n billing_period TEXT, -- '2026-01'\n total_amount REAL,\n tax_amount REAL,\n payment_status TEXT, -- 'Paid', 'Pending', 'Overdue'\n FOREIGN KEY (org_id) REFERENCES organizations(org_id)\n);\n\nCREATE TABLE users (\n user_id INTEGER PRIMARY KEY,\n org_id INTEGER,\n full_name TEXT,\n role_id INTEGER,\n last_login TIMESTAMP,\n FOREIGN KEY (org_id) REFERENCES organizations(org_id)\n);\n\nCREATE TABLE roles (\n role_id INTEGER PRIMARY KEY,\n role_name TEXT, -- 'Admin', 'Developer', 'Billing_Reader'\n permission_level INTEGER -- 1 (Low) to 5 (Critical)\n);\n```\n\nQuestion:\nFind the average cpu_utilization_avg for all 'Compute' resources over the last 7 days.\n\nInstructions:\n- Make sure you only output the information that is asked in the question.\n- If the question asks for a specific column, make sure to only include that column in the SELECT clause, nothing more.\n- The generated query should return all of the information asked in the question without any missing or extra information.\n- Before generating the final SQL query, please think through the steps of how to write the query.\n\nOutput Format:\nIn your answer, please enclose the generated SQL query in a code block:\n```sql\n-- Your SQL query\n```<|im_end|>\n<|im_start|>assistant\n", |
| "<|im_start|>system\nYou are a data science expert. Below, you are provided with a database schema and a natural language question. Your task is to understand the schema and generate a valid SQL query to answer the question.<|im_end|>\n<|im_start|>user\nDatabase Engine:\nSQLite\n\nDatabase Schema:\n```sql\nCREATE TABLE organizations (\n org_id INTEGER PRIMARY KEY,\n org_name TEXT NOT NULL,\n industry TEXT, -- 'Finance', 'Healthcare', 'Tech'\n hq_country TEXT,\n tier TEXT, -- 'Startup', 'Mid-Market', 'Fortune 500'\n created_at TIMESTAMP\n);\n\nCREATE TABLE cloud_resources (\n resource_id INTEGER PRIMARY KEY,\n org_id INTEGER,\n resource_type TEXT, -- 'Compute', 'Storage', 'Database'\n region TEXT, -- 'us-east-1', 'eu-west-1', 'ap-southeast-1'\n status TEXT, -- 'Active', 'Provisioning', 'Terminated'\n hourly_cost REAL,\n FOREIGN KEY (org_id) REFERENCES organizations(org_id)\n);\n\nCREATE TABLE usage_metrics (\n metric_id INTEGER PRIMARY KEY,\n resource_id INTEGER,\n usage_date DATE,\n cpu_utilization_avg REAL, -- 0 to 1.0\n memory_usage_gb REAL,\n data_egress_gb REAL,\n FOREIGN KEY (resource_id) REFERENCES cloud_resources(resource_id)\n);\n\nCREATE TABLE invoices (\n invoice_id INTEGER PRIMARY KEY,\n org_id INTEGER,\n billing_period TEXT, -- '2026-01'\n total_amount REAL,\n tax_amount REAL,\n payment_status TEXT, -- 'Paid', 'Pending', 'Overdue'\n FOREIGN KEY (org_id) REFERENCES organizations(org_id)\n);\n\nCREATE TABLE users (\n user_id INTEGER PRIMARY KEY,\n org_id INTEGER,\n full_name TEXT,\n role_id INTEGER,\n last_login TIMESTAMP,\n FOREIGN KEY (org_id) REFERENCES organizations(org_id)\n);\n\nCREATE TABLE roles (\n role_id INTEGER PRIMARY KEY,\n role_name TEXT, -- 'Admin', 'Developer', 'Billing_Reader'\n permission_level INTEGER -- 1 (Low) to 5 (Critical)\n);\n```\n\nQuestion:\nIdentify organizations that have at least one resource of every resource_type (Compute, Storage, and Database).\n\nInstructions:\n- Make sure you only output the information that is asked in the question.\n- If the question asks for a specific column, make sure to only include that column in the SELECT clause, nothing more.\n- The generated query should return all of the information asked in the question without any missing or extra information.\n- Before generating the final SQL query, please think through the steps of how to write the query.\n\nOutput Format:\nIn your answer, please enclose the generated SQL query in a code block:\n```sql\n-- Your SQL query\n```<|im_end|>\n<|im_start|>assistant\n", |
| "<|im_start|>system\nYou are a data science expert. Below, you are provided with a database schema and a natural language question. Your task is to understand the schema and generate a valid SQL query to answer the question.<|im_end|>\n<|im_start|>user\nDatabase Engine:\nSQLite\n\nDatabase Schema:\n```sql\nCREATE TABLE organizations (\n org_id INTEGER PRIMARY KEY,\n org_name TEXT NOT NULL,\n industry TEXT, -- 'Finance', 'Healthcare', 'Tech'\n hq_country TEXT,\n tier TEXT, -- 'Startup', 'Mid-Market', 'Fortune 500'\n created_at TIMESTAMP\n);\n\nCREATE TABLE cloud_resources (\n resource_id INTEGER PRIMARY KEY,\n org_id INTEGER,\n resource_type TEXT, -- 'Compute', 'Storage', 'Database'\n region TEXT, -- 'us-east-1', 'eu-west-1', 'ap-southeast-1'\n status TEXT, -- 'Active', 'Provisioning', 'Terminated'\n hourly_cost REAL,\n FOREIGN KEY (org_id) REFERENCES organizations(org_id)\n);\n\nCREATE TABLE usage_metrics (\n metric_id INTEGER PRIMARY KEY,\n resource_id INTEGER,\n usage_date DATE,\n cpu_utilization_avg REAL, -- 0 to 1.0\n memory_usage_gb REAL,\n data_egress_gb REAL,\n FOREIGN KEY (resource_id) REFERENCES cloud_resources(resource_id)\n);\n\nCREATE TABLE invoices (\n invoice_id INTEGER PRIMARY KEY,\n org_id INTEGER,\n billing_period TEXT, -- '2026-01'\n total_amount REAL,\n tax_amount REAL,\n payment_status TEXT, -- 'Paid', 'Pending', 'Overdue'\n FOREIGN KEY (org_id) REFERENCES organizations(org_id)\n);\n\nCREATE TABLE users (\n user_id INTEGER PRIMARY KEY,\n org_id INTEGER,\n full_name TEXT,\n role_id INTEGER,\n last_login TIMESTAMP,\n FOREIGN KEY (org_id) REFERENCES organizations(org_id)\n);\n\nCREATE TABLE roles (\n role_id INTEGER PRIMARY KEY,\n role_name TEXT, -- 'Admin', 'Developer', 'Billing_Reader'\n permission_level INTEGER -- 1 (Low) to 5 (Critical)\n);\n```\n\nQuestion:\nCalculate the Effective Tax Rate for each organization (Tax Amount / Total Amount) for the '2026-01' period.\n\nInstructions:\n- Make sure you only output the information that is asked in the question.\n- If the question asks for a specific column, make sure to only include that column in the SELECT clause, nothing more.\n- The generated query should return all of the information asked in the question without any missing or extra information.\n- Before generating the final SQL query, please think through the steps of how to write the query.\n\nOutput Format:\nIn your answer, please enclose the generated SQL query in a code block:\n```sql\n-- Your SQL query\n```<|im_end|>\n<|im_start|>assistant\n", |
| "<|im_start|>system\nYou are a data science expert. Below, you are provided with a database schema and a natural language question. Your task is to understand the schema and generate a valid SQL query to answer the question.<|im_end|>\n<|im_start|>user\nDatabase Engine:\nSQLite\n\nDatabase Schema:\n```sql\nCREATE TABLE organizations (\n org_id INTEGER PRIMARY KEY,\n org_name TEXT NOT NULL,\n industry TEXT, -- 'Finance', 'Healthcare', 'Tech'\n hq_country TEXT,\n tier TEXT, -- 'Startup', 'Mid-Market', 'Fortune 500'\n created_at TIMESTAMP\n);\n\nCREATE TABLE cloud_resources (\n resource_id INTEGER PRIMARY KEY,\n org_id INTEGER,\n resource_type TEXT, -- 'Compute', 'Storage', 'Database'\n region TEXT, -- 'us-east-1', 'eu-west-1', 'ap-southeast-1'\n status TEXT, -- 'Active', 'Provisioning', 'Terminated'\n hourly_cost REAL,\n FOREIGN KEY (org_id) REFERENCES organizations(org_id)\n);\n\nCREATE TABLE usage_metrics (\n metric_id INTEGER PRIMARY KEY,\n resource_id INTEGER,\n usage_date DATE,\n cpu_utilization_avg REAL, -- 0 to 1.0\n memory_usage_gb REAL,\n data_egress_gb REAL,\n FOREIGN KEY (resource_id) REFERENCES cloud_resources(resource_id)\n);\n\nCREATE TABLE invoices (\n invoice_id INTEGER PRIMARY KEY,\n org_id INTEGER,\n billing_period TEXT, -- '2026-01'\n total_amount REAL,\n tax_amount REAL,\n payment_status TEXT, -- 'Paid', 'Pending', 'Overdue'\n FOREIGN KEY (org_id) REFERENCES organizations(org_id)\n);\n\nCREATE TABLE users (\n user_id INTEGER PRIMARY KEY,\n org_id INTEGER,\n full_name TEXT,\n role_id INTEGER,\n last_login TIMESTAMP,\n FOREIGN KEY (org_id) REFERENCES organizations(org_id)\n);\n\nCREATE TABLE roles (\n role_id INTEGER PRIMARY KEY,\n role_name TEXT, -- 'Admin', 'Developer', 'Billing_Reader'\n permission_level INTEGER -- 1 (Low) to 5 (Critical)\n);\n```\n\nQuestion:\nList all resources that have never recorded any usage in the usage_metrics table.\n\nInstructions:\n- Make sure you only output the information that is asked in the question.\n- If the question asks for a specific column, make sure to only include that column in the SELECT clause, nothing more.\n- The generated query should return all of the information asked in the question without any missing or extra information.\n- Before generating the final SQL query, please think through the steps of how to write the query.\n\nOutput Format:\nIn your answer, please enclose the generated SQL query in a code block:\n```sql\n-- Your SQL query\n```<|im_end|>\n<|im_start|>assistant\n", |
| "<|im_start|>system\nYou are a data science expert. Below, you are provided with a database schema and a natural language question. Your task is to understand the schema and generate a valid SQL query to answer the question.<|im_end|>\n<|im_start|>user\nDatabase Engine:\nSQLite\n\nDatabase Schema:\n```sql\nCREATE TABLE organizations (\n org_id INTEGER PRIMARY KEY,\n org_name TEXT NOT NULL,\n industry TEXT, -- 'Finance', 'Healthcare', 'Tech'\n hq_country TEXT,\n tier TEXT, -- 'Startup', 'Mid-Market', 'Fortune 500'\n created_at TIMESTAMP\n);\n\nCREATE TABLE cloud_resources (\n resource_id INTEGER PRIMARY KEY,\n org_id INTEGER,\n resource_type TEXT, -- 'Compute', 'Storage', 'Database'\n region TEXT, -- 'us-east-1', 'eu-west-1', 'ap-southeast-1'\n status TEXT, -- 'Active', 'Provisioning', 'Terminated'\n hourly_cost REAL,\n FOREIGN KEY (org_id) REFERENCES organizations(org_id)\n);\n\nCREATE TABLE usage_metrics (\n metric_id INTEGER PRIMARY KEY,\n resource_id INTEGER,\n usage_date DATE,\n cpu_utilization_avg REAL, -- 0 to 1.0\n memory_usage_gb REAL,\n data_egress_gb REAL,\n FOREIGN KEY (resource_id) REFERENCES cloud_resources(resource_id)\n);\n\nCREATE TABLE invoices (\n invoice_id INTEGER PRIMARY KEY,\n org_id INTEGER,\n billing_period TEXT, -- '2026-01'\n total_amount REAL,\n tax_amount REAL,\n payment_status TEXT, -- 'Paid', 'Pending', 'Overdue'\n FOREIGN KEY (org_id) REFERENCES organizations(org_id)\n);\n\nCREATE TABLE users (\n user_id INTEGER PRIMARY KEY,\n org_id INTEGER,\n full_name TEXT,\n role_id INTEGER,\n last_login TIMESTAMP,\n FOREIGN KEY (org_id) REFERENCES organizations(org_id)\n);\n\nCREATE TABLE roles (\n role_id INTEGER PRIMARY KEY,\n role_name TEXT, -- 'Admin', 'Developer', 'Billing_Reader'\n permission_level INTEGER -- 1 (Low) to 5 (Critical)\n);\n```\n\nQuestion:\nFind the total billing amount per industry for the first quarter of 2026.\n\nInstructions:\n- Make sure you only output the information that is asked in the question.\n- If the question asks for a specific column, make sure to only include that column in the SELECT clause, nothing more.\n- The generated query should return all of the information asked in the question without any missing or extra information.\n- Before generating the final SQL query, please think through the steps of how to write the query.\n\nOutput Format:\nIn your answer, please enclose the generated SQL query in a code block:\n```sql\n-- Your SQL query\n```<|im_end|>\n<|im_start|>assistant\n", |
| "<|im_start|>system\nYou are a data science expert. Below, you are provided with a database schema and a natural language question. Your task is to understand the schema and generate a valid SQL query to answer the question.<|im_end|>\n<|im_start|>user\nDatabase Engine:\nSQLite\n\nDatabase Schema:\n```sql\nCREATE TABLE organizations (\n org_id INTEGER PRIMARY KEY,\n org_name TEXT NOT NULL,\n industry TEXT, -- 'Finance', 'Healthcare', 'Tech'\n hq_country TEXT,\n tier TEXT, -- 'Startup', 'Mid-Market', 'Fortune 500'\n created_at TIMESTAMP\n);\n\nCREATE TABLE cloud_resources (\n resource_id INTEGER PRIMARY KEY,\n org_id INTEGER,\n resource_type TEXT, -- 'Compute', 'Storage', 'Database'\n region TEXT, -- 'us-east-1', 'eu-west-1', 'ap-southeast-1'\n status TEXT, -- 'Active', 'Provisioning', 'Terminated'\n hourly_cost REAL,\n FOREIGN KEY (org_id) REFERENCES organizations(org_id)\n);\n\nCREATE TABLE usage_metrics (\n metric_id INTEGER PRIMARY KEY,\n resource_id INTEGER,\n usage_date DATE,\n cpu_utilization_avg REAL, -- 0 to 1.0\n memory_usage_gb REAL,\n data_egress_gb REAL,\n FOREIGN KEY (resource_id) REFERENCES cloud_resources(resource_id)\n);\n\nCREATE TABLE invoices (\n invoice_id INTEGER PRIMARY KEY,\n org_id INTEGER,\n billing_period TEXT, -- '2026-01'\n total_amount REAL,\n tax_amount REAL,\n payment_status TEXT, -- 'Paid', 'Pending', 'Overdue'\n FOREIGN KEY (org_id) REFERENCES organizations(org_id)\n);\n\nCREATE TABLE users (\n user_id INTEGER PRIMARY KEY,\n org_id INTEGER,\n full_name TEXT,\n role_id INTEGER,\n last_login TIMESTAMP,\n FOREIGN KEY (org_id) REFERENCES organizations(org_id)\n);\n\nCREATE TABLE roles (\n role_id INTEGER PRIMARY KEY,\n role_name TEXT, -- 'Admin', 'Developer', 'Billing_Reader'\n permission_level INTEGER -- 1 (Low) to 5 (Critical)\n);\n```\n\nQuestion:\nList resources where the cpu_utilization_avg was consistently below 10% (0.1) for the last 30 days.\n\nInstructions:\n- Make sure you only output the information that is asked in the question.\n- If the question asks for a specific column, make sure to only include that column in the SELECT clause, nothing more.\n- The generated query should return all of the information asked in the question without any missing or extra information.\n- Before generating the final SQL query, please think through the steps of how to write the query.\n\nOutput Format:\nIn your answer, please enclose the generated SQL query in a code block:\n```sql\n-- Your SQL query\n```<|im_end|>\n<|im_start|>assistant\n", |
| "<|im_start|>system\nYou are a data science expert. Below, you are provided with a database schema and a natural language question. Your task is to understand the schema and generate a valid SQL query to answer the question.<|im_end|>\n<|im_start|>user\nDatabase Engine:\nSQLite\n\nDatabase Schema:\n```sql\nCREATE TABLE organizations (\n org_id INTEGER PRIMARY KEY,\n org_name TEXT NOT NULL,\n industry TEXT, -- 'Finance', 'Healthcare', 'Tech'\n hq_country TEXT,\n tier TEXT, -- 'Startup', 'Mid-Market', 'Fortune 500'\n created_at TIMESTAMP\n);\n\nCREATE TABLE cloud_resources (\n resource_id INTEGER PRIMARY KEY,\n org_id INTEGER,\n resource_type TEXT, -- 'Compute', 'Storage', 'Database'\n region TEXT, -- 'us-east-1', 'eu-west-1', 'ap-southeast-1'\n status TEXT, -- 'Active', 'Provisioning', 'Terminated'\n hourly_cost REAL,\n FOREIGN KEY (org_id) REFERENCES organizations(org_id)\n);\n\nCREATE TABLE usage_metrics (\n metric_id INTEGER PRIMARY KEY,\n resource_id INTEGER,\n usage_date DATE,\n cpu_utilization_avg REAL, -- 0 to 1.0\n memory_usage_gb REAL,\n data_egress_gb REAL,\n FOREIGN KEY (resource_id) REFERENCES cloud_resources(resource_id)\n);\n\nCREATE TABLE invoices (\n invoice_id INTEGER PRIMARY KEY,\n org_id INTEGER,\n billing_period TEXT, -- '2026-01'\n total_amount REAL,\n tax_amount REAL,\n payment_status TEXT, -- 'Paid', 'Pending', 'Overdue'\n FOREIGN KEY (org_id) REFERENCES organizations(org_id)\n);\n\nCREATE TABLE users (\n user_id INTEGER PRIMARY KEY,\n org_id INTEGER,\n full_name TEXT,\n role_id INTEGER,\n last_login TIMESTAMP,\n FOREIGN KEY (org_id) REFERENCES organizations(org_id)\n);\n\nCREATE TABLE roles (\n role_id INTEGER PRIMARY KEY,\n role_name TEXT, -- 'Admin', 'Developer', 'Billing_Reader'\n permission_level INTEGER -- 1 (Low) to 5 (Critical)\n);\n```\n\nQuestion:\nFind organizations that have 'Active' resources but have not been generated an invoice for the current billing period.\n\nInstructions:\n- Make sure you only output the information that is asked in the question.\n- If the question asks for a specific column, make sure to only include that column in the SELECT clause, nothing more.\n- The generated query should return all of the information asked in the question without any missing or extra information.\n- Before generating the final SQL query, please think through the steps of how to write the query.\n\nOutput Format:\nIn your answer, please enclose the generated SQL query in a code block:\n```sql\n-- Your SQL query\n```<|im_end|>\n<|im_start|>assistant\n", |
| "<|im_start|>system\nYou are a data science expert. Below, you are provided with a database schema and a natural language question. Your task is to understand the schema and generate a valid SQL query to answer the question.<|im_end|>\n<|im_start|>user\nDatabase Engine:\nSQLite\n\nDatabase Schema:\n```sql\nCREATE TABLE organizations (\n org_id INTEGER PRIMARY KEY,\n org_name TEXT NOT NULL,\n industry TEXT, -- 'Finance', 'Healthcare', 'Tech'\n hq_country TEXT,\n tier TEXT, -- 'Startup', 'Mid-Market', 'Fortune 500'\n created_at TIMESTAMP\n);\n\nCREATE TABLE cloud_resources (\n resource_id INTEGER PRIMARY KEY,\n org_id INTEGER,\n resource_type TEXT, -- 'Compute', 'Storage', 'Database'\n region TEXT, -- 'us-east-1', 'eu-west-1', 'ap-southeast-1'\n status TEXT, -- 'Active', 'Provisioning', 'Terminated'\n hourly_cost REAL,\n FOREIGN KEY (org_id) REFERENCES organizations(org_id)\n);\n\nCREATE TABLE usage_metrics (\n metric_id INTEGER PRIMARY KEY,\n resource_id INTEGER,\n usage_date DATE,\n cpu_utilization_avg REAL, -- 0 to 1.0\n memory_usage_gb REAL,\n data_egress_gb REAL,\n FOREIGN KEY (resource_id) REFERENCES cloud_resources(resource_id)\n);\n\nCREATE TABLE invoices (\n invoice_id INTEGER PRIMARY KEY,\n org_id INTEGER,\n billing_period TEXT, -- '2026-01'\n total_amount REAL,\n tax_amount REAL,\n payment_status TEXT, -- 'Paid', 'Pending', 'Overdue'\n FOREIGN KEY (org_id) REFERENCES organizations(org_id)\n);\n\nCREATE TABLE users (\n user_id INTEGER PRIMARY KEY,\n org_id INTEGER,\n full_name TEXT,\n role_id INTEGER,\n last_login TIMESTAMP,\n FOREIGN KEY (org_id) REFERENCES organizations(org_id)\n);\n\nCREATE TABLE roles (\n role_id INTEGER PRIMARY KEY,\n role_name TEXT, -- 'Admin', 'Developer', 'Billing_Reader'\n permission_level INTEGER -- 1 (Low) to 5 (Critical)\n);\n```\n\nQuestion:\nIdentify the Top User for each organization (the user who logged in most recently).\n\nInstructions:\n- Make sure you only output the information that is asked in the question.\n- If the question asks for a specific column, make sure to only include that column in the SELECT clause, nothing more.\n- The generated query should return all of the information asked in the question without any missing or extra information.\n- Before generating the final SQL query, please think through the steps of how to write the query.\n\nOutput Format:\nIn your answer, please enclose the generated SQL query in a code block:\n```sql\n-- Your SQL query\n```<|im_end|>\n<|im_start|>assistant\n", |
| "<|im_start|>system\nYou are a data science expert. Below, you are provided with a database schema and a natural language question. Your task is to understand the schema and generate a valid SQL query to answer the question.<|im_end|>\n<|im_start|>user\nDatabase Engine:\nSQLite\n\nDatabase Schema:\n```sql\nCREATE TABLE organizations (\n org_id INTEGER PRIMARY KEY,\n org_name TEXT NOT NULL,\n industry TEXT, -- 'Finance', 'Healthcare', 'Tech'\n hq_country TEXT,\n tier TEXT, -- 'Startup', 'Mid-Market', 'Fortune 500'\n created_at TIMESTAMP\n);\n\nCREATE TABLE cloud_resources (\n resource_id INTEGER PRIMARY KEY,\n org_id INTEGER,\n resource_type TEXT, -- 'Compute', 'Storage', 'Database'\n region TEXT, -- 'us-east-1', 'eu-west-1', 'ap-southeast-1'\n status TEXT, -- 'Active', 'Provisioning', 'Terminated'\n hourly_cost REAL,\n FOREIGN KEY (org_id) REFERENCES organizations(org_id)\n);\n\nCREATE TABLE usage_metrics (\n metric_id INTEGER PRIMARY KEY,\n resource_id INTEGER,\n usage_date DATE,\n cpu_utilization_avg REAL, -- 0 to 1.0\n memory_usage_gb REAL,\n data_egress_gb REAL,\n FOREIGN KEY (resource_id) REFERENCES cloud_resources(resource_id)\n);\n\nCREATE TABLE invoices (\n invoice_id INTEGER PRIMARY KEY,\n org_id INTEGER,\n billing_period TEXT, -- '2026-01'\n total_amount REAL,\n tax_amount REAL,\n payment_status TEXT, -- 'Paid', 'Pending', 'Overdue'\n FOREIGN KEY (org_id) REFERENCES organizations(org_id)\n);\n\nCREATE TABLE users (\n user_id INTEGER PRIMARY KEY,\n org_id INTEGER,\n full_name TEXT,\n role_id INTEGER,\n last_login TIMESTAMP,\n FOREIGN KEY (org_id) REFERENCES organizations(org_id)\n);\n\nCREATE TABLE roles (\n role_id INTEGER PRIMARY KEY,\n role_name TEXT, -- 'Admin', 'Developer', 'Billing_Reader'\n permission_level INTEGER -- 1 (Low) to 5 (Critical)\n);\n```\n\nQuestion:\nFor each region, find which resource_type accounts for the highest total hourly_cost.\n\nInstructions:\n- Make sure you only output the information that is asked in the question.\n- If the question asks for a specific column, make sure to only include that column in the SELECT clause, nothing more.\n- The generated query should return all of the information asked in the question without any missing or extra information.\n- Before generating the final SQL query, please think through the steps of how to write the query.\n\nOutput Format:\nIn your answer, please enclose the generated SQL query in a code block:\n```sql\n-- Your SQL query\n```<|im_end|>\n<|im_start|>assistant\n", |
| "<|im_start|>system\nYou are a data science expert. Below, you are provided with a database schema and a natural language question. Your task is to understand the schema and generate a valid SQL query to answer the question.<|im_end|>\n<|im_start|>user\nDatabase Engine:\nSQLite\n\nDatabase Schema:\n```sql\nCREATE TABLE organizations (\n org_id INTEGER PRIMARY KEY,\n org_name TEXT NOT NULL,\n industry TEXT, -- 'Finance', 'Healthcare', 'Tech'\n hq_country TEXT,\n tier TEXT, -- 'Startup', 'Mid-Market', 'Fortune 500'\n created_at TIMESTAMP\n);\n\nCREATE TABLE cloud_resources (\n resource_id INTEGER PRIMARY KEY,\n org_id INTEGER,\n resource_type TEXT, -- 'Compute', 'Storage', 'Database'\n region TEXT, -- 'us-east-1', 'eu-west-1', 'ap-southeast-1'\n status TEXT, -- 'Active', 'Provisioning', 'Terminated'\n hourly_cost REAL,\n FOREIGN KEY (org_id) REFERENCES organizations(org_id)\n);\n\nCREATE TABLE usage_metrics (\n metric_id INTEGER PRIMARY KEY,\n resource_id INTEGER,\n usage_date DATE,\n cpu_utilization_avg REAL, -- 0 to 1.0\n memory_usage_gb REAL,\n data_egress_gb REAL,\n FOREIGN KEY (resource_id) REFERENCES cloud_resources(resource_id)\n);\n\nCREATE TABLE invoices (\n invoice_id INTEGER PRIMARY KEY,\n org_id INTEGER,\n billing_period TEXT, -- '2026-01'\n total_amount REAL,\n tax_amount REAL,\n payment_status TEXT, -- 'Paid', 'Pending', 'Overdue'\n FOREIGN KEY (org_id) REFERENCES organizations(org_id)\n);\n\nCREATE TABLE users (\n user_id INTEGER PRIMARY KEY,\n org_id INTEGER,\n full_name TEXT,\n role_id INTEGER,\n last_login TIMESTAMP,\n FOREIGN KEY (org_id) REFERENCES organizations(org_id)\n);\n\nCREATE TABLE roles (\n role_id INTEGER PRIMARY KEY,\n role_name TEXT, -- 'Admin', 'Developer', 'Billing_Reader'\n permission_level INTEGER -- 1 (Low) to 5 (Critical)\n);\n```\n\nQuestion:\nList 'Startup' tier organizations whose total resource spend in a single month exceeded $5,000.\n\nInstructions:\n- Make sure you only output the information that is asked in the question.\n- If the question asks for a specific column, make sure to only include that column in the SELECT clause, nothing more.\n- The generated query should return all of the information asked in the question without any missing or extra information.\n- Before generating the final SQL query, please think through the steps of how to write the query.\n\nOutput Format:\nIn your answer, please enclose the generated SQL query in a code block:\n```sql\n-- Your SQL query\n```<|im_end|>\n<|im_start|>assistant\n", |
| "<|im_start|>system\nYou are a data science expert. Below, you are provided with a database schema and a natural language question. Your task is to understand the schema and generate a valid SQL query to answer the question.<|im_end|>\n<|im_start|>user\nDatabase Engine:\nSQLite\n\nDatabase Schema:\n```sql\nCREATE TABLE organizations (\n org_id INTEGER PRIMARY KEY,\n org_name TEXT NOT NULL,\n industry TEXT, -- 'Finance', 'Healthcare', 'Tech'\n hq_country TEXT,\n tier TEXT, -- 'Startup', 'Mid-Market', 'Fortune 500'\n created_at TIMESTAMP\n);\n\nCREATE TABLE cloud_resources (\n resource_id INTEGER PRIMARY KEY,\n org_id INTEGER,\n resource_type TEXT, -- 'Compute', 'Storage', 'Database'\n region TEXT, -- 'us-east-1', 'eu-west-1', 'ap-southeast-1'\n status TEXT, -- 'Active', 'Provisioning', 'Terminated'\n hourly_cost REAL,\n FOREIGN KEY (org_id) REFERENCES organizations(org_id)\n);\n\nCREATE TABLE usage_metrics (\n metric_id INTEGER PRIMARY KEY,\n resource_id INTEGER,\n usage_date DATE,\n cpu_utilization_avg REAL, -- 0 to 1.0\n memory_usage_gb REAL,\n data_egress_gb REAL,\n FOREIGN KEY (resource_id) REFERENCES cloud_resources(resource_id)\n);\n\nCREATE TABLE invoices (\n invoice_id INTEGER PRIMARY KEY,\n org_id INTEGER,\n billing_period TEXT, -- '2026-01'\n total_amount REAL,\n tax_amount REAL,\n payment_status TEXT, -- 'Paid', 'Pending', 'Overdue'\n FOREIGN KEY (org_id) REFERENCES organizations(org_id)\n);\n\nCREATE TABLE users (\n user_id INTEGER PRIMARY KEY,\n org_id INTEGER,\n full_name TEXT,\n role_id INTEGER,\n last_login TIMESTAMP,\n FOREIGN KEY (org_id) REFERENCES organizations(org_id)\n);\n\nCREATE TABLE roles (\n role_id INTEGER PRIMARY KEY,\n role_name TEXT, -- 'Admin', 'Developer', 'Billing_Reader'\n permission_level INTEGER -- 1 (Low) to 5 (Critical)\n);\n```\n\nQuestion:\nCalculate the percentage of organizations created in 2025 that still have 'Active' resources in 2026.\n\nInstructions:\n- Make sure you only output the information that is asked in the question.\n- If the question asks for a specific column, make sure to only include that column in the SELECT clause, nothing more.\n- The generated query should return all of the information asked in the question without any missing or extra information.\n- Before generating the final SQL query, please think through the steps of how to write the query.\n\nOutput Format:\nIn your answer, please enclose the generated SQL query in a code block:\n```sql\n-- Your SQL query\n```<|im_end|>\n<|im_start|>assistant\n", |
| "<|im_start|>system\nYou are a data science expert. Below, you are provided with a database schema and a natural language question. Your task is to understand the schema and generate a valid SQL query to answer the question.<|im_end|>\n<|im_start|>user\nDatabase Engine:\nSQLite\n\nDatabase Schema:\n```sql\nCREATE TABLE organizations (\n org_id INTEGER PRIMARY KEY,\n org_name TEXT NOT NULL,\n industry TEXT, -- 'Finance', 'Healthcare', 'Tech'\n hq_country TEXT,\n tier TEXT, -- 'Startup', 'Mid-Market', 'Fortune 500'\n created_at TIMESTAMP\n);\n\nCREATE TABLE cloud_resources (\n resource_id INTEGER PRIMARY KEY,\n org_id INTEGER,\n resource_type TEXT, -- 'Compute', 'Storage', 'Database'\n region TEXT, -- 'us-east-1', 'eu-west-1', 'ap-southeast-1'\n status TEXT, -- 'Active', 'Provisioning', 'Terminated'\n hourly_cost REAL,\n FOREIGN KEY (org_id) REFERENCES organizations(org_id)\n);\n\nCREATE TABLE usage_metrics (\n metric_id INTEGER PRIMARY KEY,\n resource_id INTEGER,\n usage_date DATE,\n cpu_utilization_avg REAL, -- 0 to 1.0\n memory_usage_gb REAL,\n data_egress_gb REAL,\n FOREIGN KEY (resource_id) REFERENCES cloud_resources(resource_id)\n);\n\nCREATE TABLE invoices (\n invoice_id INTEGER PRIMARY KEY,\n org_id INTEGER,\n billing_period TEXT, -- '2026-01'\n total_amount REAL,\n tax_amount REAL,\n payment_status TEXT, -- 'Paid', 'Pending', 'Overdue'\n FOREIGN KEY (org_id) REFERENCES organizations(org_id)\n);\n\nCREATE TABLE users (\n user_id INTEGER PRIMARY KEY,\n org_id INTEGER,\n full_name TEXT,\n role_id INTEGER,\n last_login TIMESTAMP,\n FOREIGN KEY (org_id) REFERENCES organizations(org_id)\n);\n\nCREATE TABLE roles (\n role_id INTEGER PRIMARY KEY,\n role_name TEXT, -- 'Admin', 'Developer', 'Billing_Reader'\n permission_level INTEGER -- 1 (Low) to 5 (Critical)\n);\n```\n\nQuestion:\nFind resources whose data_egress_gb on any given day was more than 3 standard deviations above the average egress for that specific resource type.\n\nInstructions:\n- Make sure you only output the information that is asked in the question.\n- If the question asks for a specific column, make sure to only include that column in the SELECT clause, nothing more.\n- The generated query should return all of the information asked in the question without any missing or extra information.\n- Before generating the final SQL query, please think through the steps of how to write the query.\n\nOutput Format:\nIn your answer, please enclose the generated SQL query in a code block:\n```sql\n-- Your SQL query\n```<|im_end|>\n<|im_start|>assistant\n", |
| "<|im_start|>system\nYou are a data science expert. Below, you are provided with a database schema and a natural language question. Your task is to understand the schema and generate a valid SQL query to answer the question.<|im_end|>\n<|im_start|>user\nDatabase Engine:\nSQLite\n\nDatabase Schema:\n```sql\nCREATE TABLE organizations (\n org_id INTEGER PRIMARY KEY,\n org_name TEXT NOT NULL,\n industry TEXT, -- 'Finance', 'Healthcare', 'Tech'\n hq_country TEXT,\n tier TEXT, -- 'Startup', 'Mid-Market', 'Fortune 500'\n created_at TIMESTAMP\n);\n\nCREATE TABLE cloud_resources (\n resource_id INTEGER PRIMARY KEY,\n org_id INTEGER,\n resource_type TEXT, -- 'Compute', 'Storage', 'Database'\n region TEXT, -- 'us-east-1', 'eu-west-1', 'ap-southeast-1'\n status TEXT, -- 'Active', 'Provisioning', 'Terminated'\n hourly_cost REAL,\n FOREIGN KEY (org_id) REFERENCES organizations(org_id)\n);\n\nCREATE TABLE usage_metrics (\n metric_id INTEGER PRIMARY KEY,\n resource_id INTEGER,\n usage_date DATE,\n cpu_utilization_avg REAL, -- 0 to 1.0\n memory_usage_gb REAL,\n data_egress_gb REAL,\n FOREIGN KEY (resource_id) REFERENCES cloud_resources(resource_id)\n);\n\nCREATE TABLE invoices (\n invoice_id INTEGER PRIMARY KEY,\n org_id INTEGER,\n billing_period TEXT, -- '2026-01'\n total_amount REAL,\n tax_amount REAL,\n payment_status TEXT, -- 'Paid', 'Pending', 'Overdue'\n FOREIGN KEY (org_id) REFERENCES organizations(org_id)\n);\n\nCREATE TABLE users (\n user_id INTEGER PRIMARY KEY,\n org_id INTEGER,\n full_name TEXT,\n role_id INTEGER,\n last_login TIMESTAMP,\n FOREIGN KEY (org_id) REFERENCES organizations(org_id)\n);\n\nCREATE TABLE roles (\n role_id INTEGER PRIMARY KEY,\n role_name TEXT, -- 'Admin', 'Developer', 'Billing_Reader'\n permission_level INTEGER -- 1 (Low) to 5 (Critical)\n);\n```\n\nQuestion:\nCreate a monthly summary for each organization showing: Total Spend, Peak CPU utilization, and the Year-over-Year growth in data usage using the formula (Current_Usage - Previous_Usage) / Previous_Usage.\n\nInstructions:\n- Make sure you only output the information that is asked in the question.\n- If the question asks for a specific column, make sure to only include that column in the SELECT clause, nothing more.\n- The generated query should return all of the information asked in the question without any missing or extra information.\n- Before generating the final SQL query, please think through the steps of how to write the query.\n\nOutput Format:\nIn your answer, please enclose the generated SQL query in a code block:\n```sql\n-- Your SQL query\n```<|im_end|>\n<|im_start|>assistant\n" |
| ], |
| "desc_act": false, |
| "exllama_config": { |
| "version": 1 |
| }, |
| "group_size": 128, |
| "max_input_length": null, |
| "meta": { |
| "quantizer": [ |
| "optimum:2.1.0", |
| "gptqmodel:5.7.0" |
| ] |
| }, |
| "model_seqlen": null, |
| "module_name_preceding_first_block": null, |
| "modules_in_block_to_quantize": null, |
| "pad_token_id": null, |
| "quant_method": "gptq", |
| "sym": true, |
| "tokenizer": null, |
| "true_sequential": true, |
| "use_cuda_fp16": false, |
| "use_exllama": true |
| }, |
| "rms_norm_eps": 1e-06, |
| "rope_scaling": null, |
| "rope_theta": 1000000.0, |
| "sliding_window": null, |
| "tie_word_embeddings": false, |
| "transformers_version": "4.57.6", |
| "use_cache": true, |
| "use_sliding_window": false, |
| "vocab_size": 152064 |
| } |
|
|