rajaykumar12959
/

gemma-2-2b-sql-finetuned

@@ -96,37 +96,91 @@ model, tokenizer = FastLanguageModel.from_pretrained(
 FastLanguageModel.for_inference(model)  # Enable faster inference
 ```
-### Generating SQL Queries
 ```python
-def generate_sql(schema, question):
-    gemma_prompt = """<start_of_turn>user
 You are a powerful text-to-SQL model. Your job is to answer questions about a database. You are given a question and context regarding one or more tables.
 ### Schema:
-{}
 ### Question:
-{}<end_of_turn>
 <start_of_turn>model
 """
-    input_prompt = gemma_prompt.format(schema, question)
     inputs = tokenizer([input_prompt], return_tensors="pt").to("cuda")
-    outputs = model.generate(**inputs, max_new_tokens=300, use_cache=True)
     result = tokenizer.batch_decode(outputs)[0]
-    # Extract the generated SQL
-    sql_result = result.split("<start_of_turn>model")[-1].replace("<end_of_turn>", "").strip()
-    return sql_result
 ```
-### Example: Complex Multi-Table Query
 ```python
 # E-commerce Database Schema
-test_sql_context = """
 CREATE TABLE users (
     user_id INT PRIMARY KEY,
     username TEXT,
@@ -157,15 +211,14 @@ CREATE TABLE order_items (
 );
 """
-# Complex Question
-test_question = """
 List the usernames and emails of users who have spent more than $500 in total on products
 in the 'Electronics' category.
 """
-# Generate SQL
-sql_query = generate_sql(test_sql_context, test_question)
-print(sql_query)
 ```
 **Expected Output:**
@@ -180,6 +233,23 @@ GROUP BY u.user_id, u.username, u.email
 HAVING SUM(oi.quantity * p.price) > 500;
 ```
 ## Training Details
 ### Dataset

 FastLanguageModel.for_inference(model)  # Enable faster inference
 ```
+### Inference Function
 ```python
+def inference_text_to_sql(model, tokenizer, schema, question, max_new_tokens=300):
+    """
+    Perform inference to generate SQL from natural language question and database schema.
+    Args:
+        model: Fine-tuned Gemma model
+        tokenizer: Model tokenizer
+        schema: Database schema as string
+        question: Natural language question
+        max_new_tokens: Maximum tokens to generate
+    Returns:
+        Generated SQL query as string
+    """
+    # Format the input prompt
+    input_prompt = f"""<start_of_turn>user
 You are a powerful text-to-SQL model. Your job is to answer questions about a database. You are given a question and context regarding one or more tables.
 ### Schema:
+{schema}
 ### Question:
+{question}<end_of_turn>
 <start_of_turn>model
 """
+    # Tokenize input
     inputs = tokenizer([input_prompt], return_tensors="pt").to("cuda")
+    # Generate output
+    with torch.no_grad():
+        outputs = model.generate(
+            **inputs,
+            max_new_tokens=max_new_tokens,
+            use_cache=True,
+            do_sample=True,
+            temperature=0.1,  # Low temperature for more deterministic output
+            top_p=0.9,
+            pad_token_id=tokenizer.eos_token_id
+        )
+    # Decode and clean the result
     result = tokenizer.batch_decode(outputs)[0]
+    sql_query = result.split("<start_of_turn>model")[-1].replace("<end_of_turn>", "").strip()
+    return sql_query
 ```
+### Example Usage
+#### Example 1: Simple Single-Table Query
+```python
+# Simple employee database
+simple_schema = """
+CREATE TABLE employees (
+    employee_id INT PRIMARY KEY,
+    name TEXT,
+    department TEXT,
+    salary DECIMAL,
+    hire_date DATE
+);
+"""
+simple_question = "Find all employees in the 'Engineering' department with salary greater than 75000"
+sql_result = inference_text_to_sql(model, tokenizer, simple_schema, simple_question)
+print(f"Generated SQL:\n{sql_result}")
+```
+**Expected Output:**
+```sql
+SELECT * FROM employees
+WHERE department = 'Engineering'
+AND salary > 75000;
+```
+#### Example 2: Multi-Table JOIN Query
 ```python
 # E-commerce Database Schema
+complex_schema = """
 CREATE TABLE users (
     user_id INT PRIMARY KEY,
     username TEXT,
 );
 """
+# Complex Question requiring 4-table JOIN
+complex_question = """
 List the usernames and emails of users who have spent more than $500 in total on products
 in the 'Electronics' category.
 """
+sql_result = inference_text_to_sql(model, tokenizer, complex_schema, complex_question)
+print(f"Generated SQL:\n{sql_result}")
 ```
 **Expected Output:**
 HAVING SUM(oi.quantity * p.price) > 500;
 ```
+#### Example 3: Aggregation with GROUP BY
+```python
+agg_question = "Find the average salary by department for departments with more than 5 employees"
+sql_result = inference_text_to_sql(model, tokenizer, simple_schema, agg_question)
+print(f"Generated SQL:\n{sql_result}")
+```
+**Expected Output:**
+```sql
+SELECT department, AVG(salary) as avg_salary
+FROM employees
+GROUP BY department
+HAVING COUNT(*) > 5;
+```
 ## Training Details
 ### Dataset