Spaces:

aarohanverma
/

text2sql-demo

Sleeping

App Files Files Community

aarohanverma commited on Mar 18, 2025

Commit

9d0df30

verified ·

1 Parent(s): 27e057c

Update app.py

Browse files

Files changed (1) hide show

app.py +30 -22

app.py CHANGED Viewed

@@ -2,53 +2,61 @@ import gradio as gr
 import torch
 from transformers import AutoModelForSeq2SeqLM, AutoTokenizer
-# Set up device (GPU if available)
 device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
-# Load the fine-tuned model and tokenizer
-model_name = "aarohanverma/text2sql-flan-t5-base-qlora-finetuned"  # Replace with your model repository name
 model = AutoModelForSeq2SeqLM.from_pretrained(model_name, torch_dtype=torch.bfloat16).to(device)
 tokenizer = AutoTokenizer.from_pretrained("google/flan-t5-base")
 def generate_sql(context: str, query: str) -> str:
     """
     Generates a SQL query given the provided context and natural language query.
     Constructs a prompt from the inputs, then performs deterministic generation
-    with beam search and repetition handling.
     """
     prompt = f"""Context:
 {context}
 Query:
 {query}
 Response:
 """
-    # Tokenize the prompt and move to device
     inputs = tokenizer(prompt, return_tensors="pt", truncation=True, max_length=512).to(device)
-    # Ensure decoder_start_token_id is set for encoder-decoder generation
     if model.config.decoder_start_token_id is None:
         model.config.decoder_start_token_id = tokenizer.pad_token_id
-    # Generate the SQL output with optimized parameters
-    generated_ids = model.generate(
-        input_ids=inputs["input_ids"],
-        decoder_start_token_id=model.config.decoder_start_token_id,
-        max_new_tokens=100,
-        temperature=0.1,
-        num_beams=5,
-        repetition_penalty=1.2,
-        early_stopping=True,
-    )
-    # Decode and clean the generated SQL statement
     generated_sql = tokenizer.decode(generated_ids[0], skip_special_tokens=True)
-    generated_sql = generated_sql.split(";")[0] + ";"  # ✅ Ensures only the first valid SQL query is returned
     return generated_sql
-# Create Gradio interface with two input boxes: one for context and one for query
 iface = gr.Interface(
     fn=generate_sql,
     inputs=[

 import torch
 from transformers import AutoModelForSeq2SeqLM, AutoTokenizer
+# Set up device: use GPU if available, else CPU.
 device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
+# Load the fine-tuned model and tokenizer.
+model_name = "aarohanverma/text2sql-flan-t5-base-qlora-finetuned"
 model = AutoModelForSeq2SeqLM.from_pretrained(model_name, torch_dtype=torch.bfloat16).to(device)
 tokenizer = AutoTokenizer.from_pretrained("google/flan-t5-base")
+# For CPU inference, convert the model to FP32 for better compatibility.
+if device.type == "cpu":
+    model = model.float()
+# Optionally compile the model for speed improvements (requires PyTorch 2.0+).
+try:
+    model = torch.compile(model)
+except Exception as e:
+    print("torch.compile optimization failed:", e)
 def generate_sql(context: str, query: str) -> str:
     """
     Generates a SQL query given the provided context and natural language query.
     Constructs a prompt from the inputs, then performs deterministic generation
+    using beam search with repetition handling.
     """
     prompt = f"""Context:
 {context}
 Query:
 {query}
 Response:
 """
+    # Tokenize the prompt with truncation and max length; move to device.
     inputs = tokenizer(prompt, return_tensors="pt", truncation=True, max_length=512).to(device)
+    # Ensure the decoder start token is set.
     if model.config.decoder_start_token_id is None:
         model.config.decoder_start_token_id = tokenizer.pad_token_id
+    # Generate SQL output with no_grad to optimize CPU usage.
+    with torch.no_grad():
+        generated_ids = model.generate(
+            input_ids=inputs["input_ids"],
+            decoder_start_token_id=model.config.decoder_start_token_id,
+            max_new_tokens=100,
+            temperature=0.0,         # Deterministic output
+            num_beams=5,
+            repetition_penalty=1.2,
+            early_stopping=True,
+        )
+    # Decode and clean the generated SQL statement.
     generated_sql = tokenizer.decode(generated_ids[0], skip_special_tokens=True)
+    generated_sql = generated_sql.split(";")[0].strip() + ";"  # Keep only the first valid SQL query
     return generated_sql
+# Create Gradio interface with two input boxes: one for context and one for query.
 iface = gr.Interface(
     fn=generate_sql,
     inputs=[