import gradio as gr import torch from peft import PeftModel from transformers import AutoTokenizer, AutoModelForCausalLM # Select device: GPU if available, else CPU device = torch.device("cuda" if torch.cuda.is_available() else "cpu") # Load tokenizer and model from local directory tokenizer = AutoTokenizer.from_pretrained("TinyLlama/TinyLlama-1.1B-Chat-v1.0") model = AutoModelForCausalLM.from_pretrained("TinyLlama/TinyLlama-1.1B-Chat-v1.0").to(device) # Load LoRA adapter model = PeftModel.from_pretrained(model, "LoRA_model") # Define generation function def generate_sql(prompt): inputs = tokenizer(prompt, return_tensors="pt").to(device) outputs = model.generate( **inputs, max_new_tokens=64, # speed things up do_sample=True, temperature=0.7, top_p=0.95, eos_token_id=tokenizer.eos_token_id, early_stopping=True, num_beams=5, ) full_output = tokenizer.decode(outputs[0], skip_special_tokens=True) return full_output[len(prompt):].strip().split(';', 1)[0] + ';' # remove prompt from beginning and only the first SQL statement # Gradio UI interface = gr.Interface( fn=generate_sql, inputs=gr.Textbox(lines=3, placeholder="Enter instruction, e.g. 'Show all users with age > 30' or 'Show all users where gender is female.'"), outputs="text", title="SQL Generator", description="Type a natural language prompt and get a SQL query generated by the fine-tuned TinyLlama model.", theme="default" ) interface.launch(share=True)