import gradio as gr from transformers import AutoTokenizer, AutoModelForCausalLM import torch # Model configurations MODELS = { "BM1_CS1_Syn (33M)": "withmartian/sql_interp_bm1_cs1_experiment_1.10", "BM1_CS2_Syn (33M)": "withmartian/sql_interp_bm1_cs2_experiment_2.10", "BM1_CS3_Syn (33M)": "withmartian/sql_interp_bm1_cs3_experiment_3.10", "BM1_CS4_Syn (33M)": "withmartian/sql_interp_bm1_cs4_dataset_synonyms_experiment_1.1", "BM1_CS5_Syn (33M)": "withmartian/sql_interp_bm1_cs5_dataset_synonyms_experiment_1.2", "BM2_CS1_Syn (0.5B)": "withmartian/sql_interp_bm2_cs1_experiment_4.3", "BM2_CS2_Syn (0.5B)": "withmartian/sql_interp_bm2_cs2_experiment_5.3", "BM2_CS3_Syn (0.5B)": "withmartian/sql_interp_bm2_cs3_experiment_6.3", "BM3_CS1_Syn (1B)": "withmartian/sql_interp_bm3_cs1_experiment_7.3", "BM3_CS2_Syn (1B)": "withmartian/sql_interp_bm3_cs2_experiment_8.3", "BM3_CS3_Syn (1B)": "withmartian/sql_interp_bm3_cs3_experiment_9.3", } model_cache = {} def load_model(model_name): if model_name not in model_cache: model_id = MODELS[model_name] tokenizer = AutoTokenizer.from_pretrained(model_id) model = AutoModelForCausalLM.from_pretrained( model_id, torch_dtype=torch.float16, device_map="auto" ) model_cache[model_name] = (tokenizer, model) return model_cache[model_name] def generate_sql(model_name, instruction, schema, max_length=256, temperature=0.7): if not model_name or not instruction or not schema: return "โ ๏ธ Please fill in all fields and select a model" try: tokenizer, model = load_model(model_name) prompt = f"""### Instruction: {instruction} ### Context: {schema} ### Response:""" inputs = tokenizer(prompt, return_tensors="pt").to(model.device) outputs = model.generate( **inputs, max_length=max_length, temperature=temperature, do_sample=temperature > 0, pad_token_id=tokenizer.eos_token_id ) generated = tokenizer.decode(outputs[0], skip_special_tokens=True) if "### Response:" in generated: sql = generated.split("### Response:")[-1].strip() else: sql = generated.strip() return sql except Exception as e: return f"โ Error: {str(e)}" # Example queries examples = [ [ "BM1_CS1_Syn (33M)", "Show me the name and salary from employees", "CREATE TABLE employees (name VARCHAR(100), salary INT, department VARCHAR(100))" ], [ "BM2_CS2_Syn (0.5B)", "List worker earnings from highest to lowest", "CREATE TABLE employees (name VARCHAR(100), salary INT, department VARCHAR(100))" ], [ "BM3_CS3_Syn (1B)", "Count how many employees in each department", "CREATE TABLE employees (name VARCHAR(100), salary INT, department VARCHAR(100))" ], ] # Custom CSS for beautiful styling custom_css = """ .gradio-container { font-family: 'Inter', sans-serif; } .header-section { text-align: center; padding: 2rem 0; background: linear-gradient(135deg, #667eea 0%, #764ba2 100%); border-radius: 12px; margin-bottom: 2rem; color: white; } .logo-container { display: flex; justify-content: center; align-items: center; gap: 1rem; margin-bottom: 1rem; } .martian-badge { background: rgba(255, 255, 255, 0.2); padding: 0.5rem 1rem; border-radius: 20px; font-size: 0.9rem; backdrop-filter: blur(10px); } .info-box { background: linear-gradient(135deg, #f5f7fa 0%, #c3cfe2 100%); border-radius: 12px; padding: 1.5rem; margin: 1rem 0; border-left: 4px solid #667eea; } .citation-box { background: #f8f9fa; border: 1px solid #dee2e6; border-radius: 8px; padding: 1.5rem; margin: 2rem 0; font-family: 'Monaco', 'Courier New', monospace; font-size: 0.85rem; } .citation-header { font-weight: bold; color: #495057; margin-bottom: 0.5rem; display: flex; align-items: center; gap: 0.5rem; } .resource-links { display: flex; gap: 1rem; justify-content: center; margin: 1.5rem 0; flex-wrap: wrap; } .resource-link { background: white; padding: 0.75rem 1.5rem; border-radius: 8px; text-decoration: none; color: #667eea; border: 2px solid #667eea; font-weight: 500; transition: all 0.3s ease; } .resource-link:hover { background: #667eea; color: white; } footer { text-align: center; padding: 2rem 0; color: #6c757d; border-top: 1px solid #dee2e6; margin-top: 3rem; } """ # Create Gradio interface with gr.Blocks(css=custom_css, title="TinySQL Demo | Martian", theme=gr.themes.Soft()) as demo: # Header with Martian branding gr.HTML("""
Transform natural language into SQL queries using mechanistically interpretable models
@misc{harrasse2025tinysqlprogressivetexttosqldataset,
title={TinySQL: A Progressive Text-to-SQL Dataset for Mechanistic Interpretability Research},
author={Abir Harrasse and Philip Quirke and Clement Neo and Dhruv Nathawani and Luke Marks and Amir Abdullah},
year={2025},
eprint={2503.12730},
archivePrefix={arXiv},
primaryClass={cs.LG},
url={https://arxiv.org/abs/2503.12730}
}