# app.py
import gradio as gr
import torch
from transformers import AutoTokenizer, AutoModelForCausalLM, BitsAndBytesConfig

MODEL_ID = "microsoft/CADFusion"

def load_model():
    print("Loading tokenizer...")
    tokenizer = AutoTokenizer.from_pretrained(MODEL_ID, use_fast=True)
    print("Trying to load model in 4-bit (bitsandbytes)...")
    try:
        bnb_config = BitsAndBytesConfig(
            load_in_4bit=True,
            bnb_4bit_quant_type="nf4",
            bnb_4bit_use_double_quant=True,
            bnb_4bit_compute_dtype=torch.float16,
        )
        model = AutoModelForCausalLM.from_pretrained(
            MODEL_ID,
            quantization_config=bnb_config,
            device_map="auto",
            trust_remote_code=True,
        )
        print("Loaded in 4-bit")
    except Exception as e:
        print("4-bit load failed:", e)
        print("Falling back to fp16 (may require larger GPU RAM)...")
        model = AutoModelForCausalLM.from_pretrained(
            MODEL_ID,
            device_map="auto",
            torch_dtype=torch.float16,
            trust_remote_code=True,
        )

    model.eval()
    return tokenizer, model

tokenizer, model = load_model()

def generate(prompt, max_new_tokens=256):
    if prompt is None or prompt.strip() == "":
        return "Please provide a text description of the CAD model."
    inputs = tokenizer(prompt, return_tensors="pt").to(model.device)
    with torch.no_grad():
        out = model.generate(**inputs, max_new_tokens=int(max_new_tokens), do_sample=False)
    text = tokenizer.decode(out[0], skip_special_tokens=True)
    return text

with gr.Blocks() as demo:
    gr.Markdown("# CADFusion demo (microsoft/CADFusion)\nEnter a design description and hit Generate.")
    with gr.Row():
        prompt = gr.Textbox(lines=5, placeholder="e.g. 'a coffee mug with cylindrical body and curved handle'")
        tokens = gr.Slider(64, 1024, value=256, label="max_new_tokens")
    out = gr.Textbox(lines=20)
    btn = gr.Button("Generate")
    btn.click(fn=generate, inputs=[prompt, tokens], outputs=out)

if __name__ == "__main__":
    demo.launch()