# app.py import gradio as gr import torch from transformers import AutoTokenizer, AutoModelForCausalLM, BitsAndBytesConfig MODEL_ID = "microsoft/CADFusion" def load_model(): print("Loading tokenizer...") tokenizer = AutoTokenizer.from_pretrained(MODEL_ID, use_fast=True) print("Trying to load model in 4-bit (bitsandbytes)...") try: bnb_config = BitsAndBytesConfig( load_in_4bit=True, bnb_4bit_quant_type="nf4", bnb_4bit_use_double_quant=True, bnb_4bit_compute_dtype=torch.float16, ) model = AutoModelForCausalLM.from_pretrained( MODEL_ID, quantization_config=bnb_config, device_map="auto", trust_remote_code=True, ) print("Loaded in 4-bit") except Exception as e: print("4-bit load failed:", e) print("Falling back to fp16 (may require larger GPU RAM)...") model = AutoModelForCausalLM.from_pretrained( MODEL_ID, device_map="auto", torch_dtype=torch.float16, trust_remote_code=True, ) model.eval() return tokenizer, model tokenizer, model = load_model() def generate(prompt, max_new_tokens=256): if prompt is None or prompt.strip() == "": return "Please provide a text description of the CAD model." inputs = tokenizer(prompt, return_tensors="pt").to(model.device) with torch.no_grad(): out = model.generate(**inputs, max_new_tokens=int(max_new_tokens), do_sample=False) text = tokenizer.decode(out[0], skip_special_tokens=True) return text with gr.Blocks() as demo: gr.Markdown("# CADFusion demo (microsoft/CADFusion)\nEnter a design description and hit Generate.") with gr.Row(): prompt = gr.Textbox(lines=5, placeholder="e.g. 'a coffee mug with cylindrical body and curved handle'") tokens = gr.Slider(64, 1024, value=256, label="max_new_tokens") out = gr.Textbox(lines=20) btn = gr.Button("Generate") btn.click(fn=generate, inputs=[prompt, tokens], outputs=out) if __name__ == "__main__": demo.launch()