Apple
Add CADFusion Gradio demo
c02daf6
raw
history blame
2.14 kB
# app.py
import gradio as gr
import torch
from transformers import AutoTokenizer, AutoModelForCausalLM, BitsAndBytesConfig
MODEL_ID = "microsoft/CADFusion"
def load_model():
print("Loading tokenizer...")
tokenizer = AutoTokenizer.from_pretrained(MODEL_ID, use_fast=True)
print("Trying to load model in 4-bit (bitsandbytes)...")
try:
bnb_config = BitsAndBytesConfig(
load_in_4bit=True,
bnb_4bit_quant_type="nf4",
bnb_4bit_use_double_quant=True,
bnb_4bit_compute_dtype=torch.float16,
)
model = AutoModelForCausalLM.from_pretrained(
MODEL_ID,
quantization_config=bnb_config,
device_map="auto",
trust_remote_code=True,
)
print("Loaded in 4-bit")
except Exception as e:
print("4-bit load failed:", e)
print("Falling back to fp16 (may require larger GPU RAM)...")
model = AutoModelForCausalLM.from_pretrained(
MODEL_ID,
device_map="auto",
torch_dtype=torch.float16,
trust_remote_code=True,
)
model.eval()
return tokenizer, model
tokenizer, model = load_model()
def generate(prompt, max_new_tokens=256):
if prompt is None or prompt.strip() == "":
return "Please provide a text description of the CAD model."
inputs = tokenizer(prompt, return_tensors="pt").to(model.device)
with torch.no_grad():
out = model.generate(**inputs, max_new_tokens=int(max_new_tokens), do_sample=False)
text = tokenizer.decode(out[0], skip_special_tokens=True)
return text
with gr.Blocks() as demo:
gr.Markdown("# CADFusion demo (microsoft/CADFusion)\nEnter a design description and hit Generate.")
with gr.Row():
prompt = gr.Textbox(lines=5, placeholder="e.g. 'a coffee mug with cylindrical body and curved handle'")
tokens = gr.Slider(64, 1024, value=256, label="max_new_tokens")
out = gr.Textbox(lines=20)
btn = gr.Button("Generate")
btn.click(fn=generate, inputs=[prompt, tokens], outputs=out)
if __name__ == "__main__":
demo.launch()