import gradio as gr import torch import spaces from transformers import AutoModelForCausalLM, AutoTokenizer from peft import PeftModel BASE = "Qwen/Qwen2.5-Math-7B" ADAPTER = "billwang37/WWang-Lab-OptimAI" print("Loading base model...") base = AutoModelForCausalLM.from_pretrained( BASE, torch_dtype=torch.bfloat16, trust_remote_code=True ) print("Applying LoRA adapter...") model = PeftModel.from_pretrained(base, ADAPTER) model.eval() tokenizer = AutoTokenizer.from_pretrained(ADAPTER, trust_remote_code=True) if tokenizer.pad_token is None: tokenizer.pad_token = tokenizer.eos_token @spaces.GPU(duration=120) def solve(prompt, max_new_tokens=1024): if not prompt.strip(): return "Please enter an optimization problem." model.to("cuda") inputs = tokenizer(prompt, return_tensors="pt", truncation=True, max_length=2048).to("cuda") with torch.no_grad(): out = model.generate( **inputs, max_new_tokens=int(max_new_tokens), do_sample=False, pad_token_id=tokenizer.pad_token_id, ) full = tokenizer.decode(out[0], skip_special_tokens=True) prompt_text = tokenizer.decode(inputs.input_ids[0], skip_special_tokens=True) return full[len(prompt_text):].strip() examples = [ ["A factory produces A and B. A needs 2h labor, 3 units material, profit $5. B needs 1h labor, 2 units material, profit $4. Available: 100h labor, 200 units material. Maximize profit. Give the optimal x_A, x_B, and total profit.", 1024], ["Shortest path from A to D in graph: A-B (cost 3), A-C (cost 1), B-D (cost 2), C-D (cost 5), B-C (cost 1). Find the cost and path.", 512], ["When would you use Lagrangian relaxation instead of Benders decomposition?", 1024], ["Integer program: max 5x+4y s.t. 6x+4y<=24, x+2y<=6, x,y in {0,1,2,3,4,5,6}. Find optimal integer solution.", 1024], ] with gr.Blocks(title="OptimAI Demo") as demo: gr.Markdown("# OptimAI: Operations Research Assistant") gr.Markdown( "A Qwen2.5-Math-7B fine-tune (SFT + DPO) for OR problems: LP, IP, " "shortest path, queueing, KKT, duality, etc. Try an example or paste your own." ) with gr.Row(): with gr.Column(scale=2): prompt = gr.Textbox(label="Problem", lines=8, placeholder="Maximize 3x + 5y subject to ...") tokens = gr.Slider(128, 2048, value=1024, step=64, label="Max new tokens") btn = gr.Button("Solve", variant="primary") with gr.Column(scale=3): output = gr.Textbox(label="Model output", lines=20) btn.click(solve, inputs=[prompt, tokens], outputs=output) gr.Examples(examples=examples, inputs=[prompt, tokens]) gr.Markdown( "**Model:** [billwang37/WWang-Lab-OptimAI](https://huggingface.co/billwang37/WWang-Lab-OptimAI) \n" "**Note:** First call takes ~30-60s (cold start). Subsequent calls ~10-20s." ) if __name__ == "__main__": demo.launch()