Spaces:
Sleeping
Sleeping
| import gradio as gr | |
| import torch | |
| import spaces | |
| from transformers import AutoModelForCausalLM, AutoTokenizer | |
| from peft import PeftModel | |
| BASE = "Qwen/Qwen2.5-Math-7B" | |
| ADAPTER = "billwang37/WWang-Lab-OptimAI" | |
| print("Loading base model...") | |
| base = AutoModelForCausalLM.from_pretrained( | |
| BASE, torch_dtype=torch.bfloat16, trust_remote_code=True | |
| ) | |
| print("Applying LoRA adapter...") | |
| model = PeftModel.from_pretrained(base, ADAPTER) | |
| model.eval() | |
| tokenizer = AutoTokenizer.from_pretrained(ADAPTER, trust_remote_code=True) | |
| if tokenizer.pad_token is None: | |
| tokenizer.pad_token = tokenizer.eos_token | |
| def solve(prompt, max_new_tokens=1024): | |
| if not prompt.strip(): | |
| return "Please enter an optimization problem." | |
| model.to("cuda") | |
| inputs = tokenizer(prompt, return_tensors="pt", truncation=True, max_length=2048).to("cuda") | |
| with torch.no_grad(): | |
| out = model.generate( | |
| **inputs, | |
| max_new_tokens=int(max_new_tokens), | |
| do_sample=False, | |
| pad_token_id=tokenizer.pad_token_id, | |
| ) | |
| full = tokenizer.decode(out[0], skip_special_tokens=True) | |
| prompt_text = tokenizer.decode(inputs.input_ids[0], skip_special_tokens=True) | |
| return full[len(prompt_text):].strip() | |
| examples = [ | |
| ["A factory produces A and B. A needs 2h labor, 3 units material, profit $5. B needs 1h labor, 2 units material, profit $4. Available: 100h labor, 200 units material. Maximize profit. Give the optimal x_A, x_B, and total profit.", 1024], | |
| ["Shortest path from A to D in graph: A-B (cost 3), A-C (cost 1), B-D (cost 2), C-D (cost 5), B-C (cost 1). Find the cost and path.", 512], | |
| ["When would you use Lagrangian relaxation instead of Benders decomposition?", 1024], | |
| ["Integer program: max 5x+4y s.t. 6x+4y<=24, x+2y<=6, x,y in {0,1,2,3,4,5,6}. Find optimal integer solution.", 1024], | |
| ] | |
| with gr.Blocks(title="OptimAI Demo") as demo: | |
| gr.Markdown("# OptimAI: Operations Research Assistant") | |
| gr.Markdown( | |
| "A Qwen2.5-Math-7B fine-tune (SFT + DPO) for OR problems: LP, IP, " | |
| "shortest path, queueing, KKT, duality, etc. Try an example or paste your own." | |
| ) | |
| with gr.Row(): | |
| with gr.Column(scale=2): | |
| prompt = gr.Textbox(label="Problem", lines=8, placeholder="Maximize 3x + 5y subject to ...") | |
| tokens = gr.Slider(128, 2048, value=1024, step=64, label="Max new tokens") | |
| btn = gr.Button("Solve", variant="primary") | |
| with gr.Column(scale=3): | |
| output = gr.Textbox(label="Model output", lines=20) | |
| btn.click(solve, inputs=[prompt, tokens], outputs=output) | |
| gr.Examples(examples=examples, inputs=[prompt, tokens]) | |
| gr.Markdown( | |
| "**Model:** [billwang37/WWang-Lab-OptimAI](https://huggingface.co/billwang37/WWang-Lab-OptimAI) \n" | |
| "**Note:** First call takes ~30-60s (cold start). Subsequent calls ~10-20s." | |
| ) | |
| if __name__ == "__main__": | |
| demo.launch() |