billwang37's picture
Create app.py
ee569f0 verified
import gradio as gr
import torch
import spaces
from transformers import AutoModelForCausalLM, AutoTokenizer
from peft import PeftModel
BASE = "Qwen/Qwen2.5-Math-7B"
ADAPTER = "billwang37/WWang-Lab-OptimAI"
print("Loading base model...")
base = AutoModelForCausalLM.from_pretrained(
BASE, torch_dtype=torch.bfloat16, trust_remote_code=True
)
print("Applying LoRA adapter...")
model = PeftModel.from_pretrained(base, ADAPTER)
model.eval()
tokenizer = AutoTokenizer.from_pretrained(ADAPTER, trust_remote_code=True)
if tokenizer.pad_token is None:
tokenizer.pad_token = tokenizer.eos_token
@spaces.GPU(duration=120)
def solve(prompt, max_new_tokens=1024):
if not prompt.strip():
return "Please enter an optimization problem."
model.to("cuda")
inputs = tokenizer(prompt, return_tensors="pt", truncation=True, max_length=2048).to("cuda")
with torch.no_grad():
out = model.generate(
**inputs,
max_new_tokens=int(max_new_tokens),
do_sample=False,
pad_token_id=tokenizer.pad_token_id,
)
full = tokenizer.decode(out[0], skip_special_tokens=True)
prompt_text = tokenizer.decode(inputs.input_ids[0], skip_special_tokens=True)
return full[len(prompt_text):].strip()
examples = [
["A factory produces A and B. A needs 2h labor, 3 units material, profit $5. B needs 1h labor, 2 units material, profit $4. Available: 100h labor, 200 units material. Maximize profit. Give the optimal x_A, x_B, and total profit.", 1024],
["Shortest path from A to D in graph: A-B (cost 3), A-C (cost 1), B-D (cost 2), C-D (cost 5), B-C (cost 1). Find the cost and path.", 512],
["When would you use Lagrangian relaxation instead of Benders decomposition?", 1024],
["Integer program: max 5x+4y s.t. 6x+4y<=24, x+2y<=6, x,y in {0,1,2,3,4,5,6}. Find optimal integer solution.", 1024],
]
with gr.Blocks(title="OptimAI Demo") as demo:
gr.Markdown("# OptimAI: Operations Research Assistant")
gr.Markdown(
"A Qwen2.5-Math-7B fine-tune (SFT + DPO) for OR problems: LP, IP, "
"shortest path, queueing, KKT, duality, etc. Try an example or paste your own."
)
with gr.Row():
with gr.Column(scale=2):
prompt = gr.Textbox(label="Problem", lines=8, placeholder="Maximize 3x + 5y subject to ...")
tokens = gr.Slider(128, 2048, value=1024, step=64, label="Max new tokens")
btn = gr.Button("Solve", variant="primary")
with gr.Column(scale=3):
output = gr.Textbox(label="Model output", lines=20)
btn.click(solve, inputs=[prompt, tokens], outputs=output)
gr.Examples(examples=examples, inputs=[prompt, tokens])
gr.Markdown(
"**Model:** [billwang37/WWang-Lab-OptimAI](https://huggingface.co/billwang37/WWang-Lab-OptimAI) \n"
"**Note:** First call takes ~30-60s (cold start). Subsequent calls ~10-20s."
)
if __name__ == "__main__":
demo.launch()