Spaces:

billwang37
/

WWang-Lab-OptimAI

Sleeping

App Files Files Community

WWang-Lab-OptimAI / app.py

billwang37

Create app.py

ee569f0 verified 21 days ago

raw

history blame contribute delete

2.96 kB

	import gradio as gr
	import torch
	import spaces
	from transformers import AutoModelForCausalLM, AutoTokenizer
	from peft import PeftModel

	BASE = "Qwen/Qwen2.5-Math-7B"
	ADAPTER = "billwang37/WWang-Lab-OptimAI"

	print("Loading base model...")
	base = AutoModelForCausalLM.from_pretrained(
	BASE, torch_dtype=torch.bfloat16, trust_remote_code=True
	)
	print("Applying LoRA adapter...")
	model = PeftModel.from_pretrained(base, ADAPTER)
	model.eval()

	tokenizer = AutoTokenizer.from_pretrained(ADAPTER, trust_remote_code=True)
	if tokenizer.pad_token is None:
	tokenizer.pad_token = tokenizer.eos_token


	@spaces.GPU(duration=120)
	def solve(prompt, max_new_tokens=1024):
	if not prompt.strip():
	return "Please enter an optimization problem."
	model.to("cuda")
	inputs = tokenizer(prompt, return_tensors="pt", truncation=True, max_length=2048).to("cuda")
	with torch.no_grad():
	out = model.generate(
	**inputs,
	max_new_tokens=int(max_new_tokens),
	do_sample=False,
	pad_token_id=tokenizer.pad_token_id,
	)
	full = tokenizer.decode(out[0], skip_special_tokens=True)
	prompt_text = tokenizer.decode(inputs.input_ids[0], skip_special_tokens=True)
	return full[len(prompt_text):].strip()


	examples = [
	["A factory produces A and B. A needs 2h labor, 3 units material, profit $5. B needs 1h labor, 2 units material, profit $4. Available: 100h labor, 200 units material. Maximize profit. Give the optimal x_A, x_B, and total profit.", 1024],
	["Shortest path from A to D in graph: A-B (cost 3), A-C (cost 1), B-D (cost 2), C-D (cost 5), B-C (cost 1). Find the cost and path.", 512],
	["When would you use Lagrangian relaxation instead of Benders decomposition?", 1024],
	["Integer program: max 5x+4y s.t. 6x+4y<=24, x+2y<=6, x,y in {0,1,2,3,4,5,6}. Find optimal integer solution.", 1024],
	]

	with gr.Blocks(title="OptimAI Demo") as demo:
	gr.Markdown("# OptimAI: Operations Research Assistant")
	gr.Markdown(
	"A Qwen2.5-Math-7B fine-tune (SFT + DPO) for OR problems: LP, IP, "
	"shortest path, queueing, KKT, duality, etc. Try an example or paste your own."
	)
	with gr.Row():
	with gr.Column(scale=2):
	prompt = gr.Textbox(label="Problem", lines=8, placeholder="Maximize 3x + 5y subject to ...")
	tokens = gr.Slider(128, 2048, value=1024, step=64, label="Max new tokens")
	btn = gr.Button("Solve", variant="primary")
	with gr.Column(scale=3):
	output = gr.Textbox(label="Model output", lines=20)

	btn.click(solve, inputs=[prompt, tokens], outputs=output)
	gr.Examples(examples=examples, inputs=[prompt, tokens])

	gr.Markdown(
	"Model: [billwang37/WWang-Lab-OptimAI](https://huggingface.co/billwang37/WWang-Lab-OptimAI) \n"
	"Note: First call takes ~30-60s (cold start). Subsequent calls ~10-20s."
	)


	if __name__ == "__main__":
	demo.launch()