Spaces:

SALEETAI
/

coding-agent-demo

Runtime error

Update app.py

db57385 verified about 1 month ago

1.21 kB

	import gradio as gr
	import torch
	from transformers import AutoModelForCausalLM, AutoTokenizer
	from peft import PeftModel

	# 1. Load the Base Model and your Adapters
	model_id = "Qwen/Qwen2.5-Coder-7B-Instruct" # Base model
	adapter_id = "SALEETAI/coding-agent-qwen-sft" # Your trained adapters

	print("Loading model... this may take a few minutes on CPU.")
	tokenizer = AutoTokenizer.from_pretrained(model_id)

	# Load base model in 8-bit to save RAM (CPU friendly)
	base_model = AutoModelForCausalLM.from_pretrained(
	model_id,
	torch_dtype=torch.float32,
	device_map="cpu"
	)

	# Merge your trained SFT weights
	model = PeftModel.from_pretrained(base_model, adapter_id)
	print("Model Loaded!")

	def chat(message, history):
	# Prepare the prompt
	inputs = tokenizer(message, return_tensors="pt").to("cpu")

	# Generate
	with torch.no_grad():
	outputs = model.generate(**inputs, max_new_tokens=200)

	response = tokenizer.decode(outputs[0], skip_special_tokens=True)
	# Remove the prompt from the response
	return response.replace(message, "").strip()

	# Build UI
	demo = gr.ChatInterface(fn=chat, title="Coding Agent (CPU Mode)")

	if __name__ == "__main__":
	demo.launch()