Spaces:

CaptainNimo
/

nimos-coder-agent

Running

App Files Files Community

nimos-coder-agent / app.py

CaptainNimo

uploaded app.py files

616e144 verified about 1 month ago

raw

history blame contribute delete

4.77 kB

	"""
	Nimo's Personal Coder Agent - HuggingFace Spaces Demo

	A fine-tuned LLM for code generation, deployed on HuggingFace Spaces.
	"""

	import gradio as gr
	import torch
	from transformers import AutoModelForCausalLM, AutoTokenizer, BitsAndBytesConfig
	from peft import PeftModel

	# Configuration
	MODEL_ID = "CaptainNimo/nimos-coder-agent-v2"
	BASE_MODEL_ID = "Qwen/Qwen2.5-Coder-0.5B-Instruct"

	# Global variables for model and tokenizer
	model = None
	tokenizer = None


	def load_model():
	"""Load the fine-tuned model."""
	global model, tokenizer

	print("Loading tokenizer...")
	tokenizer = AutoTokenizer.from_pretrained(BASE_MODEL_ID, trust_remote_code=True)
	tokenizer.pad_token = tokenizer.eos_token

	print("Loading base model...")
	# Try GPU first, fall back to CPU
	if torch.cuda.is_available():
	bnb_config = BitsAndBytesConfig(
	load_in_4bit=True,
	bnb_4bit_quant_type="nf4",
	bnb_4bit_compute_dtype=torch.bfloat16,
	)
	base_model = AutoModelForCausalLM.from_pretrained(
	BASE_MODEL_ID,
	quantization_config=bnb_config,
	device_map="auto",
	trust_remote_code=True,
	)
	else:
	# CPU fallback (slower but works)
	base_model = AutoModelForCausalLM.from_pretrained(
	BASE_MODEL_ID,
	torch_dtype=torch.float32,
	device_map="cpu",
	trust_remote_code=True,
	)

	print("Loading fine-tuned adapter...")
	model = PeftModel.from_pretrained(base_model, MODEL_ID)
	model.eval()

	print("Model loaded successfully!")
	return model, tokenizer


	def generate_code(instruction: str, context: str = "", max_tokens: int = 256, temperature: float = 0.7):
	"""Generate code from instruction."""
	global model, tokenizer

	if model is None:
	return "Model is loading, please wait..."

	# Build prompt
	if context.strip():
	prompt = f"""### Instruction:
	{instruction}

	### Input:
	{context}

	### Response:
	"""
	else:
	prompt = f"""### Instruction:
	{instruction}

	### Response:
	"""

	# Generate
	inputs = tokenizer(prompt, return_tensors="pt").to(model.device)

	with torch.no_grad():
	outputs = model.generate(
	**inputs,
	max_new_tokens=max_tokens,
	temperature=temperature,
	top_p=0.9,
	do_sample=True,
	pad_token_id=tokenizer.eos_token_id,
	)

	response = tokenizer.decode(outputs[0], skip_special_tokens=True)

	# Extract just the response
	if "### Response:" in response:
	response = response.split("### Response:")[-1].strip()

	return response


	# Example prompts
	EXAMPLES = [
	["Write a Python function to check if a number is prime", ""],
	["Create a JavaScript function to debounce API calls", ""],
	["Write a SQL query to find the top 5 customers by sales", ""],
	["Fix the bug in this code", "def factorial(n):\n return n * factorial(n-1)"],
	["Add error handling to this function", "def divide(a, b):\n return a / b"],
	]

	# Load model at startup
	print("Initializing Nimo's Coder Agent...")
	load_model()

	# Create interface
	with gr.Blocks(title="Nimo's Coder Agent", theme=gr.themes.Soft()) as demo:
	gr.Markdown(
	"""
	# Nimo's Personal Coder Agent

	A fine-tuned LLM for code generation, debugging, and code review.

	Model: Qwen2.5-Coder-0.5B + QLoRA fine-tuned on CodeAlpaca-20k

	[GitHub](https://github.com/CaptainNimo/nimos-personal-coder-agent) \|
	[Model](https://huggingface.co/CaptainNimo/nimos-coder-agent-v2)
	"""
	)

	with gr.Row():
	with gr.Column():
	instruction = gr.Textbox(
	label="What code do you need?",
	placeholder="e.g., Write a Python function to sort a list...",
	lines=2
	)
	context = gr.Textbox(
	label="Context/Existing Code (optional)",
	placeholder="Paste code here for debugging or refactoring...",
	lines=4
	)
	with gr.Row():
	max_tokens = gr.Slider(64, 512, value=256, step=32, label="Max Length")
	temperature = gr.Slider(0.1, 1.5, value=0.7, step=0.1, label="Creativity")

	btn = gr.Button("Generate Code", variant="primary")

	with gr.Column():
	output = gr.Code(label="Generated Code", language="python", lines=15)

	gr.Examples(examples=EXAMPLES, inputs=[instruction, context])

	btn.click(generate_code, inputs=[instruction, context, max_tokens, temperature], outputs=output)

	gr.Markdown("---\nFine-tuned by Nimo using QLoRA on free Google Colab GPU")

	demo.launch()