Spaces:

nvhuynh16
/

gemma-code-generator

Sleeping

App Files Files Community

gemma-code-generator / app.py

nvhuynh16

Update app.py

26a4c95 verified 3 months ago

raw

history blame contribute delete

11.8 kB

	"""
	Gradio demo for Gemma Code Generator.
	Loads the fine-tuned model directly using PEFT.
	Includes production monitoring and logging.
	"""

	import gradio as gr
	import torch
	import os
	import json
	import ast
	import time
	from datetime import datetime
	from transformers import AutoTokenizer, AutoModelForCausalLM
	from peft import PeftModel

	# Model configuration
	BASE_MODEL = "google/gemma-2-2b-it"
	ADAPTER_MODEL = "nvhuynh16/gemma-2b-code-alpaca-best"
	HF_TOKEN = os.environ.get("HF_TOKEN", None)

	# Global variables for lazy loading
	tokenizer = None
	model = None

	# Production monitoring
	ENABLE_LOGGING = os.environ.get("ENABLE_LOGGING", "true").lower() == "true"
	LOG_FILE = "production_logs.jsonl"

	def check_syntax(code: str) -> bool:
	"""Check if generated code has valid Python syntax"""
	try:
	ast.parse(code)
	return True
	except:
	return False

	def log_request(instruction: str, generated_code: str, tokens_generated: int, latency: float, error: str = None):
	"""Log request for production monitoring"""
	if not ENABLE_LOGGING:
	return

	log_entry = {
	"timestamp": datetime.now().isoformat(),
	"instruction_length": len(instruction),
	"response_length": len(generated_code),
	"tokens_generated": tokens_generated,
	"latency_seconds": round(latency, 2),
	"has_syntax_error": not check_syntax(generated_code) if generated_code and not error else True,
	"error": error,
	}

	try:
	with open(LOG_FILE, "a", encoding="utf-8") as f:
	f.write(json.dumps(log_entry) + "\n")
	except Exception as e:
	print(f"Logging failed: {e}")

	# Safety filters - Layer 1: Input Validation
	DANGEROUS_KEYWORDS = [
	"delete all files", "rm -rf", "shutil.rmtree",
	"sql injection", "drop table", "truncate table",
	"keylogger", "backdoor", "exploit",
	"hack into", "steal password", "crack password",
	"ddos", "denial of service", "fork bomb",
	"malware", "ransomware", "trojan"
	]

	def validate_input(instruction: str) -> tuple:
	"""
	Validate input for dangerous keywords.
	Returns: (is_valid: bool, error_message: str)
	"""
	instruction_lower = instruction.lower()

	for keyword in DANGEROUS_KEYWORDS:
	if keyword in instruction_lower:
	return False, f"⚠️ Safety Filter: Request blocked. Your instruction contains potentially unsafe content related to '{keyword}'.\n\nPlease rephrase your request to focus on legitimate programming tasks."

	return True, ""

	# Safety filters - Layer 2: Output Filtering
	DANGEROUS_PATTERNS = [
	("os.remove", "file deletion"),
	("shutil.rmtree", "directory deletion"),
	("os.unlink", "file deletion"),
	("DROP TABLE", "database destruction"),
	("TRUNCATE TABLE", "database destruction"),
	("DELETE FROM", "database deletion"),
	("eval(", "arbitrary code execution"),
	("exec(", "arbitrary code execution"),
	("__import__", "dynamic imports"),
	("os.system", "system command execution"),
	("subprocess.call", "system command execution"),
	("subprocess.run", "system command execution"),
	]

	def filter_dangerous_code(code: str) -> str:
	"""
	Filter dangerous code patterns from output.
	Returns: filtered code or safety warning
	"""
	code_lower = code.lower()

	for pattern, reason in DANGEROUS_PATTERNS:
	if pattern.lower() in code_lower:
	return f"""# ⚠️ SAFETY FILTER ACTIVATED
	#
	# Code generation blocked: Potentially dangerous pattern detected ({reason})
	# Pattern: {pattern}
	#
	# This is a safety feature to prevent generating code that could:
	# - Delete files or data
	# - Execute arbitrary system commands
	# - Compromise system security
	#
	# Please rephrase your request with safer requirements.
	# For educational purposes, consult official documentation or security resources.
	"""

	return code

	def load_model():
	"""Lazy load model on first request"""
	global tokenizer, model

	if model is None:
	print("Loading model for the first time...")

	# Load tokenizer
	tokenizer = AutoTokenizer.from_pretrained(BASE_MODEL, token=HF_TOKEN)

	# Load base model with 4-bit quantization
	base_model = AutoModelForCausalLM.from_pretrained(
	BASE_MODEL,
	device_map="auto",
	torch_dtype=torch.float16,
	load_in_4bit=True,
	token=HF_TOKEN
	)

	# Load LoRA adapter
	model = PeftModel.from_pretrained(base_model, ADAPTER_MODEL, token=HF_TOKEN)
	model.eval()

	print("Model loaded successfully!")

	return tokenizer, model


	def generate_code(instruction: str, max_tokens: int = 256, temperature: float = 0.7):
	"""Generate code from instruction with monitoring and safety filters"""
	start_time = time.time()

	if not instruction.strip():
	return "Please enter an instruction."

	# Layer 1: Input validation
	is_valid, validation_error = validate_input(instruction)
	if not is_valid:
	# Log blocked request
	log_request(instruction, validation_error, 0, time.time() - start_time, "BLOCKED_BY_SAFETY_FILTER")
	return validation_error

	generated_code = ""
	tokens_generated = 0
	error = None

	try:
	# Load model (cached after first call)
	tok, mdl = load_model()

	# Format prompt in Alpaca style
	prompt = f"""### Instruction:
	{instruction}

	### Input:


	### Response:
	"""

	# Tokenize
	inputs = tok(prompt, return_tensors="pt").to(mdl.device)
	input_length = len(inputs.input_ids[0])

	# Generate
	with torch.no_grad():
	outputs = mdl.generate(
	**inputs,
	max_new_tokens=max_tokens,
	temperature=temperature,
	top_p=0.9,
	do_sample=True,
	pad_token_id=tok.eos_token_id,
	)

	# Calculate tokens generated
	tokens_generated = len(outputs[0]) - input_length

	# Decode
	generated = tok.decode(outputs[0], skip_special_tokens=True)

	# Extract code after "### Response:"
	if "### Response:" in generated:
	generated_code = generated.split("### Response:")[-1].strip()
	else:
	generated_code = generated.strip()

	# Layer 2: Output filtering for dangerous patterns
	generated_code = filter_dangerous_code(generated_code)

	except Exception as e:
	error = str(e)
	generated_code = f"Error: {error}\n\nPlease try again."

	finally:
	# Log request
	latency = time.time() - start_time
	log_request(instruction, generated_code, tokens_generated, latency, error)

	return generated_code


	# Custom CSS for better appearance
	custom_css = """
	.container {
	max-width: 900px;
	margin: auto;
	}
	.output-code {
	font-family: 'Courier New', monospace;
	font-size: 14px;
	}
	"""

	# Create Gradio interface
	with gr.Blocks(theme=gr.themes.Soft(), css=custom_css) as demo:

	gr.Markdown(
	"""
	# 🤖 Gemma Code Generator

	Fine-tuned Gemma-2B model for multi-language code generation using QLoRA.

	Performance: 76% syntax correctness \| BLEU Score: 16.83 (+53% improvement over baseline 11.00)

	Note: First request may take 1-2 minutes as the model loads on HuggingFace servers. Subsequent requests are instant!

	---

	### 🛡️ Safety Features

	This demo includes production-grade safety filters:
	- Input Validation: Blocks requests with potentially dangerous keywords
	- Output Filtering: Prevents generation of code that could delete files, execute arbitrary commands, or compromise security
	- Production Monitoring: All requests are logged for quality tracking (privacy-respecting, no personal data stored)

	⚠️ AI-Generated Code Disclaimer: Always review generated code before use. AI models can make mistakes.
	"""
	)

	with gr.Row():
	with gr.Column(scale=1):
	instruction_input = gr.Textbox(
	label="Code Instruction",
	placeholder="Describe the function you want to create...",
	lines=3,
	)

	with gr.Accordion("Advanced Settings", open=False):
	max_tokens_slider = gr.Slider(
	minimum=64,
	maximum=512,
	value=256,
	step=64,
	label="Max Tokens",
	info="Maximum length of generated code"
	)

	temperature_slider = gr.Slider(
	minimum=0.1,
	maximum=1.5,
	value=0.7,
	step=0.1,
	label="Temperature",
	info="Higher = more creative, Lower = more deterministic"
	)

	generate_btn = gr.Button("Generate Code", variant="primary", size="lg")

	with gr.Column(scale=1):
	output_code = gr.Code(
	label="Generated Code",
	language="python",
	elem_classes="output-code"
	)

	# Examples
	gr.Examples(
	examples=[
	["Write a function to check if a number is prime"],
	["Create a function to reverse a string"],
	["Write a function to find the factorial of a number"],
	["Implement binary search on a sorted list"],
	["Create a function to merge two sorted lists"],
	["Write a function to calculate Fibonacci numbers"],
	["Implement a function to find the longest common subsequence"],
	["Create a function to validate an email address using regex"],
	["Write a function to convert a decimal number to binary"],
	["Implement a simple LRU cache using OrderedDict"],
	],
	inputs=[instruction_input],
	label="Example Prompts (Click to use)"
	)

	# Event handler
	generate_btn.click(
	fn=generate_code,
	inputs=[instruction_input, max_tokens_slider, temperature_slider],
	outputs=[output_code],
	)

	# Model information footer
	gr.Markdown(
	"""
	---

	### 📊 Model Performance

	\| Metric \| Baseline (Pretrained) \| Fine-Tuned (Actual) \| Improvement \|
	\|--------\|----------------------\|---------------------\|-------------\|
	\| BLEU Score \| 11.00 \| 16.83 \| +53% ✅ \|
	\| Syntax Correctness \| 81% \| 76% \| -5% \|
	\| Trainable Parameters \| 2.5B \| 3.2M (0.12%) \| 100x fewer \|

	Evaluated on 100 multi-language test samples (72 Python, 28 other languages)

	### 🛠️ Technical Details

	- Base Model: google/gemma-2-2b-it (2.5B parameters)
	- Fine-tuning: QLoRA (4-bit quantization + LoRA rank 16)
	- Dataset: CodeAlpaca-20k (18,000 training examples)
	- Checkpoint: Step 2000 (~1.8 epochs, selected for best BLEU score)
	- Training: 10-15 hours on free Google Colab T4 GPU
	- Cost: $0 (free Colab + free HF Spaces hosting)

	### 🔗 Links

	[Model on HuggingFace](https://huggingface.co/nvhuynh16/gemma-2b-code-alpaca-best) •
	[GitHub Repository](https://github.com/nvhuynh16/Gemma-Code-Fine-Tuning) •
	[Portfolio](https://portfolio-nvhuynh.vercel.app) •
	[Base Model](https://huggingface.co/google/gemma-2-2b-it)

	---

	Built for portfolio demonstration • Targeting AI/ML Applied Scientist roles

	This demo uses HuggingFace Inference API for serverless, cost-free inference
	"""
	)

	if __name__ == "__main__":
	demo.launch()