import gc import json import os import threading import time from typing import Optional import gradio as gr import torch from transformers import AutoModelForCausalLM, AutoTokenizer from peft import PeftModel # --------------------------------------------------------------------------- # Model registry # --------------------------------------------------------------------------- MODELS = { "ISO27001-Expert-1.5B": { "base": "Qwen/Qwen2.5-1.5B-Instruct", "adapter": "AYI-NEDJIMI/ISO27001-Expert-1.5B", "default_prompt": ( "You are ISO 27001 Expert, a specialized AI assistant for " "ISO/IEC 27001 information security management systems. " "You help organizations understand, implement, and maintain " "ISO 27001 certification, including risk assessment, controls " "from Annex A, Statement of Applicability, and audit preparation." ), }, "RGPD-Expert-1.5B": { "base": "Qwen/Qwen2.5-1.5B-Instruct", "adapter": "AYI-NEDJIMI/RGPD-Expert-1.5B", "default_prompt": ( "You are RGPD Expert, a specialized AI assistant for GDPR/RGPD " "data protection regulations. You help organizations understand " "their obligations under the General Data Protection Regulation, " "including data subject rights, Data Protection Impact Assessments, " "lawful bases for processing, and breach notification procedures." ), }, "CyberSec-Assistant-3B": { "base": "Qwen/Qwen2.5-3B-Instruct", "adapter": "AYI-NEDJIMI/CyberSec-Assistant-3B", "default_prompt": ( "You are CyberSec Assistant, an expert AI specialized in " "cybersecurity, compliance (GDPR, NIS2, DORA, AI Act, ISO 27001), " "penetration testing, SOC operations, and AI security." ), }, } # --------------------------------------------------------------------------- # Global model state # --------------------------------------------------------------------------- _lock = threading.Lock() _loaded_model_name = None _tokenizer = None _model = None def load_model(model_name: str): """Load or switch to a different model.""" global _loaded_model_name, _tokenizer, _model with _lock: if _loaded_model_name == model_name and _model is not None: return # Already loaded # Unload previous model if _model is not None: del _model del _tokenizer gc.collect() torch.cuda.empty_cache() # Load new model cfg = MODELS[model_name] hf_token = os.getenv("HF_TOKEN") _tokenizer = AutoTokenizer.from_pretrained( cfg["base"], trust_remote_code=True, token=hf_token, ) base = AutoModelForCausalLM.from_pretrained( cfg["base"], torch_dtype=torch.float32, device_map="cpu", trust_remote_code=True, token=hf_token, ) _model = PeftModel.from_pretrained( base, cfg["adapter"], torch_dtype=torch.float32, token=hf_token, ) _model.eval() _loaded_model_name = model_name def generate_single( model_name: str, system_prompt: str, user_prompt: str, temperature: float, top_p: float, top_k: int, max_tokens: int, repetition_penalty: float, ) -> tuple[str, dict]: """ Generate a single response with metrics. Returns: (response_text, metrics_dict) """ if not user_prompt.strip(): return "", {} # Load model try: load_model(model_name) except Exception as e: return f"Error loading model: {e}", {} # Build messages messages = [ {"role": "system", "content": system_prompt}, {"role": "user", "content": user_prompt}, ] input_text = _tokenizer.apply_chat_template( messages, tokenize=False, add_generation_prompt=True ) inputs = _tokenizer(input_text, return_tensors="pt").to("cpu") input_length = inputs.input_ids.shape[1] # Generation start_time = time.time() with torch.no_grad(): outputs = _model.generate( **inputs, max_new_tokens=max_tokens, temperature=temperature, top_p=top_p, top_k=top_k if top_k > 0 else None, do_sample=temperature > 0, repetition_penalty=repetition_penalty, pad_token_id=_tokenizer.eos_token_id, ) end_time = time.time() elapsed = end_time - start_time # Decode generated_ids = outputs[0][input_length:] response = _tokenizer.decode(generated_ids, skip_special_tokens=True) # Metrics num_tokens = len(generated_ids) tokens_per_sec = num_tokens / elapsed if elapsed > 0 else 0 metrics = { "tokens": num_tokens, "time_sec": round(elapsed, 2), "tokens_per_sec": round(tokens_per_sec, 2), } return response, metrics # --------------------------------------------------------------------------- # UI handlers # --------------------------------------------------------------------------- def generate_response( model_name: str, system_prompt: str, user_prompt: str, temperature: float, top_p: float, top_k: int, max_tokens: int, repetition_penalty: float, ): """Handler for single-turn Q&A.""" response, metrics = generate_single( model_name, system_prompt, user_prompt, temperature, top_p, top_k, max_tokens, repetition_penalty ) metrics_text = "" if metrics: metrics_text = ( f"**Generation Metrics:**\n" f"- Tokens: {metrics['tokens']}\n" f"- Time: {metrics['time_sec']}s\n" f"- Speed: {metrics['tokens_per_sec']} tokens/sec" ) return response, metrics_text def export_json(model_name: str, system_prompt: str, user_prompt: str, response: str, metrics_text: str): """Export conversation as JSON.""" data = { "model": model_name, "system_prompt": system_prompt, "user_prompt": user_prompt, "response": response, "metrics": metrics_text, } return json.dumps(data, indent=2, ensure_ascii=False) def generate_comparison( model_name: str, system_prompt: str, user_prompt: str, # Config A temp_a: float, top_p_a: float, top_k_a: int, max_tok_a: int, rep_pen_a: float, # Config B temp_b: float, top_p_b: float, top_k_b: int, max_tok_b: int, rep_pen_b: float, ): """Generate side-by-side comparison with different parameter sets.""" response_a, metrics_a = generate_single( model_name, system_prompt, user_prompt, temp_a, top_p_a, top_k_a, max_tok_a, rep_pen_a ) response_b, metrics_b = generate_single( model_name, system_prompt, user_prompt, temp_b, top_p_b, top_k_b, max_tok_b, rep_pen_b ) metrics_text_a = "" if metrics_a: metrics_text_a = ( f"**Config A Metrics:**\n" f"- Tokens: {metrics_a['tokens']}\n" f"- Time: {metrics_a['time_sec']}s\n" f"- Speed: {metrics_a['tokens_per_sec']} tok/s" ) metrics_text_b = "" if metrics_b: metrics_text_b = ( f"**Config B Metrics:**\n" f"- Tokens: {metrics_b['tokens']}\n" f"- Time: {metrics_b['time_sec']}s\n" f"- Speed: {metrics_b['tokens_per_sec']} tok/s" ) return response_a, metrics_text_a, response_b, metrics_text_b def update_system_prompt(model_name: str): """Update system prompt textbox when model changes.""" return MODELS[model_name]["default_prompt"] # --------------------------------------------------------------------------- # Gradio UI # --------------------------------------------------------------------------- DESCRIPTION = """\ ## Model Playground Experiment with **3 fine-tuned cybersecurity models** using customizable parameters. **Features:** - Single-turn Q&A (no chat history) - Adjustable generation parameters (temperature, top-p, top-k, max tokens, repetition penalty) - Real-time generation metrics (tokens/sec, total time, token count) - Export conversations as JSON - Side-by-side comparison mode with 2 different parameter configurations - Dark theme optimized for readability **Models:** - **ISO27001-Expert-1.5B**: ISO/IEC 27001 ISMS specialist - **RGPD-Expert-1.5B**: GDPR/RGPD compliance expert - **CyberSec-Assistant-3B**: General cybersecurity assistant """ theme = gr.themes.Monochrome( primary_hue="red", secondary_hue="purple", neutral_hue="slate", font=gr.themes.GoogleFont("Inter"), ).set( body_background_fill="#0a0a0a", body_background_fill_dark="#0a0a0a", block_background_fill="#1a1a1a", block_background_fill_dark="#1a1a1a", input_background_fill="#262626", input_background_fill_dark="#262626", button_primary_background_fill="#dc2626", button_primary_background_fill_dark="#dc2626", ) with gr.Blocks(theme=theme, title="Model Playground") as demo: gr.Markdown("# Model Playground") gr.Markdown(DESCRIPTION) with gr.Tabs(): # =================================================================== # Tab 1: Single-Turn Q&A # =================================================================== with gr.Tab("Single-Turn Q&A"): with gr.Row(): with gr.Column(scale=2): model_select = gr.Dropdown( choices=list(MODELS.keys()), value="ISO27001-Expert-1.5B", label="Select Model", ) with gr.Column(scale=3): system_prompt_box = gr.Textbox( value=MODELS["ISO27001-Expert-1.5B"]["default_prompt"], label="System Prompt (Editable)", lines=4, ) user_prompt_box = gr.Textbox( label="Your Question", placeholder="Enter your question here...", lines=3, ) with gr.Accordion("Generation Parameters", open=True): with gr.Row(): temperature_slider = gr.Slider( minimum=0, maximum=2, value=0.7, step=0.1, label="Temperature", info="Higher = more creative, lower = more deterministic" ) top_p_slider = gr.Slider( minimum=0, maximum=1, value=0.9, step=0.05, label="Top-p (nucleus sampling)", ) top_k_slider = gr.Slider( minimum=0, maximum=100, value=50, step=5, label="Top-k (0 = disabled)", ) with gr.Row(): max_tokens_slider = gr.Slider( minimum=128, maximum=2048, value=512, step=128, label="Max Tokens", ) repetition_penalty_slider = gr.Slider( minimum=1.0, maximum=2.0, value=1.1, step=0.1, label="Repetition Penalty", ) generate_btn = gr.Button("Generate Response", variant="primary", size="lg") with gr.Row(): with gr.Column(scale=3): response_box = gr.Textbox( label="Response", lines=15, interactive=False, ) with gr.Column(scale=1): metrics_box = gr.Markdown(label="Metrics") with gr.Row(): export_btn = gr.Button("Export as JSON") json_output = gr.Textbox(label="JSON Export", lines=10, visible=False) # Wire up events model_select.change( fn=update_system_prompt, inputs=[model_select], outputs=[system_prompt_box], ) generate_btn.click( fn=generate_response, inputs=[ model_select, system_prompt_box, user_prompt_box, temperature_slider, top_p_slider, top_k_slider, max_tokens_slider, repetition_penalty_slider, ], outputs=[response_box, metrics_box], ) export_btn.click( fn=export_json, inputs=[model_select, system_prompt_box, user_prompt_box, response_box, metrics_box], outputs=[json_output], ).then( fn=lambda: gr.update(visible=True), outputs=[json_output], ) gr.Examples( examples=[ ["What are the mandatory clauses of ISO 27001:2022?"], ["What are the 6 lawful bases for processing under GDPR?"], ["Explain the MITRE ATT&CK framework."], ["What are the main requirements of the NIS2 directive?"], ], inputs=user_prompt_box, ) # =================================================================== # Tab 2: Side-by-Side Comparison # =================================================================== with gr.Tab("Side-by-Side Comparison"): gr.Markdown("### Compare responses from the same model with 2 different parameter configurations") with gr.Row(): with gr.Column(scale=2): model_select_comp = gr.Dropdown( choices=list(MODELS.keys()), value="ISO27001-Expert-1.5B", label="Select Model", ) with gr.Column(scale=3): system_prompt_comp = gr.Textbox( value=MODELS["ISO27001-Expert-1.5B"]["default_prompt"], label="System Prompt (Editable)", lines=4, ) user_prompt_comp = gr.Textbox( label="Your Question", placeholder="Enter your question here...", lines=3, ) with gr.Row(): # Config A with gr.Column(): gr.Markdown("#### Configuration A") temp_a = gr.Slider(0, 2, value=0.3, step=0.1, label="Temperature") top_p_a = gr.Slider(0, 1, value=0.9, step=0.05, label="Top-p") top_k_a = gr.Slider(0, 100, value=50, step=5, label="Top-k") max_tok_a = gr.Slider(128, 2048, value=512, step=128, label="Max Tokens") rep_pen_a = gr.Slider(1.0, 2.0, value=1.1, step=0.1, label="Rep. Penalty") # Config B with gr.Column(): gr.Markdown("#### Configuration B") temp_b = gr.Slider(0, 2, value=1.2, step=0.1, label="Temperature") top_p_b = gr.Slider(0, 1, value=0.95, step=0.05, label="Top-p") top_k_b = gr.Slider(0, 100, value=40, step=5, label="Top-k") max_tok_b = gr.Slider(128, 2048, value=512, step=128, label="Max Tokens") rep_pen_b = gr.Slider(1.0, 2.0, value=1.2, step=0.1, label="Rep. Penalty") compare_btn = gr.Button("Generate Comparison", variant="primary", size="lg") with gr.Row(): with gr.Column(): response_a = gr.Textbox(label="Response A", lines=12, interactive=False) metrics_a = gr.Markdown() with gr.Column(): response_b = gr.Textbox(label="Response B", lines=12, interactive=False) metrics_b = gr.Markdown() # Wire up events model_select_comp.change( fn=update_system_prompt, inputs=[model_select_comp], outputs=[system_prompt_comp], ) compare_btn.click( fn=generate_comparison, inputs=[ model_select_comp, system_prompt_comp, user_prompt_comp, temp_a, top_p_a, top_k_a, max_tok_a, rep_pen_a, temp_b, top_p_b, top_k_b, max_tok_b, rep_pen_b, ], outputs=[response_a, metrics_a, response_b, metrics_b], ) gr.Examples( examples=[ ["What is a Data Protection Impact Assessment?"], ["Explain the concept of Zero Trust security."], ["What are the penalties for GDPR non-compliance?"], ], inputs=user_prompt_comp, ) # Footer gr.HTML("""
Built by Ayi NEDJIMI | Models: ISO27001, RGPD, CyberSec-3B | Portfolio
Fine-tuned with QLoRA on Qwen 2.5 | Model Playground