Spaces:

BenjaminKaindu0506
/

bug-bounty-chatbot

Build error

File size: 3,248 Bytes

9b4e2a5

"""
Model Configuration for Bug Bounty Chatbot
This file contains configuration settings for different model types and sizes
"""

import os
from typing import Dict, Any

# Model configurations
MODEL_CONFIGS = {
    "distilbert_classification": {
        "model_type": "classification",
        "max_length": 512,
        "batch_size": 16,
        "device": "auto",
        "description": "Fine-tuned DistilBERT for security classification tasks"
    },
    "codegemma_2b": {
        "model_type": "generation",
        "max_length": 2048,
        "batch_size": 8,
        "device": "auto",
        "description": "CodeGemma 2B fine-tuned for security code analysis"
    },
    "codegemma_7b": {
        "model_type": "generation", 
        "max_length": 4096,
        "batch_size": 4,
        "device": "gpu",  # Requires GPU for 7B model
        "description": "CodeGemma 7B fine-tuned for advanced security analysis"
    }
}

# Default model paths (update these with your actual model paths)
DEFAULT_MODEL_PATHS = {
    "distilbert": "/Users/macbook/Downloads/finetuned_model",
    "distilbert_2": "/Users/macbook/Downloads/finetuned_model 2",
    "codegemma_2b": None,  # Update when you have CodeGemma models
    "codegemma_7b": None   # Update when you have CodeGemma models
}

# Security testing prompts and templates
SECURITY_PROMPTS = {
    "vulnerability_analysis": """
<|system|>
You are a cybersecurity expert specializing in bug bounty hunting and penetration testing.
Analyze the following security scenario and provide detailed guidance.
<|user|>
{query}
<|assistant|>
""",
    
    "code_review": """
<|system|>
You are a security code reviewer. Analyze the following code for security vulnerabilities.
<|user|>
Review this code for security issues:
{code}
<|assistant|>
""",
    
    "methodology": """
<|system|>
You are a penetration testing methodology expert. Provide step-by-step guidance for security testing.
<|user|>
{query}
<|assistant|>
"""
}

# Security categories and their associated keywords
SECURITY_KEYWORDS = {
    "web_app": [
        "sql injection", "xss", "csrf", "authentication", "authorization",
        "file upload", "directory traversal", "ssrf", "xxe", "idor"
    ],
    "network": [
        "port scan", "network", "tcp", "udp", "sniffing", "mitm",
        "dns", "arp", "wireless", "vpn", "firewall"
    ],
    "infrastructure": [
        "server", "privilege escalation", "container", "cloud", "api",
        "database", "misconfiguration", "default credentials"
    ]
}

def get_model_config(model_name: str) -> Dict[str, Any]:
    """Get configuration for a specific model"""
    return MODEL_CONFIGS.get(model_name, MODEL_CONFIGS["distilbert_classification"])

def get_model_path(model_name: str) -> str:
    """Get the path for a specific model"""
    return DEFAULT_MODEL_PATHS.get(model_name, "")

def validate_model_path(model_path: str) -> bool:
    """Validate if model path exists and contains required files"""
    if not model_path or not os.path.exists(model_path):
        return False
    
    required_files = ["config.json", "tokenizer.json"]
    for file in required_files:
        if not os.path.exists(os.path.join(model_path, file)):
            return False
    
    return True