Spaces:

zazaman
/

guardrails-final

Sleeping

File size: 7,383 Bytes

# config.py
import os

# === API KEYS ===
# It's recommended to set your API key as an environment variable for security.
# For Hugging Face Spaces, set this in the Spaces settings under "Repository secrets"
GEMINI_API_KEY = os.environ.get("GEMINI_API_KEY", "AIzaSyB3nxr2n_eI-2aXQUm__5OU5sM2pH9cc0k")

if not GEMINI_API_KEY:
    print("⚠️  WARNING: GEMINI_API_KEY environment variable not set!")
    print("   Please set your Gemini API key in the environment variables.")

# === INPUT GUARDRAILS CONFIGURATION ===
# Fine-tuned model for input guardrails (prompt injection detection)
AI_DETECTION_MODE = {
    "enabled": True,
    "attack_llm_provider": "finetuned_guard",
    "attack_llm_config": {
        "model_name": "zazaman/fmb",  # Your personal finetuned model
        # Fine-tuned model runs locally - no additional configuration needed
    }
}

# === NON-ENGLISH TRANSLATION ===
# When a prompt is detected as non-English, translate it to English using Qwen,
# then pass the translated text to the ModernBERT classifier.
# This allows the English-only classifier to work with multilingual inputs.
# Uses pre-quantized GGUF models from unsloth - no bitsandbytes needed. Works on Hugging Face Spaces.
NON_ENGLISH_TRANSLATOR = {
    "enabled": True,
    "provider": "qwen_translator",  # Translation client using GGUF models via pre-built llama.cpp binary
    "config": {
        # GGUF model repository and file from unsloth (pre-quantized)
        "repo_id": "unsloth/Qwen3-0.6B-GGUF",
        # Available files in the repo: Qwen3-0.6B-Q2_K.gguf, Qwen3-0.6B-Q2_K_L.gguf, 
        # Qwen3-0.6B-IQ4_XS.gguf, Qwen3-0.6B-IQ4_NL.gguf, Qwen3-0.6B-BF16.gguf
        # Q2_K is smallest (~200MB) but lower quality
        # IQ4_XS is small (~250MB) with good quality
        # IQ4_NL is medium (~300MB) with better quality
        # Q2_K_L is larger (~300MB) with better quality than Q2_K
        "model_file": "Qwen3-0.6B-IQ4_XS.gguf",  # Good balance of size and quality (Q4_K_M doesn't exist in repo)
        # Inference options tuned for accurate translation
        "temperature": 0.3,  # Lower temperature for more accurate, consistent translations
        "top_p": 0.9,
        "top_k": 40,
        # Max tokens to generate (reduced for faster inference - translation is usually short)
        "max_tokens": 128,
        # Context window size (reduced for faster inference - translation doesn't need large context)
        "context_size": 256,
        # CPU threads for inference (use more threads for faster inference)
        # Set to 0 to auto-detect, or specify number of CPU cores
        "n_threads": 0,  # 0 = auto-detect (uses all available cores)
        # GPU layers (0 = CPU only, set to >0 if GPU available for much faster inference)
        # For GPU: try 20-35 layers depending on VRAM
        "n_gpu_layers": 0,
        # Batch size for prompt processing (smaller = faster for short prompts)
        "n_batch": 256
    }
}

# === PERFORMANCE OPTIMIZATION SETTINGS ===
# These settings help reduce memory usage and startup time
PERFORMANCE_OPTIMIZATIONS = {
    "shared_models": True,  # Use shared model instances to reduce memory
    "lazy_loading": True,   # Load models only when needed
    "disable_warnings": True,  # Disable verbose library warnings
    "cpu_optimization": True,  # Optimize for CPU inference
}

# === LLM CONFIGURATION ===
# Choose which LLM provider to use
LLM_PROVIDER = "gemini"  # Can be "gemini", "ollama", "lmstudio", "manual"

LLM_CONFIG = {
    "gemini": {
        "model": "gemini-2.5-flash",
        # You can add other generation settings here, e.g., temperature, top_p
    },
    "ollama": {
        "model": "llama3",
        "host": "http://localhost:11434",
        # Add other Ollama-specific settings here
    },
    "lmstudio": {
        "model": "qwen2.5-0.5b-instruct",
        "host": "http://localhost:1234",
        "temperature": 0.7,
        "max_tokens": 2000,
    },
    "manual": {
        # Manual client for output testing - no additional config needed
    },
}

# === SYSTEM PROMPT ===
# The system prompt is used by all LLM providers that support it
SYSTEM_PROMPT = """ """
SYSTEM_PROMPTZ = """You are a customer support chatbot for Alfredo's Pizza Cafe. Your responses should be based solely on the provided information.

Here are your instructions:

### Role and Behavior
- You are a friendly and helpful customer support representative for Alfredo's Pizza Cafe.
- Only answer questions related to Alfredo's Pizza Cafe's menu, account management on the website, delivery times, and other directly relevant topics.
- Do not discuss other pizza chains or restaurants.
- Do not answer questions about topics unrelated to Alfredo's Pizza Cafe or its services.

### Knowledge Limitations:
- Only use information provided in the knowledge base above.
- If a question cannot be answered using the information in the knowledge base, politely state that you don't have that information and offer to connect the user with a human representative.
- Do not make up or infer information that is not explicitly stated in the knowledge base.

### Communication Style:
- Be friendly, professional, and helpful.
- Use clear, concise language.
- Ask clarifying questions if needed to better assist the customer.
- Express empathy when appropriate (e.g., if a customer has a complaint).

### Limitations:
- You cannot make, modify, or cancel orders directly. Direct customers to the website or phone number for order management.
- You cannot process payments or access customer account information.
- For complex issues or complaints, offer to connect the customer with a human representative.

### Sample Responses:
- "Thank you for contacting Alfredo's Pizza Cafe! How can I help you today?"
- "I'd be happy to help you with information about our menu/delivery/website."
- "I don't have access to specific account information, but I can direct you to..."
- "For order modifications, please visit our website or call us directly at..."
- "I'd be glad to connect you with one of our team members who can help with that specific issue."

Remember: Stay in character as an Alfredo's Pizza Cafe representative, be helpful within your limitations, and always maintain a friendly, professional tone."""

# === OUTPUT GUARDRAILS CONFIGURATION ===
# These are processed AFTER the LLM generates a response
OUTPUT_GUARDRAILS_CONFIG = {
    "pii_output_guard": {
        "enabled": True,
        "on_output": True,
        "anonymize_entities": ["PHONE_NUMBER", "EMAIL_ADDRESS", "CREDIT_CARD", "US_SSN", "IP_ADDRESS", "US_BANK_NUMBER", "IN_AADHAAR"]
    }
    # Add other output guardrails here as needed
}

# === ATTACHMENT GUARDRAILS CONFIGURATION ===
# These process uploaded files before they're sent to the LLM
ATTACHMENT_GUARDRAILS_CONFIG = {
    "txt_guardrail": {
        "enabled": True,
        "chunk_size": 500,  # tokens per chunk
        "confidence_threshold": 0.75,
        "max_file_size_mb": 10,
    },
    "pdf_guardrail": {
        "enabled": True,
        "chunk_size": 800,  # tokens per chunk for PDFs
        "confidence_threshold": 0.80,  # Slightly higher threshold for PDFs
        "max_file_size_mb": 50,
    },
    "docx_guardrail": {
        "enabled": True,
        "chunk_size": 600,  # tokens per chunk for Word docs
        "confidence_threshold": 0.75,
        "max_file_size_mb": 25,
    }
}