Spaces:

zazaman
/

guardrails-final

Sleeping

App Files Files Community

guardrails-final / config.py

zazaman

Optimize translation: reduce max_tokens and context_size, add no-display-prompt flag

b45369f about 1 month ago

raw

history blame contribute delete

7.38 kB

	# config.py
	import os

	# === API KEYS ===
	# It's recommended to set your API key as an environment variable for security.
	# For Hugging Face Spaces, set this in the Spaces settings under "Repository secrets"
	GEMINI_API_KEY = os.environ.get("GEMINI_API_KEY", "AIzaSyB3nxr2n_eI-2aXQUm__5OU5sM2pH9cc0k")

	if not GEMINI_API_KEY:
	print("⚠️ WARNING: GEMINI_API_KEY environment variable not set!")
	print(" Please set your Gemini API key in the environment variables.")

	# === INPUT GUARDRAILS CONFIGURATION ===
	# Fine-tuned model for input guardrails (prompt injection detection)
	AI_DETECTION_MODE = {
	"enabled": True,
	"attack_llm_provider": "finetuned_guard",
	"attack_llm_config": {
	"model_name": "zazaman/fmb", # Your personal finetuned model
	# Fine-tuned model runs locally - no additional configuration needed
	}
	}

	# === NON-ENGLISH TRANSLATION ===
	# When a prompt is detected as non-English, translate it to English using Qwen,
	# then pass the translated text to the ModernBERT classifier.
	# This allows the English-only classifier to work with multilingual inputs.
	# Uses pre-quantized GGUF models from unsloth - no bitsandbytes needed. Works on Hugging Face Spaces.
	NON_ENGLISH_TRANSLATOR = {
	"enabled": True,
	"provider": "qwen_translator", # Translation client using GGUF models via pre-built llama.cpp binary
	"config": {
	# GGUF model repository and file from unsloth (pre-quantized)
	"repo_id": "unsloth/Qwen3-0.6B-GGUF",
	# Available files in the repo: Qwen3-0.6B-Q2_K.gguf, Qwen3-0.6B-Q2_K_L.gguf,
	# Qwen3-0.6B-IQ4_XS.gguf, Qwen3-0.6B-IQ4_NL.gguf, Qwen3-0.6B-BF16.gguf
	# Q2_K is smallest (~200MB) but lower quality
	# IQ4_XS is small (~250MB) with good quality
	# IQ4_NL is medium (~300MB) with better quality
	# Q2_K_L is larger (~300MB) with better quality than Q2_K
	"model_file": "Qwen3-0.6B-IQ4_XS.gguf", # Good balance of size and quality (Q4_K_M doesn't exist in repo)
	# Inference options tuned for accurate translation
	"temperature": 0.3, # Lower temperature for more accurate, consistent translations
	"top_p": 0.9,
	"top_k": 40,
	# Max tokens to generate (reduced for faster inference - translation is usually short)
	"max_tokens": 128,
	# Context window size (reduced for faster inference - translation doesn't need large context)
	"context_size": 256,
	# CPU threads for inference (use more threads for faster inference)
	# Set to 0 to auto-detect, or specify number of CPU cores
	"n_threads": 0, # 0 = auto-detect (uses all available cores)
	# GPU layers (0 = CPU only, set to >0 if GPU available for much faster inference)
	# For GPU: try 20-35 layers depending on VRAM
	"n_gpu_layers": 0,
	# Batch size for prompt processing (smaller = faster for short prompts)
	"n_batch": 256
	}
	}

	# === PERFORMANCE OPTIMIZATION SETTINGS ===
	# These settings help reduce memory usage and startup time
	PERFORMANCE_OPTIMIZATIONS = {
	"shared_models": True, # Use shared model instances to reduce memory
	"lazy_loading": True, # Load models only when needed
	"disable_warnings": True, # Disable verbose library warnings
	"cpu_optimization": True, # Optimize for CPU inference
	}

	# === LLM CONFIGURATION ===
	# Choose which LLM provider to use
	LLM_PROVIDER = "gemini" # Can be "gemini", "ollama", "lmstudio", "manual"

	LLM_CONFIG = {
	"gemini": {
	"model": "gemini-2.5-flash",
	# You can add other generation settings here, e.g., temperature, top_p
	},
	"ollama": {
	"model": "llama3",
	"host": "http://localhost:11434",
	# Add other Ollama-specific settings here
	},
	"lmstudio": {
	"model": "qwen2.5-0.5b-instruct",
	"host": "http://localhost:1234",
	"temperature": 0.7,
	"max_tokens": 2000,
	},
	"manual": {
	# Manual client for output testing - no additional config needed
	},
	}

	# === SYSTEM PROMPT ===
	# The system prompt is used by all LLM providers that support it
	SYSTEM_PROMPT = """ """
	SYSTEM_PROMPTZ = """You are a customer support chatbot for Alfredo's Pizza Cafe. Your responses should be based solely on the provided information.

	Here are your instructions:

	### Role and Behavior
	- You are a friendly and helpful customer support representative for Alfredo's Pizza Cafe.
	- Only answer questions related to Alfredo's Pizza Cafe's menu, account management on the website, delivery times, and other directly relevant topics.
	- Do not discuss other pizza chains or restaurants.
	- Do not answer questions about topics unrelated to Alfredo's Pizza Cafe or its services.

	### Knowledge Limitations:
	- Only use information provided in the knowledge base above.
	- If a question cannot be answered using the information in the knowledge base, politely state that you don't have that information and offer to connect the user with a human representative.
	- Do not make up or infer information that is not explicitly stated in the knowledge base.

	### Communication Style:
	- Be friendly, professional, and helpful.
	- Use clear, concise language.
	- Ask clarifying questions if needed to better assist the customer.
	- Express empathy when appropriate (e.g., if a customer has a complaint).

	### Limitations:
	- You cannot make, modify, or cancel orders directly. Direct customers to the website or phone number for order management.
	- You cannot process payments or access customer account information.
	- For complex issues or complaints, offer to connect the customer with a human representative.

	### Sample Responses:
	- "Thank you for contacting Alfredo's Pizza Cafe! How can I help you today?"
	- "I'd be happy to help you with information about our menu/delivery/website."
	- "I don't have access to specific account information, but I can direct you to..."
	- "For order modifications, please visit our website or call us directly at..."
	- "I'd be glad to connect you with one of our team members who can help with that specific issue."

	Remember: Stay in character as an Alfredo's Pizza Cafe representative, be helpful within your limitations, and always maintain a friendly, professional tone."""

	# === OUTPUT GUARDRAILS CONFIGURATION ===
	# These are processed AFTER the LLM generates a response
	OUTPUT_GUARDRAILS_CONFIG = {
	"pii_output_guard": {
	"enabled": True,
	"on_output": True,
	"anonymize_entities": ["PHONE_NUMBER", "EMAIL_ADDRESS", "CREDIT_CARD", "US_SSN", "IP_ADDRESS", "US_BANK_NUMBER", "IN_AADHAAR"]
	}
	# Add other output guardrails here as needed
	}

	# === ATTACHMENT GUARDRAILS CONFIGURATION ===
	# These process uploaded files before they're sent to the LLM
	ATTACHMENT_GUARDRAILS_CONFIG = {
	"txt_guardrail": {
	"enabled": True,
	"chunk_size": 500, # tokens per chunk
	"confidence_threshold": 0.75,
	"max_file_size_mb": 10,
	},
	"pdf_guardrail": {
	"enabled": True,
	"chunk_size": 800, # tokens per chunk for PDFs
	"confidence_threshold": 0.80, # Slightly higher threshold for PDFs
	"max_file_size_mb": 50,
	},
	"docx_guardrail": {
	"enabled": True,
	"chunk_size": 600, # tokens per chunk for Word docs
	"confidence_threshold": 0.75,
	"max_file_size_mb": 25,
	}
	}