Spaces:
Sleeping
Sleeping
| # config.py | |
| import os | |
| # === API KEYS === | |
| # It's recommended to set your API key as an environment variable for security. | |
| # For Hugging Face Spaces, set this in the Spaces settings under "Repository secrets" | |
| GEMINI_API_KEY = os.environ.get("GEMINI_API_KEY", "AIzaSyB3nxr2n_eI-2aXQUm__5OU5sM2pH9cc0k") | |
| if not GEMINI_API_KEY: | |
| print("⚠️ WARNING: GEMINI_API_KEY environment variable not set!") | |
| print(" Please set your Gemini API key in the environment variables.") | |
| # === INPUT GUARDRAILS CONFIGURATION === | |
| # Fine-tuned model for input guardrails (prompt injection detection) | |
| AI_DETECTION_MODE = { | |
| "enabled": True, | |
| "attack_llm_provider": "finetuned_guard", | |
| "attack_llm_config": { | |
| "model_name": "zazaman/fmb", # Your personal finetuned model | |
| # Fine-tuned model runs locally - no additional configuration needed | |
| } | |
| } | |
| # === NON-ENGLISH TRANSLATION === | |
| # When a prompt is detected as non-English, translate it to English using Qwen, | |
| # then pass the translated text to the ModernBERT classifier. | |
| # This allows the English-only classifier to work with multilingual inputs. | |
| # Uses pre-quantized GGUF models from unsloth - no bitsandbytes needed. Works on Hugging Face Spaces. | |
| NON_ENGLISH_TRANSLATOR = { | |
| "enabled": True, | |
| "provider": "qwen_translator", # Translation client using GGUF models via pre-built llama.cpp binary | |
| "config": { | |
| # GGUF model repository and file from unsloth (pre-quantized) | |
| "repo_id": "unsloth/Qwen3-0.6B-GGUF", | |
| # Available files in the repo: Qwen3-0.6B-Q2_K.gguf, Qwen3-0.6B-Q2_K_L.gguf, | |
| # Qwen3-0.6B-IQ4_XS.gguf, Qwen3-0.6B-IQ4_NL.gguf, Qwen3-0.6B-BF16.gguf | |
| # Q2_K is smallest (~200MB) but lower quality | |
| # IQ4_XS is small (~250MB) with good quality | |
| # IQ4_NL is medium (~300MB) with better quality | |
| # Q2_K_L is larger (~300MB) with better quality than Q2_K | |
| "model_file": "Qwen3-0.6B-IQ4_XS.gguf", # Good balance of size and quality (Q4_K_M doesn't exist in repo) | |
| # Inference options tuned for accurate translation | |
| "temperature": 0.3, # Lower temperature for more accurate, consistent translations | |
| "top_p": 0.9, | |
| "top_k": 40, | |
| # Max tokens to generate (reduced for faster inference - translation is usually short) | |
| "max_tokens": 128, | |
| # Context window size (reduced for faster inference - translation doesn't need large context) | |
| "context_size": 256, | |
| # CPU threads for inference (use more threads for faster inference) | |
| # Set to 0 to auto-detect, or specify number of CPU cores | |
| "n_threads": 0, # 0 = auto-detect (uses all available cores) | |
| # GPU layers (0 = CPU only, set to >0 if GPU available for much faster inference) | |
| # For GPU: try 20-35 layers depending on VRAM | |
| "n_gpu_layers": 0, | |
| # Batch size for prompt processing (smaller = faster for short prompts) | |
| "n_batch": 256 | |
| } | |
| } | |
| # === PERFORMANCE OPTIMIZATION SETTINGS === | |
| # These settings help reduce memory usage and startup time | |
| PERFORMANCE_OPTIMIZATIONS = { | |
| "shared_models": True, # Use shared model instances to reduce memory | |
| "lazy_loading": True, # Load models only when needed | |
| "disable_warnings": True, # Disable verbose library warnings | |
| "cpu_optimization": True, # Optimize for CPU inference | |
| } | |
| # === LLM CONFIGURATION === | |
| # Choose which LLM provider to use | |
| LLM_PROVIDER = "gemini" # Can be "gemini", "ollama", "lmstudio", "manual" | |
| LLM_CONFIG = { | |
| "gemini": { | |
| "model": "gemini-2.5-flash", | |
| # You can add other generation settings here, e.g., temperature, top_p | |
| }, | |
| "ollama": { | |
| "model": "llama3", | |
| "host": "http://localhost:11434", | |
| # Add other Ollama-specific settings here | |
| }, | |
| "lmstudio": { | |
| "model": "qwen2.5-0.5b-instruct", | |
| "host": "http://localhost:1234", | |
| "temperature": 0.7, | |
| "max_tokens": 2000, | |
| }, | |
| "manual": { | |
| # Manual client for output testing - no additional config needed | |
| }, | |
| } | |
| # === SYSTEM PROMPT === | |
| # The system prompt is used by all LLM providers that support it | |
| SYSTEM_PROMPT = """ """ | |
| SYSTEM_PROMPTZ = """You are a customer support chatbot for Alfredo's Pizza Cafe. Your responses should be based solely on the provided information. | |
| Here are your instructions: | |
| ### Role and Behavior | |
| - You are a friendly and helpful customer support representative for Alfredo's Pizza Cafe. | |
| - Only answer questions related to Alfredo's Pizza Cafe's menu, account management on the website, delivery times, and other directly relevant topics. | |
| - Do not discuss other pizza chains or restaurants. | |
| - Do not answer questions about topics unrelated to Alfredo's Pizza Cafe or its services. | |
| ### Knowledge Limitations: | |
| - Only use information provided in the knowledge base above. | |
| - If a question cannot be answered using the information in the knowledge base, politely state that you don't have that information and offer to connect the user with a human representative. | |
| - Do not make up or infer information that is not explicitly stated in the knowledge base. | |
| ### Communication Style: | |
| - Be friendly, professional, and helpful. | |
| - Use clear, concise language. | |
| - Ask clarifying questions if needed to better assist the customer. | |
| - Express empathy when appropriate (e.g., if a customer has a complaint). | |
| ### Limitations: | |
| - You cannot make, modify, or cancel orders directly. Direct customers to the website or phone number for order management. | |
| - You cannot process payments or access customer account information. | |
| - For complex issues or complaints, offer to connect the customer with a human representative. | |
| ### Sample Responses: | |
| - "Thank you for contacting Alfredo's Pizza Cafe! How can I help you today?" | |
| - "I'd be happy to help you with information about our menu/delivery/website." | |
| - "I don't have access to specific account information, but I can direct you to..." | |
| - "For order modifications, please visit our website or call us directly at..." | |
| - "I'd be glad to connect you with one of our team members who can help with that specific issue." | |
| Remember: Stay in character as an Alfredo's Pizza Cafe representative, be helpful within your limitations, and always maintain a friendly, professional tone.""" | |
| # === OUTPUT GUARDRAILS CONFIGURATION === | |
| # These are processed AFTER the LLM generates a response | |
| OUTPUT_GUARDRAILS_CONFIG = { | |
| "pii_output_guard": { | |
| "enabled": True, | |
| "on_output": True, | |
| "anonymize_entities": ["PHONE_NUMBER", "EMAIL_ADDRESS", "CREDIT_CARD", "US_SSN", "IP_ADDRESS", "US_BANK_NUMBER", "IN_AADHAAR"] | |
| } | |
| # Add other output guardrails here as needed | |
| } | |
| # === ATTACHMENT GUARDRAILS CONFIGURATION === | |
| # These process uploaded files before they're sent to the LLM | |
| ATTACHMENT_GUARDRAILS_CONFIG = { | |
| "txt_guardrail": { | |
| "enabled": True, | |
| "chunk_size": 500, # tokens per chunk | |
| "confidence_threshold": 0.75, | |
| "max_file_size_mb": 10, | |
| }, | |
| "pdf_guardrail": { | |
| "enabled": True, | |
| "chunk_size": 800, # tokens per chunk for PDFs | |
| "confidence_threshold": 0.80, # Slightly higher threshold for PDFs | |
| "max_file_size_mb": 50, | |
| }, | |
| "docx_guardrail": { | |
| "enabled": True, | |
| "chunk_size": 600, # tokens per chunk for Word docs | |
| "confidence_threshold": 0.75, | |
| "max_file_size_mb": 25, | |
| } | |
| } |