""" Smart Confidant - A Magic: The Gathering chatbot with support for local and API-based LLMs. Supports both local transformers models and HuggingFace API models with custom theming. """ import gradio as gr from gradio.themes.base import Base from huggingface_hub import InferenceClient import os import base64 from pathlib import Path import traceback from datetime import datetime from threading import Lock import time from prometheus_client import start_http_server, Counter, Summary, Gauge # Load environment variables from .env file try: from dotenv import load_dotenv load_dotenv() except ImportError: # If python-dotenv not installed, skip (will use system env vars only) pass # ============================================================================ # Configuration # ============================================================================ LOCAL_MODELS = ["arnir0/Tiny-LLM"] API_MODELS = ["meta-llama/Llama-3.2-3B-Instruct"] DEFAULT_SYSTEM_MESSAGE = "You are an expert assistant for Magic: The Gathering. You're name is Smart Confidant, but people tend to call you Bob." TITLE = "🎓🧙🏻‍♂️ Smart Confidant 🧙🏻‍♂️🎓" # Create labeled model options for the radio selector MODEL_OPTIONS = [] for model in LOCAL_MODELS: MODEL_OPTIONS.append(f"{model} (local)") for model in API_MODELS: MODEL_OPTIONS.append(f"{model} (api)") # Global state for local model pipeline (cached across requests) pipe = None stop_inference = False # Debug logging setup with thread-safe access debug_logs = [] debug_lock = Lock() MAX_LOG_LINES = 100 # ============================================================================ # Debug Logging Functions # ============================================================================ def log_debug(message, level="INFO"): """Add timestamped message to debug log (thread-safe, rotating buffer).""" timestamp = datetime.now().strftime("%H:%M:%S") log_entry = f"[{timestamp}] [{level}] {message}" with debug_lock: debug_logs.append(log_entry) if len(debug_logs) > MAX_LOG_LINES: debug_logs.pop(0) print(log_entry) return "\n".join(debug_logs) def get_debug_logs(): """Retrieve all debug logs as a single string.""" with debug_lock: return "\n".join(debug_logs) # ============================================================================ # Prometheus Metrics # ============================================================================ # Core request metrics REQUEST_COUNTER = Counter('smart_confidant_requests_total', 'Total number of chat requests') SUCCESSFUL_REQUESTS = Counter('smart_confidant_successful_requests_total', 'Total number of successful requests') FAILED_REQUESTS = Counter('smart_confidant_failed_requests_total', 'Total number of failed requests') REQUEST_DURATION = Summary('smart_confidant_request_duration_seconds', 'Time spent processing request') # Enhanced chatbot metrics MODEL_SELECTION_COUNTER = Counter('smart_confidant_model_selections_total', 'Count of model selections', ['model_name', 'model_type']) TOKEN_COUNT = Summary('smart_confidant_tokens_generated', 'Number of tokens generated per response') CONVERSATION_LENGTH = Gauge('smart_confidant_conversation_length', 'Number of messages in current conversation') ERROR_BY_TYPE = Counter('smart_confidant_errors_by_type_total', 'Count of errors by type', ['error_type']) # ============================================================================ # Asset Loading & Theme Configuration # ============================================================================ # Load background image as base64 data URL for CSS injection ASSETS_DIR = Path(__file__).parent / "assets" BACKGROUND_IMAGE_PATH = ASSETS_DIR / "confidant_pattern.png" try: with open(BACKGROUND_IMAGE_PATH, "rb") as _img_f: _encoded_img = base64.b64encode(_img_f.read()).decode("ascii") BACKGROUND_DATA_URL = f"data:image/png;base64,{_encoded_img}" log_debug("Background image loaded successfully") except Exception as e: log_debug(f"Error loading background image: {e}", "ERROR") BACKGROUND_DATA_URL = "" class TransparentTheme(Base): """Custom Gradio theme with transparent body background to show tiled image.""" def __init__(self): super().__init__() super().set( body_background_fill="*neutral_950", body_background_fill_dark="*neutral_950", ) # Custom CSS for dark theme with tiled background image # Uses aggressive selectors to override Gradio's default styling fancy_css = f""" /* Tiled background image on page body */ body {{ background-image: url('{BACKGROUND_DATA_URL}') !important; background-repeat: repeat !important; background-size: auto !important; background-attachment: fixed !important; background-color: #1a1a1a !important; }} /* Make Gradio wrapper divs transparent to show background */ gradio-app, .gradio-container, .gradio-container > div, .gradio-container > div > div, .main, .contain, [class*="svelte"] > div, div[class*="wrap"]:not(.gr-button):not([class*="input"]):not([class*="textbox"]):not([class*="bubble"]):not([class*="message"]), div[class*="container"]:not([class*="input"]):not([class*="button"]) {{ background: transparent !important; background-color: transparent !important; background-image: none !important; }} /* Center and constrain main container */ .gradio-container {{ max-width: 700px !important; margin: 0 auto !important; padding: 20px !important; box-shadow: 0 4px 8px rgba(0, 0, 0, 0.1) !important; border-radius: 10px !important; font-family: 'Arial', sans-serif !important; }} /* Green title banner */ #title {{ text-align: center !important; font-size: 2em !important; margin-bottom: 20px !important; color: #ffffff !important; background-color: #4CAF50 !important; padding: 20px !important; border-radius: 10px !important; box-shadow: 0 2px 4px rgba(0, 0, 0, 0.3) !important; }} /* Dark grey backgrounds for chatbot and settings components */ .block.svelte-12cmxck {{ background-color: rgba(60, 60, 60, 0.95) !important; border-radius: 10px !important; }} div[class*="bubble-wrap"], div[class*="message-wrap"] {{ background-color: rgba(60, 60, 60, 0.95) !important; border-radius: 10px !important; padding: 15px !important; }} .label-wrap, div[class*="accordion"] {{ background-color: rgba(60, 60, 60, 0.95) !important; border-radius: 10px !important; }} /* White text for readability on dark backgrounds */ .block.svelte-12cmxck, .block.svelte-12cmxck *, div[class*="bubble-wrap"] *, div[class*="message-wrap"] *, .label-wrap, .label-wrap * {{ color: #ffffff !important; }} /* Green buttons with hover effect */ .gr-button, button {{ background-color: #4CAF50 !important; background-image: none !important; color: white !important; border: none !important; border-radius: 5px !important; padding: 10px 20px !important; cursor: pointer !important; transition: background-color 0.3s ease !important; }} .gr-button:hover, button:hover {{ background-color: #45a049 !important; }} .gr-slider input {{ color: #4CAF50 !important; }} """ # ============================================================================ # Chat Response Handler # ============================================================================ def respond( message, history: list[dict[str, str]], system_message, max_tokens, temperature, top_p, selected_model: str, ): """ Handle chat responses using either local transformers models or HuggingFace API. Args: message: User's input message history: List of previous messages in conversation system_message: System prompt to guide model behavior max_tokens: Maximum tokens to generate temperature: Sampling temperature (higher = more random) top_p: Nucleus sampling threshold selected_model: Model identifier with "(local)" or "(api)" suffix Yields: str: Generated response text or error message """ global pipe # Prometheus metrics: Track request start REQUEST_COUNTER.inc() start_time = time.perf_counter() try: log_debug(f"New message received: '{message[:50]}...'") log_debug(f"Selected model: {selected_model}") log_debug(f"Parameters - max_tokens: {max_tokens}, temp: {temperature}, top_p: {top_p}") # Build complete message history with system prompt messages = [{"role": "system", "content": system_message}] messages.extend(history) messages.append({"role": "user", "content": message}) log_debug(f"Message history length: {len(messages)}") # Parse model type and name from selection is_local = selected_model.endswith("(local)") model_name = selected_model.replace(" (local)", "").replace(" (api)", "") # Prometheus metrics: Track model selection and conversation length model_type = "local" if is_local else "api" MODEL_SELECTION_COUNTER.labels(model_name=model_name, model_type=model_type).inc() CONVERSATION_LENGTH.set(len(messages)) response = "" if is_local: # ===== LOCAL MODEL PATH ===== log_debug(f"Using LOCAL mode with model: {model_name}") try: from transformers import pipeline import torch log_debug("Transformers imported successfully") # Load or reuse cached pipeline if pipe is None or pipe.model.name_or_path != model_name: log_debug(f"Loading model pipeline for: {model_name}") pipe = pipeline("text-generation", model=model_name) log_debug("Model pipeline loaded successfully") else: log_debug("Using cached model pipeline") # Format conversation as plain text prompt prompt = "\n".join([f"{m['role']}: {m['content']}" for m in messages]) log_debug(f"Prompt length: {len(prompt)} characters") # Run inference log_debug("Starting inference...") outputs = pipe( prompt, max_new_tokens=max_tokens, do_sample=True, temperature=temperature, top_p=top_p, ) log_debug("Inference completed") # Extract new tokens only (strip original prompt) response = outputs[0]["generated_text"][len(prompt):] log_debug(f"Response length: {len(response)} characters") # Prometheus metrics: Track success and approximate token count SUCCESSFUL_REQUESTS.inc() TOKEN_COUNT.observe(len(response.split())) # Approximate token count using word count yield response.strip() except ImportError as e: # Prometheus metrics: Track error FAILED_REQUESTS.inc() ERROR_BY_TYPE.labels(error_type="import_error").inc() error_msg = f"Import error: {str(e)}" log_debug(error_msg, "ERROR") log_debug(traceback.format_exc(), "ERROR") yield f"❌ Import Error: {str(e)}\n\nPlease check log.txt for details." except Exception as e: # Prometheus metrics: Track error FAILED_REQUESTS.inc() ERROR_BY_TYPE.labels(error_type="local_model_error").inc() error_msg = f"Local model error: {str(e)}" log_debug(error_msg, "ERROR") log_debug(traceback.format_exc(), "ERROR") yield f"❌ Local Model Error: {str(e)}\n\nPlease check log.txt for details." else: # ===== API MODEL PATH ===== log_debug(f"Using API mode with model: {model_name}") try: # Check for HuggingFace API token hf_token = os.environ.get("HF_TOKEN", None) if hf_token: log_debug("HF_TOKEN found in environment") else: log_debug("No HF_TOKEN in environment - API call will likely fail", "WARN") # Create HuggingFace Inference client log_debug("Creating InferenceClient...") client = InferenceClient( api_key=hf_token, ) log_debug("InferenceClient created successfully") # Call chat completion API log_debug("Starting chat completion...") completion = client.chat.completions.create( model=model_name, messages=messages, max_tokens=max_tokens, temperature=temperature, top_p=top_p, ) response = completion.choices[0].message.content log_debug(f"Completion received. Response length: {len(response)} characters") # Prometheus metrics: Track success and approximate token count SUCCESSFUL_REQUESTS.inc() TOKEN_COUNT.observe(len(response.split())) # Approximate token count using word count yield response except Exception as e: # Prometheus metrics: Track error FAILED_REQUESTS.inc() ERROR_BY_TYPE.labels(error_type="api_error").inc() error_msg = f"API error: {str(e)}" log_debug(error_msg, "ERROR") log_debug(traceback.format_exc(), "ERROR") yield f"❌ API Error: {str(e)}\n\nPlease check log.txt for details." except Exception as e: # Prometheus metrics: Track error FAILED_REQUESTS.inc() ERROR_BY_TYPE.labels(error_type="unexpected_error").inc() error_msg = f"Unexpected error in respond function: {str(e)}" log_debug(error_msg, "ERROR") log_debug(traceback.format_exc(), "ERROR") yield f"❌ Unexpected Error: {str(e)}\n\nPlease check log.txt for details." finally: # Prometheus metrics: Record request duration REQUEST_DURATION.observe(time.perf_counter() - start_time) # ============================================================================ # Gradio UI Definition # ============================================================================ # Allow Gradio to serve static files from assets directory (requires absolute path) ASSETS_DIR_ABSOLUTE = str(Path(__file__).parent / "assets") gr.set_static_paths(paths=[ASSETS_DIR_ABSOLUTE]) with gr.Blocks(theme=TransparentTheme(), css=fancy_css) as demo: # Title banner gr.Markdown(f"

{TITLE}

") # Chatbot component with custom avatar icons (using forward slashes for web serving) # Gradio serves files via HTTP URLs which require forward slashes, not Windows backslashes MONSTER_ICON = str((ASSETS_DIR / "monster_icon.png").as_posix()) BOT_ICON = str((ASSETS_DIR / "smart_confidant_icon.png").as_posix()) log_debug(f"Monster icon path: {MONSTER_ICON}") log_debug(f"Bot icon path: {BOT_ICON}") chatbot = gr.Chatbot( type="messages", avatar_images=(MONSTER_ICON, BOT_ICON) ) # Collapsible settings panel with gr.Accordion("⚙️ Additional Settings", open=False): system_message = gr.Textbox(value=DEFAULT_SYSTEM_MESSAGE, label="System message") max_tokens = gr.Slider(minimum=1, maximum=2048, value=512, step=1, label="Max new tokens") temperature = gr.Slider(minimum=0.1, maximum=2.0, value=0.7, step=0.1, label="Temperature") top_p = gr.Slider(minimum=0.1, maximum=1.0, value=0.95, step=0.05, label="Top-p (nucleus sampling)") selected_model = gr.Radio(choices=MODEL_OPTIONS, label="Select Model", value=MODEL_OPTIONS[1]) # Wire up chat interface with response handler gr.ChatInterface( fn=respond, chatbot=chatbot, additional_inputs=[ system_message, max_tokens, temperature, top_p, selected_model, ], type="messages", ) # ============================================================================ # Application Entry Point # ============================================================================ if __name__ == "__main__": log_debug("="*50) log_debug("Smart Confidant Application Starting") log_debug(f"Available models: {MODEL_OPTIONS}") log_debug(f"HF_TOKEN present: {'Yes' if os.environ.get('HF_TOKEN') else 'No'}") log_debug("="*50) # Start Prometheus metrics server on port 8000 log_debug("Starting Prometheus metrics server on port 8000") start_http_server(8000) log_debug("Prometheus metrics server started - available at http://0.0.0.0:8000/metrics") # Launch on all interfaces for VM/container deployment, with Gradio share link demo.launch(server_name="0.0.0.0", server_port=8012, share=True, allowed_paths=[ASSETS_DIR_ABSOLUTE])