""" Lamonte Smith Digital Twin — Application Entry Point Security-first design: All user input passes through validation, sanitization, rate limiting, and conversation depth checks before reaching the LLM. All model output passes through disclosure filtering before rendering to the user. """ import logging import os import chromadb import gradio as gr from openai import OpenAI from openai.types.responses import ResponseInputItemParam import config import inference import prompts import rag import security import tools logger = logging.getLogger(__name__) logging.basicConfig(level=logging.WARNING) for name in (__name__, 'inference', 'rag', 'tools', 'security'): logging.getLogger(name).setLevel(config.LOG_LEVEL) ### Environment Setup on_hf_spaces = os.environ.get("SPACE_ID") is not None if on_hf_spaces: from huggingface_hub import snapshot_download snapshot_download( repo_id=config.HUGGINGFACE_DATASET_REPO, repo_type='dataset', local_dir=config.CHROMA_PATH.name, token=os.environ.get('HF_TOKEN'), ) else: from dotenv import load_dotenv load_dotenv() ### Startup Security Audit startup_warnings = security.audit_startup_security() if startup_warnings: for w in startup_warnings: logger.critical(w) oai_client = OpenAI() chroma_client = chromadb.PersistentClient(config.CHROMA_PATH, config.CHROMA_CLIENT_SETTINGS) collection = chroma_client.get_collection(config.CHROMA_COLLECTION_NAME) tool_registry = tools.build_all_tools() # Build BM25 keyword index for hybrid search bm25_index = rag.build_bm25_index(collection) logger.info("BM25 index built with %d documents", len(bm25_index._docs)) ### Session Callback with Security Controls _RATE_LIMITED_MSG = ("I need a moment to catch up — you're sending messages faster than I can " "process them. Give me a few seconds and try again.") _INPUT_TOO_LONG_MSG = ("That message is a bit long for me to process effectively. " "Could you shorten it and try again?") _INJECTION_MSG = ("I appreciate the creativity, but I'm designed to answer questions about " "Lamonte's background, research, and career. What can I help you with?") _DEPTH_LIMIT_MSG = ("We've had a great conversation! For best results, I'd recommend " "starting a fresh chat. I perform best in shorter sessions.") def _prune_stale_injections(api_messages: list, max_retained: int): """Remove old RAG context injections from conversation history to prevent context window bloat. Keeps the system prompt (first developer message) and only the most recent N developer-role context injections. Security benefit: Limits the accumulated context an attacker could probe through progressive extraction over many turns.""" # Find all developer-role messages that contain retrieval results (not the system prompt) injection_indices = [ i for i, m in enumerate(api_messages) if isinstance(m, dict) and m.get('role') == 'developer' and 'retrieved_context' in m.get('content', '') ] # Remove all but the most recent max_retained injections if len(injection_indices) > max_retained: to_remove = injection_indices[:-max_retained] for idx in reversed(to_remove): # reverse to preserve indices api_messages.pop(idx) logger.debug("Pruned %d stale RAG injections, kept %d", len(to_remove), max_retained) def gradio_input_callback(user_input: str, gradio_history: list[gr.ChatMessage], api_messages: list[ResponseInputItemParam]): """ Security-hardened callback. All input passes through validation, sanitization, rate limiting, and depth checks before reaching the LLM pipeline. """ # --- SECURITY GATE 1: Rate limiting --- if not security.rate_limiter.check_query_rate(): yield [gr.ChatMessage(role="assistant", content=_RATE_LIMITED_MSG)], api_messages return # --- SECURITY GATE 2: Input validation --- is_valid, reason = security.validate_input(user_input) if not is_valid: if reason == "input_too_long": msg = _INPUT_TOO_LONG_MSG elif reason == "injection_detected": msg = _INJECTION_MSG else: msg = _INJECTION_MSG yield [gr.ChatMessage(role="assistant", content=msg)], api_messages return # --- SECURITY GATE 3: Input sanitization --- user_input = security.sanitize_input(user_input) # --- SECURITY GATE 4: Conversation depth check --- if not security.check_conversation_depth(api_messages): yield [gr.ChatMessage(role="assistant", content=_DEPTH_LIMIT_MSG)], api_messages return # --- SECURE PIPELINE: Build context and stream response --- if not api_messages: api_messages.append({"role": "developer", "content": prompts.SYSTEM_MESSAGE}) # --- CONTEXT WINDOW MANAGEMENT: Prune stale RAG injections --- # Keep only the N most recent developer-role context injections to prevent # context bloat, token waste, and conflicting information across turns. _prune_stale_injections(api_messages, config.MAX_RETAINED_INJECTIONS) rag_context = rag.build_context_injection( oai_client, collection, user_input, bm25_index=bm25_index ) api_messages.append({"role": "developer", "content": rag_context}) api_messages.append({"role": "user", "content": user_input}) # Secure debug logging — never log full prompts or secrets logger.debug("Processing query (%d chars, %d messages in history)", len(user_input), len(api_messages)) yield from inference.stream_turn(oai_client, api_messages, tool_registry) ### Gradio UI greeting: gr.MessageDict = { "role": "assistant", "content": "Hey there! \U0001f44b I'm Virtual Lamonte. " "Ask me about my AI/ML research, my work at AT&T or GM, my doctoral journey at Walsh College, " "or anything about Agentic AI, cybersecurity, wireless infrastructure, or autonomous vehicles. " "How can I help?" } _avatar_path = config.BASE_DIR / 'assets' / 'avatar.png' _favicon_path = config.BASE_DIR / 'assets' / 'favicon.ico' chatbot = gr.Chatbot( [greeting], type='messages', show_label=False, avatar_images=(None, str(_avatar_path) if _avatar_path.exists() else None), scale=1, ) api_messages = gr.State([]) demo = gr.ChatInterface( fn=gradio_input_callback, chatbot=chatbot, additional_inputs=[api_messages], additional_outputs=[api_messages], additional_inputs_accordion=gr.Accordion(visible=False), title='Virtual Lamonte', fill_height=True, fill_width=False, ) custom_css = ( ".main { max-width: 800px !important; margin: auto !important; }\n" "h1 { text-align: left !important; }\n" ".avatar-container { width: 50px !important; height: 50px !important; }\n" ".avatar-container img { padding: 0 !important; }\n" ".role { align-self: center !important; }\n" ".message-buttons-left { display: none !important; }\n" ".thought-group { width: fit-content !important; padding-right: var(--spacing-xxl) !important}\n" "footer { height: 5px !important; visibility: hidden !important; }\n" ) if __name__ == "__main__": demo.launch()