Spaces:

akryldigital
/

audit_assistant

Sleeping

App Files Files Community

akryldigital commited on 23 days ago

Commit

e2ce48f

verified ·

1 Parent(s): 1b3c4bb

lazy load vLLM

Browse files

Files changed (1) hide show

app.py +791 -1018

app.py CHANGED Viewed

@@ -1,38 +1,10 @@
 """
-Intelligent Audit Report Chatbot UI
 """
 import os
-import time
-import json
-import uuid
-import logging
-import traceback
-from pathlib import Path
-from collections import Counter
-from typing import List, Dict, Any, Optional
-import pandas as pd
-import streamlit as st
-import plotly.express as px
-from langchain_core.messages import HumanMessage, AIMessage
-from src.agents import get_multi_agent_chatbot, get_smart_chatbot, get_gemini_chatbot
-from src.feedback import FeedbackManager
-from src.ui_components import get_custom_css, display_chunk_statistics_charts, display_chunk_statistics_table, extract_chunk_statistics
-from src.config.paths import (
-    IS_DEPLOYED,
-    PROJECT_DIR,
-    HF_CACHE_DIR,
-    FEEDBACK_DIR,
-    CONVERSATIONS_DIR,
-)
 # ===== CRITICAL: Fix OMP_NUM_THREADS FIRST, before ANY other imports =====
 # Some libraries load at import time and will fail if OMP_NUM_THREADS is invalid
@@ -56,6 +28,17 @@ try:
 except (ValueError, TypeError):
     os.environ["OMP_NUM_THREADS"] = "1"
 # ===== Setup HuggingFace cache directories BEFORE any model imports =====
 # CRITICAL: Set these before any imports that might use HuggingFace (like sentence-transformers)
 # Only override cache directories in deployed environment (local uses defaults)
@@ -73,1067 +56,857 @@ if IS_DEPLOYED and HF_CACHE_DIR:
     except (PermissionError, OSError):
         # If we can't create it, log but continue (might already exist from Dockerfile)
         pass
 else:
     from dotenv import load_dotenv
     load_dotenv()
-# Configure logging
-logging.basicConfig(level=logging.INFO, format='%(asctime)s - %(levelname)s - %(message)s')
-logger = logging.getLogger(__name__)
-# Log environment setup for debugging
-logger.info(f"📁 PROJECT_DIR: {PROJECT_DIR}")
-logger.info(f"🌍 Environment: {'DEPLOYED' if IS_DEPLOYED else 'LOCAL'}")
-logger.info(f"🔧 OMP_NUM_THREADS: {os.environ.get('OMP_NUM_THREADS', 'NOT SET')}")
-logger.info(f"📁 HuggingFace cache: {os.environ.get('HF_HOME', 'DEFAULT (not overridden)')}")
-import torch, sys
-try:
-    cuda_ = torch.cuda.is_available()
-    if gpu_check not in st.session_state:
-        st.write(f"GPU check skipped: {e.__str__}")
-    print("CUDA:", cuda_)
-    logger.info("CUDA:", cuda_)
-    if cuda_:
-        if gpu_check not in st.session_state:
-            st.write(f"Device: {torch.cuda.get_device_name(0)}")
-        print("Device:", torch.cuda.get_device_name(0))
-        logger.info(f"Device: {torch.cuda.get_device_name(0)}")
-except Exception as e:
-    if gpu_check not in st.session_state:
-        st.write(f"GPU check skipped: {e.__str__}")
-    logger.error(f"GPU check skipped: {e.__str__}")
-    print("GPU check skipped:", e, file=sys.stderr)
-finally:
-    st.session_state.gpu_check = True
-# Page config
-st.set_page_config(
-    layout="wide",
-    page_icon="🤖",
-    initial_sidebar_state="expanded",
-    page_title="Intelligent Audit Report Chatbot"
 )
-st.markdown(get_custom_css(), unsafe_allow_html=True)
-def get_system_type():
-    """Get the current system type"""
-    system = os.environ.get('CHATBOT_SYSTEM', 'multi-agent')
-    if system == 'smart':
-        return "Smart Chatbot System"
-    else:
-        return "Multi-Agent System"
-def get_chatbot(version: str = "v1"):
-    """Initialize and return the chatbot based on version"""
-    if version == "beta":
-        return get_gemini_chatbot()
-    else:
-        # Check environment variable for system type (v1)
-        system = os.environ.get('CHATBOT_SYSTEM', 'multi-agent')
-        if system == 'smart':
-            return get_smart_chatbot()
-        else:
-            return get_multi_agent_chatbot()
-def serialize_messages(messages):
-    """Serialize LangChain messages to dictionaries"""
-    serialized = []
-    for msg in messages:
-        if hasattr(msg, 'content'):
-            serialized.append({
-                "type": type(msg).__name__,
-                "content": str(msg.content)
-            })
-    return serialized
-def serialize_documents(sources):
-    """Serialize document objects to dictionaries with deduplication"""
-    serialized = []
-    seen_content = set()
-    for doc in sources:
-        content = getattr(doc, 'page_content', getattr(doc, 'content', ''))
-        # Skip if we've seen this exact content before
-        if content in seen_content:
-            continue
-        seen_content.add(content)
-        doc_dict = {
-            "content": content,
-            "metadata": getattr(doc, 'metadata', {}),
-            "score": getattr(doc, 'metadata', {}).get('reranked_score', getattr(doc, 'metadata', {}).get('original_score', 0.0)),
-            "id": getattr(doc, 'metadata', {}).get('_id', 'unknown'),
-            "source": getattr(doc, 'metadata', {}).get('source', 'unknown'),
-            "year": getattr(doc, 'metadata', {}).get('year', 'unknown'),
-            "district": getattr(doc, 'metadata', {}).get('district', 'unknown'),
-            "page": getattr(doc, 'metadata', {}).get('page', 'unknown'),
-            "chunk_id": getattr(doc, 'metadata', {}).get('chunk_id', 'unknown'),
-            "page_label": getattr(doc, 'metadata', {}).get('page_label', 'unknown'),
-            "original_score": getattr(doc, 'metadata', {}).get('original_score', 0.0),
-            "reranked_score": getattr(doc, 'metadata', {}).get('reranked_score', None)
-        }
-        serialized.append(doc_dict)
-    return serialized
-feedback_manager = FeedbackManager()
-@st.cache_data
-def load_filter_options():
     try:
-        filter_options_path = PROJECT_DIR / "src" / "config" / "filter_options.json"
-        with open(filter_options_path, "r") as f:
-            return json.load(f)
-    except FileNotFoundError:
-        st.info(f"Looking for filter_options.json in: {PROJECT_DIR / 'src' / 'config'}")
-        st.error("filter_options.json not found. Please run the metadata analysis script.")
-        return {"sources": [], "years": [], "districts": [], 'filenames': []}
-def main():
-    # Initialize session state
-    if 'messages' not in st.session_state:
-        st.session_state.messages = []
-    if 'conversation_id' not in st.session_state:
-        st.session_state.conversation_id = f"session_{uuid.uuid4().hex[:8]}"
-    if 'session_start_time' not in st.session_state:
-        st.session_state.session_start_time = time.time()
-    if 'active_filters' not in st.session_state:
-        st.session_state.active_filters = {'sources': [], 'years': [], 'districts': [], 'filenames': []}
-    # Track RAG retrieval history for feedback
-    if 'rag_retrieval_history' not in st.session_state:
-        st.session_state.rag_retrieval_history = []
-    # Version selection (v1 or beta)
-    if 'chatbot_version' not in st.session_state:
-        st.session_state.chatbot_version = "v1"
-    # Initialize chatbot based on version (only if not already initialized for this version)
-    chatbot_version_key = f"chatbot_{st.session_state.chatbot_version}"
-    # Check if we need to initialize: chatbot doesn't exist OR version changed
-    needs_init = (
-        chatbot_version_key not in st.session_state or
-        st.session_state.get('_last_version') != st.session_state.chatbot_version
-    )
-    if needs_init:
-        try:
-            # Different spinner messages for different versions
-            if st.session_state.chatbot_version == "beta":
-                spinner_msg = "🔄 Initializing Gemini FSA"
-            else:
-                spinner_msg = "🔄 Loading AI models and connecting to database..."
-            with st.spinner(spinner_msg):
-                st.session_state[chatbot_version_key] = get_chatbot(st.session_state.chatbot_version)
-                st.session_state['_last_version'] = st.session_state.chatbot_version
-                st.session_state.chatbot = st.session_state[chatbot_version_key]
-            print("✅ AI system ready!")
-        except Exception as e:
-            st.error(f"❌ Failed to initialize chatbot: {str(e)}")
-            # Only show Gemini-specific error message for beta version
-            if st.session_state.chatbot_version == "beta":
-                st.error("Please check your environment variables (GEMINI_API_KEY, GEMINI_FILESTORE_NAME for beta)")
-            else:
-                st.error("Please check your configuration and ensure all required models and databases are accessible.")
-            # Reset to v1 to prevent infinite loop
-            st.session_state.chatbot_version = "v1"
-            st.session_state['_last_version'] = "v1"
-            if 'chatbot' in st.session_state:
-                del st.session_state['chatbot']
-            st.stop()  # Stop execution to prevent infinite loop
-    else:
-        # Chatbot already initialized for this version, just use it
-        st.session_state.chatbot = st.session_state[chatbot_version_key]
-    # Reset conversation history if needed (but keep chatbot cached)
-    if 'reset_conversation' in st.session_state and st.session_state.reset_conversation:
-        st.session_state.messages = []
-        st.session_state.conversation_id = f"session_{uuid.uuid4().hex[:8]}"
-        st.session_state.session_start_time = time.time()
-        st.session_state.rag_retrieval_history = []
-        st.session_state.feedback_submitted = False
-        st.session_state.reset_conversation = False
-        st.rerun()
-    # Version selection radio button (top right)
-    col1, col2 = st.columns([3, 1])
-    with col1:
-        st.markdown('<p class="subtitle">Ask questions about audit reports. Use the sidebar filters to narrow down your search!</p>', unsafe_allow_html=True)
-    with col2:
-        st.markdown("<br>", unsafe_allow_html=True)  # Add some spacing
-        selected_version = st.radio(
-            "**Version:**",
-            options=["v1", "beta"],
-            index=0 if st.session_state.chatbot_version == "v1" else 1,
-            horizontal=True,
-            key="version_selector",
-            help="Select v1 (default RAG system) or beta (Gemini FSA)"
-        )
-        # Update version if changed
-        if selected_version != st.session_state.chatbot_version:
-            # Store the old version to check if we need to switch
-            old_version = st.session_state.chatbot_version
-            st.session_state.chatbot_version = selected_version
-            # If chatbot for new version already exists, just switch to it
-            new_chatbot_key = f"chatbot_{selected_version}"
-            if new_chatbot_key in st.session_state:
-                # Chatbot already exists, just switch
-                st.session_state.chatbot = st.session_state[new_chatbot_key]
-                st.session_state['_last_version'] = selected_version
-            else:
-                # Need to initialize new version - will be handled by initialization logic above
-                st.session_state['_last_version'] = old_version  # Set to old to trigger init check
-            st.rerun()
-    # Show version info
-    if st.session_state.chatbot_version == "beta":
-        st.info("🔬 **Beta Mode**: Using Google Gemini FSA")
-    # Session info
-    duration = int(time.time() - st.session_state.session_start_time)
-    duration_str = f"{duration // 60}m {duration % 60}s"
-    st.markdown(f'''
-    <div class="session-info">
-        <strong>Session Info:</strong> Messages: {len(st.session_state.messages)} | Duration: {duration_str} | Status: Active | ID: {st.session_state.conversation_id}
-    </div>
-    ''', unsafe_allow_html=True)
-    # Load filter options
-    filter_options = load_filter_options()
-    # Sidebar for filters
-    with st.sidebar:
-        # Instructions section (collapsible)
-        with st.expander("📖 How to Use", expanded=False):
-            st.markdown("""
-            #### 🎯 Using Filters
-            1. **Select filters** from the sidebar to narrow your search:
-            2. **Leave filters empty** to search across all data
-            3. **Type your question** in the chat input at the bottom
-            4. **Click "Send"** to submit your question
-            #### 💡 Tips
-            - Use specific questions for better results
-            - Combine multiple filters for precise searches
-            - Check the "Retrieved Documents" tab to see source material
-            #### ⚠️ Important
-            **When finished, please close the browser window** to free up computational resources.
-            ---
-            For more detailed help, see the example questions at the bottom of the page.
-            """)
-        st.markdown("### 🔍 Search Filters")
-        st.markdown("Select filters to narrow down your search. Leave empty to search all data.")
-        st.markdown('<div class="filter-section">', unsafe_allow_html=True)
-        st.markdown('<div class="filter-title">📄 Specific Reports (Filename Filter)</div>', unsafe_allow_html=True)
-        st.markdown('<p style="font-size: 0.85em; color: #666;">⚠️ Selecting specific reports will ignore all other filters</p>', unsafe_allow_html=True)
-        selected_filenames = st.multiselect(
-                "Select specific reports:",
-                options=filter_options.get('filenames', []),
-                default=st.session_state.active_filters.get('filenames', []),
-                key="filenames_filter",
-                help="Choose specific reports to search. When enabled, all other filters are ignored."
             )
-        st.markdown('</div>', unsafe_allow_html=True)
-        # Determine if filename filter is active
-        filename_mode = len(selected_filenames) > 0
-        # Sources filter
-        # st.markdown('<div class="filter-section">', unsafe_allow_html=True)
-        st.markdown('<div class="filter-title">📊 Sources</div>', unsafe_allow_html=True)
-        selected_sources = st.multiselect(
-            "Select sources:",
-            options=filter_options['sources'],
-            default=st.session_state.active_filters['sources'],
-            disabled = filename_mode,
-            key="sources_filter",
-            help="Choose which types of reports to search"
-        )
-        st.markdown('</div>', unsafe_allow_html=True)
-        # Years filter
-        # st.markdown('<div class="filter-section">', unsafe_allow_html=True)
-        st.markdown('<div class="filter-title">📅 Years</div>', unsafe_allow_html=True)
-        selected_years = st.multiselect(
-            "Select years:",
-            options=filter_options['years'],
-            default=st.session_state.active_filters['years'],
-            disabled = filename_mode,
-            key="years_filter",
-            help="Choose which years to search"
-        )
-        st.markdown('</div>', unsafe_allow_html=True)
-        # Districts filter
-        # st.markdown('<div class="filter-section">', unsafe_allow_html=True)
-        st.markdown('<div class="filter-title">🏘️ Districts</div>', unsafe_allow_html=True)
-        selected_districts = st.multiselect(
-            "Select districts:",
-            options=filter_options['districts'],
-            default=st.session_state.active_filters['districts'],
-            disabled = filename_mode,
-            key="districts_filter",
-            help="Choose which districts to search"
-        )
-        st.markdown('</div>', unsafe_allow_html=True)
-        # Update active filters
-        st.session_state.active_filters = {
-            'sources': selected_sources if not filename_mode else [],
-            'years': selected_years if not filename_mode else [],
-            'districts': selected_districts if not filename_mode else [],
-            'filenames': selected_filenames
-        }
-        # Clear filters button
-        if st.button("🗑️ Clear All Filters", key="clear_filters_button"):
-            st.session_state.active_filters = {'sources': [], 'years': [], 'districts': [], 'filenames': []}
-            st.rerun()
-    # Main content area with tabs
-    tab1, tab2 = st.tabs(["💬 Chat", "📄 Retrieved Documents"])
-    with tab1:
-        # Chat container
-        chat_container = st.container()
-        with chat_container:
-            # Display conversation history
-            for message in st.session_state.messages:
-                if isinstance(message, HumanMessage):
-                    st.markdown(f'<div class="user-message">{message.content}</div>', unsafe_allow_html=True)
-                elif isinstance(message, AIMessage):
-                    st.markdown(f'<div class="bot-message">{message.content}</div>', unsafe_allow_html=True)
-        # Input area
-        st.markdown("<br>", unsafe_allow_html=True)
-        # Create two columns for input and button
-        col1, col2 = st.columns([4, 1])
-        with col1:
-            # Use a counter to force input clearing
-            if 'input_counter' not in st.session_state:
-                st.session_state.input_counter = 0
-            # Handle pending question from example questions section
-            if 'pending_question' in st.session_state and st.session_state.pending_question:
-                default_value = st.session_state.pending_question
-                # Increment counter to force new input widget
-                st.session_state.input_counter = (st.session_state.get('input_counter', 0) + 1) % 1000
-                del st.session_state.pending_question
-                key_suffix = st.session_state.input_counter
-            else:
-                default_value = ""
-                key_suffix = st.session_state.input_counter
-            user_input = st.text_input(
-                "Type your message here...",
-                placeholder="Ask about budget allocations, expenditures, or audit findings...",
-                key=f"user_input_{key_suffix}",
-                label_visibility="collapsed",
-                value=default_value if default_value else None
-            )
-        with col2:
-            send_button = st.button("Send", key="send_button", width='stretch')
-        # Clear chat button
-        if st.button("🗑️ Clear Chat", key="clear_chat_button"):
-            st.session_state.reset_conversation = True
-            # Clear all conversation files
-            conversations_path = CONVERSATIONS_DIR
-            if conversations_path.exists():
-                for file in conversations_path.iterdir():
-                    if file.suffix == '.json':
-                        file.unlink()
-            st.rerun()
-        # Handle user input
-        if send_button and user_input:
-            # Construct filter context string
-            filter_context_str = ""
-            if selected_filenames:
-                filter_context_str += "FILTER CONTEXT:\n"
-                filter_context_str += f"Filenames: {', '.join(selected_filenames)}\n"
-                filter_context_str += "USER QUERY:\n"
-            elif selected_sources or selected_years or selected_districts:
-                filter_context_str += "FILTER CONTEXT:\n"
-                if selected_sources:
-                    filter_context_str += f"Sources: {', '.join(selected_sources)}\n"
-                if selected_years:
-                    filter_context_str += f"Years: {', '.join(selected_years)}\n"
-                if selected_districts:
-                    filter_context_str += f"Districts: {', '.join(selected_districts)}\n"
-                filter_context_str += "USER QUERY:\n"
-            full_query = filter_context_str + user_input
-            # Add user message to history
-            st.session_state.messages.append(HumanMessage(content=user_input))
-            # Get chatbot response
-            with st.spinner("🤔 Thinking..."):
-                try:
-                    # Pass the full query with filter context
-                    chat_result = st.session_state.chatbot.chat(full_query, st.session_state.conversation_id)
-                    # Handle both old format (string) and new format (dict)
-                    if isinstance(chat_result, dict):
-                        response = chat_result['response']
-                        rag_result = chat_result.get('rag_result')
-                        st.session_state.last_rag_result = rag_result
-                        # Track RAG retrieval for feedback
-                        if rag_result:
-                            sources = rag_result.get('sources', []) if isinstance(rag_result, dict) else (rag_result.sources if hasattr(rag_result, 'sources') else [])
-                            # For Gemini, also check gemini_result for sources
-                            if not sources or len(sources) == 0:
-                                gemini_result = chat_result.get('gemini_result')
-                                print(f"🔍 DEBUG: Checking gemini_result for sources...")
-                                print(f"   gemini_result exists: {gemini_result is not None}")
-                                if gemini_result:
-                                    print(f"   gemini_result type: {type(gemini_result)}")
-                                    print(f"   has sources attr: {hasattr(gemini_result, 'sources')}")
-                                    if hasattr(gemini_result, 'sources'):
-                                        print(f"   sources length: {len(gemini_result.sources) if gemini_result.sources else 0}")
-                                if gemini_result and hasattr(gemini_result, 'sources'):
-                                    # Format Gemini sources for display
-                                    if hasattr(st.session_state.chatbot, 'gemini_client'):
-                                        sources = st.session_state.chatbot.gemini_client.format_sources_for_display(gemini_result)
-                                        print(f"✅ Formatted {len(sources)} sources from gemini_client")
-                                    elif hasattr(st.session_state.chatbot, '_format_gemini_sources'):
-                                        sources = st.session_state.chatbot._format_gemini_sources(gemini_result)
-                                        print(f"✅ Formatted {len(sources)} sources from _format_gemini_sources")
-                            # Update rag_result with sources if we found them
-                            if sources and len(sources) > 0:
-                                if isinstance(rag_result, dict):
-                                    rag_result['sources'] = sources
-                                elif hasattr(rag_result, 'sources'):
-                                    rag_result.sources = sources
-                                # Update last_rag_result with sources
-                                st.session_state.last_rag_result = rag_result
-                                print(f"✅ Updated rag_result with {len(sources)} sources")
-                            # Get the actual RAG query
-                            actual_rag_query = chat_result.get('actual_rag_query', '')
-                            if actual_rag_query:
-                                # Format it like the log message
-                                timestamp = time.strftime("%Y-%m-%d %H:%M:%S", time.localtime())
-                                formatted_query = f"{timestamp} - INFO - 🔍 ACTUAL RAG QUERY: '{actual_rag_query}'"
-                            else:
-                                formatted_query = "No RAG query available"
-                            # Extract filters from active filters
-                            filters_used = {
-                                "sources": st.session_state.active_filters.get('sources', []),
-                                "years": st.session_state.active_filters.get('years', []),
-                                "districts": st.session_state.active_filters.get('districts', []),
-                                "filenames": st.session_state.active_filters.get('filenames', [])
-                            }
-                            retrieval_entry = {
-                                "conversation_up_to": serialize_messages(st.session_state.messages),
-                                "rag_query_expansion": formatted_query,
-                                "docs_retrieved": serialize_documents(sources),
-                                "filters_applied": filters_used,
-                                "timestamp": time.time()
-                            }
-                            st.session_state.rag_retrieval_history.append(retrieval_entry)
-                            # Debug logging
-                            print(f"📊 RETRIEVAL TRACKING: {len(sources)} sources stored in retrieval history")
-                    else:
-                        response = chat_result
-                        st.session_state.last_rag_result = None
-                    # Add bot response to history
-                    st.session_state.messages.append(AIMessage(content=response))
-                except Exception as e:
-                    error_msg = f"Sorry, I encountered an error: {str(e)}"
-                    st.session_state.messages.append(AIMessage(content=error_msg))
-            # Clear input and rerun
-            st.session_state.input_counter += 1  # This will clear the input
-            st.rerun()
-    with tab2:
-        # Document retrieval panel
-        if hasattr(st.session_state, 'last_rag_result') and st.session_state.last_rag_result:
-            rag_result = st.session_state.last_rag_result
-            # Handle both PipelineResult object and dictionary formats
-            sources = None
-            if hasattr(rag_result, 'sources'):
-                # PipelineResult object format
-                sources = rag_result.sources
-            elif isinstance(rag_result, dict) and 'sources' in rag_result:
-                # Dictionary format from multi-agent system
-                sources = rag_result['sources']
-            # For Gemini, also check if we need to format sources from gemini_result
-            if (not sources or len(sources) == 0) and isinstance(rag_result, dict):
-                gemini_result = rag_result.get('gemini_result')
-                if gemini_result and hasattr(gemini_result, 'sources'):
-                    # Format Gemini sources for display
-                    if hasattr(st.session_state.chatbot, 'gemini_client'):
-                        sources = st.session_state.chatbot.gemini_client.format_sources_for_display(gemini_result)
-                    elif hasattr(st.session_state.chatbot, '_format_gemini_sources'):
-                        sources = st.session_state.chatbot._format_gemini_sources(gemini_result)
-            if sources and len(sources) > 0:
-                # Count unique filenames
-                unique_filenames = set()
-                for doc in sources:
-                    filename = getattr(doc, 'metadata', {}).get('filename', 'Unknown')
-                    unique_filenames.add(filename)
-                st.markdown(f"**Found {len(sources)} document chunks from {len(unique_filenames)} unique documents (showing top 20):**")
-                if len(unique_filenames) < len(sources):
-                    st.info(f"💡 **Note**: Each document is split into multiple chunks. You're seeing {len(sources)} chunks from {len(unique_filenames)} documents.")
-                # Extract and display statistics
-                stats = extract_chunk_statistics(sources)
-                # Show charts for 10+ results, tables for fewer
-                if len(sources) >= 10:
-                    display_chunk_statistics_charts(stats, "Retrieval Statistics")
-                    # Also show tables below charts for detailed view
-                    st.markdown("---")
-                    display_chunk_statistics_table(stats, "Retrieval Distribution")
-                else:
-                    display_chunk_statistics_table(stats, "Retrieval Distribution")
-                st.markdown("---")
-                st.markdown("### 📄 Document Details")
-                for i, doc in enumerate(sources):  # Show all documents
-                    # Get relevance score and ID if available
-                    metadata = getattr(doc, 'metadata', {})
-                    # Handle both standard RAG scores and Gemini scores
-                    score = metadata.get('reranked_score') or metadata.get('original_score') or metadata.get('score')
-                    chunk_id = metadata.get('_id') or metadata.get('chunk_id', 'Unknown')
-                    if score is not None:
-                        try:
-                            score_text = f" (Score: {float(score):.3f})"
-                        except (ValueError, TypeError):
-                            score_text = ""
-                    else:
-                        score_text = ""
-                    if chunk_id and chunk_id != 'Unknown':
-                        score_text += f" (ID: {str(chunk_id)[:8]}...)" if score_text else f" (ID: {str(chunk_id)[:8]}...)"
-                    with st.expander(f"📄 Document {i+1}: {getattr(doc, 'metadata', {}).get('filename', 'Unknown')[:50]}...{score_text}"):
-                        # Display document metadata with emojis
-                        metadata = getattr(doc, 'metadata', {})
-                        col1, col2, col3, col4 = st.columns([2, 1.5, 1, 1])
-                        with col1:
-                            st.write(f"📄 **File:** {metadata.get('filename', 'Unknown')}")
-                        with col2:
-                            st.write(f"🏛️ **Source:** {metadata.get('source', 'Unknown')}")
-                        with col3:
-                            st.write(f"📅 **Year:** {metadata.get('year', 'Unknown')}")
-                        with col4:
-                            # Display page number and chunk ID
-                            page = metadata.get('page_label', metadata.get('page', 'Unknown'))
-                            chunk_id = metadata.get('_id', 'Unknown')
-                            st.write(f"📖 **Page:** {page}")
-                            st.write(f"🆔 **ID:** {chunk_id}")
-                        # Display full content (no truncation)
-                        content = getattr(doc, 'page_content', 'No content available')
-                        st.write(f"**Full Content:**")
-                        st.text_area("Full Content", value=content, height=300, disabled=True, label_visibility="collapsed", key=f"preview_{i}")
             else:
-                st.info("No documents were retrieved for the last query.")
-        else:
-            st.info("No documents have been retrieved yet. Start a conversation to see retrieved documents here.")
-    # Feedback Dashboard Section
-    st.markdown("---")
-    st.markdown("### 💬 Feedback Dashboard")
-    # Check if there's any conversation to provide feedback on
-    has_conversation = len(st.session_state.messages) > 0
-    has_retrievals = len(st.session_state.rag_retrieval_history) > 0
-    if not has_conversation:
-        st.info("💡 Start a conversation to provide feedback!")
-        st.markdown("The feedback dashboard will be enabled once you begin chatting.")
-    else:
-        st.markdown("Help us improve by providing feedback on this conversation.")
-        # Initialize feedback state if not exists
-        if 'feedback_submitted' not in st.session_state:
-            st.session_state.feedback_submitted = False
-        # Feedback form - only show if feedback not already submitted
-        if not st.session_state.feedback_submitted:
-            with st.form("feedback_form", clear_on_submit=False):
-                col1, col2 = st.columns([1, 1])
-                with col1:
-                    feedback_score = st.slider(
-                        "Rate this conversation (1-5)",
-                        min_value=1,
-                        max_value=5,
-                        help="How satisfied are you with the conversation?"
-                    )
-                with col2:
-                    is_feedback_about_last_retrieval = st.checkbox(
-                        "Feedback about last retrieval only",
-                        value=True,
-                        help="If checked, feedback applies to the most recent document retrieval"
-                    )
-                open_ended_feedback = st.text_area(
-                    "Your feedback (optional)",
-                    placeholder="Tell us what went well or what could be improved...",
-                    height=100
-                )
-                # Disable submit if no score selected
-                submit_disabled = feedback_score is None
-                submitted = st.form_submit_button(
-                    "📤 Submit Feedback",
-                    width='stretch',
-                    disabled=submit_disabled
-                )
-                if submitted:
-                    # Log the feedback data being submitted
-                    print("=" * 80)
-                    print("🔄 FEEDBACK SUBMISSION: Starting...")
-                    print("=" * 80)
-                    st.write("🔍 **Debug: Feedback Data Being Submitted:**")
-                    # Extract transcript from messages
-                    transcript = feedback_manager.extract_transcript(st.session_state.messages)
-                    # Build retrievals structure
-                    retrievals = feedback_manager.build_retrievals_structure(
-                        st.session_state.rag_retrieval_history.copy() if st.session_state.rag_retrieval_history else [],
-                        st.session_state.messages
-                    )
-                    # Build feedback_score_related_retrieval_docs
-                    feedback_score_related_retrieval_docs = feedback_manager.build_feedback_score_related_retrieval_docs(
-                        is_feedback_about_last_retrieval,
-                        st.session_state.messages,
-                        st.session_state.rag_retrieval_history.copy() if st.session_state.rag_retrieval_history else []
                     )
-                    # Preserve old retrieved_data format for backward compatibility
-                    retrieved_data_old_format = st.session_state.rag_retrieval_history.copy() if st.session_state.rag_retrieval_history else []
-                    # Create feedback data dictionary
-                    feedback_dict = {
-                        "open_ended_feedback": open_ended_feedback,
-                        "score": feedback_score,
-                        "is_feedback_about_last_retrieval": is_feedback_about_last_retrieval,
-                        "conversation_id": st.session_state.conversation_id,
-                        "timestamp": time.time(),
-                        "message_count": len(st.session_state.messages),
-                        "has_retrievals": has_retrievals,
-                        "retrieval_count": len(st.session_state.rag_retrieval_history) if st.session_state.rag_retrieval_history else 0,
-                        "transcript": transcript,
-                        "retrievals": retrievals,
-                        "feedback_score_related_retrieval_docs": feedback_score_related_retrieval_docs,
-                        "retrieved_data": retrieved_data_old_format  # Preserved old column
-                    }
-                    print(f"📝 FEEDBACK SUBMISSION: Score={feedback_score}, Retrievals={len(st.session_state.rag_retrieval_history) if st.session_state.rag_retrieval_history else 0}")
-                    # Create UserFeedback dataclass instance
-                    feedback_obj = None  # Initialize outside try block
-                    try:
-                        feedback_obj = feedback_manager.create_feedback_from_dict(feedback_dict)
-                        print(f"✅ FEEDBACK SUBMISSION: Feedback object created - ID={feedback_obj.feedback_id}")
-                        st.write(f"✅ **Feedback Object Created**")
-                        st.write(f"- Feedback ID: {feedback_obj.feedback_id}")
-                        st.write(f"- Score: {feedback_obj.score}/5")
-                        st.write(f"- Has Retrievals: {feedback_obj.has_retrievals}")
-                        # Convert back to dict for JSON serialization
-                        feedback_data = feedback_obj.to_dict()
-                    except Exception as e:
-                        print(f"❌ FEEDBACK SUBMISSION: Failed to create feedback object: {e}")
-                        st.error(f"Failed to create feedback object: {e}")
-                        feedback_data = feedback_dict
-                    # Display the data being submitted
-                    st.json(feedback_data)
-                    # Save feedback to file - use PROJECT_DIR to ensure writability
-                    feedback_dir = FEEDBACK_DIR
-                    try:
-                        # Ensure directory exists with write permissions (777 for compatibility)
-                        feedback_dir.mkdir(parents=True, mode=0o777, exist_ok=True)
-                    except (PermissionError, OSError) as e:
-                        logger.warning(f"Could not create feedback directory at {feedback_dir}: {e}")
-                        # Fallback to relative path
-                        feedback_dir = Path("feedback")
-                        feedback_dir.mkdir(parents=True, mode=0o777, exist_ok=True)
-                    feedback_file = feedback_dir / f"feedback_{st.session_state.conversation_id}_{int(time.time())}.json"
-                    try:
-                        # Ensure parent directory exists before writing
-                        feedback_file.parent.mkdir(parents=True, mode=0o777, exist_ok=True)
-                        # Save to local file first
-                        print(f"💾 FEEDBACK SAVE: Saving to local file: {feedback_file}")
-                        with open(feedback_file, 'w') as f:
-                            json.dump(feedback_data, f, indent=2, default=str)
-                        print(f"✅ FEEDBACK SAVE: Local file saved successfully")
-                        # Save to Snowflake if enabled and credentials available
-                        logger.info("🔄 FEEDBACK SAVE: Starting Snowflake save process...")
-                        logger.info(f"📊 FEEDBACK SAVE: feedback_obj={'exists' if feedback_obj else 'None'}")
-                        snowflake_success = False
-                        try:
-                            snowflake_enabled = os.getenv("SNOWFLAKE_ENABLED", "false").lower() == "true"
-                            logger.info(f"🔍 SNOWFLAKE CHECK: enabled={snowflake_enabled}")
-                            if snowflake_enabled:
-                                if feedback_obj:
-                                    try:
-                                        logger.info("📤 SNOWFLAKE UI: Attempting to save feedback to Snowflake...")
-                                        print("📤 SNOWFLAKE UI: Attempting to save feedback to Snowflake...")
-                                        # Show spinner while saving to Snowflake (can take 10-15 seconds)
-                                        # This includes: connection establishment (~5s), data preparation, and SQL execution (~5s)
-                                        with st.spinner("💾 Saving feedback to Snowflake... This may take 10-15 seconds (connecting to database, preparing data, and executing query)"):
-                                            snowflake_success = feedback_manager.save_to_snowflake(feedback_obj)
-                                        if snowflake_success:
-                                            logger.info("✅ SNOWFLAKE UI: Successfully saved to Snowflake")
-                                            print("✅ SNOWFLAKE UI: Successfully saved to Snowflake")
-                                        else:
-                                            logger.warning("⚠️ SNOWFLAKE UI: Save failed")
-                                            print("⚠️ SNOWFLAKE UI: Save failed")
-                                    except Exception as e:
-                                        logger.error(f"❌ SNOWFLAKE UI ERROR: {e}")
-                                        print(f"❌ SNOWFLAKE UI ERROR: {e}")
-                                        traceback.print_exc()
-                                        snowflake_success = False
-                                else:
-                                    logger.warning("⚠️ SNOWFLAKE UI: Skipping (feedback object not created)")
-                                    print("⚠️ SNOWFLAKE UI: Skipping (feedback object not created)")
-                                    snowflake_success = False
-                            else:
-                                logger.info("💡 SNOWFLAKE UI: Integration disabled")
-                                print("💡 SNOWFLAKE UI: Integration disabled")
-                                # If Snowflake is disabled, consider it successful (local save only)
-                                snowflake_success = True
-                        except Exception as e:
-                            logger.error(f"❌ Exception in Snowflake save: {type(e).__name__}: {e}")
-                            print(f"❌ Exception in Snowflake save: {type(e).__name__}: {e}")
-                            snowflake_success = False
-                        # Only show success if Snowflake save succeeded (or if Snowflake is disabled)
-                        if snowflake_success:
-                            st.success("✅ Thank you for your feedback! It has been saved successfully.")
-                            st.balloons()
                         else:
-                            st.warning("⚠️ Feedback saved locally, but Snowflake save failed. Please check logs.")
-                        # Mark feedback as submitted to prevent resubmission
-                        st.session_state.feedback_submitted = True
-                        print("=" * 80)
-                        print(f"✅ FEEDBACK SUBMISSION: Completed successfully")
-                        print("=" * 80)
-                        # Log file location
-                        st.info(f"📁 Feedback saved to: {feedback_file}")
-                    except Exception as e:
-                        print(f"❌ FEEDBACK SUBMISSION: Error saving feedback: {e}")
-                        print(f"❌ FEEDBACK SUBMISSION: Error type: {type(e).__name__}")
-                        traceback.print_exc()
-                        st.error(f"❌ Error saving feedback: {e}")
-                        st.write(f"Debug error: {str(e)}")
-        else:
-            # Feedback already submitted - show success message and reset option
-            st.success("✅ Feedback already submitted for this conversation!")
-            col1, col2 = st.columns([1, 1])
-            with col1:
-                if st.button("🔄 Submit New Feedback", key="new_feedback_button", width='stretch'):
-                    try:
-                        st.session_state.feedback_submitted = False
-                        st.rerun()
-                    except Exception as e:
-                        # Handle any Streamlit API exceptions gracefully
-                        logger.error(f"Error resetting feedback state: {e}")
-                        st.error(f"Error resetting feedback. Please refresh the page.")
-            with col2:
-                if st.button("📋 View Conversation", key="view_conversation_button", width='stretch'):
-                    # Scroll to conversation - this is handled by the auto-scroll at bottom
-                    pass
-    # Display retrieval history stats
-    if st.session_state.rag_retrieval_history:
-        st.markdown("---")
-        st.markdown("#### 📊 Retrieval History")
-        with st.expander(f"View {len(st.session_state.rag_retrieval_history)} retrieval entries", expanded=True):
-            for idx, entry in enumerate(st.session_state.rag_retrieval_history, 1):
-                st.markdown(f"### **Retrieval #{idx}**")
-                # Display timestamp if available
-                if entry.get("timestamp"):
-                    timestamp_str = time.strftime("%Y-%m-%d %H:%M:%S", time.localtime(entry["timestamp"]))
-                    st.caption(f"🕐 {timestamp_str}")
-                # Display the actual RAG query
-                rag_query_expansion = entry.get("rag_query_expansion", "No query available")
-                st.markdown("**🔍 RAG Query:**")
-                st.code(rag_query_expansion, language="text")
-                # Display filters used
-                filters_applied = entry.get("filters_applied", {})
-                if filters_applied and any(filters_applied.values()):
-                    st.markdown("**🎯 Filters Applied:**")
-                    filter_display = {}
-                    if filters_applied.get("sources"):
-                        filter_display["Sources"] = filters_applied["sources"]
-                    if filters_applied.get("years"):
-                        filter_display["Years"] = filters_applied["years"]
-                    if filters_applied.get("districts"):
-                        filter_display["Districts"] = filters_applied["districts"]
-                    if filters_applied.get("filenames"):
-                        filter_display["Filenames"] = filters_applied["filenames"]
-                    if filter_display:
-                        st.json(filter_display)
-                    else:
-                        st.info("No filters applied")
-                else:
-                    st.info("No filters applied")
-                # Display conversation history up to retrieval point
-                conversation_up_to = entry.get("conversation_up_to", [])
-                if conversation_up_to:
-                    st.markdown("**💬 Conversation History (up to retrieval point):**")
-                    with st.expander(f"View {len(conversation_up_to)} messages", expanded=False):
-                        for msg_idx, msg in enumerate(conversation_up_to, 1):
-                            role = msg.get("type", "unknown")
-                            content = msg.get("content", "")
-                            if role == "HumanMessage" or role == "human":
-                                st.markdown(f"**👤 User {msg_idx}:** {content[:200]}{'...' if len(content) > 200 else ''}")
-                            elif role == "AIMessage" or role == "ai":
-                                st.markdown(f"**🤖 Assistant {msg_idx}:** {content[:200]}{'...' if len(content) > 200 else ''}")
-                else:
-                    st.info("No conversation history available")
-                # Display documents retrieved
-                docs_retrieved = entry.get("docs_retrieved", [])
-                if docs_retrieved:
-                    st.markdown(f"**📄 Documents Retrieved ({len(docs_retrieved)}):**")
-                    with st.expander(f"View {len(docs_retrieved)} documents", expanded=False):
-                        for doc_idx, doc in enumerate(docs_retrieved, 1):
-                            st.markdown(f"**Document {doc_idx}:**")
-                            # Display metadata
-                            metadata = doc.get("metadata", {})
-                            if metadata:
-                                col1, col2, col3 = st.columns(3)
-                                with col1:
-                                    st.write(f"📄 **File:** {metadata.get('filename', 'Unknown')}")
-                                with col2:
-                                    st.write(f"🏛️ **Source:** {metadata.get('source', 'Unknown')}")
-                                with col3:
-                                    st.write(f"📅 **Year:** {metadata.get('year', 'Unknown')}")
-                                # Additional metadata
-                                if metadata.get('district'):
-                                    st.write(f"📍 **District:** {metadata.get('district')}")
-                                if metadata.get('page'):
-                                    st.write(f"📖 **Page:** {metadata.get('page')}")
-                                if metadata.get('score') is not None:
-                                    st.write(f"⭐ **Score:** {metadata.get('score'):.3f}" if isinstance(metadata.get('score'), (int, float)) else f"⭐ **Score:** {metadata.get('score')}")
-                            # Display content preview (first 200 chars)
-                            content = doc.get("content", doc.get("page_content", ""))
-                            if content:
-                                st.markdown("**Content Preview:**")
-                                st.text_area(
-                                    "Content Preview",
-                                    value=content[:200] + ("..." if len(content) > 200 else ""),
-                                    height=100,
-                                    disabled=True,
-                                    label_visibility="collapsed",
-                                    key=f"retrieval_{idx}_doc_{doc_idx}_preview"
-                                )
-                            if doc_idx < len(docs_retrieved):
-                                st.markdown("---")
-                else:
-                    st.info("No documents retrieved")
-                # Display summary stats
-                st.markdown("**📊 Summary:**")
-                st.json({
-                    "conversation_length": len(conversation_up_to),
-                    "documents_retrieved": len(docs_retrieved)
-                })
-                if idx < len(st.session_state.rag_retrieval_history):
-                    st.markdown("---")
-    # Example Questions Section
-    st.markdown("---")
-    st.markdown("### 💡 Example Questions")
-    st.markdown("Click on any question below to use it, or modify the editable examples:")
-    # Initialize example question state
-    if 'custom_question_1' not in st.session_state:
-        st.session_state.custom_question_1 = "How were administrative costs managed in the PDM implementation, and what issues arose with budget execution regarding staff salaries?"
-    if 'custom_question_2' not in st.session_state:
-        st.session_state.custom_question_2 = "What did the National Coordinator say about the release of funds for PDM administrative costs in the letter dated 29th September 2022 and how did the funding received affect the activities of the PDCs and PDM SACCOs in the FY 2022/23?"
-    # Question 1: Filename insights (fixed, clickable)
-    st.markdown("#### 📄 Question 1: List insights from a specific file")
-    col1, col2 = st.columns([3, 1])
-    with col1:
-        example_q1 = "List couple of insights from the filename."
-        st.markdown(f"**Example:** `{example_q1}`")
-        st.info("💡 **Filter to apply:** Select a Filename from the sidebar panel before asking this question.")
-    with col2:
-        if st.button("📋 Use This Question", key="use_example_1", width='stretch'):
-            st.session_state.pending_question = example_q1
-            st.session_state.input_counter = (st.session_state.get('input_counter', 0) + 1) % 1000
-            st.rerun()
-    st.markdown("---")
-    # Questions 2 & 3: Editable examples
-    st.markdown("#### ✏️ Customizable Questions (Edit and use)")
-    # Question 2
-    # st.markdown("**Question 2:**")
-    custom_q1 = st.text_area(
-        "Edit question 2:",
-        value=st.session_state.custom_question_1,
-        height=80,
-        key="edit_question_2",
-        help="Modify this question to fit your needs, then click 'Use This Question'"
-    )
-    col1, col2 = st.columns([1, 4])
-    with col1:
-        if st.button("📋 Use Question 2", key="use_custom_1", width='stretch'):
-            if custom_q1.strip():
-                st.session_state.pending_question = custom_q1.strip()
-                st.session_state.custom_question_1 = custom_q1.strip()
-                st.session_state.input_counter = (st.session_state.get('input_counter', 0) + 1) % 1000
                 st.rerun()
-            else:
-                st.warning("Please enter a question first!")
-    with col2:
-        st.caption("💡 Tip: Add specific details like dates, names, or amounts to get more precise answers")
-    st.info("💡 **Filter to apply:** Select District(s) and Year(s) sidebar panel before asking this question.")
-    st.markdown("---")
-    # Question 3
-    # st.markdown("**Question 3:**")
-    custom_q2 = st.text_area(
-        "Edit question 3:",
-        value=st.session_state.custom_question_2,
-        height=80,
-        key="edit_question_3",
-        help="Modify this question to fit your needs, then click 'Use This Question'"
-    )
-    col1, col2 = st.columns([1, 4])
-    with col1:
-        if st.button("📋 Use Question 3", key="use_custom_2", width='stretch'):
-            if custom_q2.strip():
-                st.session_state.pending_question = custom_q2.strip()
-                st.session_state.custom_question_2 = custom_q2.strip()
-                st.session_state.input_counter = (st.session_state.get('input_counter', 0) + 1) % 1000
                 st.rerun()
-            else:
-                st.warning("Please enter a question first!")
-    with col2:
-        st.caption("💡 Tip: Use specific terms from the documents (e.g., 'PDM', 'SACCOs', 'FY 2022/23')")
-    # Store selected question for next render (handled in input section above)
-    # This ensures the question populates the input field correctly
-    # Auto-scroll to bottom
-    st.markdown("""
-    <script>
-        window.scrollTo(0, document.body.scrollHeight);
-    </script>
-    """, unsafe_allow_html=True)
-if __name__ == "__main__":
-    # Check if running in Streamlit context
-    try:
-        from streamlit.runtime.scriptrunner import get_script_run_ctx
-        if get_script_run_ctx() is None:
-            # Not in Streamlit runtime - show helpful message
-            print("=" * 80)
-            print("⚠️  WARNING: This is a Streamlit app!")
-            print("=" * 80)
-            print("\nPlease run this app using:")
-            print("  streamlit run app.py")
-            print("\nNot: python app.py")
-            print("\nThe app will not function correctly when run with 'python app.py'")
-            print("=" * 80)
-            import sys
-            sys.exit(1)
-    except ImportError:
-        # Streamlit not installed or not in Streamlit context
-        print("=" * 80)
-        print("⚠️  WARNING: This is a Streamlit app!")
-        print("=" * 80)
-        print("\nPlease run this app using:")
-        print("  streamlit run app.py")
-        print("\nNot: python app.py")
-        print("=" * 80)
-        import sys
-        sys.exit(1)
-    main()

 """
+FempowerBot Training Simulator - Main Application
+Interactive chatbot for practicing communication strategies.
 """
 import os
+import sys
 # ===== CRITICAL: Fix OMP_NUM_THREADS FIRST, before ANY other imports =====
 # Some libraries load at import time and will fail if OMP_NUM_THREADS is invalid
 except (ValueError, TypeError):
     os.environ["OMP_NUM_THREADS"] = "1"
+# ===== Import path configuration BEFORE other imports =====
+from src.config.paths import (
+    IS_DEPLOYED,
+    PROJECT_DIR,
+    HF_CACHE_DIR,
+    FEEDBACK_DIR,
+    CONVERSATIONS_DIR,
+    PROMPTS_DIR,
+    LOGS_DIR,
+)
 # ===== Setup HuggingFace cache directories BEFORE any model imports =====
 # CRITICAL: Set these before any imports that might use HuggingFace (like sentence-transformers)
 # Only override cache directories in deployed environment (local uses defaults)
     except (PermissionError, OSError):
         # If we can't create it, log but continue (might already exist from Dockerfile)
         pass
 else:
+    # Local development - load .env file and ensure NO cache vars are set
+    # Let HuggingFace use its defaults (~/.cache/huggingface)
     from dotenv import load_dotenv
     load_dotenv()
+    # Unset any HF cache variables that might exist in the environment
+    for var in ["HF_HOME", "TRANSFORMERS_CACHE", "HF_DATASETS_CACHE", "HF_HUB_CACHE", "SENTENCE_TRANSFORMERS_HOME"]:
+        if var in os.environ:
+            del os.environ[var]
+# ===== NOW safe to import everything else =====
+import streamlit as st
+from pathlib import Path
+import json
+from datetime import datetime
+import logging
+from src.config.loader import config
+# Import Ollama for local development (lightweight import)
+if not IS_DEPLOYED:
+    import ollama
+# NOTE: vLLM is NOT imported here! It's imported lazily in load_model()
+# Reason: vLLM import takes 30-60 seconds due to CUDA initialization
+# This keeps app startup fast and lets Streamlit render UI immediately
+# ===== Setup logging =====
+log_level = os.getenv("LOG_LEVEL", config.get("logging.level", "INFO"))
+logging.basicConfig(
+    level=getattr(logging, log_level),
+    format=config.get("logging.format", "%(asctime)s - %(name)s - %(levelname)s - %(message)s")
 )
+logger = logging.getLogger(__name__)
+# Reduce noise from external libraries
+logging.getLogger("httpcore").setLevel(logging.WARNING)
+logging.getLogger("httpx").setLevel(logging.WARNING)
+logging.getLogger("fsevents").setLevel(logging.WARNING)
+logger.info(f"Starting FempowerBot - Deployed: {IS_DEPLOYED}")
+# Log startup message
+if IS_DEPLOYED:
+    logger.info("App starting in DEPLOYED mode (vLLM will be imported when model is loaded)")
+else:
+    logger.info("App starting in LOCAL mode (using Ollama)")
+# ===== Page Configuration =====
+st.set_page_config(
+    page_title=config.get("app.title", "FempowerBot Training Simulator"),
+    page_icon=config.get("app.page_icon", "💬"),
+    layout=config.get("app.layout", "wide")
+)
+# ===== Initialize session state =====
+if "messages" not in st.session_state:
+    st.session_state.messages = []
+if "model_loaded" not in st.session_state:
+    st.session_state.model_loaded = False
+if "current_model" not in st.session_state:
+    st.session_state.current_model = None
+if "current_persona" not in st.session_state:
+    st.session_state.current_persona = None
+if "current_prompt_type" not in st.session_state:
+    st.session_state.current_prompt_type = None
+if "session_id" not in st.session_state:
+    st.session_state.session_id = datetime.now().strftime("%Y%m%d_%H%M%S")
+if "response_times" not in st.session_state:
+    st.session_state.response_times = []
+if "custom_prompt" not in st.session_state:
+    st.session_state.custom_prompt = None
+if "loaded_prompt_text" not in st.session_state:
+    st.session_state.loaded_prompt_text = ""
+if "prompt_edited" not in st.session_state:
+    st.session_state.prompt_edited = False
+if "few_shot_examples" not in st.session_state:
+    st.session_state.few_shot_examples = ""
+if "custom_gen_params" not in st.session_state:
+    st.session_state.custom_gen_params = None
+if "show_save_dialog" not in st.session_state:
+    st.session_state.show_save_dialog = False
+if "last_prompt_selection" not in st.session_state:
+    st.session_state.last_prompt_selection = ""
+def load_prompt_from_file(persona: str, prompt_type: str) -> str:
+    """Load prompt text from file.
+    For 'modular' type: Combines base instructions + persona-specific module.
+    For other types: Loads single file.
+    """
+    # Handle modular prompts (base + persona-specific)
+    if prompt_type.lower() == "modular":
+        base_path = PROMPTS_DIR / "_base_instructions.txt"
+        persona_path = PROMPTS_DIR / f"{persona.lower()}_modular.txt"
+        prompt_parts = []
+        # Load base instructions
+        if base_path.exists():
+            with open(base_path, "r", encoding="utf-8") as f:
+                prompt_parts.append(f.read())
+        else:
+            logger.warning(f"Base instructions not found: {base_path}")
+        # Load persona-specific module
+        if persona_path.exists():
+            with open(persona_path, "r", encoding="utf-8") as f:
+                prompt_parts.append(f.read())
+        else:
+            logger.error(f"Persona module not found: {persona_path}")
+            return ""
+        # Combine with clear separation
+        return "\n\n" + "="*80 + "\n\n".join(prompt_parts)
+    # Handle standard single-file prompts
+    filename = f"{persona.lower()}_{prompt_type.lower()}.txt"
+    prompt_path = PROMPTS_DIR / filename
+    if prompt_path.exists():
+        with open(prompt_path, "r", encoding="utf-8") as f:
+            return f.read()
+    else:
+        logger.error(f"Prompt file not found: {prompt_path}")
+        return ""
+def get_available_prompt_types(persona: str) -> list:
+    """Get all available prompt types for a persona by scanning prompts directory."""
+    persona_lower = persona.lower()
+    prompt_files = list(PROMPTS_DIR.glob(f"{persona_lower}_*.txt"))
+    # Extract prompt types from filenames
+    types = []
+    for file in prompt_files:
+        # Format: {persona}_{type}.txt
+        type_name = file.stem.replace(f"{persona_lower}_", "")
+        types.append(type_name.capitalize())
+    return sorted(types)
+def save_custom_prompt(persona: str, prompt_type_name: str, prompt_text: str) -> bool:
+    """Save a custom prompt to disk."""
     try:
+        filename = f"{persona.lower()}_{prompt_type_name.lower()}.txt"
+        prompt_path = PROMPTS_DIR / filename
+        # Don't overwrite compressed or full
+        if prompt_type_name.lower() in ['compressed', 'full']:
+            logger.error(f"Cannot overwrite default prompt types: {prompt_type_name}")
+            return False
+        with open(prompt_path, "w", encoding="utf-8") as f:
+            f.write(prompt_text)
+        logger.info(f"Saved custom prompt: {prompt_path}")
+        return True
+    except Exception as e:
+        logger.error(f"Error saving custom prompt: {e}")
+        return False
+def load_prompt(persona: str, prompt_type: str) -> str:
+    """Load the system prompt based on persona and type.
+    Priority: custom_prompt (edited in UI) > loaded file
+    """
+    # Use edited prompt if it exists
+    if st.session_state.prompt_edited and st.session_state.custom_prompt:
+        return st.session_state.custom_prompt
+    # Otherwise load from file
+    return load_prompt_from_file(persona, prompt_type)
+@st.cache_resource
+def load_model(model_key: str):
+    """Load and cache the model (Ollama for local, vLLM for deployed)."""
+    try:
+        # Get model configuration
+        models = config.get_models()
+        if model_key not in models:
+            st.error(f"Model '{model_key}' not found in configuration")
+            return None
+        model_info = models[model_key]
+        model_name = model_info['name']
+        if IS_DEPLOYED:
+            # DEPLOYED: Use vLLM for optimized inference on T4 GPU
+            # Lazy import vLLM (takes 30-60s due to CUDA initialization)
+            # This keeps app startup fast - vLLM only imported when user loads model
+            logger.info("Importing vLLM (this may take 30-60 seconds)...")
+            from vllm import LLM, SamplingParams
+            logger.info("vLLM imported successfully")
+            model_path = model_info['hf_id']
+            logger.info(f"Loading model with vLLM: {model_name} ({model_path})")
+            with st.spinner(f"Loading {model_name} with vLLM... This may take a few minutes."):
+                # Initialize vLLM with optimized settings for T4
+                llm = LLM(
+                    model=model_path,
+                    download_dir=str(HF_CACHE_DIR) if HF_CACHE_DIR else None,
+                    dtype="half",  # FP16 for T4
+                    gpu_memory_utilization=0.90,  # Use 90% of GPU memory
+                    max_model_len=4096,  # Adjust based on model and T4 VRAM
+                    trust_remote_code=False,
+                )
+                logger.info(f"vLLM model loaded successfully: {model_name}")
+                return {"type": "vllm", "llm": llm, "model_key": model_key, "model_name": model_name}
+        else:
+            # LOCAL: Use Ollama for Apple Silicon optimization
+            ollama_model = model_info.get('ollama_id', 'mistral')
+            logger.info(f"Checking Ollama for model: {ollama_model}")
+            with st.spinner(f"Checking Ollama model {ollama_model}..."):
+                try:
+                    # Check if Ollama is running and get available models
+                    available_models_response = ollama.list()
+                    # Extract model names from response
+                    # Response format: {'models': [{'name': '...', 'model': '...', ...}]}
+                    models = available_models_response.get('models', [])
+                    model_names = []
+                    for m in models:
+                        # Handle both 'name' and 'model' keys
+                        name = m.get('name') or m.get('model', '')
+                        if name:
+                            model_names.append(name)
+                    logger.info(f"Available Ollama models: {model_names}")
+                    # Check if requested model is available (check base name without tag)
+                    model_available = any(
+                        ollama_model in name or name.startswith(ollama_model.split(':')[0])
+                        for name in model_names
+                    )
+                    if not model_available:
+                        st.warning(f"Model '{ollama_model}' not found locally. Pulling...")
+                        logger.info(f"Pulling Ollama model: {ollama_model}")
+                        # Pull the model
+                        with st.spinner(f"Downloading {ollama_model}... This may take a few minutes."):
+                            ollama.pull(ollama_model)
+                        st.success(f"✅ Model '{ollama_model}' downloaded successfully!")
+                    logger.info(f"Ollama ready with model: {ollama_model}")
+                    return {"type": "ollama", "model_name": ollama_model, "model_key": model_key}
+                except Exception as e:
+                    st.error(f"Ollama error: {str(e)}")
+                    st.info("Make sure Ollama is running: `ollama serve`")
+                    logger.error(f"Ollama error: {e}", exc_info=True)
+                    return None
+    except Exception as e:
+        logger.error(f"Error loading model: {str(e)}", exc_info=True)
+        st.error(f"Error loading model: {str(e)}")
+        return None
+def format_chat_prompt(system_prompt: str, conversation_history: list, few_shot_examples: str = "") -> str:
+    """Format the conversation history with system prompt and optional few-shot examples."""
+    # Build conversation string
+    formatted = system_prompt
+    # Add few-shot examples if provided
+    if few_shot_examples and few_shot_examples.strip():
+        formatted += "\n\n### Example Conversations\n\n" + few_shot_examples.strip()
+    formatted += "\n\n---\n\n"
+    for msg in conversation_history:
+        if msg["role"] == "user":
+            formatted += f"User: {msg['content']}\n\n"
+        elif msg["role"] == "assistant":
+            formatted += f"FempowerBot: {msg['content']}\n\n"
+    # Add prompt for next response
+    formatted += "FempowerBot:"
+    return formatted
+def generate_response(model_obj, prompt: str) -> tuple[str, float]:
+    """Generate a response from the model (Ollama for local, vLLM for deployed).
+    Returns:
+        tuple: (response_text, generation_time_seconds)
+    """
+    import time
+    start_time = time.time()
+    try:
+        # Use custom params if set, otherwise use config defaults
+        gen_params = st.session_state.custom_gen_params if st.session_state.custom_gen_params else config.get_generation_params()
+        if model_obj["type"] == "ollama":
+            # LOCAL: Use Ollama with proper conversation history
+            # Get model type from config to handle response format
+            model_key = model_obj.get('model_key')
+            models = config.get_models()
+            model_type = models.get(model_key, {}).get('model_type', 'standard')
+            # Build options for Ollama
+            max_tokens = gen_params.get('max_new_tokens', 2000)
+            ollama_options = {
+                "temperature": gen_params.get('temperature', 0.8),
+                "num_predict": max_tokens,
+                "top_p": gen_params.get('top_p', 0.9),
+                "top_k": gen_params.get('top_k', 50),
+            }
+            # Build proper message history instead of one big prompt string
+            # Extract system prompt and conversation from the formatted prompt
+            # The prompt is: system_prompt + "---" + conversation history
+            messages = []
+            # Add system message
+            if prompt.startswith("You are"):
+                parts = prompt.split("\n\n---\n\n", 1)
+                system_prompt_text = parts[0]
+                messages.append({"role": "system", "content": system_prompt_text})
+                # Parse conversation history if present
+                if len(parts) > 1:
+                    conv_text = parts[1]
+                    # Split by "User:" and "FempowerBot:"
+                    lines = conv_text.split('\n\n')
+                    for line in lines:
+                        line = line.strip()
+                        if line.startswith("User:"):
+                            messages.append({"role": "user", "content": line[5:].strip()})
+                        elif line.startswith("FempowerBot:") and len(line) > 12:
+                            messages.append({"role": "assistant", "content": line[12:].strip()})
+            else:
+                # Fallback: use as single user message
+                messages = [{"role": "user", "content": prompt}]
+            response = ollama.chat(
+                model=model_obj['model_name'],
+                messages=messages,
+                options=ollama_options
+            )
+            # Extract content from response based on model type
+            message = response.get('message', {})
+            if model_type == "reasoning":
+                # Reasoning models (Qwen3, DeepSeek-R1) use 'thinking' field for internal reasoning
+                # and 'content' for final response. If content is empty, extract from thinking.
+                generated = message.get('content', '').strip()
+                if not generated:
+                    # Fallback: extract actual response from thinking field
+                    thinking = message.get('thinking', '').strip()
+                    if thinking:
+                        logger.info("Extracting response from 'thinking' field (reasoning model)")
+                        # Try to find where the model formulated the actual response
+                        # Look for patterns like "Final response:", "I'll write:", quotation marks with response
+                        best_match = None
+                        # Pattern 1: Look for quoted responses (most reliable)
+                        import re
+                        quoted = re.findall(r'"([^"]+(?:\?|\.)[^"]*)"', thinking)
+                        if quoted:
+                            # Take the longest quoted response that looks like a conversational reply
+                            best_match = max(quoted, key=len) if len(quoted[-1]) > 30 else quoted[-1]
+                        # Pattern 2: Look for "Final response" or similar markers
+                        if not best_match:
+                            for delimiter in ['Final response (', 'I\'ll write:\n"', 'Revised to ', 'Final response:\n"']:
+                                if delimiter in thinking:
+                                    parts = thinking.split(delimiter, 1)
+                                    if len(parts) > 1:
+                                        # Extract text in quotes after delimiter
+                                        text_after = parts[1]
+                                        match = re.search(r'"([^"]+)"', text_after)
+                                        if match:
+                                            best_match = match.group(1)
+                                            break
+                        generated = best_match if best_match else "I apologize, but I couldn't generate a proper response."
+                    else:
+                        logger.error(f"Empty response from reasoning model. Full response: {response}")
+                        generated = "I apologize, but I couldn't generate a response."
+            else:
+                # Standard models put response directly in 'content'
+                generated = message.get('content', '').strip()
+                if not generated:
+                    logger.error(f"Empty response from Ollama. Full response: {response}")
+                    generated = "I apologize, but I couldn't generate a response."
+        else:
+            # DEPLOYED: Use vLLM
+            # Note: vLLM is already imported in load_model() before this is called
+            from vllm import SamplingParams
+            llm = model_obj['llm']
+            # Create sampling parameters
+            sampling_params = SamplingParams(
+                temperature=gen_params.get('temperature', 0.8),
+                top_p=gen_params.get('top_p', 0.9),
+                top_k=gen_params.get('top_k', 50),
+                max_tokens=gen_params.get('max_new_tokens', 200),
+                repetition_penalty=gen_params.get('repetition_penalty', 1.1),
             )
+            # Generate
+            outputs = llm.generate([prompt], sampling_params)
+            generated = outputs[0].outputs[0].text.strip()
+        # Clean up response
+        if generated.startswith("FempowerBot:"):
+            generated = generated[len("FempowerBot:"):].strip()
+        if "User:" in generated:
+            generated = generated.split("User:")[0].strip()
+        # Calculate generation time
+        generation_time = time.time() - start_time
+        logger.info(f"Response generated in {generation_time:.2f}s ({len(generated)} chars)")
+        return generated, generation_time
+    except Exception as e:
+        generation_time = time.time() - start_time
+        logger.error(f"Error generating response: {str(e)}")
+        st.error(f"Error generating response: {str(e)}")
+        return "I apologize, but I encountered an error generating a response.", generation_time
+def save_conversation():
+    """Save the current conversation to disk."""
+    if not config.get("storage.save_conversations", True):
+        return
+    try:
+        # Ensure directory exists
+        CONVERSATIONS_DIR.mkdir(parents=True, exist_ok=True)
+        # Create conversation data
+        conversation_data = {
+            "session_id": st.session_state.session_id,
+            "timestamp": datetime.now().isoformat(),
+            "model": st.session_state.current_model,
+            "persona": st.session_state.current_persona,
+            "prompt_type": st.session_state.current_prompt_type,
+            "messages": st.session_state.messages
+        }
+        # Save to file
+        filename = f"conversation_{st.session_state.session_id}.json"
+        filepath = CONVERSATIONS_DIR / filename
+        with open(filepath, 'w', encoding='utf-8') as f:
+            json.dump(conversation_data, f, indent=2, ensure_ascii=False)
+        logger.debug(f"Conversation saved: {filepath}")
+    except Exception as e:
+        logger.error(f"Error saving conversation: {str(e)}", exc_info=True)
+# ===== Main UI =====
+st.title("💬 FempowerBot Training Simulator")
+st.markdown("""
+Practice your communication strategies with realistic conversation partners who are
+**unfamiliar**, **skeptical**, or **antagonistic** toward feminism and gender equality.
+""")
+# ===== Sidebar for configuration =====
+with st.sidebar:
+    st.header("⚙️ Configuration")
+    # Model selection
+    st.subheader("🤖 Select Model")
+    # Get available models from config
+    available_models = config.get_models()
+    model_display_names = {key: info['name'] for key, info in available_models.items()}
+    selected_model_key = st.selectbox(
+        "Choose Language Model",
+        options=list(model_display_names.keys()),
+        format_func=lambda x: model_display_names[x],
+        help="Select a model that fits within T4 GPU constraints"
+    )
+    # Show model info
+    model_info = available_models[selected_model_key]
+    with st.expander("ℹ️ Model Info"):
+        st.write(f"**Recommended GPU:** {model_info.get('recommended_gpu', 'N/A')}")
+        st.write(f"**VRAM Required:** {model_info.get('vram_required', 'N/A')}")
+        st.write(f"**Context Length:** {model_info.get('context_length', 'N/A')}")
+    # Persona selection
+    st.subheader("🎭 Select Persona")
+    # Get personas from config
+    personas_list = config.get_personas()
+    persona_options = [p['name'] for p in personas_list]
+    selected_persona = st.radio(
+        "Choose conversation partner type",
+        options=persona_options,
+        help="Select who you want to practice talking with"
+    )
+    # Prompt type selection
+    st.subheader("📝 Prompt Type")
+    # Get available prompt types for selected persona (scans files dynamically)
+    available_types = get_available_prompt_types(selected_persona)
+    if not available_types:
+        # Fallback to config if no files found
+        prompt_types = config.get("prompts.types", [])
+        available_types = [p['name'] for p in prompt_types]
+    selected_prompt_type = st.radio(
+        "Choose prompt type",
+        options=available_types,
+        help="Select prompt type. Custom types appear here after saving."
+    )
+    # Load model button
+    if st.button("🚀 Load Model", type="primary"):
+        model_obj = load_model(selected_model_key)
+        if model_obj:
+            st.session_state.model_loaded = True
+            st.session_state.current_model = selected_model_key
+            st.session_state.model_obj = model_obj
+            backend = "Ollama (Apple Silicon)" if model_obj["type"] == "ollama" else "vLLM (T4 GPU)"
+            st.success(f"✅ {model_display_names[selected_model_key]} loaded via {backend}!")
+    # Reset conversation button
+    if st.button("🔄 Reset Conversation"):
+        st.session_state.messages = []
+        st.session_state.response_times = []
+        st.session_state.session_id = datetime.now().strftime("%Y%m%d_%H%M%S")
+        st.rerun()
+    # # Debug info (if debug mode enabled)
+    # if os.getenv("DEBUG", "false").lower() == "true":
+    #     st.divider()
+    #     st.caption("🔧 Debug Info")
+    #     st.caption(f"Deployed: {IS_DEPLOYED}")
+    #     st.caption(f"Session: {st.session_state.session_id}")
+    #     st.caption(f"Messages: {len(st.session_state.messages)}")
+# ===== Main interface with tabs =====
+if not st.session_state.model_loaded:
+    st.info("👈 Please select and load a model from the sidebar to begin.")
+else:
+    # Check if settings changed
+    settings_changed = (
+        selected_persona != st.session_state.current_persona or
+        selected_prompt_type != st.session_state.current_prompt_type
+    )
+    if settings_changed and len(st.session_state.messages) > 0:
+        st.warning("⚠️ Settings changed! Click 'Reset Conversation' to apply new settings.")
+    # Update current settings
+    st.session_state.current_persona = selected_persona
+    st.session_state.current_prompt_type = selected_prompt_type
+    # Create tabs
+    tab_chat, tab_config = st.tabs(["💬 Chat", "⚙️ Configuration"])
+    with tab_chat:
+        # ===== Chat Tab - Display messages only =====
+        for message in st.session_state.messages:
+            with st.chat_message(message["role"]):
+                st.markdown(message["content"])
+                # Show timing for assistant messages
+                if message["role"] == "assistant" and "gen_time" in message:
+                    st.caption(f"⏱️ {message['gen_time']:.2f}s")
+    with tab_config:
+        # ===== Configuration Tab =====
+        st.subheader("🎯 Advanced Configuration")
+        col1, col2 = st.columns([2, 1])
+        with col1:
+            # ===== System Prompt Editor =====
+            st.markdown("### 📝 System Prompt")
+            # Auto-load prompt when persona/type changes
+            current_selection = f"{selected_persona}_{selected_prompt_type}"
+            if "last_prompt_selection" not in st.session_state:
+                st.session_state.last_prompt_selection = current_selection
+            if st.session_state.last_prompt_selection != current_selection:
+                # Selection changed - reload prompt from file
+                loaded_text = load_prompt_from_file(selected_persona, selected_prompt_type)
+                st.session_state.loaded_prompt_text = loaded_text
+                st.session_state.custom_prompt = loaded_text
+                st.session_state.prompt_edited = False
+                st.session_state.last_prompt_selection = current_selection
+            # Initialize loaded prompt if empty
+            if not st.session_state.loaded_prompt_text:
+                st.session_state.loaded_prompt_text = load_prompt_from_file(selected_persona, selected_prompt_type)
+            # Show current selection info
+            st.caption(f"📂 Currently loaded: **{selected_persona} / {selected_prompt_type}**")
+            if st.session_state.prompt_edited:
+                st.caption("✏️ *Prompt has been edited (not saved to disk)*")
+            # Prompt editor
+            custom_prompt = st.text_area(
+                "Edit system prompt",
+                value=st.session_state.custom_prompt if st.session_state.custom_prompt else st.session_state.loaded_prompt_text,
+                height=300,
+                key="prompt_editor",
+                help="Edit the prompt. Changes apply immediately to chat (RAM only)."
+            )
+            # Track if edited
+            if custom_prompt != st.session_state.loaded_prompt_text:
+                st.session_state.prompt_edited = True
+                st.session_state.custom_prompt = custom_prompt
             else:
+                st.session_state.prompt_edited = False
+                st.session_state.custom_prompt = None
+            # Action buttons
+            col_a, col_b, col_c = st.columns(3)
+            with col_a:
+                if st.button("🔄 Reload from File"):
+                    loaded_text = load_prompt_from_file(selected_persona, selected_prompt_type)
+                    st.session_state.loaded_prompt_text = loaded_text
+                    st.session_state.custom_prompt = loaded_text
+                    st.session_state.prompt_edited = False
+                    st.success("✅ Reloaded from file!")
+                    st.rerun()
+            with col_b:
+                if st.button("💾 Save as New Type"):
+                    st.session_state.show_save_dialog = True
+            with col_c:
+                if st.session_state.prompt_edited:
+                    if st.button("❌ Discard Changes"):
+                        st.session_state.custom_prompt = st.session_state.loaded_prompt_text
+                        st.session_state.prompt_edited = False
+                        st.rerun()
+            # Save as new type dialog
+            if st.session_state.get("show_save_dialog", False):
+                with st.form("save_prompt_form"):
+                    st.markdown("#### 💾 Save as New Prompt Type")
+                    new_type_name = st.text_input(
+                        "Prompt Type Name",
+                        placeholder="e.g., detailed, brief, custom1",
+                        help="Avoid 'compressed' and 'full' (reserved)"
                     )
+                    col_save, col_cancel = st.columns(2)
+                    with col_save:
+                        save_submitted = st.form_submit_button("✅ Save", type="primary")
+                    with col_cancel:
+                        cancel_submitted = st.form_submit_button("❌ Cancel")
+                    if save_submitted and new_type_name:
+                        if new_type_name.lower() in ['compressed', 'full']:
+                            st.error("❌ Cannot use reserved names: 'compressed' or 'full'")
                         else:
+                            success = save_custom_prompt(selected_persona, new_type_name, custom_prompt)
+                            if success:
+                                st.success(f"✅ Saved as: {selected_persona.lower()}_{new_type_name.lower()}.txt")
+                                st.session_state.show_save_dialog = False
+                                st.rerun()
+                            else:
+                                st.error("❌ Failed to save prompt")
+                    if cancel_submitted:
+                        st.session_state.show_save_dialog = False
+                        st.rerun()
+            # Preview expander
+            with st.expander("👁️ Preview Current Prompt", expanded=False):
+                current = st.session_state.custom_prompt if st.session_state.prompt_edited else st.session_state.loaded_prompt_text
+                st.code(current, language="text")
+            # ===== Few-Shot Examples =====
+            st.markdown("### 📚 Few-Shot Examples")
+            st.caption("Add example conversations to guide the bot's responses (currently empty by default)")
+            with st.expander("➕ Add Few-Shot Examples", expanded=False):
+                few_shot_text = st.text_area(
+                    "Paste example conversation (format: User: ... / FempowerBot: ...)",
+                    value=st.session_state.few_shot_examples,
+                    height=200,
+                    placeholder="Example:\n\nUser: What is feminism?\n\nFempowerBot: It's about equal rights for everyone, regardless of gender.\n\nUser: That makes sense!\n\nFempowerBot: Glad I could help clarify!",
+                    help="Provide multi-turn conversation examples to improve bot responses"
+                )
+                if st.button("💾 Save Few-Shot Examples"):
+                    st.session_state.few_shot_examples = few_shot_text
+                    st.success("✅ Few-shot examples saved!")
+                    st.rerun()
+        with col2:
+            # ===== Generation Parameters =====
+            st.markdown("### ⚙️ Generation Parameters")
+            # Get default params
+            default_params = config.get_generation_params()
+            current_params = st.session_state.custom_gen_params if st.session_state.custom_gen_params else default_params
+            max_tokens = st.number_input(
+                "Max Tokens",
+                min_value=50,
+                max_value=4000,
+                value=current_params.get('max_new_tokens', 2000),
+                step=50,
+                help="Maximum length of generated response"
+            )
+            temperature = st.slider(
+                "Temperature",
+                min_value=0.0,
+                max_value=2.0,
+                value=current_params.get('temperature', 0.8),
+                step=0.1,
+                help="Higher = more creative, Lower = more focused"
+            )
+            top_p = st.slider(
+                "Top P",
+                min_value=0.0,
+                max_value=1.0,
+                value=current_params.get('top_p', 0.9),
+                step=0.05,
+                help="Nucleus sampling threshold"
+            )
+            top_k = st.number_input(
+                "Top K",
+                min_value=1,
+                max_value=100,
+                value=current_params.get('top_k', 50),
+                step=5,
+                help="Sample from top K tokens"
+            )
+            repetition_penalty = st.slider(
+                "Repetition Penalty",
+                min_value=1.0,
+                max_value=2.0,
+                value=current_params.get('repetition_penalty', 1.1),
+                step=0.05,
+                help="Penalty for repeating tokens"
+            )
+            if st.button("💾 Apply Parameters"):
+                st.session_state.custom_gen_params = {
+                    'max_new_tokens': max_tokens,
+                    'temperature': temperature,
+                    'top_p': top_p,
+                    'top_k': top_k,
+                    'repetition_penalty': repetition_penalty
+                }
+                st.success("✅ Parameters applied!")
                 st.rerun()
+            if st.button("🔄 Reset Parameters"):
+                st.session_state.custom_gen_params = None
+                st.success("✅ Reset to defaults!")
                 st.rerun()
+            # ===== Performance Stats =====
+            if st.session_state.response_times:
+                st.markdown("### 📊 Performance Stats")
+                avg_time = sum(r['time'] for r in st.session_state.response_times) / len(st.session_state.response_times)
+                total_responses = len(st.session_state.response_times)
+                st.metric("Avg Response Time", f"{avg_time:.2f}s")
+                st.metric("Total Responses", total_responses)
+    # ===== Chat Input (outside tabs - must be at this level) =====
+    if prompt := st.chat_input(config.get("app.chat_input_placeholder", "Type your message here...")):
+        # Add user message to chat
+        st.session_state.messages.append({"role": "user", "content": prompt})
+        # Generate bot response
+        with st.spinner("Thinking..."):
+            # Load system prompt
+            system_prompt = load_prompt(
+                st.session_state.current_persona,
+                st.session_state.current_prompt_type
+            )
+            # Format prompt with conversation history and few-shot examples
+            full_prompt = format_chat_prompt(
+                system_prompt,
+                st.session_state.messages,
+                st.session_state.few_shot_examples
+            )
+            # Generate response
+            response, gen_time = generate_response(
+                st.session_state.model_obj,
+                full_prompt
+            )
+            # Track response time
+            st.session_state.response_times.append({
+                "time": gen_time,
+                "chars": len(response),
+                "timestamp": datetime.now().isoformat()
+            })
+        # Add assistant response to chat with timing info
+        st.session_state.messages.append({
+            "role": "assistant",
+            "content": response,
+            "gen_time": gen_time
+        })
+        # Save conversation
+        save_conversation()
+        st.rerun()
+# ===== Footer =====
+st.divider()
+st.markdown("""
+<div style='text-align: center; color: gray; font-size: 0.9em;'>
+    <p>🎯 <strong>FempowerBot</strong> is a training tool for practicing difficult conversations.</p>
+    <p>The bot stays in character to provide realistic practice scenarios.</p>
+</div>
+""", unsafe_allow_html=True)