"""Streamlit frontend for the Indecimal RAG AI Assistant.""" import os import streamlit as st from dotenv import load_dotenv from rag_engine import RAGEngine from config import OPENROUTER_MODEL, OLLAMA_MODEL load_dotenv() # ── Page config ────────────────────────────────────────────────────────────── st.set_page_config( page_title="Indecimal AI Assistant", page_icon="🏗️", layout="wide", ) # ── Custom CSS ─────────────────────────────────────────────────────────────── st.markdown( """ """, unsafe_allow_html=True, ) # ── Helper: get API key from multiple sources ──────────────────────────────── def _resolve_api_key(sidebar_key: str) -> str: """Try HF Spaces secrets → .env → sidebar input.""" try: return st.secrets["OPENROUTER_API_KEY"] except Exception: pass env_key = os.getenv("OPENROUTER_API_KEY", "") if env_key and env_key != "your_key_here": return env_key return sidebar_key # ── Cached RAG engine ──────────────────────────────────────────────────────── @st.cache_resource(show_spinner="Loading embedding model & building index…") def get_rag_engine() -> RAGEngine: engine = RAGEngine() engine.initialize() return engine # ── Sidebar ────────────────────────────────────────────────────────────────── with st.sidebar: st.markdown("# 🏗️ Indecimal") st.markdown("**AI Construction Assistant**") st.divider() model_choice = st.selectbox( "LLM Provider", ["OpenRouter (Cloud)", "Ollama (Local)"], ) api_key_input = st.text_input( "OpenRouter API Key", value=os.getenv("OPENROUTER_API_KEY", ""), type="password", help="Get a free key at https://openrouter.ai", ) ollama_model = st.text_input("Ollama Model", value=OLLAMA_MODEL) compare_mode = st.toggle("Compare Models", value=False, help="Run both OpenRouter & Ollama side-by-side") st.divider() # System status engine = get_rag_engine() st.markdown("### System Status") if engine.ready: st.markdown(f'✅ Documents loaded: {len(engine.chunks)} chunks', unsafe_allow_html=True) st.markdown(f'✅ Embedding model: all-MiniLM-L6-v2', unsafe_allow_html=True) st.markdown(f'✅ FAISS index: ready', unsafe_allow_html=True) else: st.markdown('⏳ Engine loading…', unsafe_allow_html=True) current_llm = ollama_model if model_choice == "Ollama (Local)" else OPENROUTER_MODEL st.markdown(f"**LLM:** `{current_llm}`") st.divider() if st.button("🗑️ Clear Chat"): st.session_state.messages = [] st.rerun() # ── Resolve API key ───────────────────────────────────────────────────────── api_key = _resolve_api_key(api_key_input) engine.api_key = api_key engine.model = OPENROUTER_MODEL # ── Chat state ─────────────────────────────────────────────────────────────── if "messages" not in st.session_state: st.session_state.messages = [] # Welcome message if not st.session_state.messages: st.session_state.messages.append( { "role": "assistant", "content": ( "👋 Hi! I'm the Indecimal AI Assistant. I can answer questions about " "Indecimal's construction packages, pricing, quality assurance, customer " "journey, and policies. Ask me anything!" ), "chunks": [], } ) # ── Render chat history ───────────────────────────────────────────────────── for msg in st.session_state.messages: with st.chat_message(msg["role"]): st.markdown(msg["content"]) if msg.get("chunks"): with st.expander(f"📄 Retrieved Context ({len(msg['chunks'])} chunks)"): for i, c in enumerate(msg["chunks"], 1): st.markdown( f"""

Chunk {i} — {c['source']} {'· ' + c['header'] if c.get('header') else ''} · score: {c.get('score', 0):.3f}

{c['text'][:500]}

""", unsafe_allow_html=True, ) # Show comparison columns if present if msg.get("comparison"): comp = msg["comparison"] col1, col2 = st.columns(2) with col1: st.markdown(f"### ☁️ OpenRouter ({comp['openrouter']['response_time']}s)") st.markdown(comp["openrouter"]["answer"]) with col2: st.markdown(f"### 🖥️ Ollama ({comp['ollama']['response_time']}s)") st.markdown(comp["ollama"]["answer"]) # ── Chat input ─────────────────────────────────────────────────────────────── if prompt := st.chat_input("Ask about Indecimal…"): # Display user message st.session_state.messages.append({"role": "user", "content": prompt, "chunks": []}) with st.chat_message("user"): st.markdown(prompt) # Validate use_ollama = model_choice == "Ollama (Local)" if not use_ollama and not api_key: with st.chat_message("assistant"): st.warning("⚠️ Please enter your OpenRouter API key in the sidebar.") st.session_state.messages.append( {"role": "assistant", "content": "⚠️ Please enter your OpenRouter API key in the sidebar.", "chunks": []} ) else: with st.chat_message("assistant"): with st.spinner("Searching documents…"): if compare_mode: result = engine.query_both(prompt, ollama_model=ollama_model) chunks = result.get("retrieved_chunks", []) # Show comparison col1, col2 = st.columns(2) with col1: st.markdown(f"### ☁️ OpenRouter ({result['openrouter']['response_time']}s)") st.markdown(result["openrouter"]["answer"]) with col2: st.markdown(f"### 🖥️ Ollama ({result['ollama']['response_time']}s)") st.markdown(result["ollama"]["answer"]) with st.expander(f"📄 Retrieved Context ({len(chunks)} chunks)"): for i, c in enumerate(chunks, 1): st.markdown( f"""

Chunk {i} — {c['source']} {'· ' + c['header'] if c.get('header') else ''} · score: {c.get('score', 0):.3f}

{c['text'][:500]}

""", unsafe_allow_html=True, ) st.session_state.messages.append( { "role": "assistant", "content": "*(Comparison mode — see both responses above)*", "chunks": chunks, "comparison": { "openrouter": result["openrouter"], "ollama": result["ollama"], }, } ) else: result = engine.query(prompt, use_ollama=use_ollama, ollama_model=ollama_model) answer = result["answer"] chunks = result.get("retrieved_chunks", []) resp_time = result.get("response_time", 0) st.markdown(answer) st.caption(f"⏱️ Response time: {resp_time}s") if chunks: with st.expander(f"📄 Retrieved Context ({len(chunks)} chunks)"): for i, c in enumerate(chunks, 1): st.markdown( f"""

Chunk {i} — {c['source']} {'· ' + c['header'] if c.get('header') else ''} · score: {c.get('score', 0):.3f}

{c['text'][:500]}

""", unsafe_allow_html=True, ) st.session_state.messages.append( {"role": "assistant", "content": answer, "chunks": chunks} )