import streamlit as st import os import logging # Configure logging logging.basicConfig(level=logging.INFO, format='%(asctime)s - %(levelname)s - %(message)s') logger = logging.getLogger("APP_ENTRY") logger.info("🚀 app.py module loaded. Streamlit starting up...") st.set_page_config(page_title="Gemini Research Assistant", layout="wide") st.title("💎 Agentic RAG: Gemini 2.0 Research Assistant") # --- AUTO-INGESTION SEQUENCE --- # This ensures the vector DB exists before the agent tries to load it. # --- CONFIGURATION --- DB_PATH = "./chroma_db" DATA_PATH = "./data" @st.cache_resource(show_spinner=False) def initialize_knowledge_base(): """Checks and builds the vector database if missing.""" if not os.path.exists(DB_PATH) or not os.listdir(DB_PATH): logger.info("⚠️ VectorDB not found. Checking for PDF data...") if os.path.exists(DATA_PATH) and any(f.endswith('.pdf') for f in os.listdir(DATA_PATH)): logger.info("📄 Data found. Starting ingestion process...") # We use a placeholder to show progress since st.spinner isn't thread-safe in early startup sometimes status_placeholder = st.empty() status_placeholder.info("🧠 Initializing Knowledge Base... Check Logs for progress.") from src.processor import build_index try: build_index(DATA_PATH, DB_PATH) status_placeholder.success("✅ Knowledge Base Built! Refreshing...") logger.info("✅ Ingestion complete.") status_placeholder.empty() except Exception as e: logger.error(f"❌ Ingestion FAILED: {e}") status_placeholder.error(f"Failed to build index: {e}") raise e else: logger.warning("No data found in 'data' directory.") st.warning("⚠️ No data found! Please add PDFs to the 'data' folder to use Local Research.") else: logger.info("✅ VectorDB exists. Skipping ingestion.") # Run the initialization initialize_knowledge_base() # Lazy import agent AFTER DB check to prevent "Table not found" errors logger.info("🤖 Loading Agent Logic...") from src.agent import app as agent_app logger.info("✅ Agent loaded. Ready to serve.") if "messages" not in st.session_state: st.session_state.messages = [] # Display history for msg in st.session_state.messages: with st.chat_message(msg["role"]): st.markdown(msg["content"]) # Chat input if prompt := st.chat_input("Ask about internal docs or latest tech..."): st.session_state.messages.append({"role": "user", "content": prompt}) with st.chat_message("user"): st.markdown(prompt) with st.chat_message("assistant"): inputs = {"messages": [("user", prompt)]} config = {"configurable": {"thread_id": "1"}} # Execute LangGraph brain response = agent_app.invoke(inputs, config=config) answer = response["messages"][-1].content st.markdown(answer) st.session_state.messages.append({"role": "assistant", "content": answer})