Rag-ag / app.py
beastLucifer's picture
Upload 11 files
9806c71 verified
import streamlit as st
import os
import logging
# Configure logging
logging.basicConfig(level=logging.INFO, format='%(asctime)s - %(levelname)s - %(message)s')
logger = logging.getLogger("APP_ENTRY")
logger.info("πŸš€ app.py module loaded. Streamlit starting up...")
st.set_page_config(page_title="Gemini Research Assistant", layout="wide")
st.title("πŸ’Ž Agentic RAG: Gemini 2.0 Research Assistant")
# --- AUTO-INGESTION SEQUENCE ---
# This ensures the vector DB exists before the agent tries to load it.
# --- CONFIGURATION ---
DB_PATH = "./chroma_db"
DATA_PATH = "./data"
@st.cache_resource(show_spinner=False)
def initialize_knowledge_base():
"""Checks and builds the vector database if missing."""
if not os.path.exists(DB_PATH) or not os.listdir(DB_PATH):
logger.info("⚠️ VectorDB not found. Checking for PDF data...")
if os.path.exists(DATA_PATH) and any(f.endswith('.pdf') for f in os.listdir(DATA_PATH)):
logger.info("πŸ“„ Data found. Starting ingestion process...")
# We use a placeholder to show progress since st.spinner isn't thread-safe in early startup sometimes
status_placeholder = st.empty()
status_placeholder.info("🧠 Initializing Knowledge Base... Check Logs for progress.")
from src.processor import build_index
try:
build_index(DATA_PATH, DB_PATH)
status_placeholder.success("βœ… Knowledge Base Built! Refreshing...")
logger.info("βœ… Ingestion complete.")
status_placeholder.empty()
except Exception as e:
logger.error(f"❌ Ingestion FAILED: {e}")
status_placeholder.error(f"Failed to build index: {e}")
raise e
else:
logger.warning("No data found in 'data' directory.")
st.warning("⚠️ No data found! Please add PDFs to the 'data' folder to use Local Research.")
else:
logger.info("βœ… VectorDB exists. Skipping ingestion.")
# Run the initialization
initialize_knowledge_base()
# Lazy import agent AFTER DB check to prevent "Table not found" errors
logger.info("πŸ€– Loading Agent Logic...")
from src.agent import app as agent_app
logger.info("βœ… Agent loaded. Ready to serve.")
if "messages" not in st.session_state:
st.session_state.messages = []
# Display history
for msg in st.session_state.messages:
with st.chat_message(msg["role"]):
st.markdown(msg["content"])
# Chat input
if prompt := st.chat_input("Ask about internal docs or latest tech..."):
st.session_state.messages.append({"role": "user", "content": prompt})
with st.chat_message("user"):
st.markdown(prompt)
with st.chat_message("assistant"):
inputs = {"messages": [("user", prompt)]}
config = {"configurable": {"thread_id": "1"}}
# Execute LangGraph brain
response = agent_app.invoke(inputs, config=config)
answer = response["messages"][-1].content
st.markdown(answer)
st.session_state.messages.append({"role": "assistant", "content": answer})