ShivanshCodex's picture
Update app.py
4161b0f verified
# rag_app.py
import streamlit as st
import google.generativeai as genai
from dotenv import load_dotenv
from langchain.vectorstores import Chroma
from langchain_google_genai import GoogleGenerativeAIEmbeddings
from agent.config import GOOGLE_CSE_API_KEY, GOOGLE_CSE_CX, GEMINI_API_KEY
from agent.search_tool import GoogleCSESearchTool
from agent.scraper_tool import WebScraperTool
from agent.chunker import TextChunker
from pipeline.query_handler import analyze_query
from pipeline.search_and_scrape import search_and_scrape
from pipeline.embed_and_store import embed_and_store_chunks
from pipeline.answer_generator import generate_answer
import logging
logger = logging.getLogger(__name__)
logger.setLevel(logging.INFO)
load_dotenv()
# ─── Streamlit UI Setup ───────────────────────────────────────
st.set_page_config(page_title="Gemini RAG Researcher", layout="wide")
st.title("🌐 Gemini-Powered RAG Web Research Assistant")
user_query = st.text_area(
"πŸ” Ask your question (web research will be performed):",
placeholder="e.g., India-US space cooperation 2025",
height=120
)
# ─── Sidebar API Key Input ────────────────────────────────────
st.sidebar.markdown("### πŸ”‘ API Keys")
user_gemini_key = st.sidebar.text_input(
label="🧠 Gemini API Key",
type="password",
placeholder="Paste your Gemini API key..."
)
# Override env key if user provides custom one
genai.configure(api_key=user_gemini_key if user_gemini_key else GEMINI_API_KEY)
api_key=user_gemini_key if user_gemini_key else GEMINI_API_KEY
# ─── Sidebar Settings ─────────────────────────────────────────
st.sidebar.markdown("### βš™οΈ Settings")
num_links = st.sidebar.number_input("πŸ”— Pages to scrape", min_value=1, max_value=15, value=4, step=1)
max_crawl_depth = st.sidebar.number_input("🌐 Crawl depth", min_value=1, max_value=3, value=2, step=1)
max_crawl_pages = st.sidebar.number_input("πŸ“„ Pages per homepage", min_value=1, max_value=3, value=2, step=1)
# ─── Main Execution ───────────────────────────────────────────
if user_query:
try:
st.info("Running full RAG pipeline...")
# Tool initialization
search_tool = GoogleCSESearchTool(api_key=GOOGLE_CSE_API_KEY, cse_id=GOOGLE_CSE_CX)
scraper = WebScraperTool()
chunker = TextChunker()
embedding_model = GoogleGenerativeAIEmbeddings(
model="models/text-embedding-004",
google_api_key=GEMINI_API_KEY
)
# RAG steps
keyword_chunks = analyze_query(user_query)
scraped_results, all_chunks = search_and_scrape(
keyword_chunks, search_tool, scraper, chunker, user_query,
max_links=num_links, max_pages=3,
max_crawl_depth=max_crawl_depth, max_crawl_pages=max_crawl_pages
)
chroma_store = embed_and_store_chunks(all_chunks, embedding_model)
if chroma_store is None:
st.error("Failed to embed and store documents.")
else:
retriever = chroma_store.as_retriever(search_kwargs={"k": 4})
generate_answer(user_query, retriever, scraped_results)
except Exception as e:
logger.exception("Pipeline failed.")
st.error(f"Something went wrong: {e}")