Spaces:
Runtime error
Runtime error
| # rag_app.py | |
| import streamlit as st | |
| import google.generativeai as genai | |
| from dotenv import load_dotenv | |
| from langchain.vectorstores import Chroma | |
| from langchain_google_genai import GoogleGenerativeAIEmbeddings | |
| from agent.config import GOOGLE_CSE_API_KEY, GOOGLE_CSE_CX, GEMINI_API_KEY | |
| from agent.search_tool import GoogleCSESearchTool | |
| from agent.scraper_tool import WebScraperTool | |
| from agent.chunker import TextChunker | |
| from pipeline.query_handler import analyze_query | |
| from pipeline.search_and_scrape import search_and_scrape | |
| from pipeline.embed_and_store import embed_and_store_chunks | |
| from pipeline.answer_generator import generate_answer | |
| import logging | |
| logger = logging.getLogger(__name__) | |
| logger.setLevel(logging.INFO) | |
| load_dotenv() | |
| # βββ Streamlit UI Setup βββββββββββββββββββββββββββββββββββββββ | |
| st.set_page_config(page_title="Gemini RAG Researcher", layout="wide") | |
| st.title("π Gemini-Powered RAG Web Research Assistant") | |
| user_query = st.text_area( | |
| "π Ask your question (web research will be performed):", | |
| placeholder="e.g., India-US space cooperation 2025", | |
| height=120 | |
| ) | |
| # βββ Sidebar API Key Input ββββββββββββββββββββββββββββββββββββ | |
| st.sidebar.markdown("### π API Keys") | |
| user_gemini_key = st.sidebar.text_input( | |
| label="π§ Gemini API Key", | |
| type="password", | |
| placeholder="Paste your Gemini API key..." | |
| ) | |
| # Override env key if user provides custom one | |
| genai.configure(api_key=user_gemini_key if user_gemini_key else GEMINI_API_KEY) | |
| api_key=user_gemini_key if user_gemini_key else GEMINI_API_KEY | |
| # βββ Sidebar Settings βββββββββββββββββββββββββββββββββββββββββ | |
| st.sidebar.markdown("### βοΈ Settings") | |
| num_links = st.sidebar.number_input("π Pages to scrape", min_value=1, max_value=15, value=4, step=1) | |
| max_crawl_depth = st.sidebar.number_input("π Crawl depth", min_value=1, max_value=3, value=2, step=1) | |
| max_crawl_pages = st.sidebar.number_input("π Pages per homepage", min_value=1, max_value=3, value=2, step=1) | |
| # βββ Main Execution βββββββββββββββββββββββββββββββββββββββββββ | |
| if user_query: | |
| try: | |
| st.info("Running full RAG pipeline...") | |
| # Tool initialization | |
| search_tool = GoogleCSESearchTool(api_key=GOOGLE_CSE_API_KEY, cse_id=GOOGLE_CSE_CX) | |
| scraper = WebScraperTool() | |
| chunker = TextChunker() | |
| embedding_model = GoogleGenerativeAIEmbeddings( | |
| model="models/text-embedding-004", | |
| google_api_key=GEMINI_API_KEY | |
| ) | |
| # RAG steps | |
| keyword_chunks = analyze_query(user_query) | |
| scraped_results, all_chunks = search_and_scrape( | |
| keyword_chunks, search_tool, scraper, chunker, user_query, | |
| max_links=num_links, max_pages=3, | |
| max_crawl_depth=max_crawl_depth, max_crawl_pages=max_crawl_pages | |
| ) | |
| chroma_store = embed_and_store_chunks(all_chunks, embedding_model) | |
| if chroma_store is None: | |
| st.error("Failed to embed and store documents.") | |
| else: | |
| retriever = chroma_store.as_retriever(search_kwargs={"k": 4}) | |
| generate_answer(user_query, retriever, scraped_results) | |
| except Exception as e: | |
| logger.exception("Pipeline failed.") | |
| st.error(f"Something went wrong: {e}") | |