import os import streamlit as st from dotenv import load_dotenv from tavily import TavilyClient from langchain.schema import Document from langchain.text_splitter import RecursiveCharacterTextSplitter from langchain_google_genai import GoogleGenerativeAIEmbeddings, ChatGoogleGenerativeAI from langchain.chains import RetrievalQA from langchain_chroma import Chroma # === 🛠 Fix protobuf issue === os.environ["PROTOCOL_BUFFERS_PYTHON_IMPLEMENTATION"] = "python" # === 🔐 Load Environment Variables === load_dotenv() GOOGLE_API_KEY = os.getenv("GOOGLE_API_KEY") or st.secrets.get("GOOGLE_API_KEY", "") TAVILY_API_KEY = os.getenv("TAVILY_API_KEY") or st.secrets.get("TAVILY_API_KEY", "") # === 🚨 Validate keys === if not GOOGLE_API_KEY or not TAVILY_API_KEY: st.error("API keys missing! Please check your .env file or Streamlit secrets.") st.stop() # === 🤖 Set up clients === tavily_client = TavilyClient(api_key=TAVILY_API_KEY) embedding_model = GoogleGenerativeAIEmbeddings( model="models/embedding-001", google_api_key=GOOGLE_API_KEY ) llm = ChatGoogleGenerativeAI( model="models/gemini-1.5-flash", google_api_key=GOOGLE_API_KEY ) # === 🌐 Streamlit UI === st.title("🌐 Ask Questions About Any Website!") # --- Step 1: Website input --- url = st.text_input("🔗 Enter a website URL:") if st.button("🚀 Extract and Index"): if not url.strip(): st.warning("Please enter a valid URL.") else: with st.spinner("Extracting content..."): try: data = tavily_client.extract(urls=url) raw_text = data.get("text") or data.get("results", [{}])[0].get("raw_content", "") if not raw_text.strip(): st.error("❌ Failed to extract content from the website.") st.stop() doc = Document(page_content=raw_text) splitter = RecursiveCharacterTextSplitter(chunk_size=1000, chunk_overlap=200) chunks = splitter.split_documents([doc]) # Vectorstore with Chroma vectorstore = Chroma.from_documents( chunks, embedding=embedding_model, collection_name="website_collection", persist_directory="./chroma_db" ) st.session_state.vectorstore = vectorstore st.success("✅ Website content indexed successfully!") except Exception as e: st.error(f"❌ Error during extraction/indexing: {str(e)}") # --- Step 2: Ask a question --- question = st.text_input("💬 Ask a question about the website content:") if question and "vectorstore" in st.session_state: with st.spinner("Thinking..."): try: retriever = st.session_state.vectorstore.as_retriever() qa_chain = RetrievalQA.from_chain_type(llm=llm, retriever=retriever) result = qa_chain.run(question) st.subheader("✅ Answer") st.write(result) except Exception as e: st.error(f"❌ Failed to generate answer: {str(e)}")