Spaces:

ytrsoymr
/

WebQueryBot

Sleeping

App Files Files Community

ytrsoymr commited on Jul 10, 2025

Commit

08d1605

verified ·

1 Parent(s): 5635792

Update app.py

Browse files

Files changed (1) hide show

app.py +57 -33

app.py CHANGED Viewed

@@ -8,50 +8,74 @@ from langchain_google_genai import GoogleGenerativeAIEmbeddings, ChatGoogleGener
 from langchain.chains import RetrievalQA
 from langchain_chroma import Chroma
-# Load .env if needed
 load_dotenv()
-# Set API keys (can also use st.secrets or os.environ)
-os.environ["google_api_key"] = st.secrets["GOOGLE_API_KEY"] if "GOOGLE_API_KEY" in st.secrets else os.getenv("GOOGLE_API_KEY")
-TAVILY_API_KEY = st.secrets["TAVILY_API_KEY"] if "TAVILY_API_KEY" in st.secrets else os.getenv("TAVILY_API_KEY")
-# Initialize clients
 tavily_client = TavilyClient(api_key=TAVILY_API_KEY)
-embedding_model = GoogleGenerativeAIEmbeddings(model="models/embedding-001", google_api_key=os.environ["google_api_key"])
-llm = ChatGoogleGenerativeAI(model="models/gemini-1.5-flash", google_api_key=os.environ["google_api_key"])
-# Streamlit UI
-st.title("🌐 Website Q&A with Gemini + Tavily")
-url = st.text_input("Enter a website URL:")
-if st.button("Extract and Index Content"):
-    with st.spinner("Extracting and indexing website content..."):
-        data = tavily_client.extract(urls=url)
-        # Convert to LangChain Documents
-        documents = []
-        for doc in data.get("results", []):
-            raw = doc.get("raw_content", "")
-            if raw:
-                documents.append(Document(page_content=raw))
-        # Chunking
-        splitter = RecursiveCharacterTextSplitter(chunk_size=1000, chunk_overlap=200)
-        chunks = splitter.split_documents(documents)
-        # Chroma vector store
-        vectorstore = Chroma.from_documents(chunks, embedding=embedding_model, collection_name="inno", persist_directory="./chroma_db")
-        st.success("Website content indexed successfully!")
-        # Save vectorstore to session state
-        st.session_state.vectorstore = vectorstore
-question = st.text_input("Ask a question about the website content:")
 if question and "vectorstore" in st.session_state:
     with st.spinner("Thinking..."):
-        retriever = st.session_state.vectorstore.as_retriever()
-        chain = RetrievalQA.from_chain_type(llm=llm, retriever=retriever)
-        result = chain.run(question)
-        st.subheader("💬 Answer")
-        st.write(result)

 from langchain.chains import RetrievalQA
 from langchain_chroma import Chroma
+# === 🛠 Fix protobuf issue ===
+os.environ["PROTOCOL_BUFFERS_PYTHON_IMPLEMENTATION"] = "python"
+# === 🔐 Load Environment Variables ===
 load_dotenv()
+GOOGLE_API_KEY = os.getenv("GOOGLE_API_KEY") or st.secrets.get("GOOGLE_API_KEY", "")
+TAVILY_API_KEY = os.getenv("TAVILY_API_KEY") or st.secrets.get("TAVILY_API_KEY", "")
+# === 🚨 Validate keys ===
+if not GOOGLE_API_KEY or not TAVILY_API_KEY:
+    st.error("API keys missing! Please check your .env file or Streamlit secrets.")
+    st.stop()
+# === 🤖 Set up clients ===
 tavily_client = TavilyClient(api_key=TAVILY_API_KEY)
+embedding_model = GoogleGenerativeAIEmbeddings(
+    model="models/embedding-001", google_api_key=GOOGLE_API_KEY
+)
+llm = ChatGoogleGenerativeAI(
+    model="models/gemini-1.5-flash", google_api_key=GOOGLE_API_KEY
+)
+# === 🌐 Streamlit UI ===
+st.title("🌐 Ask Questions About Any Website!")
+# --- Step 1: Website input ---
+url = st.text_input("🔗 Enter a website URL:")
+if st.button("🚀 Extract and Index"):
+    if not url.strip():
+        st.warning("Please enter a valid URL.")
+    else:
+        with st.spinner("Extracting content..."):
+            try:
+                data = tavily_client.extract(urls=url)
+                raw_text = data.get("text") or data.get("results", [{}])[0].get("raw_content", "")
+                if not raw_text.strip():
+                    st.error("❌ Failed to extract content from the website.")
+                    st.stop()
+                doc = Document(page_content=raw_text)
+                splitter = RecursiveCharacterTextSplitter(chunk_size=1000, chunk_overlap=200)
+                chunks = splitter.split_documents([doc])
+                # Vectorstore with Chroma
+                vectorstore = Chroma.from_documents(
+                    chunks,
+                    embedding=embedding_model,
+                    collection_name="website_collection",
+                    persist_directory="./chroma_db"
+                )
+                st.session_state.vectorstore = vectorstore
+                st.success("✅ Website content indexed successfully!")
+            except Exception as e:
+                st.error(f"❌ Error during extraction/indexing: {str(e)}")
+# --- Step 2: Ask a question ---
+question = st.text_input("💬 Ask a question about the website content:")
 if question and "vectorstore" in st.session_state:
     with st.spinner("Thinking..."):
+        try:
+            retriever = st.session_state.vectorstore.as_retriever()
+            qa_chain = RetrievalQA.from_chain_type(llm=llm, retriever=retriever)
+            result = qa_chain.run(question)
+            st.subheader("✅ Answer")
+            st.write(result)
+        except Exception as e:
+            st.error(f"❌ Failed to generate answer: {str(e)}")