WebQueryBot / app.py
ytrsoymr's picture
Update app.py
08d1605 verified
import os
import streamlit as st
from dotenv import load_dotenv
from tavily import TavilyClient
from langchain.schema import Document
from langchain.text_splitter import RecursiveCharacterTextSplitter
from langchain_google_genai import GoogleGenerativeAIEmbeddings, ChatGoogleGenerativeAI
from langchain.chains import RetrievalQA
from langchain_chroma import Chroma
# === πŸ›  Fix protobuf issue ===
os.environ["PROTOCOL_BUFFERS_PYTHON_IMPLEMENTATION"] = "python"
# === πŸ” Load Environment Variables ===
load_dotenv()
GOOGLE_API_KEY = os.getenv("GOOGLE_API_KEY") or st.secrets.get("GOOGLE_API_KEY", "")
TAVILY_API_KEY = os.getenv("TAVILY_API_KEY") or st.secrets.get("TAVILY_API_KEY", "")
# === 🚨 Validate keys ===
if not GOOGLE_API_KEY or not TAVILY_API_KEY:
st.error("API keys missing! Please check your .env file or Streamlit secrets.")
st.stop()
# === πŸ€– Set up clients ===
tavily_client = TavilyClient(api_key=TAVILY_API_KEY)
embedding_model = GoogleGenerativeAIEmbeddings(
model="models/embedding-001", google_api_key=GOOGLE_API_KEY
)
llm = ChatGoogleGenerativeAI(
model="models/gemini-1.5-flash", google_api_key=GOOGLE_API_KEY
)
# === 🌐 Streamlit UI ===
st.title("🌐 Ask Questions About Any Website!")
# --- Step 1: Website input ---
url = st.text_input("πŸ”— Enter a website URL:")
if st.button("πŸš€ Extract and Index"):
if not url.strip():
st.warning("Please enter a valid URL.")
else:
with st.spinner("Extracting content..."):
try:
data = tavily_client.extract(urls=url)
raw_text = data.get("text") or data.get("results", [{}])[0].get("raw_content", "")
if not raw_text.strip():
st.error("❌ Failed to extract content from the website.")
st.stop()
doc = Document(page_content=raw_text)
splitter = RecursiveCharacterTextSplitter(chunk_size=1000, chunk_overlap=200)
chunks = splitter.split_documents([doc])
# Vectorstore with Chroma
vectorstore = Chroma.from_documents(
chunks,
embedding=embedding_model,
collection_name="website_collection",
persist_directory="./chroma_db"
)
st.session_state.vectorstore = vectorstore
st.success("βœ… Website content indexed successfully!")
except Exception as e:
st.error(f"❌ Error during extraction/indexing: {str(e)}")
# --- Step 2: Ask a question ---
question = st.text_input("πŸ’¬ Ask a question about the website content:")
if question and "vectorstore" in st.session_state:
with st.spinner("Thinking..."):
try:
retriever = st.session_state.vectorstore.as_retriever()
qa_chain = RetrievalQA.from_chain_type(llm=llm, retriever=retriever)
result = qa_chain.run(question)
st.subheader("βœ… Answer")
st.write(result)
except Exception as e:
st.error(f"❌ Failed to generate answer: {str(e)}")