Spaces:

ytrsoymr
/

WebQueryBot

Sleeping

App Files Files Community

WebQueryBot / app.py

ytrsoymr

Update app.py

08d1605 verified 7 months ago

raw

history blame contribute delete

3.13 kB

	import os
	import streamlit as st
	from dotenv import load_dotenv
	from tavily import TavilyClient
	from langchain.schema import Document
	from langchain.text_splitter import RecursiveCharacterTextSplitter
	from langchain_google_genai import GoogleGenerativeAIEmbeddings, ChatGoogleGenerativeAI
	from langchain.chains import RetrievalQA
	from langchain_chroma import Chroma

	# === 🛠 Fix protobuf issue ===
	os.environ["PROTOCOL_BUFFERS_PYTHON_IMPLEMENTATION"] = "python"

	# === 🔐 Load Environment Variables ===
	load_dotenv()
	GOOGLE_API_KEY = os.getenv("GOOGLE_API_KEY") or st.secrets.get("GOOGLE_API_KEY", "")
	TAVILY_API_KEY = os.getenv("TAVILY_API_KEY") or st.secrets.get("TAVILY_API_KEY", "")

	# === 🚨 Validate keys ===
	if not GOOGLE_API_KEY or not TAVILY_API_KEY:
	st.error("API keys missing! Please check your .env file or Streamlit secrets.")
	st.stop()

	# === 🤖 Set up clients ===
	tavily_client = TavilyClient(api_key=TAVILY_API_KEY)
	embedding_model = GoogleGenerativeAIEmbeddings(
	model="models/embedding-001", google_api_key=GOOGLE_API_KEY
	)
	llm = ChatGoogleGenerativeAI(
	model="models/gemini-1.5-flash", google_api_key=GOOGLE_API_KEY
	)

	# === 🌐 Streamlit UI ===
	st.title("🌐 Ask Questions About Any Website!")

	# --- Step 1: Website input ---
	url = st.text_input("🔗 Enter a website URL:")

	if st.button("🚀 Extract and Index"):
	if not url.strip():
	st.warning("Please enter a valid URL.")
	else:
	with st.spinner("Extracting content..."):
	try:
	data = tavily_client.extract(urls=url)
	raw_text = data.get("text") or data.get("results", [{}])[0].get("raw_content", "")
	if not raw_text.strip():
	st.error("❌ Failed to extract content from the website.")
	st.stop()

	doc = Document(page_content=raw_text)
	splitter = RecursiveCharacterTextSplitter(chunk_size=1000, chunk_overlap=200)
	chunks = splitter.split_documents([doc])

	# Vectorstore with Chroma
	vectorstore = Chroma.from_documents(
	chunks,
	embedding=embedding_model,
	collection_name="website_collection",
	persist_directory="./chroma_db"
	)

	st.session_state.vectorstore = vectorstore
	st.success("✅ Website content indexed successfully!")

	except Exception as e:
	st.error(f"❌ Error during extraction/indexing: {str(e)}")

	# --- Step 2: Ask a question ---
	question = st.text_input("💬 Ask a question about the website content:")

	if question and "vectorstore" in st.session_state:
	with st.spinner("Thinking..."):
	try:
	retriever = st.session_state.vectorstore.as_retriever()
	qa_chain = RetrievalQA.from_chain_type(llm=llm, retriever=retriever)
	result = qa_chain.run(question)
	st.subheader("✅ Answer")
	st.write(result)
	except Exception as e:
	st.error(f"❌ Failed to generate answer: {str(e)}")