Spaces:
Sleeping
Sleeping
File size: 3,134 Bytes
1f33f1c 5585981 bf2dd99 5585981 1f33f1c 08d1605 bf2dd99 08d1605 1f33f1c 08d1605 1f33f1c 08d1605 bf2dd99 08d1605 bf2dd99 08d1605 5585981 08d1605 5585981 08d1605 5585981 08d1605 5585981 08d1605 5585981 08d1605 5585981 08d1605 5585981 08d1605 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 |
import os
import streamlit as st
from dotenv import load_dotenv
from tavily import TavilyClient
from langchain.schema import Document
from langchain.text_splitter import RecursiveCharacterTextSplitter
from langchain_google_genai import GoogleGenerativeAIEmbeddings, ChatGoogleGenerativeAI
from langchain.chains import RetrievalQA
from langchain_chroma import Chroma
# === π Fix protobuf issue ===
os.environ["PROTOCOL_BUFFERS_PYTHON_IMPLEMENTATION"] = "python"
# === π Load Environment Variables ===
load_dotenv()
GOOGLE_API_KEY = os.getenv("GOOGLE_API_KEY") or st.secrets.get("GOOGLE_API_KEY", "")
TAVILY_API_KEY = os.getenv("TAVILY_API_KEY") or st.secrets.get("TAVILY_API_KEY", "")
# === π¨ Validate keys ===
if not GOOGLE_API_KEY or not TAVILY_API_KEY:
st.error("API keys missing! Please check your .env file or Streamlit secrets.")
st.stop()
# === π€ Set up clients ===
tavily_client = TavilyClient(api_key=TAVILY_API_KEY)
embedding_model = GoogleGenerativeAIEmbeddings(
model="models/embedding-001", google_api_key=GOOGLE_API_KEY
)
llm = ChatGoogleGenerativeAI(
model="models/gemini-1.5-flash", google_api_key=GOOGLE_API_KEY
)
# === π Streamlit UI ===
st.title("π Ask Questions About Any Website!")
# --- Step 1: Website input ---
url = st.text_input("π Enter a website URL:")
if st.button("π Extract and Index"):
if not url.strip():
st.warning("Please enter a valid URL.")
else:
with st.spinner("Extracting content..."):
try:
data = tavily_client.extract(urls=url)
raw_text = data.get("text") or data.get("results", [{}])[0].get("raw_content", "")
if not raw_text.strip():
st.error("β Failed to extract content from the website.")
st.stop()
doc = Document(page_content=raw_text)
splitter = RecursiveCharacterTextSplitter(chunk_size=1000, chunk_overlap=200)
chunks = splitter.split_documents([doc])
# Vectorstore with Chroma
vectorstore = Chroma.from_documents(
chunks,
embedding=embedding_model,
collection_name="website_collection",
persist_directory="./chroma_db"
)
st.session_state.vectorstore = vectorstore
st.success("β
Website content indexed successfully!")
except Exception as e:
st.error(f"β Error during extraction/indexing: {str(e)}")
# --- Step 2: Ask a question ---
question = st.text_input("π¬ Ask a question about the website content:")
if question and "vectorstore" in st.session_state:
with st.spinner("Thinking..."):
try:
retriever = st.session_state.vectorstore.as_retriever()
qa_chain = RetrievalQA.from_chain_type(llm=llm, retriever=retriever)
result = qa_chain.run(question)
st.subheader("β
Answer")
st.write(result)
except Exception as e:
st.error(f"β Failed to generate answer: {str(e)}")
|