File size: 3,134 Bytes
1f33f1c
5585981
bf2dd99
 
5585981
 
 
 
 
1f33f1c
08d1605
 
 
 
bf2dd99
08d1605
 
1f33f1c
08d1605
 
 
 
1f33f1c
08d1605
bf2dd99
08d1605
 
 
 
 
 
 
 
 
bf2dd99
08d1605
 
5585981
08d1605
 
 
 
 
 
 
 
 
 
 
5585981
08d1605
 
 
5585981
08d1605
 
 
 
 
 
 
5585981
08d1605
 
5585981
08d1605
 
5585981
08d1605
 
5585981
 
 
08d1605
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
import os
import streamlit as st
from dotenv import load_dotenv
from tavily import TavilyClient
from langchain.schema import Document
from langchain.text_splitter import RecursiveCharacterTextSplitter
from langchain_google_genai import GoogleGenerativeAIEmbeddings, ChatGoogleGenerativeAI
from langchain.chains import RetrievalQA
from langchain_chroma import Chroma

# === πŸ›  Fix protobuf issue ===
os.environ["PROTOCOL_BUFFERS_PYTHON_IMPLEMENTATION"] = "python"

# === πŸ” Load Environment Variables ===
load_dotenv()
GOOGLE_API_KEY = os.getenv("GOOGLE_API_KEY") or st.secrets.get("GOOGLE_API_KEY", "")
TAVILY_API_KEY = os.getenv("TAVILY_API_KEY") or st.secrets.get("TAVILY_API_KEY", "")

# === 🚨 Validate keys ===
if not GOOGLE_API_KEY or not TAVILY_API_KEY:
    st.error("API keys missing! Please check your .env file or Streamlit secrets.")
    st.stop()

# === πŸ€– Set up clients ===
tavily_client = TavilyClient(api_key=TAVILY_API_KEY)
embedding_model = GoogleGenerativeAIEmbeddings(
    model="models/embedding-001", google_api_key=GOOGLE_API_KEY
)
llm = ChatGoogleGenerativeAI(
    model="models/gemini-1.5-flash", google_api_key=GOOGLE_API_KEY
)

# === 🌐 Streamlit UI ===
st.title("🌐 Ask Questions About Any Website!")

# --- Step 1: Website input ---
url = st.text_input("πŸ”— Enter a website URL:")

if st.button("πŸš€ Extract and Index"):
    if not url.strip():
        st.warning("Please enter a valid URL.")
    else:
        with st.spinner("Extracting content..."):
            try:
                data = tavily_client.extract(urls=url)
                raw_text = data.get("text") or data.get("results", [{}])[0].get("raw_content", "")
                if not raw_text.strip():
                    st.error("❌ Failed to extract content from the website.")
                    st.stop()

                doc = Document(page_content=raw_text)
                splitter = RecursiveCharacterTextSplitter(chunk_size=1000, chunk_overlap=200)
                chunks = splitter.split_documents([doc])

                # Vectorstore with Chroma
                vectorstore = Chroma.from_documents(
                    chunks,
                    embedding=embedding_model,
                    collection_name="website_collection",
                    persist_directory="./chroma_db"
                )

                st.session_state.vectorstore = vectorstore
                st.success("βœ… Website content indexed successfully!")

            except Exception as e:
                st.error(f"❌ Error during extraction/indexing: {str(e)}")

# --- Step 2: Ask a question ---
question = st.text_input("πŸ’¬ Ask a question about the website content:")

if question and "vectorstore" in st.session_state:
    with st.spinner("Thinking..."):
        try:
            retriever = st.session_state.vectorstore.as_retriever()
            qa_chain = RetrievalQA.from_chain_type(llm=llm, retriever=retriever)
            result = qa_chain.run(question)
            st.subheader("βœ… Answer")
            st.write(result)
        except Exception as e:
            st.error(f"❌ Failed to generate answer: {str(e)}")