Spaces:
Sleeping
Sleeping
| import os | |
| import streamlit as st | |
| from dotenv import load_dotenv | |
| from tavily import TavilyClient | |
| from langchain.schema import Document | |
| from langchain.text_splitter import RecursiveCharacterTextSplitter | |
| from langchain_google_genai import GoogleGenerativeAIEmbeddings, ChatGoogleGenerativeAI | |
| from langchain.chains import RetrievalQA | |
| from langchain_chroma import Chroma | |
| # === π Fix protobuf issue === | |
| os.environ["PROTOCOL_BUFFERS_PYTHON_IMPLEMENTATION"] = "python" | |
| # === π Load Environment Variables === | |
| load_dotenv() | |
| GOOGLE_API_KEY = os.getenv("GOOGLE_API_KEY") or st.secrets.get("GOOGLE_API_KEY", "") | |
| TAVILY_API_KEY = os.getenv("TAVILY_API_KEY") or st.secrets.get("TAVILY_API_KEY", "") | |
| # === π¨ Validate keys === | |
| if not GOOGLE_API_KEY or not TAVILY_API_KEY: | |
| st.error("API keys missing! Please check your .env file or Streamlit secrets.") | |
| st.stop() | |
| # === π€ Set up clients === | |
| tavily_client = TavilyClient(api_key=TAVILY_API_KEY) | |
| embedding_model = GoogleGenerativeAIEmbeddings( | |
| model="models/embedding-001", google_api_key=GOOGLE_API_KEY | |
| ) | |
| llm = ChatGoogleGenerativeAI( | |
| model="models/gemini-1.5-flash", google_api_key=GOOGLE_API_KEY | |
| ) | |
| # === π Streamlit UI === | |
| st.title("π Ask Questions About Any Website!") | |
| # --- Step 1: Website input --- | |
| url = st.text_input("π Enter a website URL:") | |
| if st.button("π Extract and Index"): | |
| if not url.strip(): | |
| st.warning("Please enter a valid URL.") | |
| else: | |
| with st.spinner("Extracting content..."): | |
| try: | |
| data = tavily_client.extract(urls=url) | |
| raw_text = data.get("text") or data.get("results", [{}])[0].get("raw_content", "") | |
| if not raw_text.strip(): | |
| st.error("β Failed to extract content from the website.") | |
| st.stop() | |
| doc = Document(page_content=raw_text) | |
| splitter = RecursiveCharacterTextSplitter(chunk_size=1000, chunk_overlap=200) | |
| chunks = splitter.split_documents([doc]) | |
| # Vectorstore with Chroma | |
| vectorstore = Chroma.from_documents( | |
| chunks, | |
| embedding=embedding_model, | |
| collection_name="website_collection", | |
| persist_directory="./chroma_db" | |
| ) | |
| st.session_state.vectorstore = vectorstore | |
| st.success("β Website content indexed successfully!") | |
| except Exception as e: | |
| st.error(f"β Error during extraction/indexing: {str(e)}") | |
| # --- Step 2: Ask a question --- | |
| question = st.text_input("π¬ Ask a question about the website content:") | |
| if question and "vectorstore" in st.session_state: | |
| with st.spinner("Thinking..."): | |
| try: | |
| retriever = st.session_state.vectorstore.as_retriever() | |
| qa_chain = RetrievalQA.from_chain_type(llm=llm, retriever=retriever) | |
| result = qa_chain.run(question) | |
| st.subheader("β Answer") | |
| st.write(result) | |
| except Exception as e: | |
| st.error(f"β Failed to generate answer: {str(e)}") | |