Spaces:

ytrsoymr
/

WebQueryBot

Sleeping

App Files Files Community

ytrsoymr commited on Jul 10, 2025

Commit

5585981

verified ·

1 Parent(s): 410de35

Update app.py

Browse files

Files changed (1) hide show

app.py +46 -47

app.py CHANGED Viewed

@@ -1,58 +1,57 @@
 import os
 from dotenv import load_dotenv
 from tavily import TavilyClient
-from langchain_google_genai import ChatGoogleGenerativeAI
-from langchain.chains import LLMChain
-from langchain.prompts import PromptTemplate
-import streamlit as st
-# Load .env
 load_dotenv()
-# API keys
-GOOGLE_API_KEY = os.getenv("GOOGLE_API_KEY")
-TAVILY_API_KEY = os.getenv("TAVILY_API_KEY")
-# LLM
-llm = ChatGoogleGenerativeAI(
-    model="models/gemini-1.5-flash",
-    google_api_key=GOOGLE_API_KEY
-)
-# Tavily client
 tavily_client = TavilyClient(api_key=TAVILY_API_KEY)
-# ✅ FIXED: extract website text
-def extract_website_text(url):
-    result = tavily_client.extract(urls=url)
-    if result and "text" in result:
-        return result["text"]
-    return "Could not extract content from the URL."
-# Prompt
-prompt = PromptTemplate(
-    input_variables=["website_content", "question"],
-    template="""
-You are an intelligent assistant. Based on the following website content:
-{website_content}
-Answer the following question:
-{question}
-"""
-)
-qa_chain = LLMChain(llm=llm, prompt=prompt)
 # Streamlit UI
-st.title("🌐 WebQueryBot – Ask any website!")
 url = st.text_input("Enter a website URL:")
-question = st.text_area("What do you want to ask about the website?")
-if st.button("Get Answer"):
-    with st.spinner("Extracting and generating answer..."):
-        site_text = extract_website_text(url)
-        result = qa_chain.invoke({
-            "website_content": site_text,
-            "question": question
-        })
-        st.subheader("✅ Answer")
-        st.write(result["text"])

 import os
+import streamlit as st
 from dotenv import load_dotenv
 from tavily import TavilyClient
+from langchain.schema import Document
+from langchain.text_splitter import RecursiveCharacterTextSplitter
+from langchain_google_genai import GoogleGenerativeAIEmbeddings, ChatGoogleGenerativeAI
+from langchain.chains import RetrievalQA
+from langchain_chroma import Chroma
+# Load .env if needed
 load_dotenv()
+# Set API keys (can also use st.secrets or os.environ)
+os.environ["google_api_key"] = st.secrets["GOOGLE_API_KEY"] if "GOOGLE_API_KEY" in st.secrets else os.getenv("GOOGLE_API_KEY")
+TAVILY_API_KEY = st.secrets["TAVILY_API_KEY"] if "TAVILY_API_KEY" in st.secrets else os.getenv("TAVILY_API_KEY")
+# Initialize clients
 tavily_client = TavilyClient(api_key=TAVILY_API_KEY)
+embedding_model = GoogleGenerativeAIEmbeddings(model="models/embedding-001", google_api_key=os.environ["google_api_key"])
+llm = ChatGoogleGenerativeAI(model="models/gemini-1.5-flash", google_api_key=os.environ["google_api_key"])
 # Streamlit UI
+st.title("🌐 Website Q&A with Gemini + Tavily")
 url = st.text_input("Enter a website URL:")
+if st.button("Extract and Index Content"):
+    with st.spinner("Extracting and indexing website content..."):
+        data = tavily_client.extract(urls=url)
+        # Convert to LangChain Documents
+        documents = []
+        for doc in data.get("results", []):
+            raw = doc.get("raw_content", "")
+            if raw:
+                documents.append(Document(page_content=raw))
+        # Chunking
+        splitter = RecursiveCharacterTextSplitter(chunk_size=1000, chunk_overlap=200)
+        chunks = splitter.split_documents(documents)
+        # Chroma vector store
+        vectorstore = Chroma.from_documents(chunks, embedding=embedding_model, collection_name="inno", persist_directory="./chroma_db")
+        st.success("Website content indexed successfully!")
+        # Save vectorstore to session state
+        st.session_state.vectorstore = vectorstore
+question = st.text_input("Ask a question about the website content:")
+if question and "vectorstore" in st.session_state:
+    with st.spinner("Thinking..."):
+        retriever = st.session_state.vectorstore.as_retriever()
+        chain = RetrievalQA.from_chain_type(llm=llm, retriever=retriever)
+        result = chain.run(question)
+        st.subheader("💬 Answer")
+        st.write(result)