ytrsoymr commited on
Commit
08d1605
Β·
verified Β·
1 Parent(s): 5635792

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +57 -33
app.py CHANGED
@@ -8,50 +8,74 @@ from langchain_google_genai import GoogleGenerativeAIEmbeddings, ChatGoogleGener
8
  from langchain.chains import RetrievalQA
9
  from langchain_chroma import Chroma
10
 
11
- # Load .env if needed
 
 
 
12
  load_dotenv()
 
 
13
 
14
- # Set API keys (can also use st.secrets or os.environ)
15
- os.environ["google_api_key"] = st.secrets["GOOGLE_API_KEY"] if "GOOGLE_API_KEY" in st.secrets else os.getenv("GOOGLE_API_KEY")
16
- TAVILY_API_KEY = st.secrets["TAVILY_API_KEY"] if "TAVILY_API_KEY" in st.secrets else os.getenv("TAVILY_API_KEY")
 
17
 
18
- # Initialize clients
19
  tavily_client = TavilyClient(api_key=TAVILY_API_KEY)
20
- embedding_model = GoogleGenerativeAIEmbeddings(model="models/embedding-001", google_api_key=os.environ["google_api_key"])
21
- llm = ChatGoogleGenerativeAI(model="models/gemini-1.5-flash", google_api_key=os.environ["google_api_key"])
 
 
 
 
 
 
 
22
 
23
- # Streamlit UI
24
- st.title("🌐 Website Q&A with Gemini + Tavily")
25
- url = st.text_input("Enter a website URL:")
26
 
27
- if st.button("Extract and Index Content"):
28
- with st.spinner("Extracting and indexing website content..."):
29
- data = tavily_client.extract(urls=url)
 
 
 
 
 
 
 
 
30
 
31
- # Convert to LangChain Documents
32
- documents = []
33
- for doc in data.get("results", []):
34
- raw = doc.get("raw_content", "")
35
- if raw:
36
- documents.append(Document(page_content=raw))
37
 
38
- # Chunking
39
- splitter = RecursiveCharacterTextSplitter(chunk_size=1000, chunk_overlap=200)
40
- chunks = splitter.split_documents(documents)
 
 
 
 
41
 
42
- # Chroma vector store
43
- vectorstore = Chroma.from_documents(chunks, embedding=embedding_model, collection_name="inno", persist_directory="./chroma_db")
44
- st.success("Website content indexed successfully!")
45
 
46
- # Save vectorstore to session state
47
- st.session_state.vectorstore = vectorstore
48
 
49
- question = st.text_input("Ask a question about the website content:")
 
50
 
51
  if question and "vectorstore" in st.session_state:
52
  with st.spinner("Thinking..."):
53
- retriever = st.session_state.vectorstore.as_retriever()
54
- chain = RetrievalQA.from_chain_type(llm=llm, retriever=retriever)
55
- result = chain.run(question)
56
- st.subheader("πŸ’¬ Answer")
57
- st.write(result)
 
 
 
 
8
  from langchain.chains import RetrievalQA
9
  from langchain_chroma import Chroma
10
 
11
+ # === πŸ›  Fix protobuf issue ===
12
+ os.environ["PROTOCOL_BUFFERS_PYTHON_IMPLEMENTATION"] = "python"
13
+
14
+ # === πŸ” Load Environment Variables ===
15
  load_dotenv()
16
+ GOOGLE_API_KEY = os.getenv("GOOGLE_API_KEY") or st.secrets.get("GOOGLE_API_KEY", "")
17
+ TAVILY_API_KEY = os.getenv("TAVILY_API_KEY") or st.secrets.get("TAVILY_API_KEY", "")
18
 
19
+ # === 🚨 Validate keys ===
20
+ if not GOOGLE_API_KEY or not TAVILY_API_KEY:
21
+ st.error("API keys missing! Please check your .env file or Streamlit secrets.")
22
+ st.stop()
23
 
24
+ # === πŸ€– Set up clients ===
25
  tavily_client = TavilyClient(api_key=TAVILY_API_KEY)
26
+ embedding_model = GoogleGenerativeAIEmbeddings(
27
+ model="models/embedding-001", google_api_key=GOOGLE_API_KEY
28
+ )
29
+ llm = ChatGoogleGenerativeAI(
30
+ model="models/gemini-1.5-flash", google_api_key=GOOGLE_API_KEY
31
+ )
32
+
33
+ # === 🌐 Streamlit UI ===
34
+ st.title("🌐 Ask Questions About Any Website!")
35
 
36
+ # --- Step 1: Website input ---
37
+ url = st.text_input("πŸ”— Enter a website URL:")
 
38
 
39
+ if st.button("πŸš€ Extract and Index"):
40
+ if not url.strip():
41
+ st.warning("Please enter a valid URL.")
42
+ else:
43
+ with st.spinner("Extracting content..."):
44
+ try:
45
+ data = tavily_client.extract(urls=url)
46
+ raw_text = data.get("text") or data.get("results", [{}])[0].get("raw_content", "")
47
+ if not raw_text.strip():
48
+ st.error("❌ Failed to extract content from the website.")
49
+ st.stop()
50
 
51
+ doc = Document(page_content=raw_text)
52
+ splitter = RecursiveCharacterTextSplitter(chunk_size=1000, chunk_overlap=200)
53
+ chunks = splitter.split_documents([doc])
 
 
 
54
 
55
+ # Vectorstore with Chroma
56
+ vectorstore = Chroma.from_documents(
57
+ chunks,
58
+ embedding=embedding_model,
59
+ collection_name="website_collection",
60
+ persist_directory="./chroma_db"
61
+ )
62
 
63
+ st.session_state.vectorstore = vectorstore
64
+ st.success("βœ… Website content indexed successfully!")
 
65
 
66
+ except Exception as e:
67
+ st.error(f"❌ Error during extraction/indexing: {str(e)}")
68
 
69
+ # --- Step 2: Ask a question ---
70
+ question = st.text_input("πŸ’¬ Ask a question about the website content:")
71
 
72
  if question and "vectorstore" in st.session_state:
73
  with st.spinner("Thinking..."):
74
+ try:
75
+ retriever = st.session_state.vectorstore.as_retriever()
76
+ qa_chain = RetrievalQA.from_chain_type(llm=llm, retriever=retriever)
77
+ result = qa_chain.run(question)
78
+ st.subheader("βœ… Answer")
79
+ st.write(result)
80
+ except Exception as e:
81
+ st.error(f"❌ Failed to generate answer: {str(e)}")