ytrsoymr commited on
Commit
5585981
·
verified ·
1 Parent(s): 410de35

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +46 -47
app.py CHANGED
@@ -1,58 +1,57 @@
1
  import os
 
2
  from dotenv import load_dotenv
3
  from tavily import TavilyClient
4
- from langchain_google_genai import ChatGoogleGenerativeAI
5
- from langchain.chains import LLMChain
6
- from langchain.prompts import PromptTemplate
7
- import streamlit as st
 
8
 
9
- # Load .env
10
  load_dotenv()
11
 
12
- # API keys
13
- GOOGLE_API_KEY = os.getenv("GOOGLE_API_KEY")
14
- TAVILY_API_KEY = os.getenv("TAVILY_API_KEY")
15
-
16
- # LLM
17
- llm = ChatGoogleGenerativeAI(
18
- model="models/gemini-1.5-flash",
19
- google_api_key=GOOGLE_API_KEY
20
- )
21
 
22
- # Tavily client
23
  tavily_client = TavilyClient(api_key=TAVILY_API_KEY)
24
-
25
- # FIXED: extract website text
26
- def extract_website_text(url):
27
- result = tavily_client.extract(urls=url)
28
- if result and "text" in result:
29
- return result["text"]
30
- return "Could not extract content from the URL."
31
-
32
- # Prompt
33
- prompt = PromptTemplate(
34
- input_variables=["website_content", "question"],
35
- template="""
36
- You are an intelligent assistant. Based on the following website content:
37
- {website_content}
38
- Answer the following question:
39
- {question}
40
- """
41
- )
42
-
43
- qa_chain = LLMChain(llm=llm, prompt=prompt)
44
 
45
  # Streamlit UI
46
- st.title("🌐 WebQueryBot Ask any website!")
47
  url = st.text_input("Enter a website URL:")
48
- question = st.text_area("What do you want to ask about the website?")
49
-
50
- if st.button("Get Answer"):
51
- with st.spinner("Extracting and generating answer..."):
52
- site_text = extract_website_text(url)
53
- result = qa_chain.invoke({
54
- "website_content": site_text,
55
- "question": question
56
- })
57
- st.subheader("✅ Answer")
58
- st.write(result["text"])
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
  import os
2
+ import streamlit as st
3
  from dotenv import load_dotenv
4
  from tavily import TavilyClient
5
+ from langchain.schema import Document
6
+ from langchain.text_splitter import RecursiveCharacterTextSplitter
7
+ from langchain_google_genai import GoogleGenerativeAIEmbeddings, ChatGoogleGenerativeAI
8
+ from langchain.chains import RetrievalQA
9
+ from langchain_chroma import Chroma
10
 
11
+ # Load .env if needed
12
  load_dotenv()
13
 
14
+ # Set API keys (can also use st.secrets or os.environ)
15
+ os.environ["google_api_key"] = st.secrets["GOOGLE_API_KEY"] if "GOOGLE_API_KEY" in st.secrets else os.getenv("GOOGLE_API_KEY")
16
+ TAVILY_API_KEY = st.secrets["TAVILY_API_KEY"] if "TAVILY_API_KEY" in st.secrets else os.getenv("TAVILY_API_KEY")
 
 
 
 
 
 
17
 
18
+ # Initialize clients
19
  tavily_client = TavilyClient(api_key=TAVILY_API_KEY)
20
+ embedding_model = GoogleGenerativeAIEmbeddings(model="models/embedding-001", google_api_key=os.environ["google_api_key"])
21
+ llm = ChatGoogleGenerativeAI(model="models/gemini-1.5-flash", google_api_key=os.environ["google_api_key"])
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
22
 
23
  # Streamlit UI
24
+ st.title("🌐 Website Q&A with Gemini + Tavily")
25
  url = st.text_input("Enter a website URL:")
26
+
27
+ if st.button("Extract and Index Content"):
28
+ with st.spinner("Extracting and indexing website content..."):
29
+ data = tavily_client.extract(urls=url)
30
+
31
+ # Convert to LangChain Documents
32
+ documents = []
33
+ for doc in data.get("results", []):
34
+ raw = doc.get("raw_content", "")
35
+ if raw:
36
+ documents.append(Document(page_content=raw))
37
+
38
+ # Chunking
39
+ splitter = RecursiveCharacterTextSplitter(chunk_size=1000, chunk_overlap=200)
40
+ chunks = splitter.split_documents(documents)
41
+
42
+ # Chroma vector store
43
+ vectorstore = Chroma.from_documents(chunks, embedding=embedding_model, collection_name="inno", persist_directory="./chroma_db")
44
+ st.success("Website content indexed successfully!")
45
+
46
+ # Save vectorstore to session state
47
+ st.session_state.vectorstore = vectorstore
48
+
49
+ question = st.text_input("Ask a question about the website content:")
50
+
51
+ if question and "vectorstore" in st.session_state:
52
+ with st.spinner("Thinking..."):
53
+ retriever = st.session_state.vectorstore.as_retriever()
54
+ chain = RetrievalQA.from_chain_type(llm=llm, retriever=retriever)
55
+ result = chain.run(question)
56
+ st.subheader("💬 Answer")
57
+ st.write(result)