QueryaWebsite / app.py
eaglelandsonce's picture
Update app.py
52795a6
import streamlit as st
from utils import *
import constants
# Creating Session State Variables
if 'Website_URL' not in st.session_state:
st.session_state['Website_URL'] =''
if 'HuggingFace_API_Key' not in st.session_state:
st.session_state['HuggingFace_API_Key'] =''
if 'Pinecone_Index' not in st.session_state:
st.session_state['Pinecone_Index'] =''
if 'Pinecone_EBV' not in st.session_state:
st.session_state['Pinecone_ENV'] =''
if 'Pinecone_API_Key' not in st.session_state:
st.session_state['Pinecone_API_Key'] =''
if 'API_Key' not in st.session_state:
st.session_state['API_Key'] = ''
if 'history' not in st.session_state:
st.session_state['history'] = ''
# clear the chat history from streamlit session state
def clear_history():
if 'history' in st.session_state:
del st.session_state['history']
#
st.title('Query a Sitemap')
st.video("https://youtu.be/a6tZd-niM1o")
#********SIDE BAR Funtionality started*******
# Sidebar to capture the API keys
st.sidebar.title("πŸ—οΈ")
st.session_state['Website_URL']= st.sidebar.text_input('Input the Sitemap you want to Query against','https://jobs.excelcult.com/wp-sitemap-posts-post-1.xml')
st.session_state['HuggingFace_API_Key']= st.sidebar.text_input('Input Your HuggingFace API key',type="password")
st.session_state['Pinecone_Index']= st.sidebar.text_input('Input Your Pinecone Index','chatbot')
st.session_state['Pinecone_ENV']= st.sidebar.text_input('Input Your Pinecone Environment','gcp-starter')
st.session_state['Pinecone_API_Key']= st.sidebar.text_input('Input your Pinecone API key?',type="password")
load_button = st.sidebar.button("Load data to Pinecone", key="load_button")
#If the bove button is clicked, pushing the data to Pinecone...
if load_button:
#Proceed only if API keys are provided
if st.session_state['Website_URL'] !="" and st.session_state['HuggingFace_API_Key'] !="" and st.session_state['Pinecone_ENV'] !="" and st.session_state['Pinecone_API_Key']!="" :
#Fetch data from site
site_data=get_website_data(st.session_state['Website_URL'])
st.write("Data pull done...")
#Split data into chunks
chunks_data=split_data(site_data)
st.write("Spliting data done...")
#Creating embeddings instance
embeddings=create_embeddings()
# st.session_state.vs=embeddings
st.write("Embeddings instance creation done...")
#Push data to Pinecone
push_to_pinecone(st.session_state['Pinecone_API_Key'],st.session_state['Pinecone_ENV'],st.session_state['Pinecone_Index'],embeddings,chunks_data)
st.write("Pushing data to Pinecone done...")
st.sidebar.success("Data pushed to Pinecone successfully!")
else:
st.sidebar.error("Ooopssss!!! Please provide API keys and webstie URL.....")
#********SIDE BAR Funtionality ended*******
#Captures User Inputs
prompt = st.text_input('Ask a question about your uploaded website: ❓',key="prompt") # The box for the text prompt
document_count = st.slider('No.Of links to return πŸ”— - (0 LOW || 5 HIGH)', 0, 5, 2,step=1)
submit = st.button("Search")
if submit:
#Proceed only if API keys are provided
if st.session_state['Website_URL'] !="" and st.session_state['HuggingFace_API_Key'] !="" and st.session_state['Pinecone_ENV'] !="" and st.session_state['Pinecone_API_Key']!="":
#Creating embeddings instance
embeddings=create_embeddings()
st.write("Embeddings instance creation done...")
#Pull index data from Pinecone
index=pull_from_pinecone(st.session_state['Pinecone_API_Key'],st.session_state['Pinecone_ENV'],st.session_state['Pinecone_Index'],embeddings)
st.write("Pinecone index retrieval done...")
#Fetch relavant documents from Pinecone index
relavant_docs=get_similar_docs(index,prompt,document_count)
#st.write(relavant_docs)
#Displaying search results
st.success("Please find the search results :")
#Displaying search results
st.write("search results list....")
for document in relavant_docs:
st.write("πŸ‘‰**Result : "+ str(relavant_docs.index(document)+1)+"**")
st.write("**Info**: "+document.page_content)
st.write("**Link**: "+ document.metadata['source'])
else:
st.sidebar.error("Ooopssss!!! Please provide API keys.....")