import streamlit as st from utils import * import constants # Creating Session State Variables if 'Website_URL' not in st.session_state: st.session_state['Website_URL'] ='' if 'HuggingFace_API_Key' not in st.session_state: st.session_state['HuggingFace_API_Key'] ='' if 'Pinecone_Index' not in st.session_state: st.session_state['Pinecone_Index'] ='' if 'Pinecone_EBV' not in st.session_state: st.session_state['Pinecone_ENV'] ='' if 'Pinecone_API_Key' not in st.session_state: st.session_state['Pinecone_API_Key'] ='' if 'API_Key' not in st.session_state: st.session_state['API_Key'] = '' if 'history' not in st.session_state: st.session_state['history'] = '' # clear the chat history from streamlit session state def clear_history(): if 'history' in st.session_state: del st.session_state['history'] # st.title('Query a Sitemap') st.video("https://youtu.be/a6tZd-niM1o") #********SIDE BAR Funtionality started******* # Sidebar to capture the API keys st.sidebar.title("🗝️") st.session_state['Website_URL']= st.sidebar.text_input('Input the Sitemap you want to Query against','https://jobs.excelcult.com/wp-sitemap-posts-post-1.xml') st.session_state['HuggingFace_API_Key']= st.sidebar.text_input('Input Your HuggingFace API key',type="password") st.session_state['Pinecone_Index']= st.sidebar.text_input('Input Your Pinecone Index','chatbot') st.session_state['Pinecone_ENV']= st.sidebar.text_input('Input Your Pinecone Environment','gcp-starter') st.session_state['Pinecone_API_Key']= st.sidebar.text_input('Input your Pinecone API key?',type="password") load_button = st.sidebar.button("Load data to Pinecone", key="load_button") #If the bove button is clicked, pushing the data to Pinecone... if load_button: #Proceed only if API keys are provided if st.session_state['Website_URL'] !="" and st.session_state['HuggingFace_API_Key'] !="" and st.session_state['Pinecone_ENV'] !="" and st.session_state['Pinecone_API_Key']!="" : #Fetch data from site site_data=get_website_data(st.session_state['Website_URL']) st.write("Data pull done...") #Split data into chunks chunks_data=split_data(site_data) st.write("Spliting data done...") #Creating embeddings instance embeddings=create_embeddings() # st.session_state.vs=embeddings st.write("Embeddings instance creation done...") #Push data to Pinecone push_to_pinecone(st.session_state['Pinecone_API_Key'],st.session_state['Pinecone_ENV'],st.session_state['Pinecone_Index'],embeddings,chunks_data) st.write("Pushing data to Pinecone done...") st.sidebar.success("Data pushed to Pinecone successfully!") else: st.sidebar.error("Ooopssss!!! Please provide API keys and webstie URL.....") #********SIDE BAR Funtionality ended******* #Captures User Inputs prompt = st.text_input('Ask a question about your uploaded website: ❓',key="prompt") # The box for the text prompt document_count = st.slider('No.Of links to return 🔗 - (0 LOW || 5 HIGH)', 0, 5, 2,step=1) submit = st.button("Search") if submit: #Proceed only if API keys are provided if st.session_state['Website_URL'] !="" and st.session_state['HuggingFace_API_Key'] !="" and st.session_state['Pinecone_ENV'] !="" and st.session_state['Pinecone_API_Key']!="": #Creating embeddings instance embeddings=create_embeddings() st.write("Embeddings instance creation done...") #Pull index data from Pinecone index=pull_from_pinecone(st.session_state['Pinecone_API_Key'],st.session_state['Pinecone_ENV'],st.session_state['Pinecone_Index'],embeddings) st.write("Pinecone index retrieval done...") #Fetch relavant documents from Pinecone index relavant_docs=get_similar_docs(index,prompt,document_count) #st.write(relavant_docs) #Displaying search results st.success("Please find the search results :") #Displaying search results st.write("search results list....") for document in relavant_docs: st.write("👉**Result : "+ str(relavant_docs.index(document)+1)+"**") st.write("**Info**: "+document.page_content) st.write("**Link**: "+ document.metadata['source']) else: st.sidebar.error("Ooopssss!!! Please provide API keys.....")