QueryaWebsite / app.py
eaglelandsonce's picture
Update app.py
d374dbb
raw
history blame
5.92 kB
import streamlit as st
from utils import *
import constants
# Creating Session State Variables
if 'Website_URL' not in st.session_state:
st.session_state['Website_URL'] =''
if 'HuggingFace_API_Key' not in st.session_state:
st.session_state['HuggingFace_API_Key'] =''
if 'Pinecone_Index' not in st.session_state:
st.session_state['Pinecone_Index'] =''
if 'Pinecone_EBV' not in st.session_state:
st.session_state['Pinecone_ENV'] =''
if 'Pinecone_API_Key' not in st.session_state:
st.session_state['Pinecone_API_Key'] =''
if 'API_Key' not in st.session_state:
st.session_state['API_Key'] = ''
if 'history' not in st.session_state:
st.session_state['history'] = ''
# clear the chat history from streamlit session state
def clear_history():
if 'history' in st.session_state:
del st.session_state['history']
#
st.title('AI Assistance For Website')
#********SIDE BAR Funtionality started*******
# Sidebar to capture the API keys
st.sidebar.title("πŸ—οΈ")
st.session_state['Website_URL']= st.sidebar.text_input('Input the Sitemap you want to Query against?',"https://jobs.excelcult.com/wp-sitemap-posts-post-1.xml")
st.session_state['HuggingFace_API_Key']= st.sidebar.text_input("What's your HuggingFace API key?",type="password")
st.session_state['Pinecone_Index']= st.sidebar.text_input('What's your Pinecone Index?',"chatbot")
st.session_state['Pinecone_ENV']= st.sidebar.text_input('What's your Pinecone Environment?',"gcp-starter")
st.session_state['Pinecone_API_Key']= st.sidebar.text_input("What's your Pinecone API key?",type="password")
load_button = st.sidebar.button("Load data to Pinecone", key="load_button")
#If the bove button is clicked, pushing the data to Pinecone...
if load_button:
#Proceed only if API keys are provided
if st.session_state['Website_URL'] !="" and st.session_state['HuggingFace_API_Key'] !="" and st.session_state['Pinecone_ENV'] !="" and st.session_state['Pinecone_API_Key']!="" :
#Fetch data from site
site_data=get_website_data(st.session_state['Website_URL'])
st.write("Data pull done...")
#Split data into chunks
chunks_data=split_data(site_data)
st.write("Spliting data done...")
#Creating embeddings instance
embeddings=create_embeddings()
# st.session_state.vs=embeddings
st.write("Embeddings instance creation done...")
#Push data to Pinecone
push_to_pinecone(st.session_state['Pinecone_API_Key'],st.session_state['Pinecone_ENV'],st.session_state['Pinecone_Index'],embeddings,chunks_data)
st.write("Pushing data to Pinecone done...")
st.sidebar.success("Data pushed to Pinecone successfully!")
else:
st.sidebar.error("Ooopssss!!! Please provide API keys and webstie URL.....")
#********SIDE BAR Funtionality ended*******
#Captures User Inputs
api_key = st.session_state['API_Key'] = st.text_input("Paste in your OPENAI API key?", type="password")
prompt = st.text_input('Ask a question about your uploaded website: ❓',key="prompt") # The box for the text prompt
document_count = st.slider('No.Of links to return πŸ”— - (0 LOW || 5 HIGH)', 0, 5, 2,step=1)
submit = st.button("Search")
if submit:
#Proceed only if API keys are provided
if st.session_state['Website_URL'] !="" and st.session_state['HuggingFace_API_Key'] !="" and st.session_state['Pinecone_ENV'] !="" and st.session_state['Pinecone_API_Key']!="" and st.session_state['API_Key'] != '':
q = st.text_input('Ask a question about the content of your file:')
if q: # if the user entered a question and hit enter
if 'vs' in st.session_state: # if there's the vector store (user uploaded, split and embedded a file)
#vector_store = st.session_state.vs
#Creating embeddings instance
embeddings=create_embeddings()
st.write("Embeddings instance creation done...")
st.write(f'k: {k}')
answer = ask_and_get_answer(embeddings, q, k)
# text area widget for the LLM answer
st.text_area('LLM Answer: ', value=answer)
st.divider()
# if there's no chat history in the session state, create it
if 'history' not in st.session_state:
st.session_state.history = ''
# the current question and answer
value = f'Q: {q} \nA: {answer}'
st.session_state.history = f'{value} \n {"-" * 100} \n {st.session_state.history}'
h = st.session_state.history
# text area widget for the chat history
st.text_area(label='Chat History', value=h, key='history', height=400)
'''
#Creating embeddings instance
embeddings=create_embeddings()
st.write("Embeddings instance creation done...")
#Pull index data from Pinecone
index=pull_from_pinecone(st.session_state['Pinecone_API_Key'],st.session_state['Pinecone_ENV'],st.session_state['Pinecone_Index'],embeddings)
st.write("Pinecone index retrieval done...")
#Fetch relavant documents from Pinecone index
relavant_docs=get_similar_docs(index,prompt,document_count)
st.write(relavant_docs)
#Displaying search results
st.success("Please find the search results :")
#Displaying search results
st.write("search results list....")
for document in relavant_docs:
st.write("πŸ‘‰**Result : "+ str(relavant_docs.index(document)+1)+"**")
st.write("**Info**: "+document.page_content)
st.write("**Link**: "+ document.metadata['source'])
'''
else:
st.sidebar.error("Ooopssss!!! Please provide API keys.....")