File size: 4,447 Bytes
5227dfc
 
 
 
a261a54
 
229bcf3
 
5227dfc
 
229bcf3
 
 
 
5227dfc
1f0812e
a261a54
 
 
 
 
 
 
 
 
 
 
 
 
5227dfc
 
4a9a15c
5227dfc
52795a6
 
5227dfc
 
 
229bcf3
08b2db5
 
 
 
 
5227dfc
 
 
 
 
 
eb8b7d0
5227dfc
 
24b629d
5227dfc
 
 
 
 
 
 
 
5b094ac
5227dfc
 
 
eb8b7d0
5227dfc
 
 
 
eb8b7d0
5227dfc
 
 
 
5bacc69
5227dfc
 
 
 
 
 
 
261f709
a261a54
c49ec9a
395b8cd
 
 
 
 
 
 
 
 
 
29ead90
395b8cd
 
 
 
 
 
 
 
 
 
 
a261a54
5227dfc
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
import streamlit as st
from utils import *
import constants

# Creating Session State Variables

if 'Website_URL' not in st.session_state:
    st.session_state['Website_URL'] =''
if 'HuggingFace_API_Key' not in st.session_state:
    st.session_state['HuggingFace_API_Key'] =''
if 'Pinecone_Index' not in st.session_state:
    st.session_state['Pinecone_Index'] =''
if 'Pinecone_EBV' not in st.session_state:
    st.session_state['Pinecone_ENV'] =''
if 'Pinecone_API_Key' not in st.session_state:
    st.session_state['Pinecone_API_Key'] =''
if 'API_Key' not in st.session_state:
    st.session_state['API_Key'] = ''
if 'history' not in st.session_state:
    st.session_state['history'] = ''



# clear the chat history from streamlit session state
def clear_history():
    if 'history' in st.session_state:
        del st.session_state['history']



#
st.title('Query a Sitemap') 

st.video("https://youtu.be/a6tZd-niM1o")

#********SIDE BAR Funtionality started*******

# Sidebar to capture the API keys
st.sidebar.title("πŸ—οΈ")
st.session_state['Website_URL']= st.sidebar.text_input('Input the Sitemap you want to Query against','https://jobs.excelcult.com/wp-sitemap-posts-post-1.xml')
st.session_state['HuggingFace_API_Key']= st.sidebar.text_input('Input Your HuggingFace API key',type="password")
st.session_state['Pinecone_Index']= st.sidebar.text_input('Input Your Pinecone Index','chatbot')
st.session_state['Pinecone_ENV']= st.sidebar.text_input('Input Your Pinecone Environment','gcp-starter')
st.session_state['Pinecone_API_Key']= st.sidebar.text_input('Input your Pinecone API key?',type="password")

load_button = st.sidebar.button("Load data to Pinecone", key="load_button")

#If the bove button is clicked, pushing the data to Pinecone...
if load_button:
    #Proceed only if API keys are provided
    if st.session_state['Website_URL'] !="" and st.session_state['HuggingFace_API_Key'] !="" and st.session_state['Pinecone_ENV'] !="" and st.session_state['Pinecone_API_Key']!="" :

        #Fetch data from site
        site_data=get_website_data(st.session_state['Website_URL'])
        st.write("Data pull done...")

        #Split data into chunks
        chunks_data=split_data(site_data)
        st.write("Spliting data done...")

        #Creating embeddings instance
        embeddings=create_embeddings()
        # st.session_state.vs=embeddings
        st.write("Embeddings instance creation done...")

        #Push data to Pinecone
        push_to_pinecone(st.session_state['Pinecone_API_Key'],st.session_state['Pinecone_ENV'],st.session_state['Pinecone_Index'],embeddings,chunks_data)
        st.write("Pushing data to Pinecone done...")

        st.sidebar.success("Data pushed to Pinecone successfully!")
    else:
        st.sidebar.error("Ooopssss!!! Please provide API keys and webstie URL.....")

#********SIDE BAR Funtionality ended*******

#Captures User Inputs
prompt = st.text_input('Ask a question about your uploaded website: ❓',key="prompt")  # The box for the text prompt
document_count = st.slider('No.Of links to return πŸ”— - (0 LOW || 5 HIGH)', 0, 5, 2,step=1)

submit = st.button("Search") 


if submit:
    #Proceed only if API keys are provided
    if st.session_state['Website_URL'] !="" and st.session_state['HuggingFace_API_Key'] !="" and st.session_state['Pinecone_ENV'] !="" and st.session_state['Pinecone_API_Key']!="":

        
       #Creating embeddings instance
        embeddings=create_embeddings()
        st.write("Embeddings instance creation done...")

        #Pull index data from Pinecone
        index=pull_from_pinecone(st.session_state['Pinecone_API_Key'],st.session_state['Pinecone_ENV'],st.session_state['Pinecone_Index'],embeddings)
        st.write("Pinecone index retrieval done...")

        #Fetch relavant documents from Pinecone index
        relavant_docs=get_similar_docs(index,prompt,document_count)
        #st.write(relavant_docs)

        #Displaying search results
        st.success("Please find the search results :")
         #Displaying search results
        st.write("search results list....")
        for document in relavant_docs:
            
            st.write("πŸ‘‰**Result : "+ str(relavant_docs.index(document)+1)+"**")
            st.write("**Info**: "+document.page_content)
            st.write("**Link**: "+ document.metadata['source'])
        
    

    else:
        st.sidebar.error("Ooopssss!!! Please provide API keys.....")