jonathanjordan21 commited on
Commit
7eea2bf
·
1 Parent(s): d7743fd

Create streamlit app

Browse files
Files changed (1) hide show
  1. app.py +75 -0
app.py ADDED
@@ -0,0 +1,75 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from InstructorEmbedding import INSTRUCTOR
2
+ import streamlit as st
3
+ import pandas as pd
4
+ from sklearn.metrics.pairwise import cosine_similarity
5
+
6
+ # if 'model' is not in st.session_state:
7
+ # st.session_state['model'] = INSTRUCTOR('hkunlp/instructor-large')
8
+
9
+ @st.cache_resource
10
+ def load_model():
11
+ return INSTRUCTOR('hkunlp/instructor-large')
12
+
13
+ model = load_model()
14
+
15
+
16
+ def process_data(df, desc, message, embed=False):
17
+ data = [
18
+ [
19
+ f'Represent the document for retrieval of {x[desc]]} information : ',
20
+ x[message]
21
+ ] for _,x in df.iterrows()
22
+ ]
23
+
24
+ if embed :
25
+ corpus_embeddings = embed_data(data)
26
+
27
+ question = st.text_input("Question : ")
28
+ btn_q = st.button("Submit")
29
+
30
+ if btn_q :
31
+ query = [['Represent the question for retrieving supporting documents: ',question]]
32
+ query_embeddings = model.encode(query)
33
+
34
+ similarities = cosine_similarity(query_embeddings,corpus_embeddings)
35
+ retrieved_doc_id = np.argmax(similarities)
36
+ st.text(f"{data[retrieved_doc_id][-1]}")
37
+
38
+
39
+ opt = st.radio("Choose Data : ", ["intent.csv", "upload file CSV"], captions=["LMD CSV intent data", "Custom upload CSV data"])
40
+
41
+
42
+ if opt == "intent.csv":
43
+ df = pd.read_csv("intent.csv", delimiter=";")
44
+ process_data(df, desc, message, embed=True)
45
+
46
+ else :
47
+ f = st.file_uploader("Upload CSV File with at least 2 columns", ['xlsx', 'csv'])
48
+ delim = st.text_input('CSV File Delimiter')
49
+ btn = st.button("Submit")
50
+
51
+
52
+
53
+ @st.cache_data(presist='disk')
54
+ def embed_data(data):
55
+ corpus_embeddings = model.encode(data)
56
+
57
+
58
+ if btn:
59
+ df = pd.read_csv(f, delimiter=delim)
60
+ cols = list(df.columns)
61
+
62
+ if len(cols) < 2 :
63
+ st.write("FAILED! At least 2 columns needed. Please check your dataset")
64
+ else :
65
+ desc = st.radio("Description Column", cols)
66
+ message = st.radio("Template Column", cols)
67
+ btn_col = st.button("Submit")
68
+ if btn_col:
69
+ process_data(df, desc, message, embed=True)
70
+
71
+
72
+
73
+
74
+
75
+