jonathanjordan21 commited on
Commit
e08cd35
·
1 Parent(s): 1cefc76

Create app.py

Browse files
Files changed (1) hide show
  1. app.py +43 -0
app.py ADDED
@@ -0,0 +1,43 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import gradio as gr
2
+ from transformers import pipeline
3
+ import numpy as np
4
+ import pandas as pd
5
+ from sklearn.metrics.pairwise import cosine_similarity
6
+ from InstructorEmbedding import INSTRUCTOR
7
+
8
+ pipe = pipeline(model="facebook/bart-large-mnli")
9
+ model = INSTRUCTOR('hkunlp/instructor-large')
10
+
11
+ df = pd.read_csv('intent.csv', delimiter=';')
12
+
13
+ data = [
14
+ [
15
+ f'Represent the document for retrieval of {x[desc]} information : ',
16
+ x[message]
17
+ ] for _,x in df.iterrows()
18
+ ]
19
+
20
+ corpus_embeddings = embed_data(data)
21
+
22
+
23
+ def predict(question, threshold, tags):
24
+ query = [['Represent the question for retrieving supporting documents: ',question]]
25
+ query = model.encode(query)
26
+ similarities = cosine_similarity(query_embeddings,corpus_embeddings)
27
+ retrieved_doc_id = np.argmax(similarities)
28
+
29
+ if similarities[retrieved_doc_id] < threshold:
30
+ ans = pipe(query, candidate_labels=[x.strip() for x in tags.split(",")])
31
+ ans['query_similarity_score'] = similarities[retrieved_doc_id]
32
+ return ans
33
+ return {"sequence" : data[retrieved_doc_id][-1], 'query_similarity_score : similarities[retrieved_doc_id]}
34
+
35
+
36
+
37
+
38
+
39
+ gr.Interface(fn=predict,
40
+ inputs=["text", "number", "text"],
41
+ outputs="json").launch()
42
+
43
+