Commit ·
e08cd35
1
Parent(s): 1cefc76
Create app.py
Browse files
app.py
ADDED
|
@@ -0,0 +1,43 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
import gradio as gr
|
| 2 |
+
from transformers import pipeline
|
| 3 |
+
import numpy as np
|
| 4 |
+
import pandas as pd
|
| 5 |
+
from sklearn.metrics.pairwise import cosine_similarity
|
| 6 |
+
from InstructorEmbedding import INSTRUCTOR
|
| 7 |
+
|
| 8 |
+
pipe = pipeline(model="facebook/bart-large-mnli")
|
| 9 |
+
model = INSTRUCTOR('hkunlp/instructor-large')
|
| 10 |
+
|
| 11 |
+
df = pd.read_csv('intent.csv', delimiter=';')
|
| 12 |
+
|
| 13 |
+
data = [
|
| 14 |
+
[
|
| 15 |
+
f'Represent the document for retrieval of {x[desc]} information : ',
|
| 16 |
+
x[message]
|
| 17 |
+
] for _,x in df.iterrows()
|
| 18 |
+
]
|
| 19 |
+
|
| 20 |
+
corpus_embeddings = embed_data(data)
|
| 21 |
+
|
| 22 |
+
|
| 23 |
+
def predict(question, threshold, tags):
|
| 24 |
+
query = [['Represent the question for retrieving supporting documents: ',question]]
|
| 25 |
+
query = model.encode(query)
|
| 26 |
+
similarities = cosine_similarity(query_embeddings,corpus_embeddings)
|
| 27 |
+
retrieved_doc_id = np.argmax(similarities)
|
| 28 |
+
|
| 29 |
+
if similarities[retrieved_doc_id] < threshold:
|
| 30 |
+
ans = pipe(query, candidate_labels=[x.strip() for x in tags.split(",")])
|
| 31 |
+
ans['query_similarity_score'] = similarities[retrieved_doc_id]
|
| 32 |
+
return ans
|
| 33 |
+
return {"sequence" : data[retrieved_doc_id][-1], 'query_similarity_score : similarities[retrieved_doc_id]}
|
| 34 |
+
|
| 35 |
+
|
| 36 |
+
|
| 37 |
+
|
| 38 |
+
|
| 39 |
+
gr.Interface(fn=predict,
|
| 40 |
+
inputs=["text", "number", "text"],
|
| 41 |
+
outputs="json").launch()
|
| 42 |
+
|
| 43 |
+
|