File size: 1,543 Bytes
e08cd35 4208ff2 e08cd35 d2422de e08cd35 f1180fd e08cd35 4208ff2 e08cd35 80fd1b0 e08cd35 2c6600b 4208ff2 2c6600b e08cd35 c748858 e08cd35 a78d024 e08cd35 | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 | import gradio as gr
from transformers import pipeline
import numpy as np
import pandas as pd
from sklearn.metrics.pairwise import cosine_similarity
from InstructorEmbedding import INSTRUCTOR
# pipe = pipeline(model="facebook/bart-large-mnli")
pipe = pipeline("zero-shot-classification", model="MoritzLaurer/mDeBERTa-v3-base-xnli-multilingual-nli-2mil7")
model = INSTRUCTOR('hkunlp/instructor-large')
df = pd.read_csv('intent.csv', delimiter=';')
data = [
[
f'Represent the document for retrieval of {x["description"]} information : ',
x["message"]
] for _,x in df.iterrows()
]
corpus_embeddings = model.encode(data)
def predict(question, lower_threshold, tags, multi_label):
query = [['Represent the question for retrieving supporting documents: ',question]]
query_embeddings = model.encode(query)
similarities = cosine_similarity(query_embeddings,corpus_embeddings)
retrieved_doc_id = np.argmax(similarities)
if similarities[0][retrieved_doc_id] < float(lower_threshold):
ans = pipe(question, candidate_labels=[x.strip() for x in tags.split(",") if x.strip()!=""], multi_label=multi_label)
ans['query_similarity_score'] = similarities[0][retrieved_doc_id]
return ans
return {"chatbot_response" : data[retrieved_doc_id][-1], 'query_similarity_score' : similarities[0][retrieved_doc_id]}
gr.Interface(fn=predict,
inputs=["text", gr.Slider(0.0, 1.0), "text", gr.Checkbox(label='Allow multiple true classes')],
outputs="json").launch()
|