File size: 1,543 Bytes
e08cd35
 
 
 
 
 
 
4208ff2
 
e08cd35
 
 
 
 
 
d2422de
 
e08cd35
 
 
f1180fd
e08cd35
 
4208ff2
e08cd35
80fd1b0
e08cd35
 
 
2c6600b
4208ff2
2c6600b
e08cd35
c748858
e08cd35
 
 
 
 
 
a78d024
e08cd35
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
import gradio as gr
from transformers import pipeline
import numpy as np
import pandas as pd
from sklearn.metrics.pairwise import cosine_similarity
from InstructorEmbedding import INSTRUCTOR

# pipe = pipeline(model="facebook/bart-large-mnli")
pipe = pipeline("zero-shot-classification", model="MoritzLaurer/mDeBERTa-v3-base-xnli-multilingual-nli-2mil7")
model = INSTRUCTOR('hkunlp/instructor-large')

df = pd.read_csv('intent.csv', delimiter=';')

data = [
        [
            f'Represent the document for retrieval of {x["description"]} information : ',
            x["message"]
        ] for _,x in df.iterrows()
    ]

corpus_embeddings = model.encode(data)


def predict(question, lower_threshold, tags, multi_label):
  query  = [['Represent the question for retrieving supporting documents: ',question]]
  query_embeddings = model.encode(query)
  similarities = cosine_similarity(query_embeddings,corpus_embeddings)
  retrieved_doc_id = np.argmax(similarities)

  if similarities[0][retrieved_doc_id] < float(lower_threshold):
    ans = pipe(question, candidate_labels=[x.strip() for x in tags.split(",") if x.strip()!=""], multi_label=multi_label)
    ans['query_similarity_score'] = similarities[0][retrieved_doc_id]
    return ans
  return {"chatbot_response" : data[retrieved_doc_id][-1], 'query_similarity_score' : similarities[0][retrieved_doc_id]}





gr.Interface(fn=predict,
             inputs=["text", gr.Slider(0.0, 1.0), "text", gr.Checkbox(label='Allow multiple true classes')],
             outputs="json").launch()