| | import gradio as gr |
| | from transformers import pipeline |
| | import numpy as np |
| | import pandas as pd |
| | from sklearn.metrics.pairwise import cosine_similarity |
| | from InstructorEmbedding import INSTRUCTOR |
| |
|
| | |
| | pipe = pipeline("zero-shot-classification", model="MoritzLaurer/mDeBERTa-v3-base-xnli-multilingual-nli-2mil7") |
| | model = INSTRUCTOR('hkunlp/instructor-large') |
| |
|
| | df = pd.read_csv('intent.csv', delimiter=';') |
| |
|
| | data = [ |
| | [ |
| | f'Represent the document for retrieval of {x["description"]} information : ', |
| | x["message"] |
| | ] for _,x in df.iterrows() |
| | ] |
| |
|
| | corpus_embeddings = model.encode(data) |
| |
|
| |
|
| | def predict(question, lower_threshold, tags, multi_label): |
| | query = [['Represent the question for retrieving supporting documents: ',question]] |
| | query_embeddings = model.encode(query) |
| | similarities = cosine_similarity(query_embeddings,corpus_embeddings) |
| | retrieved_doc_id = np.argmax(similarities) |
| |
|
| | if similarities[0][retrieved_doc_id] < float(lower_threshold): |
| | ans = pipe(question, candidate_labels=[x.strip() for x in tags.split(",") if x.strip()!=""], multi_label=multi_label) |
| | ans['query_similarity_score'] = similarities[0][retrieved_doc_id] |
| | return ans |
| | return {"chatbot_response" : data[retrieved_doc_id][-1], 'query_similarity_score' : similarities[0][retrieved_doc_id]} |
| |
|
| |
|
| |
|
| |
|
| |
|
| | gr.Interface(fn=predict, |
| | inputs=["text", gr.Slider(0.0, 1.0), "text", gr.Checkbox(label='Allow multiple true classes')], |
| | outputs="json").launch() |
| |
|
| | |