File size: 1,161 Bytes
b39cd72
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
import pandas as pd
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.metrics.pairwise import cosine_similarity
import numpy as np

df = pd.read_csv('qna.csv',encoding = 'utf-8',delimiter=';')
print(df)
questions = df['question'].tolist()
print(questions)
answers = df['answer'].tolist()
vectorizer = TfidfVectorizer()
tfidf_matrix = vectorizer.fit_transform(questions)

def get_most_similar_question(new_sentence):
    new_tfidf = vectorizer.transform([new_sentence])

    similarities = cosine_similarity(new_tfidf,tfidf_matrix)

    most_similar_index = np.argmax(similarities)

    similarity_percentage = similarities[0, most_similar_index]*100

    return answers[most_similar_index], similarity_percentage

def AnswertheQuestion(new_sentence):
    most_similar_answer, similarity_percentage = get_most_similar_question(new_sentence)
    if similarity_percentage > 70:
        response = {
            'answer': most_similar_answer
        
        }
    else:
        response = {
            'answer': 'Sorry, I am not aware of this information :('
        }

    return response

print(AnswertheQuestion('Who is the Ninad'))