Chatbot_01 / train.py
Ninad077's picture
Upload 4 files
b39cd72 verified
import pandas as pd
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.metrics.pairwise import cosine_similarity
import numpy as np
df = pd.read_csv('qna.csv',encoding = 'utf-8',delimiter=';')
print(df)
questions = df['question'].tolist()
print(questions)
answers = df['answer'].tolist()
vectorizer = TfidfVectorizer()
tfidf_matrix = vectorizer.fit_transform(questions)
def get_most_similar_question(new_sentence):
new_tfidf = vectorizer.transform([new_sentence])
similarities = cosine_similarity(new_tfidf,tfidf_matrix)
most_similar_index = np.argmax(similarities)
similarity_percentage = similarities[0, most_similar_index]*100
return answers[most_similar_index], similarity_percentage
def AnswertheQuestion(new_sentence):
most_similar_answer, similarity_percentage = get_most_similar_question(new_sentence)
if similarity_percentage > 70:
response = {
'answer': most_similar_answer
}
else:
response = {
'answer': 'Sorry, I am not aware of this information :('
}
return response
print(AnswertheQuestion('Who is the Ninad'))