Ninad077 commited on
Commit
b39cd72
·
verified ·
1 Parent(s): 7b018a5

Upload 4 files

Browse files
Files changed (4) hide show
  1. app.py +61 -0
  2. qna.csv +5 -0
  3. requirements.txt +0 -0
  4. train.py +40 -0
app.py ADDED
@@ -0,0 +1,61 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import streamlit as st
2
+ import pandas as pd
3
+ from sklearn.feature_extraction.text import TfidfVectorizer
4
+ from sklearn.metrics.pairwise import cosine_similarity
5
+ import numpy as np
6
+
7
+ # Function to load and preprocess the data
8
+ def load_data(file):
9
+ df = pd.read_csv(file, delimiter=";")
10
+ return df
11
+
12
+ # Function to process the input and get the most similar question
13
+ def get_most_similar_question(new_sentence, questions, answers, vectorizer, tfidf_matrix):
14
+ new_tfidf = vectorizer.transform([new_sentence])
15
+
16
+ similarities = cosine_similarity(new_tfidf, tfidf_matrix)
17
+
18
+ most_similar_index = np.argmax(similarities)
19
+
20
+ similarity_percentage = similarities[0, most_similar_index] * 100
21
+
22
+ return answers[most_similar_index], similarity_percentage
23
+
24
+ # Function to generate response
25
+ def answer_the_question(new_sentence, questions, answers, vectorizer, tfidf_matrix):
26
+ most_similar_answer, similarity_percentage = get_most_similar_question(new_sentence, questions, answers, vectorizer, tfidf_matrix)
27
+ if similarity_percentage > 70:
28
+ response = {
29
+ 'answer': most_similar_answer
30
+ }
31
+ else:
32
+ response = {
33
+ 'answer': 'Sorry, I am not aware of this information :('
34
+ }
35
+
36
+ return response
37
+
38
+ # Streamlit app
39
+ def main():
40
+ st.title("Q&A Chatbot")
41
+ st.write("Upload a CSV file with questions and answers.")
42
+
43
+ # Upload CSV file
44
+ uploaded_file = st.file_uploader("Choose a CSV file", type=["csv"])
45
+
46
+ if uploaded_file is not None:
47
+ df = load_data(uploaded_file)
48
+ questions = df['question'].tolist()
49
+ answers = df['answer'].tolist()
50
+ vectorizer = TfidfVectorizer()
51
+ tfidf_matrix = vectorizer.fit_transform(questions)
52
+
53
+ # Ask question
54
+ user_question = st.text_input("Ask your question here:")
55
+ if st.button("Ask"):
56
+ if user_question:
57
+ response = answer_the_question(user_question, questions, answers, vectorizer, tfidf_matrix)
58
+ st.write("Answer:", response['answer'])
59
+
60
+ if __name__ == "__main__":
61
+ main()
qna.csv ADDED
@@ -0,0 +1,5 @@
 
 
 
 
 
 
1
+ question;answer
2
+ who is pm of India;Modi
3
+ Who is Indian prime minister;Modi
4
+ who is the leader of BJP;Modi
5
+ Who is Indian pm;Modi
requirements.txt ADDED
Binary file (4.89 kB). View file
 
train.py ADDED
@@ -0,0 +1,40 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import pandas as pd
2
+ from sklearn.feature_extraction.text import TfidfVectorizer
3
+ from sklearn.metrics.pairwise import cosine_similarity
4
+ import numpy as np
5
+
6
+ df = pd.read_csv('qna.csv',encoding = 'utf-8',delimiter=';')
7
+ print(df)
8
+ questions = df['question'].tolist()
9
+ print(questions)
10
+ answers = df['answer'].tolist()
11
+ vectorizer = TfidfVectorizer()
12
+ tfidf_matrix = vectorizer.fit_transform(questions)
13
+
14
+ def get_most_similar_question(new_sentence):
15
+ new_tfidf = vectorizer.transform([new_sentence])
16
+
17
+ similarities = cosine_similarity(new_tfidf,tfidf_matrix)
18
+
19
+ most_similar_index = np.argmax(similarities)
20
+
21
+ similarity_percentage = similarities[0, most_similar_index]*100
22
+
23
+ return answers[most_similar_index], similarity_percentage
24
+
25
+ def AnswertheQuestion(new_sentence):
26
+ most_similar_answer, similarity_percentage = get_most_similar_question(new_sentence)
27
+ if similarity_percentage > 70:
28
+ response = {
29
+ 'answer': most_similar_answer
30
+
31
+ }
32
+ else:
33
+ response = {
34
+ 'answer': 'Sorry, I am not aware of this information :('
35
+ }
36
+
37
+ return response
38
+
39
+ print(AnswertheQuestion('Who is the Ninad'))
40
+