File size: 2,068 Bytes
b39cd72
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
import streamlit as st
import pandas as pd
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.metrics.pairwise import cosine_similarity
import numpy as np

# Function to load and preprocess the data
def load_data(file):
    df = pd.read_csv(file, delimiter=";")
    return df

# Function to process the input and get the most similar question
def get_most_similar_question(new_sentence, questions, answers, vectorizer, tfidf_matrix):
    new_tfidf = vectorizer.transform([new_sentence])

    similarities = cosine_similarity(new_tfidf, tfidf_matrix)

    most_similar_index = np.argmax(similarities)

    similarity_percentage = similarities[0, most_similar_index] * 100

    return answers[most_similar_index], similarity_percentage

# Function to generate response
def answer_the_question(new_sentence, questions, answers, vectorizer, tfidf_matrix):
    most_similar_answer, similarity_percentage = get_most_similar_question(new_sentence, questions, answers, vectorizer, tfidf_matrix)
    if similarity_percentage > 70:
        response = {
            'answer': most_similar_answer
        }
    else:
        response = {
            'answer': 'Sorry, I am not aware of this information :('
        }

    return response

# Streamlit app
def main():
    st.title("Q&A Chatbot")
    st.write("Upload a CSV file with questions and answers.")

    # Upload CSV file
    uploaded_file = st.file_uploader("Choose a CSV file", type=["csv"])

    if uploaded_file is not None:
        df = load_data(uploaded_file)
        questions = df['question'].tolist()
        answers = df['answer'].tolist()
        vectorizer = TfidfVectorizer()
        tfidf_matrix = vectorizer.fit_transform(questions)

        # Ask question
        user_question = st.text_input("Ask your question here:")
        if st.button("Ask"):
            if user_question:
                response = answer_the_question(user_question, questions, answers, vectorizer, tfidf_matrix)
                st.write("Answer:", response['answer'])

if __name__ == "__main__":
    main()