import pandas as pd
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.metrics.pairwise import cosine_similarity

# Load dataset
df = pd.read_csv("Chatbot.csv")

# Filter questions and answers
questions = df[df["name"] == "User"]["line"].tolist()
answers = df[df["name"] == "ECO"]["line"].tolist()

# Vectorize questions for similarity matching
vectorizer = TfidfVectorizer()
question_vectors = vectorizer.fit_transform(questions)

def get_best_response(user_input):
    """Finds the closest matching dataset question and returns the answer if it's a good match."""
    input_vector = vectorizer.transform([user_input])
    similarities = cosine_similarity(input_vector, question_vectors)
    
    best_match_idx = similarities.argmax()  # Get index of best-matching question
    best_match_score = similarities.max()  # Get similarity score
    
    # If similarity is above 0.6, return dataset answer, else return default message
    if best_match_score > 0.6:
        return answers[best_match_idx]
    else:
        return "I'm sorry, I don't understand. Can you rephrase?"  # Default response

# Interactive loop
print("Chatbot is ready! Type 'exit' to quit.")

while True:
    user_question = input("\nYou: ")
    if user_question.lower() == "exit":
        print("Chatbot: Goodbye!")
        break

    response = get_best_response(user_question)
    print("Chatbot:", response)