ChatBot / chatbot_ui1.py
Draco15628's picture
Uploading 7 files
7c45be9 verified
import streamlit as st
import pandas as pd
import torch
from io import BytesIO
from gtts import gTTS
from sentence_transformers import SentenceTransformer, util
from transformers import AutoModelForSeq2SeqLM, AutoTokenizer
import datetime # Logging
import json # Chat history
from textblob import TextBlob # Sentiment analysis
from langdetect import detect # Language detection
from googletrans import Translator # FIXED: Import Translator
import speech_recognition as sr # Voice recognition
# Load dataset
@st.cache_data
def load_dataset():
df = pd.read_csv("Mental_Health_FAQ.csv")
if "Questions" not in df.columns or "Answers" not in df.columns:
st.error("❌ Error: Dataset must contain 'Questions' and 'Answers' columns.")
return [], []
return df["Questions"].dropna().tolist(), df["Answers"].dropna().tolist()
questions, answers = load_dataset()
# Load models
@st.cache_resource
def load_models():
embedding_model = SentenceTransformer("all-MiniLM-L6-v2")
chatbot_model_name = "facebook/blenderbot-400M-distill"
chatbot_model = AutoModelForSeq2SeqLM.from_pretrained(chatbot_model_name)
chatbot_tokenizer = AutoTokenizer.from_pretrained(chatbot_model_name)
return embedding_model, chatbot_model, chatbot_tokenizer
embedding_model, chatbot_model, chatbot_tokenizer = load_models()
# Generate embeddings for dataset questions
@st.cache_data
def generate_question_embeddings():
if questions:
return embedding_model.encode(questions, convert_to_tensor=True)
return None
question_embeddings = generate_question_embeddings()
def get_best_response(user_input):
"""Finds the closest matching FAQ answer or generates a response using BlenderBot."""
if not questions:
return "Sorry, the FAQ database is empty."
input_embedding = embedding_model.encode(user_input, convert_to_tensor=True)
similarities = util.pytorch_cos_sim(input_embedding, question_embeddings)[0].cpu()
best_match_idx = torch.argmax(similarities).item()
best_match_score = similarities[best_match_idx].item()
if best_match_score > 0.7:
return answers[best_match_idx]
# Generate response using BlenderBot
inputs = chatbot_tokenizer(user_input, return_tensors="pt")
outputs = chatbot_model.generate(**inputs)
return chatbot_tokenizer.decode(outputs[0], skip_special_tokens=True)
def analyze_sentiment(text):
"""Analyzes sentiment and returns an emoji representation."""
sentiment = TextBlob(text).sentiment.polarity
if sentiment > 0:
return "😊 Positive"
elif sentiment < 0:
return "😞 Negative"
else:
return "😐 Neutral"
def text_to_speech(text):
"""Converts chatbot response to speech."""
tts = gTTS(text=text, lang="en")
audio_file = BytesIO()
tts.write_to_fp(audio_file)
audio_file.seek(0)
return audio_file
def transcribe_audio():
"""Allows user to speak input instead of typing."""
recognizer = sr.Recognizer()
with sr.Microphone() as source:
st.write("🎀 Listening... Speak now!")
audio = recognizer.listen(source)
try:
return recognizer.recognize_google(audio)
except sr.UnknownValueError:
return "Sorry, I couldn't understand that."
except sr.RequestError:
return "Speech recognition service error."
# Initialize Translator (FIXED)
translator = Translator()
# Streamlit UI
st.title("🧠 Mental Health Chatbot πŸ€–")
st.write("Ask me anything about mental health!")
# User Input
st.subheader("πŸ’¬ Chat with me")
user_input = st.chat_input("Type your message here...")
# Voice Input Button
if st.button("πŸŽ™οΈ Use Voice Input"):
user_input = transcribe_audio()
# Process User Input
if user_input:
# Detect language using `langdetect`
detected_lang = detect(user_input)
if detected_lang != "en":
user_input = translator.translate(user_input, dest="en").text # FIXED
# Get chatbot response
response = get_best_response(user_input)
# Sentiment analysis
sentiment_result = analyze_sentiment(user_input)
# Convert response to speech
audio_file = text_to_speech(response)
# Display chatbot response
st.write(f"**Response:** {response}")
st.write(f"**Sentiment:** {sentiment_result}")
st.audio(audio_file, format="audio/mp3", start_time=0)