import streamlit as st import pandas as pd import torch from io import BytesIO from gtts import gTTS from sentence_transformers import SentenceTransformer, util from transformers import AutoModelForSeq2SeqLM, AutoTokenizer import datetime # Logging import json # Chat history from textblob import TextBlob # Sentiment analysis from langdetect import detect # Language detection from googletrans import Translator # FIXED: Import Translator import speech_recognition as sr # Voice recognition # Load dataset @st.cache_data def load_dataset(): df = pd.read_csv("Mental_Health_FAQ.csv") if "Questions" not in df.columns or "Answers" not in df.columns: st.error("❌ Error: Dataset must contain 'Questions' and 'Answers' columns.") return [], [] return df["Questions"].dropna().tolist(), df["Answers"].dropna().tolist() questions, answers = load_dataset() # Load models @st.cache_resource def load_models(): embedding_model = SentenceTransformer("all-MiniLM-L6-v2") chatbot_model_name = "facebook/blenderbot-400M-distill" chatbot_model = AutoModelForSeq2SeqLM.from_pretrained(chatbot_model_name) chatbot_tokenizer = AutoTokenizer.from_pretrained(chatbot_model_name) return embedding_model, chatbot_model, chatbot_tokenizer embedding_model, chatbot_model, chatbot_tokenizer = load_models() # Generate embeddings for dataset questions @st.cache_data def generate_question_embeddings(): if questions: return embedding_model.encode(questions, convert_to_tensor=True) return None question_embeddings = generate_question_embeddings() def get_best_response(user_input): """Finds the closest matching FAQ answer or generates a response using BlenderBot.""" if not questions: return "Sorry, the FAQ database is empty." input_embedding = embedding_model.encode(user_input, convert_to_tensor=True) similarities = util.pytorch_cos_sim(input_embedding, question_embeddings)[0].cpu() best_match_idx = torch.argmax(similarities).item() best_match_score = similarities[best_match_idx].item() if best_match_score > 0.7: return answers[best_match_idx] # Generate response using BlenderBot inputs = chatbot_tokenizer(user_input, return_tensors="pt") outputs = chatbot_model.generate(**inputs) return chatbot_tokenizer.decode(outputs[0], skip_special_tokens=True) def analyze_sentiment(text): """Analyzes sentiment and returns an emoji representation.""" sentiment = TextBlob(text).sentiment.polarity if sentiment > 0: return "😊 Positive" elif sentiment < 0: return "😞 Negative" else: return "😐 Neutral" def text_to_speech(text): """Converts chatbot response to speech.""" tts = gTTS(text=text, lang="en") audio_file = BytesIO() tts.write_to_fp(audio_file) audio_file.seek(0) return audio_file def transcribe_audio(): """Allows user to speak input instead of typing.""" recognizer = sr.Recognizer() with sr.Microphone() as source: st.write("🎤 Listening... Speak now!") audio = recognizer.listen(source) try: return recognizer.recognize_google(audio) except sr.UnknownValueError: return "Sorry, I couldn't understand that." except sr.RequestError: return "Speech recognition service error." # Initialize Translator (FIXED) translator = Translator() # Streamlit UI st.title("🧠 Mental Health Chatbot 🤖") st.write("Ask me anything about mental health!") # User Input st.subheader("💬 Chat with me") user_input = st.chat_input("Type your message here...") # Voice Input Button if st.button("🎙️ Use Voice Input"): user_input = transcribe_audio() # Process User Input if user_input: # Detect language using `langdetect` detected_lang = detect(user_input) if detected_lang != "en": user_input = translator.translate(user_input, dest="en").text # FIXED # Get chatbot response response = get_best_response(user_input) # Sentiment analysis sentiment_result = analyze_sentiment(user_input) # Convert response to speech audio_file = text_to_speech(response) # Display chatbot response st.write(f"**Response:** {response}") st.write(f"**Sentiment:** {sentiment_result}") st.audio(audio_file, format="audio/mp3", start_time=0)