File size: 4,515 Bytes
7c45be9
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
import streamlit as st
import pandas as pd
import torch
from io import BytesIO
from gtts import gTTS
from sentence_transformers import SentenceTransformer, util
from transformers import AutoModelForSeq2SeqLM, AutoTokenizer
import datetime  # Logging
import json  # Chat history
from textblob import TextBlob  # Sentiment analysis
from langdetect import detect  # Language detection
from googletrans import Translator  # FIXED: Import Translator
import speech_recognition as sr  # Voice recognition

# Load dataset
@st.cache_data
def load_dataset():
    df = pd.read_csv("Mental_Health_FAQ.csv")
    if "Questions" not in df.columns or "Answers" not in df.columns:
        st.error("❌ Error: Dataset must contain 'Questions' and 'Answers' columns.")
        return [], []
    
    return df["Questions"].dropna().tolist(), df["Answers"].dropna().tolist()

questions, answers = load_dataset()

# Load models
@st.cache_resource
def load_models():
    embedding_model = SentenceTransformer("all-MiniLM-L6-v2")
    chatbot_model_name = "facebook/blenderbot-400M-distill"
    chatbot_model = AutoModelForSeq2SeqLM.from_pretrained(chatbot_model_name)
    chatbot_tokenizer = AutoTokenizer.from_pretrained(chatbot_model_name)
    return embedding_model, chatbot_model, chatbot_tokenizer

embedding_model, chatbot_model, chatbot_tokenizer = load_models()

# Generate embeddings for dataset questions
@st.cache_data
def generate_question_embeddings():
    if questions:
        return embedding_model.encode(questions, convert_to_tensor=True)
    return None

question_embeddings = generate_question_embeddings()

def get_best_response(user_input):
    """Finds the closest matching FAQ answer or generates a response using BlenderBot."""
    if not questions:
        return "Sorry, the FAQ database is empty."

    input_embedding = embedding_model.encode(user_input, convert_to_tensor=True)
    similarities = util.pytorch_cos_sim(input_embedding, question_embeddings)[0].cpu()

    best_match_idx = torch.argmax(similarities).item()
    best_match_score = similarities[best_match_idx].item()

    if best_match_score > 0.7:
        return answers[best_match_idx]
    
    # Generate response using BlenderBot
    inputs = chatbot_tokenizer(user_input, return_tensors="pt")
    outputs = chatbot_model.generate(**inputs)
    return chatbot_tokenizer.decode(outputs[0], skip_special_tokens=True)

def analyze_sentiment(text):
    """Analyzes sentiment and returns an emoji representation."""
    sentiment = TextBlob(text).sentiment.polarity
    if sentiment > 0:
        return "😊 Positive"
    elif sentiment < 0:
        return "😞 Negative"
    else:
        return "😐 Neutral"

def text_to_speech(text):
    """Converts chatbot response to speech."""
    tts = gTTS(text=text, lang="en")
    audio_file = BytesIO()
    tts.write_to_fp(audio_file)
    audio_file.seek(0)
    return audio_file

def transcribe_audio():
    """Allows user to speak input instead of typing."""
    recognizer = sr.Recognizer()
    with sr.Microphone() as source:
        st.write("🎀 Listening... Speak now!")
        audio = recognizer.listen(source)
    
    try:
        return recognizer.recognize_google(audio)
    except sr.UnknownValueError:
        return "Sorry, I couldn't understand that."
    except sr.RequestError:
        return "Speech recognition service error."

# Initialize Translator (FIXED)
translator = Translator()

# Streamlit UI
st.title("🧠 Mental Health Chatbot πŸ€–")
st.write("Ask me anything about mental health!")

# User Input
st.subheader("πŸ’¬ Chat with me")
user_input = st.chat_input("Type your message here...")

# Voice Input Button
if st.button("πŸŽ™οΈ Use Voice Input"):
    user_input = transcribe_audio()

# Process User Input
if user_input:
    # Detect language using `langdetect`
    detected_lang = detect(user_input)
    
    if detected_lang != "en":
        user_input = translator.translate(user_input, dest="en").text  # FIXED

    # Get chatbot response
    response = get_best_response(user_input)

    # Sentiment analysis
    sentiment_result = analyze_sentiment(user_input)

    # Convert response to speech
    audio_file = text_to_speech(response)

    # Display chatbot response
    st.write(f"**Response:** {response}")
    st.write(f"**Sentiment:** {sentiment_result}")
    st.audio(audio_file, format="audio/mp3", start_time=0)