File size: 4,192 Bytes
2399d14
fd6d7e3
1fdaaa3
fd6d7e3
e6cc2e1
 
655daa8
6d92dea
fd6d7e3
 
1745079
ffeb0c8
fd6d7e3
 
 
 
 
 
 
 
 
 
c6f5f0c
fd6d7e3
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
ffeb0c8
fd6d7e3
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
d0985e3
 
fd6d7e3
 
ffeb0c8
 
 
 
 
 
 
 
fd6d7e3
 
 
ffeb0c8
fd6d7e3
 
ffeb0c8
 
 
60817f8
 
fd6d7e3
 
 
 
 
e6cc2e1
1fdaaa3
fd6d7e3
 
 
1ce4f04
fd6d7e3
655daa8
fd6d7e3
 
 
 
 
 
9e3fff7
 
fd6d7e3
9e3fff7
 
fd6d7e3
 
 
 
bd22509
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
import gradio as gr
import random
from huggingface_hub import InferenceClient
# import lines go at the top: any libraries I need to import go up here ^^
from sentence_transformers import SentenceTransformer
import torch

client = InferenceClient ('Qwen/Qwen2.5-72B-Instruct')

# Step 1
with open("Mental health chatbot text.txt", "r", encoding="utf-8") as f:
    mental_health_text = f.read()

# Step 2: Preprocess text into sentence chunks
def preprocess_text(text):
    cleaned_text = text.strip()
    sentences = [s.strip() for s in cleaned_text.split('.') if s.strip()]
    sentence_chunks = [s.strip() for s in sentences if len(s.strip()) > 10]
    
    combined_chunks = []
    for i in range(0, len(sentences), 2):
        chunk = '. '.join(sentences[i:i+3]).strip()
        if len(chunk) > 20:
            combined_chunks.append(chunk)
    
    paragraphs = [p.strip() for p in cleaned_text.split('\n\n') if p.strip()]
    paragraph_chunks = [p for p in paragraphs if len(p) > 30]
    
    all_chunks = sentence_chunks + combined_chunks + paragraph_chunks
    
    seen = set()
    final_chunks = []
    for chunk in all_chunks:
        if chunk not in seen and len(chunk) > 15:
            seen.add(chunk)
            final_chunks.append(chunk)
    
    print(f"Created {len(final_chunks)} chunks using advanced strategy")
    print(f"Sample chunks: {final_chunks[:3]}")
    return final_chunks

cleaned_chunks = preprocess_text(mental_health_text)

# Step 3: Convert chunks into embeddings
model = SentenceTransformer('all-MiniLM-L6-v2')

def create_embeddings(text_chunks):
    chunk_embeddings = model.encode(text_chunks, convert_to_tensor=True)
    print(f"Embeddings shape: {chunk_embeddings.shape}")
    return chunk_embeddings

chunk_embeddings = create_embeddings(cleaned_chunks)

# Step 4: Retrieve top matching chunks
def get_top_chunks(query, chunk_embeddings, text_chunks, top_k=3):
    query_embedding = model.encode(query, convert_to_tensor=True)
    query_norm = query_embedding / query_embedding.norm()
    chunks_norm = chunk_embeddings / chunk_embeddings.norm(dim=1, keepdim=True)
    similarities = torch.matmul(chunks_norm, query_norm)
    top_indices = torch.topk(similarities, k=top_k).indices
    return [text_chunks[i] for i in top_indices]

# Step 5: Relevance checker
def is_mental_health_related(query):
    mental_health_keywords = [
        'anxiety', 'depression', 'stress', 'mental health', 'therapy', 'counseling',
        'mood', 'emotions', 'feelings', 'wellbeing', 'self-care', 'mindfulness',
        'meditation', 'coping', 'support', 'psychology', 'psychiatry', 'bipolar',
        'trauma', 'PTSD', 'panic', 'worry', 'sad', 'happy', 'angry', 'fear',
        'self-esteem', 'confidence', 'resilience', 'healing', 'recovery',
        'mental', 'emotional', 'psychological', 'behavioral', 'cognitive'
    ]
    
    query_lower = query.lower()
    return any(keyword in query_lower for keyword in mental_health_keywords)

queries = [
    "Managing daily stress and anxiety",
    "Building healthy coping mechanisms", 
    "Practicing mindfulness and self-care"
]

for q in queries:
    print(f"\nQuery: {q}")
    results = get_top_chunks(q, chunk_embeddings, cleaned_chunks)
    for idx, res in enumerate(results, 1):
        print(f"Result {idx}: {res}")

def respond(message, history):
    top_results = get_top_chunks(message, chunk_embeddings, cleaned_chunks)
    print(top_results)
    
    messages = [{"role": "system", "content": f"You are a friendly chatbot. You give people advice about mental health. Base your response on the following information: {top_results}"}]
    
    if history:
        messages.extend(history)
    messages.append({"role": "user", "content": message})
    
    response = client.chat_completion(messages, max_tokens=100)
    return response['choices'][0]['message']['content'].strip()

# def echo(message, history):
    # return message

# def yes_or_no(message, history):
    # return random.choice(['Yes', 'No', 'Maybe', 'Ask Again'])

chatbot = gr.ChatInterface(respond)
# defining my chatbot so that the user can interact and see their conversation history and send new messages

chatbot.launch()
#hehe test