Spaces:
Sleeping
Sleeping
File size: 4,192 Bytes
2399d14 fd6d7e3 1fdaaa3 fd6d7e3 e6cc2e1 655daa8 6d92dea fd6d7e3 1745079 ffeb0c8 fd6d7e3 c6f5f0c fd6d7e3 ffeb0c8 fd6d7e3 d0985e3 fd6d7e3 ffeb0c8 fd6d7e3 ffeb0c8 fd6d7e3 ffeb0c8 60817f8 fd6d7e3 e6cc2e1 1fdaaa3 fd6d7e3 1ce4f04 fd6d7e3 655daa8 fd6d7e3 9e3fff7 fd6d7e3 9e3fff7 fd6d7e3 bd22509 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 |
import gradio as gr
import random
from huggingface_hub import InferenceClient
# import lines go at the top: any libraries I need to import go up here ^^
from sentence_transformers import SentenceTransformer
import torch
client = InferenceClient ('Qwen/Qwen2.5-72B-Instruct')
# Step 1
with open("Mental health chatbot text.txt", "r", encoding="utf-8") as f:
mental_health_text = f.read()
# Step 2: Preprocess text into sentence chunks
def preprocess_text(text):
cleaned_text = text.strip()
sentences = [s.strip() for s in cleaned_text.split('.') if s.strip()]
sentence_chunks = [s.strip() for s in sentences if len(s.strip()) > 10]
combined_chunks = []
for i in range(0, len(sentences), 2):
chunk = '. '.join(sentences[i:i+3]).strip()
if len(chunk) > 20:
combined_chunks.append(chunk)
paragraphs = [p.strip() for p in cleaned_text.split('\n\n') if p.strip()]
paragraph_chunks = [p for p in paragraphs if len(p) > 30]
all_chunks = sentence_chunks + combined_chunks + paragraph_chunks
seen = set()
final_chunks = []
for chunk in all_chunks:
if chunk not in seen and len(chunk) > 15:
seen.add(chunk)
final_chunks.append(chunk)
print(f"Created {len(final_chunks)} chunks using advanced strategy")
print(f"Sample chunks: {final_chunks[:3]}")
return final_chunks
cleaned_chunks = preprocess_text(mental_health_text)
# Step 3: Convert chunks into embeddings
model = SentenceTransformer('all-MiniLM-L6-v2')
def create_embeddings(text_chunks):
chunk_embeddings = model.encode(text_chunks, convert_to_tensor=True)
print(f"Embeddings shape: {chunk_embeddings.shape}")
return chunk_embeddings
chunk_embeddings = create_embeddings(cleaned_chunks)
# Step 4: Retrieve top matching chunks
def get_top_chunks(query, chunk_embeddings, text_chunks, top_k=3):
query_embedding = model.encode(query, convert_to_tensor=True)
query_norm = query_embedding / query_embedding.norm()
chunks_norm = chunk_embeddings / chunk_embeddings.norm(dim=1, keepdim=True)
similarities = torch.matmul(chunks_norm, query_norm)
top_indices = torch.topk(similarities, k=top_k).indices
return [text_chunks[i] for i in top_indices]
# Step 5: Relevance checker
def is_mental_health_related(query):
mental_health_keywords = [
'anxiety', 'depression', 'stress', 'mental health', 'therapy', 'counseling',
'mood', 'emotions', 'feelings', 'wellbeing', 'self-care', 'mindfulness',
'meditation', 'coping', 'support', 'psychology', 'psychiatry', 'bipolar',
'trauma', 'PTSD', 'panic', 'worry', 'sad', 'happy', 'angry', 'fear',
'self-esteem', 'confidence', 'resilience', 'healing', 'recovery',
'mental', 'emotional', 'psychological', 'behavioral', 'cognitive'
]
query_lower = query.lower()
return any(keyword in query_lower for keyword in mental_health_keywords)
queries = [
"Managing daily stress and anxiety",
"Building healthy coping mechanisms",
"Practicing mindfulness and self-care"
]
for q in queries:
print(f"\nQuery: {q}")
results = get_top_chunks(q, chunk_embeddings, cleaned_chunks)
for idx, res in enumerate(results, 1):
print(f"Result {idx}: {res}")
def respond(message, history):
top_results = get_top_chunks(message, chunk_embeddings, cleaned_chunks)
print(top_results)
messages = [{"role": "system", "content": f"You are a friendly chatbot. You give people advice about mental health. Base your response on the following information: {top_results}"}]
if history:
messages.extend(history)
messages.append({"role": "user", "content": message})
response = client.chat_completion(messages, max_tokens=100)
return response['choices'][0]['message']['content'].strip()
# def echo(message, history):
# return message
# def yes_or_no(message, history):
# return random.choice(['Yes', 'No', 'Maybe', 'Ask Again'])
chatbot = gr.ChatInterface(respond)
# defining my chatbot so that the user can interact and see their conversation history and send new messages
chatbot.launch()
#hehe test |