Spaces:
Sleeping
Sleeping
| import gradio as gr | |
| import random | |
| from huggingface_hub import InferenceClient | |
| # import lines go at the top: any libraries I need to import go up here ^^ | |
| from sentence_transformers import SentenceTransformer | |
| import torch | |
| client = InferenceClient ('Qwen/Qwen2.5-72B-Instruct') | |
| # Step 1 | |
| with open("Mental health chatbot text.txt", "r", encoding="utf-8") as f: | |
| mental_health_text = f.read() | |
| # Step 2: Preprocess text into sentence chunks | |
| def preprocess_text(text): | |
| cleaned_text = text.strip() | |
| sentences = [s.strip() for s in cleaned_text.split('.') if s.strip()] | |
| sentence_chunks = [s.strip() for s in sentences if len(s.strip()) > 10] | |
| combined_chunks = [] | |
| for i in range(0, len(sentences), 2): | |
| chunk = '. '.join(sentences[i:i+3]).strip() | |
| if len(chunk) > 20: | |
| combined_chunks.append(chunk) | |
| paragraphs = [p.strip() for p in cleaned_text.split('\n\n') if p.strip()] | |
| paragraph_chunks = [p for p in paragraphs if len(p) > 30] | |
| all_chunks = sentence_chunks + combined_chunks + paragraph_chunks | |
| seen = set() | |
| final_chunks = [] | |
| for chunk in all_chunks: | |
| if chunk not in seen and len(chunk) > 15: | |
| seen.add(chunk) | |
| final_chunks.append(chunk) | |
| print(f"Created {len(final_chunks)} chunks using advanced strategy") | |
| print(f"Sample chunks: {final_chunks[:3]}") | |
| return final_chunks | |
| cleaned_chunks = preprocess_text(mental_health_text) | |
| # Step 3: Convert chunks into embeddings | |
| model = SentenceTransformer('all-MiniLM-L6-v2') | |
| def create_embeddings(text_chunks): | |
| chunk_embeddings = model.encode(text_chunks, convert_to_tensor=True) | |
| print(f"Embeddings shape: {chunk_embeddings.shape}") | |
| return chunk_embeddings | |
| chunk_embeddings = create_embeddings(cleaned_chunks) | |
| # Step 4: Retrieve top matching chunks | |
| def get_top_chunks(query, chunk_embeddings, text_chunks, top_k=3): | |
| query_embedding = model.encode(query, convert_to_tensor=True) | |
| query_norm = query_embedding / query_embedding.norm() | |
| chunks_norm = chunk_embeddings / chunk_embeddings.norm(dim=1, keepdim=True) | |
| similarities = torch.matmul(chunks_norm, query_norm) | |
| top_indices = torch.topk(similarities, k=top_k).indices | |
| return [text_chunks[i] for i in top_indices] | |
| # Step 5: Relevance checker | |
| def is_mental_health_related(query): | |
| mental_health_keywords = [ | |
| 'anxiety', 'depression', 'stress', 'mental health', 'therapy', 'counseling', | |
| 'mood', 'emotions', 'feelings', 'wellbeing', 'self-care', 'mindfulness', | |
| 'meditation', 'coping', 'support', 'psychology', 'psychiatry', 'bipolar', | |
| 'trauma', 'PTSD', 'panic', 'worry', 'sad', 'happy', 'angry', 'fear', | |
| 'self-esteem', 'confidence', 'resilience', 'healing', 'recovery', | |
| 'mental', 'emotional', 'psychological', 'behavioral', 'cognitive' | |
| ] | |
| query_lower = query.lower() | |
| return any(keyword in query_lower for keyword in mental_health_keywords) | |
| queries = [ | |
| "Managing daily stress and anxiety", | |
| "Building healthy coping mechanisms", | |
| "Practicing mindfulness and self-care" | |
| ] | |
| for q in queries: | |
| print(f"\nQuery: {q}") | |
| results = get_top_chunks(q, chunk_embeddings, cleaned_chunks) | |
| for idx, res in enumerate(results, 1): | |
| print(f"Result {idx}: {res}") | |
| def respond(message, history): | |
| top_results = get_top_chunks(message, chunk_embeddings, cleaned_chunks) | |
| print(top_results) | |
| messages = [{"role": "system", "content": f"You are a friendly chatbot. You give people advice about mental health. Base your response on the following information: {top_results}"}] | |
| if history: | |
| messages.extend(history) | |
| messages.append({"role": "user", "content": message}) | |
| response = client.chat_completion(messages, max_tokens=100) | |
| return response['choices'][0]['message']['content'].strip() | |
| # def echo(message, history): | |
| # return message | |
| # def yes_or_no(message, history): | |
| # return random.choice(['Yes', 'No', 'Maybe', 'Ask Again']) | |
| chatbot = gr.ChatInterface(respond) | |
| # defining my chatbot so that the user can interact and see their conversation history and send new messages | |
| chatbot.launch() | |
| #hehe test |