import gradio as gr import random from huggingface_hub import InferenceClient # import lines go at the top: any libraries I need to import go up here ^^ from sentence_transformers import SentenceTransformer import torch client = InferenceClient ('Qwen/Qwen2.5-72B-Instruct') # Step 1 with open("Mental health chatbot text.txt", "r", encoding="utf-8") as f: mental_health_text = f.read() # Step 2: Preprocess text into sentence chunks def preprocess_text(text): cleaned_text = text.strip() sentences = [s.strip() for s in cleaned_text.split('.') if s.strip()] sentence_chunks = [s.strip() for s in sentences if len(s.strip()) > 10] combined_chunks = [] for i in range(0, len(sentences), 2): chunk = '. '.join(sentences[i:i+3]).strip() if len(chunk) > 20: combined_chunks.append(chunk) paragraphs = [p.strip() for p in cleaned_text.split('\n\n') if p.strip()] paragraph_chunks = [p for p in paragraphs if len(p) > 30] all_chunks = sentence_chunks + combined_chunks + paragraph_chunks seen = set() final_chunks = [] for chunk in all_chunks: if chunk not in seen and len(chunk) > 15: seen.add(chunk) final_chunks.append(chunk) print(f"Created {len(final_chunks)} chunks using advanced strategy") print(f"Sample chunks: {final_chunks[:3]}") return final_chunks cleaned_chunks = preprocess_text(mental_health_text) # Step 3: Convert chunks into embeddings model = SentenceTransformer('all-MiniLM-L6-v2') def create_embeddings(text_chunks): chunk_embeddings = model.encode(text_chunks, convert_to_tensor=True) print(f"Embeddings shape: {chunk_embeddings.shape}") return chunk_embeddings chunk_embeddings = create_embeddings(cleaned_chunks) # Step 4: Retrieve top matching chunks def get_top_chunks(query, chunk_embeddings, text_chunks, top_k=3): query_embedding = model.encode(query, convert_to_tensor=True) query_norm = query_embedding / query_embedding.norm() chunks_norm = chunk_embeddings / chunk_embeddings.norm(dim=1, keepdim=True) similarities = torch.matmul(chunks_norm, query_norm) top_indices = torch.topk(similarities, k=top_k).indices return [text_chunks[i] for i in top_indices] # Step 5: Relevance checker def is_mental_health_related(query): mental_health_keywords = [ 'anxiety', 'depression', 'stress', 'mental health', 'therapy', 'counseling', 'mood', 'emotions', 'feelings', 'wellbeing', 'self-care', 'mindfulness', 'meditation', 'coping', 'support', 'psychology', 'psychiatry', 'bipolar', 'trauma', 'PTSD', 'panic', 'worry', 'sad', 'happy', 'angry', 'fear', 'self-esteem', 'confidence', 'resilience', 'healing', 'recovery', 'mental', 'emotional', 'psychological', 'behavioral', 'cognitive' ] query_lower = query.lower() return any(keyword in query_lower for keyword in mental_health_keywords) queries = [ "Managing daily stress and anxiety", "Building healthy coping mechanisms", "Practicing mindfulness and self-care" ] for q in queries: print(f"\nQuery: {q}") results = get_top_chunks(q, chunk_embeddings, cleaned_chunks) for idx, res in enumerate(results, 1): print(f"Result {idx}: {res}") def respond(message, history): top_results = get_top_chunks(message, chunk_embeddings, cleaned_chunks) print(top_results) messages = [{"role": "system", "content": f"You are a friendly chatbot. You give people advice about mental health. Base your response on the following information: {top_results}"}] if history: messages.extend(history) messages.append({"role": "user", "content": message}) response = client.chat_completion(messages, max_tokens=100) return response['choices'][0]['message']['content'].strip() # def echo(message, history): # return message # def yes_or_no(message, history): # return random.choice(['Yes', 'No', 'Maybe', 'Ask Again']) chatbot = gr.ChatInterface(respond) # defining my chatbot so that the user can interact and see their conversation history and send new messages chatbot.launch() #hehe test