Otium_testing / app.py
vivianoh's picture
Update app.py
6d92dea verified
import gradio as gr
import random
from huggingface_hub import InferenceClient
# import lines go at the top: any libraries I need to import go up here ^^
from sentence_transformers import SentenceTransformer
import torch
client = InferenceClient ('Qwen/Qwen2.5-72B-Instruct')
# Step 1
with open("Mental health chatbot text.txt", "r", encoding="utf-8") as f:
mental_health_text = f.read()
# Step 2: Preprocess text into sentence chunks
def preprocess_text(text):
cleaned_text = text.strip()
sentences = [s.strip() for s in cleaned_text.split('.') if s.strip()]
sentence_chunks = [s.strip() for s in sentences if len(s.strip()) > 10]
combined_chunks = []
for i in range(0, len(sentences), 2):
chunk = '. '.join(sentences[i:i+3]).strip()
if len(chunk) > 20:
combined_chunks.append(chunk)
paragraphs = [p.strip() for p in cleaned_text.split('\n\n') if p.strip()]
paragraph_chunks = [p for p in paragraphs if len(p) > 30]
all_chunks = sentence_chunks + combined_chunks + paragraph_chunks
seen = set()
final_chunks = []
for chunk in all_chunks:
if chunk not in seen and len(chunk) > 15:
seen.add(chunk)
final_chunks.append(chunk)
print(f"Created {len(final_chunks)} chunks using advanced strategy")
print(f"Sample chunks: {final_chunks[:3]}")
return final_chunks
cleaned_chunks = preprocess_text(mental_health_text)
# Step 3: Convert chunks into embeddings
model = SentenceTransformer('all-MiniLM-L6-v2')
def create_embeddings(text_chunks):
chunk_embeddings = model.encode(text_chunks, convert_to_tensor=True)
print(f"Embeddings shape: {chunk_embeddings.shape}")
return chunk_embeddings
chunk_embeddings = create_embeddings(cleaned_chunks)
# Step 4: Retrieve top matching chunks
def get_top_chunks(query, chunk_embeddings, text_chunks, top_k=3):
query_embedding = model.encode(query, convert_to_tensor=True)
query_norm = query_embedding / query_embedding.norm()
chunks_norm = chunk_embeddings / chunk_embeddings.norm(dim=1, keepdim=True)
similarities = torch.matmul(chunks_norm, query_norm)
top_indices = torch.topk(similarities, k=top_k).indices
return [text_chunks[i] for i in top_indices]
# Step 5: Relevance checker
def is_mental_health_related(query):
mental_health_keywords = [
'anxiety', 'depression', 'stress', 'mental health', 'therapy', 'counseling',
'mood', 'emotions', 'feelings', 'wellbeing', 'self-care', 'mindfulness',
'meditation', 'coping', 'support', 'psychology', 'psychiatry', 'bipolar',
'trauma', 'PTSD', 'panic', 'worry', 'sad', 'happy', 'angry', 'fear',
'self-esteem', 'confidence', 'resilience', 'healing', 'recovery',
'mental', 'emotional', 'psychological', 'behavioral', 'cognitive'
]
query_lower = query.lower()
return any(keyword in query_lower for keyword in mental_health_keywords)
queries = [
"Managing daily stress and anxiety",
"Building healthy coping mechanisms",
"Practicing mindfulness and self-care"
]
for q in queries:
print(f"\nQuery: {q}")
results = get_top_chunks(q, chunk_embeddings, cleaned_chunks)
for idx, res in enumerate(results, 1):
print(f"Result {idx}: {res}")
def respond(message, history):
top_results = get_top_chunks(message, chunk_embeddings, cleaned_chunks)
print(top_results)
messages = [{"role": "system", "content": f"You are a friendly chatbot. You give people advice about mental health. Base your response on the following information: {top_results}"}]
if history:
messages.extend(history)
messages.append({"role": "user", "content": message})
response = client.chat_completion(messages, max_tokens=100)
return response['choices'][0]['message']['content'].strip()
# def echo(message, history):
# return message
# def yes_or_no(message, history):
# return random.choice(['Yes', 'No', 'Maybe', 'Ask Again'])
chatbot = gr.ChatInterface(respond)
# defining my chatbot so that the user can interact and see their conversation history and send new messages
chatbot.launch()
#hehe test