Spaces:

E-motionAssistant
/

TherapyEnglish

Running

App Files Files Community

TherapyEnglish / app.py

Raemih

Update app.py

7b3c687 verified 11 days ago

raw

history blame contribute delete

3.26 kB

	import subprocess
	import sys

	# --- THE STABILIZER BLOCK ---
	print("🛠️ Stabilizing environment and fixing Gradio compatibility...")
	subprocess.check_call([
	sys.executable, "-m", "pip", "install",
	"tokenizers==0.20.1",
	"transformers==4.45.2",
	"huggingface-hub==0.24.7", # THE FIX: Pinning this prevents the HfFolder ImportError
	"gradio==4.44.1"
	])

	import torch
	import gradio as gr
	from transformers import AutoModelForCausalLM, AutoTokenizer

	MODEL_REPO = "E-motionAssistant/llama-3.2-3b-english-therapy-merged"
	TOKENIZER_REPO = "unsloth/Llama-3.2-3B-Instruct"
	SYSTEM_PROMPT = "You are an empathetic therapist. Provide supportive, caring responses."

	model = None
	tokenizer = None

	def load_model():
	global model, tokenizer
	if model is None:
	print(f"📥 Loading tokenizer from {TOKENIZER_REPO}...")
	tokenizer = AutoTokenizer.from_pretrained(TOKENIZER_REPO)

	if tokenizer.pad_token is None:
	tokenizer.pad_token = tokenizer.eos_token

	print(f"📥 Loading model weights (Full Precision for CPU)...")
	model = AutoModelForCausalLM.from_pretrained(
	MODEL_REPO,
	# CHANGE: Use float32 because CPU doesn't support 'Half' (float16)
	torch_dtype=torch.float32,
	device_map="cpu", # Explicitly target CPU
	low_cpu_mem_usage=True
	)
	print("✅ Success: System is online on CPU!")

	load_model()

	def chat(message, history):
	if not message.strip():
	return ""

	try:
	# Build prompt using Llama 3.2 Instruct format
	# This format helps the model understand it's a conversation
	prompt = f"<\|begin_of_text\|><\|start_header_id\|>system<\|end_header_id\|>\n\n{SYSTEM_PROMPT}<\|eot_id\|>"
	for user_msg, bot_msg in history[-3:]:
	prompt += f"<\|start_header_id\|>user<\|end_header_id\|>\n\n{user_msg}<\|eot_id\|><\|start_header_id\|>assistant<\|end_header_id\|>\n\n{bot_msg}<\|eot_id\|>"
	prompt += f"<\|start_header_id\|>user<\|end_header_id\|>\n\n{message}<\|eot_id\|><\|start_header_id\|>assistant<\|end_header_id\|>\n\n"

	# Tokenize and move to the exact same device as the model
	inputs = tokenizer(prompt, return_tensors="pt", truncation=True, max_length=2048).to(model.device)

	with torch.no_grad():
	outputs = model.generate(
	**inputs,
	max_new_tokens=256,
	temperature=0.6, # Slightly lower for more stable therapy responses
	top_p=0.9,
	do_sample=True,
	pad_token_id=tokenizer.eos_token_id,
	eos_token_id=tokenizer.eos_token_id
	)

	# Decode only the new tokens
	input_len = inputs.input_ids.shape[1]
	response = tokenizer.decode(outputs[0][input_len:], skip_special_tokens=True)

	return response.strip()

	except Exception as e:
	print(f"❌ Generation Error: {e}")
	return f"I'm sorry, I encountered an error: {str(e)}. Please try again."

	demo = gr.ChatInterface(
	fn=chat,
	title="💚 E.motion Therapy Assistant",
	theme=gr.themes.Soft(),
	chatbot=gr.Chatbot(height=450),
	)

	if __name__ == "__main__":
	demo.launch()