| import subprocess |
| import sys |
|
|
| |
| print("π οΈ Stabilizing environment and fixing Gradio compatibility...") |
| subprocess.check_call([ |
| sys.executable, "-m", "pip", "install", |
| "tokenizers==0.20.1", |
| "transformers==4.45.2", |
| "huggingface-hub==0.24.7", |
| "gradio==4.44.1" |
| ]) |
|
|
| import torch |
| import gradio as gr |
| from transformers import AutoModelForCausalLM, AutoTokenizer |
|
|
| MODEL_REPO = "E-motionAssistant/llama-3.2-3b-english-therapy-merged" |
| TOKENIZER_REPO = "unsloth/Llama-3.2-3B-Instruct" |
| SYSTEM_PROMPT = "You are an empathetic therapist. Provide supportive, caring responses." |
|
|
| model = None |
| tokenizer = None |
|
|
| def load_model(): |
| global model, tokenizer |
| if model is None: |
| print(f"π₯ Loading tokenizer from {TOKENIZER_REPO}...") |
| tokenizer = AutoTokenizer.from_pretrained(TOKENIZER_REPO) |
| |
| if tokenizer.pad_token is None: |
| tokenizer.pad_token = tokenizer.eos_token |
| |
| print(f"π₯ Loading model weights (Full Precision for CPU)...") |
| model = AutoModelForCausalLM.from_pretrained( |
| MODEL_REPO, |
| |
| torch_dtype=torch.float32, |
| device_map="cpu", |
| low_cpu_mem_usage=True |
| ) |
| print("β
Success: System is online on CPU!") |
|
|
| load_model() |
|
|
| def chat(message, history): |
| if not message.strip(): |
| return "" |
| |
| try: |
| |
| |
| prompt = f"<|begin_of_text|><|start_header_id|>system<|end_header_id|>\n\n{SYSTEM_PROMPT}<|eot_id|>" |
| for user_msg, bot_msg in history[-3:]: |
| prompt += f"<|start_header_id|>user<|end_header_id|>\n\n{user_msg}<|eot_id|><|start_header_id|>assistant<|end_header_id|>\n\n{bot_msg}<|eot_id|>" |
| prompt += f"<|start_header_id|>user<|end_header_id|>\n\n{message}<|eot_id|><|start_header_id|>assistant<|end_header_id|>\n\n" |
|
|
| |
| inputs = tokenizer(prompt, return_tensors="pt", truncation=True, max_length=2048).to(model.device) |
|
|
| with torch.no_grad(): |
| outputs = model.generate( |
| **inputs, |
| max_new_tokens=256, |
| temperature=0.6, |
| top_p=0.9, |
| do_sample=True, |
| pad_token_id=tokenizer.eos_token_id, |
| eos_token_id=tokenizer.eos_token_id |
| ) |
| |
| |
| input_len = inputs.input_ids.shape[1] |
| response = tokenizer.decode(outputs[0][input_len:], skip_special_tokens=True) |
| |
| return response.strip() |
|
|
| except Exception as e: |
| print(f"β Generation Error: {e}") |
| return f"I'm sorry, I encountered an error: {str(e)}. Please try again." |
|
|
| demo = gr.ChatInterface( |
| fn=chat, |
| title="π E.motion Therapy Assistant", |
| theme=gr.themes.Soft(), |
| chatbot=gr.Chatbot(height=450), |
| ) |
|
|
| if __name__ == "__main__": |
| demo.launch() |