Spaces:
Running
Running
| import torch | |
| import gradio as gr | |
| from transformers import AutoModelForCausalLM, AutoTokenizer | |
| MODEL_REPO = "E-motionAssistant/qwen-2.5-3b-tamil-therapy-merged" | |
| TOKENIZER_REPO = "Qwen/Qwen2.5-3B-Instruct" | |
| SYSTEM_PROMPT = "You are an empathetic Tamil therapist providing CBT-based support." | |
| model = None | |
| tokenizer = None | |
| def load_model(): | |
| global model, tokenizer | |
| if model is None: | |
| print(f"📥 Loading tokenizer from {TOKENIZER_REPO}...") | |
| tokenizer = AutoTokenizer.from_pretrained(TOKENIZER_REPO, trust_remote_code=True) | |
| if tokenizer.pad_token is None: | |
| tokenizer.pad_token = tokenizer.eos_token | |
| print(f"📥 Loading model weights (Full Precision for CPU)...") | |
| model = AutoModelForCausalLM.from_pretrained( | |
| MODEL_REPO, | |
| torch_dtype=torch.float32, | |
| device_map="cpu", | |
| trust_remote_code=True, | |
| low_cpu_mem_usage=True | |
| ) | |
| print("✅ Success: Tamil Therapy System is online on CPU!") | |
| load_model() | |
| def chat(message, history): | |
| if not message.strip(): | |
| return "" | |
| try: | |
| # Build prompt using Qwen chat format | |
| prompt = f"<|im_start|>system\n{SYSTEM_PROMPT}<|im_end|>\n" | |
| # Include last 3 exchanges for context | |
| for user_msg, bot_msg in history[-3:]: | |
| prompt += f"<|im_start|>user\n{user_msg}<|im_end|>\n<|im_start|>assistant\n{bot_msg}<|im_end|>\n" | |
| prompt += f"<|im_start|>user\n{message}<|im_end|>\n<|im_start|>assistant\n" | |
| # Tokenize and move to model device | |
| inputs = tokenizer(prompt, return_tensors="pt", truncation=True, max_length=2048).to(model.device) | |
| with torch.no_grad(): | |
| outputs = model.generate( | |
| **inputs, | |
| max_new_tokens=256, | |
| temperature=0.7, | |
| top_p=0.9, | |
| do_sample=True, | |
| pad_token_id=tokenizer.pad_token_id, | |
| eos_token_id=tokenizer.eos_token_id | |
| ) | |
| # Decode only the new tokens | |
| input_len = inputs.input_ids.shape[1] | |
| response = tokenizer.decode(outputs[0][input_len:], skip_special_tokens=True) | |
| return response.strip() | |
| except Exception as e: | |
| print(f"❌ Generation Error: {e}") | |
| return f"மன்னிக்கவும், பிழை ஏற்பட்டது: {str(e)}. மீண்டும் முயற்சிக்கவும்." | |
| demo = gr.ChatInterface( | |
| fn=chat, | |
| title="💚 E.motion Tamil Therapy Assistant", | |
| description="*உங்கள் இரக்கமுள்ள AI துணை - Your compassionate AI companion for mental wellbeing in Tamil*\n\n**Note:** This is an AI assistant, not a replacement for professional therapy.", | |
| theme=gr.themes.Soft(), | |
| chatbot=gr.Chatbot(height=450), | |
| ) | |
| if __name__ == "__main__": | |
| demo.launch( | |
| server_name="0.0.0.0", | |
| server_port=7860, | |
| share=False, | |
| show_api=False | |
| ) |