import gradio as gr
from huggingface_hub import InferenceClient
import os
import re

# Load token
HF_TOKEN = os.getenv("HF_TOKEN") or os.getenv("HUGGING_FACE_HUB_TOKEN")

print("="*50)
if not HF_TOKEN:
    print("❌ HF_TOKEN NOT FOUND")
else:
    print(f"✅ HF_TOKEN Found: {HF_TOKEN[:15]}...{HF_TOKEN[-8:]}")
print("="*50)

OFF_TOPIC_REGEX = r"(http|www|buy now|discount|subscribe|follow me|click here)"

def is_safe_to_process(text: str) -> bool:
    if len(text.strip()) < 2:
        return False
    if re.search(OFF_TOPIC_REGEX, text.lower()):
        return False
    return True

def respond(
    message,
    history: list[dict[str, str]],
    system_message,
    max_tokens,
    temperature,
    top_p,
    request: gr.Request,
):
    user_name = "User"
    if request:
        user_name = request.query_params.get("username", "User")
    
    if not is_safe_to_process(message):
        yield f"Hello {user_name}, I'm here to support your emotional well-being. How can I help you today?"
        return
    
    if not HF_TOKEN:
        yield "❌ Configuration Error: HF_TOKEN not found in Space secrets. Please add it in Settings."
        return
    
    personalized_system = (
        f"{system_message} "
        f"The user's name is {user_name}. Address them naturally by name. "
        f"Be conversational, not formal."
    )
    
    messages = [{"role": "system", "content": personalized_system}]
    messages.extend(history[-10:])
    messages.append({"role": "user", "content": message})
    
    # Create client once
    client = InferenceClient(token=HF_TOKEN)
    
    # Try chat_completion with these models
    chat_models = [
        "meta-llama/Meta-Llama-3-8B-Instruct",
        "mistralai/Mistral-7B-Instruct-v0.2",
        "HuggingFaceH4/zephyr-7b-beta",
    ]
    
    response = ""
    
    # Try chat_completion first
    for model_name in chat_models:
        try:
            print(f"\n{'='*50}")
            print(f"🔄 Trying chat_completion: {model_name}")
            print(f"{'='*50}")
            
            for msg in client.chat_completion(
                messages,
                model=model_name,
                max_tokens=max_tokens,
                stream=True,
                temperature=temperature,
                top_p=top_p,
            ):
                token = msg.choices[0].delta.content or ""
                response += token
                yield response
            
            print(f"✅ SUCCESS with {model_name}")
            return
            
        except Exception as e:
            error_str = str(e)
            error_repr = repr(e)
            print(f"❌ Failed with {model_name}")
            print(f"Error str: {error_str}")
            print(f"Error repr: {error_repr}")
            print(f"Error type: {type(e).__name__}")
            
            # Check for auth errors
            if any(x in error_str for x in ["401", "403", "Unauthorized", "Forbidden"]):
                print("🚨 AUTHENTICATION ERROR")
                yield (
                    f"🔐 Authentication Problem!\n\n"
                    f"Your token isn't working. Please:\n"
                    f"1. Create a NEW token at https://huggingface.co/settings/tokens\n"
                    f"2. Add it in Space Settings → Secrets as 'HF_TOKEN'\n"
                    f"3. Restart this Space\n\n"
                    f"Error: {error_str}"
                )
                return
            
            continue
    
    # If chat_completion failed, try text_generation as fallback
    print("\n" + "="*50)
    print("⚠️ All chat models failed, trying text_generation fallback")
    print("="*50)
    
    text_models = [
        "microsoft/phi-2",
        "google/flan-t5-large",
        "bigscience/bloom-560m",
    ]
    
    # Build a simple prompt from the conversation
    prompt = f"{personalized_system}\n\nUser: {message}\nAssistant:"
    
    for model_name in text_models:
        try:
            print(f"🔄 Trying text_generation: {model_name}")
            
            result = client.text_generation(
                prompt,
                model=model_name,
                max_new_tokens=max_tokens,
                temperature=temperature,
                top_p=top_p,
                stream=True,
            )
            
            response = ""
            for token in result:
                response += token
                yield response
            
            print(f"✅ SUCCESS with text_generation: {model_name}")
            return
            
        except Exception as e:
            print(f"❌ Failed text_generation with {model_name}: {repr(e)}")
            continue
    
    # Everything failed
    yield (
        f"I'm here for you, {user_name}, but I'm experiencing connection issues with the AI service. "
        f"This could be due to:\n\n"
        f"• High demand on Hugging Face servers\n"
        f"• Models are loading (cold start)\n"
        f"• Network connectivity issues\n\n"
        f"Please try again in 30-60 seconds. If this persists, check the Space logs for details."
    )

# Gradio Interface
demo = gr.ChatInterface(
    respond,
    type="messages",
    title="💙 Mariam - Mental Health Support",
    description="A compassionate AI assistant for emotional well-being",
    additional_inputs=[
        gr.Textbox(
            value=(
                "You are Mariam, a compassionate mental health assistant. "
                "Respond directly and naturally to the user. "
                "Be warm, empathetic, and professional. "
                "Never use formal letter formats."
            ),
            label="System Message",
            lines=4,
        ),
        gr.Slider(128, 1024, value=512, label="Max Tokens", step=32),
        gr.Slider(0.1, 1.5, value=0.7, step=0.1, label="Temperature"),
        gr.Slider(0.1, 1.0, value=0.9, step=0.05, label="Top-p"),
    ],
    theme=gr.themes.Soft(primary_hue="blue"),
    cache_examples=False,
)

if __name__ == "__main__":
    demo.launch()