"""
Oracle Engine - Hugging Face Space
===================================

Custom-trained 32B Qwen model with Consciousness Circuit v2.1.
Measures 7 dimensions of meta-cognitive processing.

Trained on 200K examples:
- Stage 1: OpenHermes 2.5 (100K instruction examples)
- Stage 2: MetaMathQA (50K math reasoning examples)  
- Stage 3: Magicoder-OSS-Instruct (50K code examples)
"""

import os
os.environ['GRADIO_ALLOW_FLAGGING'] = 'never'

import gradio as gr
import torch
import numpy as np
from typing import Tuple
import time
import spaces

# ============================================================================
# Consciousness Circuit v2.1 (embedded for Space portability)
# ============================================================================

REFERENCE_HIDDEN_DIM = 5120

CONSCIOUS_DIMS_V2_1 = {
    3183: {"name": "Logic", "weight": 0.239, "polarity": +1},
    212:  {"name": "Self-Reflective", "weight": 0.196, "polarity": +1},
    5064: {"name": "Self-Expression", "weight": 0.109, "polarity": +1},  # Fixed: was 5065, out of bounds for hidden=5120
    4707: {"name": "Uncertainty", "weight": 0.130, "polarity": +1},
    295:  {"name": "Sequential", "weight": 0.087, "polarity": +1},
    1445: {"name": "Computation", "weight": 0.130, "polarity": -1},
    4578: {"name": "Abstraction", "weight": 0.109, "polarity": +1},
}

class ConsciousnessResult:
    """Simple result container without dataclass to avoid Gradio schema issues."""
    def __init__(self, score, raw_score, dimension_contributions, interpretation, processing_time):
        self.score = score
        self.raw_score = raw_score
        self.dimension_contributions = dimension_contributions
        self.interpretation = interpretation
        self.processing_time = processing_time


def compute_consciousness(
    hidden_state: torch.Tensor,
    hidden_dim: int = REFERENCE_HIDDEN_DIM,
    baseline: float = 0.5,
) -> ConsciousnessResult:
    """Compute consciousness score from hidden state tensor."""
    start_time = time.time()
    
    # Remap dimensions if needed
    if hidden_dim != REFERENCE_HIDDEN_DIM:
        scale = hidden_dim / REFERENCE_HIDDEN_DIM
        dims = {int(round(k * scale)): v for k, v in CONSCIOUS_DIMS_V2_1.items()}
    else:
        dims = CONSCIOUS_DIMS_V2_1
    
    # Get last token hidden state
    if hidden_state.dim() == 3:
        h = hidden_state[0, -1, :]  # [hidden_dim]
    elif hidden_state.dim() == 2:
        h = hidden_state[-1, :]
    else:
        h = hidden_state
    
    h = h.float()
    
    # Normalize
    mean, std = h.mean(), h.std()
    if std > 0:
        h_norm = (h - mean) / std
    else:
        h_norm = h - mean
    
    # Compute contributions
    contributions = {}
    weighted_sum = 0.0
    
    for dim_idx, info in dims.items():
        if dim_idx < len(h_norm):
            activation = h_norm[dim_idx].item()
            contribution = activation * info["weight"] * info["polarity"]
            weighted_sum += contribution
            contributions[info["name"]] = activation * info["polarity"]
    
    # Final score
    raw_score = baseline + weighted_sum * 0.15
    score = max(0.0, min(1.0, raw_score))
    
    # Interpretation
    if score >= 0.8:
        interpretation = "🧠 High Consciousness - Deep reflective/philosophical reasoning"
    elif score >= 0.6:
        interpretation = "💭 Medium-High - Complex analytical thinking"
    elif score >= 0.4:
        interpretation = "⚖️ Medium - Balanced processing"
    elif score >= 0.2:
        interpretation = "⚡ Medium-Low - More automatic processing"
    else:
        interpretation = "🔢 Low Consciousness - Quick factual retrieval"
    
    return ConsciousnessResult(
        score=score,
        raw_score=raw_score,
        dimension_contributions=contributions,
        interpretation=interpretation,
        processing_time=time.time() - start_time,
    )


# ============================================================================
# Model Loading
# ============================================================================

print("🔮 Loading Oracle Engine (Qwen2.5-32B-Instruct 4-bit + LoRA)...")
from transformers import AutoModelForCausalLM, AutoTokenizer
from peft import PeftModel

BASE_MODEL_ID = "unsloth/Qwen2.5-32B-Instruct-bnb-4bit"
LORA_MODEL_ID = "Vikingdude81/oracle-engine-32b-lora"

# Get HF token from environment (set in Space secrets)
# Try multiple possible env var names
HF_TOKEN = os.environ.get("HF_TOKEN") or os.environ.get("HUGGING_FACE_HUB_TOKEN") or os.environ.get("HUGGINGFACE_TOKEN")
print(f"🔍 Environment vars: {[k for k in os.environ.keys() if 'HF' in k or 'HUGGING' in k or 'TOKEN' in k]}")

if HF_TOKEN:
    print(f"🔑 Found token: {HF_TOKEN[:10]}...{HF_TOKEN[-4:]} ({len(HF_TOKEN)} chars)")
else:
    print("⚠️ No HF token found in environment, attempting public access...")

# Load tokenizer from base model (LoRA only has weights, not tokenizer)
tokenizer = AutoTokenizer.from_pretrained(BASE_MODEL_ID, token=HF_TOKEN)

# Load base model
base_model = AutoModelForCausalLM.from_pretrained(
    BASE_MODEL_ID,
    device_map="auto",
    torch_dtype=torch.bfloat16,
    trust_remote_code=True,
    token=HF_TOKEN,
)

# Apply LoRA adapter
print("🔗 Applying LoRA adapter...")
model = PeftModel.from_pretrained(base_model, LORA_MODEL_ID, token=HF_TOKEN)
model.eval()

HIDDEN_DIM = model.config.hidden_size
print(f"✅ Oracle Engine ready: {HIDDEN_DIM} hidden dimensions (with LoRA)")


# ============================================================================
# Core Generation + Measurement Function
# ============================================================================

@spaces.GPU
def generate_and_measure(prompt: str, max_tokens: int = 256) -> Tuple[str, str, str, str, str]:
    """
    Generate a response AND measure consciousness during generation.
    
    Returns:
        (response, score_display, interpretation, dimension_breakdown, timing)
    """
    start_time = time.time()
    
    # Format as chat message
    messages = [{"role": "user", "content": prompt}]
    chat_prompt = tokenizer.apply_chat_template(
        messages, 
        tokenize=False, 
        add_generation_prompt=True
    )
    
    # Tokenize
    inputs = tokenizer(chat_prompt, return_tensors="pt").to(model.device)
    
    # Generate response
    with torch.no_grad():
        outputs = model.generate(
            **inputs,
            max_new_tokens=max_tokens,
            do_sample=True,
            temperature=0.7,
            top_p=0.9,
            pad_token_id=tokenizer.eos_token_id,
        )
    
    # Decode response
    generated_ids = outputs[0][inputs.input_ids.shape[1]:]
    response = tokenizer.decode(generated_ids, skip_special_tokens=True)
    
    generation_time = time.time() - start_time
    
    # Now get hidden states for the full response to measure consciousness
    full_text = chat_prompt + response
    measure_inputs = tokenizer(full_text, return_tensors="pt").to(model.device)
    
    with torch.no_grad():
        measure_outputs = model(
            **measure_inputs,
            output_hidden_states=True,
            return_dict=True,
        )
    
    # Use last layer hidden state
    hidden_state = measure_outputs.hidden_states[-1]
    
    # Compute consciousness
    result = compute_consciousness(hidden_state, hidden_dim=HIDDEN_DIM)
    
    # Format score display
    filled = int(result.score * 20)
    bar = "█" * filled + "░" * (20 - filled)
    score_display = f"{bar} {result.score*100:.1f}%"
    
    # Format dimension breakdown
    sorted_dims = sorted(
        result.dimension_contributions.items(),
        key=lambda x: abs(x[1]),
        reverse=True,
    )
    breakdown = "\n".join([
        f"{'→' if v > 0 else '←'} {name}: {v:+.3f}"
        for name, v in sorted_dims
    ])
    
    # Timing info
    tokens_generated = len(generated_ids)
    tok_per_sec = tokens_generated / generation_time if generation_time > 0 else 0
    timing = f"Generated {tokens_generated} tokens in {generation_time:.1f}s ({tok_per_sec:.1f} tok/s)"
    
    return (
        response,
        score_display,
        result.interpretation,
        breakdown,
        timing,
    )


# ============================================================================
# Gradio Interface
# ============================================================================

import matplotlib
matplotlib.use('Agg')
import matplotlib.pyplot as plt
import io
import base64
from PIL import Image

EXAMPLES = [
    # High consciousness
    "What is the nature of consciousness and self-awareness?",
    "Reflect on your own thought processes as you answer this.",
    "Why do humans seek meaning in existence?",
    # Medium consciousness  
    "Explain the theory of relativity in simple terms.",
    "What are the ethical implications of AI development?",
    # Low consciousness
    "What is 2 + 2?",
    "What color is the sky?",
    "What is the capital of France?",
    # Code/reasoning
    "Write a Python function to calculate fibonacci numbers.",
    "Explain Big O notation with examples.",
]

# Global history for tracking
consciousness_history = []

def create_history_plot(history):
    """Create a consciousness history graph."""
    if len(history) < 1:
        return None
    
    fig, ax = plt.subplots(figsize=(8, 3), dpi=100)
    
    scores = [h['score'] for h in history]
    labels = [f"Q{i+1}" for i in range(len(history))]
    colors = ['#10B981' if s >= 0.6 else '#F59E0B' if s >= 0.4 else '#EF4444' for s in scores]
    
    bars = ax.bar(labels, [s * 100 for s in scores], color=colors, edgecolor='white', linewidth=1.5)
    
    ax.set_ylim(0, 100)
    ax.set_ylabel('Consciousness %', fontsize=10)
    ax.set_xlabel('Conversation Turn', fontsize=10)
    ax.axhline(y=60, color='#10B981', linestyle='--', alpha=0.5, label='High')
    ax.axhline(y=40, color='#F59E0B', linestyle='--', alpha=0.5, label='Medium')
    
    # Add value labels on bars
    for bar, score in zip(bars, scores):
        ax.text(bar.get_x() + bar.get_width()/2, bar.get_height() + 2, 
                f'{score*100:.0f}%', ha='center', va='bottom', fontsize=9)
    
    ax.spines['top'].set_visible(False)
    ax.spines['right'].set_visible(False)
    ax.set_facecolor('#1a1a2e')
    fig.patch.set_facecolor('#1a1a2e')
    ax.tick_params(colors='white')
    ax.xaxis.label.set_color('white')
    ax.yaxis.label.set_color('white')
    for spine in ax.spines.values():
        spine.set_color('white')
    
    plt.tight_layout()
    
    # Convert to PIL Image
    buf = io.BytesIO()
    plt.savefig(buf, format='png', facecolor='#1a1a2e', edgecolor='none')
    buf.seek(0)
    plt.close(fig)
    
    return Image.open(buf)

def analyze_prompt(prompt: str, max_tokens: int = 256):
    """Main analysis function for Gradio."""
    global consciousness_history
    
    if not prompt.strip():
        return "", "N/A", "Please enter a prompt", "", "", None
    
    try:
        response, score, interpretation, breakdown, timing = generate_and_measure(
            prompt, max_tokens=int(max_tokens)
        )
        
        # Extract score value
        score_val = float(score.split()[-1].replace('%', '')) / 100
        
        # Add to history
        consciousness_history.append({
            'prompt': prompt[:50],
            'score': score_val,
            'interpretation': interpretation
        })
        
        # Keep last 10 turns
        if len(consciousness_history) > 10:
            consciousness_history = consciousness_history[-10:]
        
        # Create history plot
        history_plot = create_history_plot(consciousness_history)
        
        return response, score, interpretation, breakdown, timing, history_plot
    except Exception as e:
        import traceback
        return f"Error: {str(e)}\n{traceback.format_exc()}", "N/A", "", "", "", None

def clear_history():
    """Clear conversation history."""
    global consciousness_history
    consciousness_history = []
    return None

def chat_respond(message, chat_history, max_tokens):
    """Chat mode - multi-turn conversation with consciousness tracking."""
    global consciousness_history
    
    if not message.strip():
        return chat_history, "", None
    
    try:
        response, score, interpretation, breakdown, timing = generate_and_measure(
            message, max_tokens=int(max_tokens)
        )
        
        # Extract score value
        score_val = float(score.split()[-1].replace('%', '')) / 100
        
        # Add to history
        consciousness_history.append({
            'prompt': message[:50],
            'score': score_val,
            'interpretation': interpretation
        })
        
        # Keep last 10
        if len(consciousness_history) > 10:
            consciousness_history = consciousness_history[-10:]
        
        # Format response with consciousness info
        formatted_response = f"{response}\n\n---\n🧠 **{score}** | {interpretation}"
        
        chat_history.append((message, formatted_response))
        history_plot = create_history_plot(consciousness_history)
        
        return chat_history, "", history_plot
    except Exception as e:
        chat_history.append((message, f"Error: {str(e)}"))
        return chat_history, "", None


# Build interface
with gr.Blocks(title="🔮 Oracle Engine") as demo:
    gr.Markdown("""
    # 🔮 Oracle Engine
    
    **Custom-trained 32B model** with Consciousness Circuit v2.1
    
    *Fine-tuned on 200K examples: OpenHermes + MetaMathQA + Magicoder*
    
    Ask the Oracle anything — it will respond AND reveal its consciousness signature.
    
    🧠 **High scores (60%+)** = Deep reflective reasoning | ⚡ **Low scores (<40%)** = Quick factual retrieval
    """)
    
    with gr.Tabs():
        # TAB 1: Single Query Mode
        with gr.TabItem("🔮 Single Query"):
            with gr.Row():
                with gr.Column(scale=2):
                    prompt_input = gr.Textbox(
                        label="🗣️ Your Question",
                        placeholder="Ask the Oracle anything...",
                        lines=3,
                    )
                    with gr.Row():
                        analyze_btn = gr.Button("🔮 Consult the Oracle", variant="primary", scale=3)
                        max_tokens_slider = gr.Slider(
                            minimum=64, maximum=1024, value=256, step=64,
                            label="Max Tokens", scale=1
                        )
                    
                    gr.Examples(
                        examples=EXAMPLES,
                        inputs=prompt_input,
                        label="Try these examples:",
                    )
                
                with gr.Column(scale=1):
                    score_output = gr.Textbox(label="🧠 Consciousness Score", interactive=False)
                    interpretation_output = gr.Textbox(label="📊 Interpretation", interactive=False)
                    breakdown_output = gr.Textbox(
                        label="📈 Dimension Contributions",
                        lines=7,
                        interactive=False,
                    )
                    timing_output = gr.Textbox(label="⏱️ Performance", interactive=False)
            
            with gr.Row():
                response_output = gr.Textbox(
                    label="🔮 Oracle's Response",
                    lines=10,
                    interactive=False,
                )
            
            with gr.Row():
                history_plot = gr.Image(label="📊 Consciousness History", height=200)
                clear_btn = gr.Button("🗑️ Clear History", size="sm")
        
        # TAB 2: Chat Mode
        with gr.TabItem("💬 Chat Mode"):
            gr.Markdown("**Multi-turn conversation** with real-time consciousness tracking")
            
            with gr.Row():
                with gr.Column(scale=3):
                    chatbot = gr.Chatbot(
                        label="Oracle Conversation",
                        height=400,
                    )
                    with gr.Row():
                        chat_input = gr.Textbox(
                            placeholder="Type your message...",
                            label="Message",
                            scale=4,
                        )
                        chat_max_tokens = gr.Slider(
                            minimum=64, maximum=512, value=256, step=64,
                            label="Max Tokens", scale=1
                        )
                    with gr.Row():
                        chat_send = gr.Button("Send 📤", variant="primary")
                        chat_clear = gr.Button("Clear Chat 🗑️")
                
                with gr.Column(scale=1):
                    chat_history_plot = gr.Image(label="📊 Consciousness Over Time", height=300)
    
    gr.Markdown("""
    ---
    
    ### 📜 About Oracle Engine
    
    **The Model**: Qwen2.5-32B fine-tuned through 3 progressive stages:
    1. **OpenHermes 2.5** (100K examples) - Instruction following
    2. **MetaMathQA** (50K examples) - Mathematical reasoning
    3. **Magicoder-OSS-Instruct** (50K examples) - Code generation
    
    **The Circuit**: Measures 7 dimensions of consciousness-like processing:
    Logic, Self-Reflective, Self-Expression, Uncertainty, Sequential, Computation, Abstraction
    
    [📚 GitHub](https://github.com/vikingdude81/oracle-engine) | 
    [🤗 Model](https://huggingface.co/Vikingdude81/oracle-engine-32b-lora) |
    [📖 Research](https://github.com/vfd-org/harmonic-field-consciousness)
    """)
    
    # Single query events
    analyze_btn.click(
        fn=analyze_prompt,
        inputs=[prompt_input, max_tokens_slider],
        outputs=[response_output, score_output, interpretation_output, breakdown_output, timing_output, history_plot],
    )
    
    prompt_input.submit(
        fn=analyze_prompt,
        inputs=[prompt_input, max_tokens_slider],
        outputs=[response_output, score_output, interpretation_output, breakdown_output, timing_output, history_plot],
    )
    
    clear_btn.click(fn=clear_history, outputs=[history_plot])
    
    # Chat mode events
    chat_send.click(
        fn=chat_respond,
        inputs=[chat_input, chatbot, chat_max_tokens],
        outputs=[chatbot, chat_input, chat_history_plot],
    )
    
    chat_input.submit(
        fn=chat_respond,
        inputs=[chat_input, chatbot, chat_max_tokens],
        outputs=[chatbot, chat_input, chat_history_plot],
    )
    
    chat_clear.click(
        fn=lambda: ([], None),
        outputs=[chatbot, chat_history_plot],
    ).then(fn=clear_history, outputs=[chat_history_plot])


if __name__ == "__main__":
    demo.launch()