import gradio as gr
import tensorflow as tf
import numpy as np
from model import TeraV3
import PIL.Image

# --- 1. LOAD SOVEREIGN CORE ---
# Initialize model with training-consistent dimensions
model = TeraV3(vocab_size=100, dim=512, depth=12)
# Build model with dummy input to initialize weights
_ = model(tf.zeros((1, 32), dtype=tf.int32), vision_inputs=tf.zeros((1, 224, 224, 3), dtype=tf.float32), training=False)

try:
    model.load_weights('stable.weights.h5')
    print("✅ Sovereign Weights Loaded.")
except:
    print("⚠️ Loading default weights (untrained).")

def predict(text, image):
    # Preprocess text (dummy tokenizer for current architecture state)
    # In a full deployment, this would use a saved SentencePiece/ByteLevel BPE model
    text_ids = tf.cast(tf.random.uniform([1, 32], maxval=100), tf.int32)
    
    vis_in = None
    if image is not None:
        image = image.resize((224, 224))
        vis_in = np.array(image).astype(np.float32) / 255.0
        vis_in = np.expand_dims(vis_in, axis=0)
    
    logits = model(text_ids, vision_inputs=vis_in, training=False)
    
    # Convert logits to a human-readable placeholder for this stage
    # Real-world deployment would involve top-k sampling
    return "[Tera.V3 Sovereign Response]: The neural pathway is active. The interface is processing your multimodal request."

# --- 2. GRADIO INTERFACE ---
with gr.Blocks(theme=gr.themes.Soft()) as demo:
    gr.Markdown("# 🪐 Tera.V3 Sovereign Interface")
    gr.Markdown("Interact with the Dense-Elite multimodal architecture.")
    
    with gr.Row():
        with gr.Column():
            txt = gr.Textbox(label="Sovereign Query", placeholder="Type your message...")
            img = gr.Image(type='pil', label="Visual Context")
            btn = gr.Button("Execute", variant="primary")
        with gr.Column():
            out = gr.Textbox(label="Tera.V3 Output")
            
    btn.click(predict, inputs=[txt, img], outputs=out)

if __name__ == '__main__':
    demo.launch()