import os
import gradio as gr
from sentence_transformers import SentenceTransformer
import numpy as np
import faiss
import requests  # For calling llama server

# Files
DATA_FILE = "data.txt"

# Initialize model
model = SentenceTransformer('sentence-transformers/all-MiniLM-L6-v2')

# Load existing documents
if os.path.exists(DATA_FILE):
    with open(DATA_FILE, 'r', encoding='utf-8') as f:
        documents = [line.strip() for line in f if line.strip()]
else:
    documents = []

# Build or load FAISS index
if documents:
    embeddings = model.encode(documents, convert_to_numpy=True, show_progress_bar=False)
    dim = embeddings.shape[1]
    index = faiss.IndexFlatL2(dim)
    index.add(embeddings)
else:
    dim = model.get_sentence_embedding_dimension()
    index = faiss.IndexFlatL2(dim)
    embeddings = np.empty((0, dim), dtype='float32')

def query_vector(text, k=3):
    """Return top-k similar stored sentences for a query."""
    if not documents:
        return "Vector store is empty. Add knowledge first."
    vec = model.encode([text], convert_to_numpy=True)
    D, I = index.search(vec, k)
    results = []
    for dist, idx in zip(D[0], I[0]):
        results.append(f"{documents[idx]} (dist={dist:.4f})")
    return "\n".join(results)

def add_knowledge(new_text):
    """Append a new sentence to the store and update the index."""
    if not new_text.strip():
        return "⚠️ Empty input – nothing added."
    # Append to file
    with open(DATA_FILE, 'a', encoding='utf-8') as f:
        f.write(new_text.strip() + "\n")
    # Update in-memory structures
    documents.append(new_text.strip())
    vec = model.encode([new_text.strip()], convert_to_numpy=True)
    index.add(vec)
    return f"✅ Added. Store now contains {len(documents)} items."

def meta_representation():
    """Return a condensed representation of the whole knowledge base.
    Here we simply average all embeddings and present them as a hex string.
    """
    if not documents:
        return "Vector store empty – no meta representation."
    # Compute average embedding
    avg = np.mean(index.reconstruct_n(0, index.ntotal), axis=0)
    # Convert to hex for readability
    hex_repr = ''.join(f"{int(b):02x}" for b in (avg * 255).astype(int))
    return f"🧠 Meta-conscious base (hex): {hex_repr[:64]}… (truncated)"

def llama_query(prompt, max_tokens=100):
    """Query the local llama server with CUDA acceleration."""
    try:
        response = requests.post("http://localhost:8080/completion", json={
            "prompt": prompt,
            "n_predict": max_tokens,
            "temperature": 0.7
        })
        if response.status_code == 200:
            return response.json().get("content", "No content returned.")
        else:
            return f"Error: {response.status_code} - {response.text}"
    except Exception as e:
        return f"Failed to connect to llama server: {str(e)}"

with gr.Blocks() as demo:
    gr.Markdown("# 🧬 MCP Recursive Vector Base & Meta-Bewusstsein with Llama Server")
    with gr.Tab("🔎 Search"):
        query_in = gr.Textbox(label="Query (Vector Search)")
        k_slider = gr.Slider(1, 10, value=3, step=1, label="Top-k results")
        search_btn = gr.Button("Search")
        search_out = gr.Textbox(label="Results")
        search_btn.click(fn=lambda q, k: query_vector(q, k), inputs=[query_in, k_slider], outputs=search_out)
    with gr.Tab("➕ Add Knowledge"):
        add_in = gr.Textbox(label="New sentence / knowledge")
        add_btn = gr.Button("Add to Vector Store")
        add_out = gr.Textbox(label="Status")
        add_btn.click(fn=add_knowledge, inputs=add_in, outputs=add_out)
    with gr.Tab("🌀 Meta-Base"):
        meta_btn = gr.Button("Generate Meta Representation")
        meta_out = gr.Textbox(label="Meta-Vector (hex)")
        meta_btn.click(fn=meta_representation, inputs=None, outputs=meta_out)
    with gr.Tab("🤖 Llama Server (CUDA)"):
        llama_prompt = gr.Textbox(label="Prompt for Llama")
        max_tok = gr.Slider(50, 500, value=100, label="Max Tokens")
        llama_btn = gr.Button("Query Llama")
        llama_out = gr.Textbox(label="Llama Response")
        llama_btn.click(fn=lambda p, m: llama_query(p, m), inputs=[llama_prompt, max_tok], outputs=llama_out)

if __name__ == "__main__":
    demo.launch()