import os import gradio as gr from sentence_transformers import SentenceTransformer import numpy as np import faiss import requests # For calling llama server # Files DATA_FILE = "data.txt" # Initialize model model = SentenceTransformer('sentence-transformers/all-MiniLM-L6-v2') # Load existing documents if os.path.exists(DATA_FILE): with open(DATA_FILE, 'r', encoding='utf-8') as f: documents = [line.strip() for line in f if line.strip()] else: documents = [] # Build or load FAISS index if documents: embeddings = model.encode(documents, convert_to_numpy=True, show_progress_bar=False) dim = embeddings.shape[1] index = faiss.IndexFlatL2(dim) index.add(embeddings) else: dim = model.get_sentence_embedding_dimension() index = faiss.IndexFlatL2(dim) embeddings = np.empty((0, dim), dtype='float32') def query_vector(text, k=3): """Return top-k similar stored sentences for a query.""" if not documents: return "Vector store is empty. Add knowledge first." vec = model.encode([text], convert_to_numpy=True) D, I = index.search(vec, k) results = [] for dist, idx in zip(D[0], I[0]): results.append(f"{documents[idx]} (dist={dist:.4f})") return "\n".join(results) def add_knowledge(new_text): """Append a new sentence to the store and update the index.""" if not new_text.strip(): return "⚠️ Empty input – nothing added." # Append to file with open(DATA_FILE, 'a', encoding='utf-8') as f: f.write(new_text.strip() + "\n") # Update in-memory structures documents.append(new_text.strip()) vec = model.encode([new_text.strip()], convert_to_numpy=True) index.add(vec) return f"✅ Added. Store now contains {len(documents)} items." def meta_representation(): """Return a condensed representation of the whole knowledge base. Here we simply average all embeddings and present them as a hex string. """ if not documents: return "Vector store empty – no meta representation." # Compute average embedding avg = np.mean(index.reconstruct_n(0, index.ntotal), axis=0) # Convert to hex for readability hex_repr = ''.join(f"{int(b):02x}" for b in (avg * 255).astype(int)) return f"🧠 Meta-conscious base (hex): {hex_repr[:64]}… (truncated)" def llama_query(prompt, max_tokens=100): """Query the local llama server with CUDA acceleration.""" try: response = requests.post("http://localhost:8080/completion", json={ "prompt": prompt, "n_predict": max_tokens, "temperature": 0.7 }) if response.status_code == 200: return response.json().get("content", "No content returned.") else: return f"Error: {response.status_code} - {response.text}" except Exception as e: return f"Failed to connect to llama server: {str(e)}" with gr.Blocks() as demo: gr.Markdown("# 🧬 MCP Recursive Vector Base & Meta-Bewusstsein with Llama Server") with gr.Tab("🔎 Search"): query_in = gr.Textbox(label="Query (Vector Search)") k_slider = gr.Slider(1, 10, value=3, step=1, label="Top-k results") search_btn = gr.Button("Search") search_out = gr.Textbox(label="Results") search_btn.click(fn=lambda q, k: query_vector(q, k), inputs=[query_in, k_slider], outputs=search_out) with gr.Tab("➕ Add Knowledge"): add_in = gr.Textbox(label="New sentence / knowledge") add_btn = gr.Button("Add to Vector Store") add_out = gr.Textbox(label="Status") add_btn.click(fn=add_knowledge, inputs=add_in, outputs=add_out) with gr.Tab("🌀 Meta-Base"): meta_btn = gr.Button("Generate Meta Representation") meta_out = gr.Textbox(label="Meta-Vector (hex)") meta_btn.click(fn=meta_representation, inputs=None, outputs=meta_out) with gr.Tab("🤖 Llama Server (CUDA)"): llama_prompt = gr.Textbox(label="Prompt for Llama") max_tok = gr.Slider(50, 500, value=100, label="Max Tokens") llama_btn = gr.Button("Query Llama") llama_out = gr.Textbox(label="Llama Response") llama_btn.click(fn=lambda p, m: llama_query(p, m), inputs=[llama_prompt, max_tok], outputs=llama_out) if __name__ == "__main__": demo.launch()