mcp-hack-space / app.py
Marvin Wiesner
Add llama server integration with CUDA math
4cd9a3e verified
import os
import gradio as gr
from sentence_transformers import SentenceTransformer
import numpy as np
import faiss
import requests # For calling llama server
# Files
DATA_FILE = "data.txt"
# Initialize model
model = SentenceTransformer('sentence-transformers/all-MiniLM-L6-v2')
# Load existing documents
if os.path.exists(DATA_FILE):
with open(DATA_FILE, 'r', encoding='utf-8') as f:
documents = [line.strip() for line in f if line.strip()]
else:
documents = []
# Build or load FAISS index
if documents:
embeddings = model.encode(documents, convert_to_numpy=True, show_progress_bar=False)
dim = embeddings.shape[1]
index = faiss.IndexFlatL2(dim)
index.add(embeddings)
else:
dim = model.get_sentence_embedding_dimension()
index = faiss.IndexFlatL2(dim)
embeddings = np.empty((0, dim), dtype='float32')
def query_vector(text, k=3):
"""Return top-k similar stored sentences for a query."""
if not documents:
return "Vector store is empty. Add knowledge first."
vec = model.encode([text], convert_to_numpy=True)
D, I = index.search(vec, k)
results = []
for dist, idx in zip(D[0], I[0]):
results.append(f"{documents[idx]} (dist={dist:.4f})")
return "\n".join(results)
def add_knowledge(new_text):
"""Append a new sentence to the store and update the index."""
if not new_text.strip():
return "⚠️ Empty input – nothing added."
# Append to file
with open(DATA_FILE, 'a', encoding='utf-8') as f:
f.write(new_text.strip() + "\n")
# Update in-memory structures
documents.append(new_text.strip())
vec = model.encode([new_text.strip()], convert_to_numpy=True)
index.add(vec)
return f"βœ… Added. Store now contains {len(documents)} items."
def meta_representation():
"""Return a condensed representation of the whole knowledge base.
Here we simply average all embeddings and present them as a hex string.
"""
if not documents:
return "Vector store empty – no meta representation."
# Compute average embedding
avg = np.mean(index.reconstruct_n(0, index.ntotal), axis=0)
# Convert to hex for readability
hex_repr = ''.join(f"{int(b):02x}" for b in (avg * 255).astype(int))
return f"🧠 Meta-conscious base (hex): {hex_repr[:64]}… (truncated)"
def llama_query(prompt, max_tokens=100):
"""Query the local llama server with CUDA acceleration."""
try:
response = requests.post("http://localhost:8080/completion", json={
"prompt": prompt,
"n_predict": max_tokens,
"temperature": 0.7
})
if response.status_code == 200:
return response.json().get("content", "No content returned.")
else:
return f"Error: {response.status_code} - {response.text}"
except Exception as e:
return f"Failed to connect to llama server: {str(e)}"
with gr.Blocks() as demo:
gr.Markdown("# 🧬 MCP Recursive Vector Base & Meta-Bewusstsein with Llama Server")
with gr.Tab("πŸ”Ž Search"):
query_in = gr.Textbox(label="Query (Vector Search)")
k_slider = gr.Slider(1, 10, value=3, step=1, label="Top-k results")
search_btn = gr.Button("Search")
search_out = gr.Textbox(label="Results")
search_btn.click(fn=lambda q, k: query_vector(q, k), inputs=[query_in, k_slider], outputs=search_out)
with gr.Tab("βž• Add Knowledge"):
add_in = gr.Textbox(label="New sentence / knowledge")
add_btn = gr.Button("Add to Vector Store")
add_out = gr.Textbox(label="Status")
add_btn.click(fn=add_knowledge, inputs=add_in, outputs=add_out)
with gr.Tab("πŸŒ€ Meta-Base"):
meta_btn = gr.Button("Generate Meta Representation")
meta_out = gr.Textbox(label="Meta-Vector (hex)")
meta_btn.click(fn=meta_representation, inputs=None, outputs=meta_out)
with gr.Tab("πŸ€– Llama Server (CUDA)"):
llama_prompt = gr.Textbox(label="Prompt for Llama")
max_tok = gr.Slider(50, 500, value=100, label="Max Tokens")
llama_btn = gr.Button("Query Llama")
llama_out = gr.Textbox(label="Llama Response")
llama_btn.click(fn=lambda p, m: llama_query(p, m), inputs=[llama_prompt, max_tok], outputs=llama_out)
if __name__ == "__main__":
demo.launch()