Spaces:

BlacktechY
/

mcp-hack-space

Paused

Marvin Wiesner

Add llama server integration with CUDA math

4cd9a3e verified about 2 months ago

4.33 kB

	import os
	import gradio as gr
	from sentence_transformers import SentenceTransformer
	import numpy as np
	import faiss
	import requests # For calling llama server

	# Files
	DATA_FILE = "data.txt"

	# Initialize model
	model = SentenceTransformer('sentence-transformers/all-MiniLM-L6-v2')

	# Load existing documents
	if os.path.exists(DATA_FILE):
	with open(DATA_FILE, 'r', encoding='utf-8') as f:
	documents = [line.strip() for line in f if line.strip()]
	else:
	documents = []

	# Build or load FAISS index
	if documents:
	embeddings = model.encode(documents, convert_to_numpy=True, show_progress_bar=False)
	dim = embeddings.shape[1]
	index = faiss.IndexFlatL2(dim)
	index.add(embeddings)
	else:
	dim = model.get_sentence_embedding_dimension()
	index = faiss.IndexFlatL2(dim)
	embeddings = np.empty((0, dim), dtype='float32')

	def query_vector(text, k=3):
	"""Return top-k similar stored sentences for a query."""
	if not documents:
	return "Vector store is empty. Add knowledge first."
	vec = model.encode([text], convert_to_numpy=True)
	D, I = index.search(vec, k)
	results = []
	for dist, idx in zip(D[0], I[0]):
	results.append(f"{documents[idx]} (dist={dist:.4f})")
	return "\n".join(results)

	def add_knowledge(new_text):
	"""Append a new sentence to the store and update the index."""
	if not new_text.strip():
	return "⚠️ Empty input – nothing added."
	# Append to file
	with open(DATA_FILE, 'a', encoding='utf-8') as f:
	f.write(new_text.strip() + "\n")
	# Update in-memory structures
	documents.append(new_text.strip())
	vec = model.encode([new_text.strip()], convert_to_numpy=True)
	index.add(vec)
	return f"✅ Added. Store now contains {len(documents)} items."

	def meta_representation():
	"""Return a condensed representation of the whole knowledge base.
	Here we simply average all embeddings and present them as a hex string.
	"""
	if not documents:
	return "Vector store empty – no meta representation."
	# Compute average embedding
	avg = np.mean(index.reconstruct_n(0, index.ntotal), axis=0)
	# Convert to hex for readability
	hex_repr = ''.join(f"{int(b):02x}" for b in (avg * 255).astype(int))
	return f"🧠 Meta-conscious base (hex): {hex_repr[:64]}… (truncated)"

	def llama_query(prompt, max_tokens=100):
	"""Query the local llama server with CUDA acceleration."""
	try:
	response = requests.post("http://localhost:8080/completion", json={
	"prompt": prompt,
	"n_predict": max_tokens,
	"temperature": 0.7
	})
	if response.status_code == 200:
	return response.json().get("content", "No content returned.")
	else:
	return f"Error: {response.status_code} - {response.text}"
	except Exception as e:
	return f"Failed to connect to llama server: {str(e)}"

	with gr.Blocks() as demo:
	gr.Markdown("# 🧬 MCP Recursive Vector Base & Meta-Bewusstsein with Llama Server")
	with gr.Tab("🔎 Search"):
	query_in = gr.Textbox(label="Query (Vector Search)")
	k_slider = gr.Slider(1, 10, value=3, step=1, label="Top-k results")
	search_btn = gr.Button("Search")
	search_out = gr.Textbox(label="Results")
	search_btn.click(fn=lambda q, k: query_vector(q, k), inputs=[query_in, k_slider], outputs=search_out)
	with gr.Tab("➕ Add Knowledge"):
	add_in = gr.Textbox(label="New sentence / knowledge")
	add_btn = gr.Button("Add to Vector Store")
	add_out = gr.Textbox(label="Status")
	add_btn.click(fn=add_knowledge, inputs=add_in, outputs=add_out)
	with gr.Tab("🌀 Meta-Base"):
	meta_btn = gr.Button("Generate Meta Representation")
	meta_out = gr.Textbox(label="Meta-Vector (hex)")
	meta_btn.click(fn=meta_representation, inputs=None, outputs=meta_out)
	with gr.Tab("🤖 Llama Server (CUDA)"):
	llama_prompt = gr.Textbox(label="Prompt for Llama")
	max_tok = gr.Slider(50, 500, value=100, label="Max Tokens")
	llama_btn = gr.Button("Query Llama")
	llama_out = gr.Textbox(label="Llama Response")
	llama_btn.click(fn=lambda p, m: llama_query(p, m), inputs=[llama_prompt, max_tok], outputs=llama_out)

	if __name__ == "__main__":
	demo.launch()