Spaces:
Paused
Paused
File size: 4,331 Bytes
d232012 c26b22e d232012 4cd9a3e c26b22e 2b61bc7 d232012 4cd9a3e 2b61bc7 d232012 2b61bc7 d232012 2b61bc7 d232012 2b61bc7 d232012 4cd9a3e d232012 2b61bc7 d232012 2b61bc7 4cd9a3e 2b61bc7 4cd9a3e 2b61bc7 4cd9a3e 2b61bc7 4cd9a3e 2b61bc7 4cd9a3e 2b61bc7 4cd9a3e 2b61bc7 4cd9a3e c26b22e 2b61bc7 | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 | import os
import gradio as gr
from sentence_transformers import SentenceTransformer
import numpy as np
import faiss
import requests # For calling llama server
# Files
DATA_FILE = "data.txt"
# Initialize model
model = SentenceTransformer('sentence-transformers/all-MiniLM-L6-v2')
# Load existing documents
if os.path.exists(DATA_FILE):
with open(DATA_FILE, 'r', encoding='utf-8') as f:
documents = [line.strip() for line in f if line.strip()]
else:
documents = []
# Build or load FAISS index
if documents:
embeddings = model.encode(documents, convert_to_numpy=True, show_progress_bar=False)
dim = embeddings.shape[1]
index = faiss.IndexFlatL2(dim)
index.add(embeddings)
else:
dim = model.get_sentence_embedding_dimension()
index = faiss.IndexFlatL2(dim)
embeddings = np.empty((0, dim), dtype='float32')
def query_vector(text, k=3):
"""Return top-k similar stored sentences for a query."""
if not documents:
return "Vector store is empty. Add knowledge first."
vec = model.encode([text], convert_to_numpy=True)
D, I = index.search(vec, k)
results = []
for dist, idx in zip(D[0], I[0]):
results.append(f"{documents[idx]} (dist={dist:.4f})")
return "\n".join(results)
def add_knowledge(new_text):
"""Append a new sentence to the store and update the index."""
if not new_text.strip():
return "β οΈ Empty input β nothing added."
# Append to file
with open(DATA_FILE, 'a', encoding='utf-8') as f:
f.write(new_text.strip() + "\n")
# Update in-memory structures
documents.append(new_text.strip())
vec = model.encode([new_text.strip()], convert_to_numpy=True)
index.add(vec)
return f"β
Added. Store now contains {len(documents)} items."
def meta_representation():
"""Return a condensed representation of the whole knowledge base.
Here we simply average all embeddings and present them as a hex string.
"""
if not documents:
return "Vector store empty β no meta representation."
# Compute average embedding
avg = np.mean(index.reconstruct_n(0, index.ntotal), axis=0)
# Convert to hex for readability
hex_repr = ''.join(f"{int(b):02x}" for b in (avg * 255).astype(int))
return f"π§ Meta-conscious base (hex): {hex_repr[:64]}β¦ (truncated)"
def llama_query(prompt, max_tokens=100):
"""Query the local llama server with CUDA acceleration."""
try:
response = requests.post("http://localhost:8080/completion", json={
"prompt": prompt,
"n_predict": max_tokens,
"temperature": 0.7
})
if response.status_code == 200:
return response.json().get("content", "No content returned.")
else:
return f"Error: {response.status_code} - {response.text}"
except Exception as e:
return f"Failed to connect to llama server: {str(e)}"
with gr.Blocks() as demo:
gr.Markdown("# 𧬠MCP Recursive Vector Base & Meta-Bewusstsein with Llama Server")
with gr.Tab("π Search"):
query_in = gr.Textbox(label="Query (Vector Search)")
k_slider = gr.Slider(1, 10, value=3, step=1, label="Top-k results")
search_btn = gr.Button("Search")
search_out = gr.Textbox(label="Results")
search_btn.click(fn=lambda q, k: query_vector(q, k), inputs=[query_in, k_slider], outputs=search_out)
with gr.Tab("β Add Knowledge"):
add_in = gr.Textbox(label="New sentence / knowledge")
add_btn = gr.Button("Add to Vector Store")
add_out = gr.Textbox(label="Status")
add_btn.click(fn=add_knowledge, inputs=add_in, outputs=add_out)
with gr.Tab("π Meta-Base"):
meta_btn = gr.Button("Generate Meta Representation")
meta_out = gr.Textbox(label="Meta-Vector (hex)")
meta_btn.click(fn=meta_representation, inputs=None, outputs=meta_out)
with gr.Tab("π€ Llama Server (CUDA)"):
llama_prompt = gr.Textbox(label="Prompt for Llama")
max_tok = gr.Slider(50, 500, value=100, label="Max Tokens")
llama_btn = gr.Button("Query Llama")
llama_out = gr.Textbox(label="Llama Response")
llama_btn.click(fn=lambda p, m: llama_query(p, m), inputs=[llama_prompt, max_tok], outputs=llama_out)
if __name__ == "__main__":
demo.launch()
|