Update README: add LSH vocabulary filter to architecture

7c67638 verified 27 days ago

4.66 kB

	"""
	Thin HuggingFace-style wrapper around the webmind-brain engine.

	Usage:
	from webmind import Brain
	brain = Brain.from_pretrained("webmind/webmind-brain-v1")
	brain.ask("capital of france")
	"""

	import os
	import sys
	import json
	import shutil
	from pathlib import Path


	def _find_engine():
	"""Locate the actual brain engine source."""
	# Check common locations
	candidates = [
	Path(__file__).parent.parent / "engine" / "src",
	Path(__file__).parent.parent.parent / "webmind-research" / "papers" / "new-gen-ai" / "src",
	Path.home() / "webmind-research" / "papers" / "new-gen-ai" / "src",
	]
	for p in candidates:
	if (p / "brain.py").exists():
	return str(p)

	# Check WEBMIND_ENGINE_PATH env var
	env_path = os.environ.get("WEBMIND_ENGINE_PATH")
	if env_path and Path(env_path).exists():
	return env_path

	return None


	class Brain:
	"""HuggingFace-compatible wrapper for webmind-brain."""

	def __init__(self, db_path=None):
	engine_path = _find_engine()
	if engine_path is None:
	raise ImportError(
	"Could not find webmind-brain engine source. "
	"Set WEBMIND_ENGINE_PATH to the 'src/' directory, or install from the repo."
	)
	if engine_path not in sys.path:
	sys.path.insert(0, engine_path)

	from brain import Brain as BrainEngine
	self._engine = BrainEngine(db_path=db_path)

	@classmethod
	def from_pretrained(cls, model_id: str, db_path: str = None, **kwargs):
	"""
	Load a brain, optionally downloading from HuggingFace Hub.

	Args:
	model_id: HuggingFace model ID (e.g. "webmind/webmind-brain-v1")
	or local path to a brain database directory.
	db_path: Override path for the neuron database.
	"""
	resolved_path = db_path

	# If model_id is a local directory with a neurons.db, use it directly
	if os.path.isdir(model_id) and os.path.exists(os.path.join(model_id, "neurons.db")):
	resolved_path = resolved_path or model_id
	elif db_path is None:
	# Default: use ~/.cache/webmind/<model_name>
	cache_dir = Path.home() / ".cache" / "webmind" / model_id.replace("/", "--")
	cache_dir.mkdir(parents=True, exist_ok=True)
	resolved_path = str(cache_dir)

	# Try downloading from HF Hub if not cached
	if not (cache_dir / "neurons.db").exists():
	try:
	from huggingface_hub import snapshot_download
	downloaded = snapshot_download(model_id, cache_dir=str(cache_dir.parent))
	# Copy db files to cache
	for f in Path(downloaded).glob(".db"):
	shutil.copy2(f, cache_dir / f.name)
	except Exception:
	# No download available — start with empty brain
	pass

	return cls(db_path=resolved_path)

	def ask(self, question: str) -> dict:
	"""
	Ask the brain a question.

	Returns dict with keys:
	answer: str — the answer text
	confidence: float — how confident (0-1)
	strategy: str — how it was answered (convergence/co-occurrence/abstain)
	trace: str — reasoning trace
	"""
	return self._engine.ask(question)

	def generate(self, prompt: str, max_tokens: int = 30, temperature: float = 0.7) -> dict:
	"""
	Generate fluent text steered by a prompt.

	Returns dict with keys:
	text: str — generated text
	trace: list — per-token generation trace
	tokens_generated: int
	"""
	return self._engine.generate(prompt, max_tokens=max_tokens, temperature=temperature)

	def teach(self, sentence: str, confidence: float = 0.6) -> list:
	"""Teach the brain a new fact. Returns neuron IDs."""
	return self._engine.teach(sentence, confidence=confidence)

	def teach_batch(self, sentences: list, confidence: float = 0.6) -> list:
	"""Teach multiple sentences at once."""
	return self._engine.teach_batch(sentences, confidence=confidence)

	def flush(self):
	"""Persist all pending teaches to disk."""
	self._engine.flush()

	def health(self) -> dict:
	"""Get brain health metrics."""
	return self._engine.health()

	def close(self):
	"""Clean shutdown."""
	self._engine.close()

	def __enter__(self):
	return self

	def __exit__(self, *args):
	self.close()