diff --git a/.gitignore b/.gitignore new file mode 100644 index 0000000000000000000000000000000000000000..077e6069891fda5b7fcf5bd049cbcdc4eef455c9 --- /dev/null +++ b/.gitignore @@ -0,0 +1,2 @@ +*.wasm +*.db diff --git a/BUGFIX-README.md b/BUGFIX-README.md new file mode 100644 index 0000000000000000000000000000000000000000..4a3a1101c71e31bf30055d037f4a8383d4ba0b7e --- /dev/null +++ b/BUGFIX-README.md @@ -0,0 +1,33 @@ +# RuVector — Bug Fixes + +This is a patched version of [ruvector](https://github.com/ruvector/ruvector) with two critical bugs fixed. + +## Bugs Fixed + +### Bug 1: CLI `create` command fails with "Missing field `dimensions`" + +**Symptom:** `npx ruvector create ./db -d 384` fails with `Missing field 'dimensions'` + +**Root Cause:** The CLI passes `{ dimension: 384 }` (singular) to the `VectorDB` constructor, but the native Rust binding (`@ruvector/core`) expects `{ dimensions: 384 }` (plural). + +**Fix:** The `VectorDBWrapper` constructor now normalizes `dimension` → `dimensions` automatically. (`dist/index.js`) + +### Bug 2: JS API insert fails with "Dimension mismatch: expected 384, got 0" + +**Symptom:** `await db.insert([...384 floats...], metadata)` fails with dimension mismatch even though the vector has the correct length. + +**Root Cause:** The `insert()` method only accepted object-style args `insert({vector, metadata})`, but users naturally call it with positional args `insert(vector, metadata)`. When a Float32Array was passed as the first arg, `entry.vector` was `undefined`, creating an empty Float32Array(0). + +**Fix:** Both `insert()` and `search()` now accept positional arguments in addition to object-style: +- `db.insert(vector, metadata)` — positional style (new) +- `db.insert({vector, metadata})` — object style (still works) +- `db.search(vector, k)` — positional style (new) +- `db.search({vector, k})` — object style (still works) + +## Files Modified + +- `dist/index.js` — VectorDBWrapper class (constructor, insert, search methods) + +## Original Repository + +https://github.com/ruvector/ruvector diff --git a/HOOKS.md b/HOOKS.md new file mode 100644 index 0000000000000000000000000000000000000000..7ad692031f517caf9b9358920c343eee42b3bf08 --- /dev/null +++ b/HOOKS.md @@ -0,0 +1,221 @@ +# RuVector Hooks for Claude Code + +Self-learning intelligence hooks that enhance Claude Code with Q-learning, vector memory, and automatic agent routing. + +## Quick Start + +```bash +# Full setup: hooks + pretrain + optimized agents +npx ruvector hooks init --pretrain --build-agents quality + +# Or step by step: +npx ruvector hooks init # Setup hooks +npx ruvector hooks pretrain # Analyze repository +npx ruvector hooks build-agents # Generate agent configs +``` + +## What It Does + +RuVector hooks integrate with Claude Code to provide: + +| Feature | Description | +|---------|-------------| +| **Agent Routing** | Suggests the best agent for each file type based on learned patterns | +| **Co-edit Patterns** | Predicts "likely next files" from git history | +| **Vector Memory** | Semantic recall of project context | +| **Command Analysis** | Risk assessment for bash commands | +| **Self-Learning** | Q-learning improves suggestions over time | + +## Commands + +### Initialization + +```bash +# Full configuration +npx ruvector hooks init + +# With pretrain and agent building +npx ruvector hooks init --pretrain --build-agents security + +# Minimal (basic hooks only) +npx ruvector hooks init --minimal + +# Options +--force # Overwrite existing settings +--minimal # Basic hooks only +--pretrain # Run pretrain after init +--build-agents # Generate optimized agents (quality|speed|security|testing|fullstack) +--no-claude-md # Skip CLAUDE.md creation +--no-permissions # Skip permissions config +--no-env # Skip environment variables +--no-gitignore # Skip .gitignore update +--no-mcp # Skip MCP server config +--no-statusline # Skip status line config +``` + +### Pretrain + +Analyze your repository to bootstrap intelligence: + +```bash +npx ruvector hooks pretrain + +# Options +--depth # Git history depth (default: 100) +--verbose # Show detailed progress +--skip-git # Skip git history analysis +--skip-files # Skip file structure analysis +``` + +**What it learns:** +- File type → Agent mapping (`.rs` → rust-developer) +- Co-edit patterns from git history +- Directory → Agent mapping +- Project context memories + +### Build Agents + +Generate optimized `.claude/agents/` configurations: + +```bash +npx ruvector hooks build-agents --focus quality + +# Focus modes +--focus quality # Code quality, best practices (default) +--focus speed # Rapid development, prototyping +--focus security # OWASP, input validation, encryption +--focus testing # TDD, comprehensive coverage +--focus fullstack # Balanced frontend/backend/database + +# Options +--output # Output directory (default: .claude/agents) +--format # yaml, json, or md (default: yaml) +--include-prompts # Include system prompts in agent configs +``` + +### Verification & Diagnostics + +```bash +# Check if hooks are working +npx ruvector hooks verify + +# Diagnose and fix issues +npx ruvector hooks doctor +npx ruvector hooks doctor --fix +``` + +### Data Management + +```bash +# View statistics +npx ruvector hooks stats + +# Export intelligence data +npx ruvector hooks export -o backup.json +npx ruvector hooks export --include-all + +# Import intelligence data +npx ruvector hooks import backup.json +npx ruvector hooks import backup.json --merge +``` + +### Memory Operations + +```bash +# Store context in vector memory +npx ruvector hooks remember "API uses JWT auth" -t project + +# Semantic search memory +npx ruvector hooks recall "authentication" + +# Route a task to best agent +npx ruvector hooks route "implement user login" +``` + +## Hook Events + +| Event | Trigger | RuVector Action | +|-------|---------|-----------------| +| **PreToolUse** | Before Edit/Write/Bash | Agent routing, file analysis, command risk | +| **PostToolUse** | After Edit/Write/Bash | Q-learning update, pattern recording | +| **SessionStart** | Conversation begins | Load intelligence, display stats | +| **Stop** | Conversation ends | Save learning data | +| **UserPromptSubmit** | User sends message | Context suggestions | +| **PreCompact** | Before context compaction | Preserve important context | +| **Notification** | Any notification | Track events for learning | + +## Generated Files + +After running `hooks init`: + +``` +your-project/ +├── .claude/ +│ ├── settings.json # Hooks configuration +│ ├── statusline.sh # Status bar script +│ └── agents/ # Generated agents (with --build-agents) +│ ├── rust-specialist.yaml +│ ├── typescript-specialist.yaml +│ ├── test-architect.yaml +│ └── project-coordinator.yaml +├── .ruvector/ +│ └── intelligence.json # Learning data +├── CLAUDE.md # Project documentation +└── .gitignore # Updated with .ruvector/ +``` + +## Environment Variables + +| Variable | Default | Description | +|----------|---------|-------------| +| `RUVECTOR_INTELLIGENCE_ENABLED` | `true` | Enable/disable intelligence | +| `RUVECTOR_LEARNING_RATE` | `0.1` | Q-learning rate (0.0-1.0) | +| `RUVECTOR_MEMORY_BACKEND` | `rvlite` | Memory storage backend | +| `INTELLIGENCE_MODE` | `treatment` | A/B testing mode | + +## Example Output + +### Agent Routing +``` +🧠 Intelligence Analysis: + 📁 src/api/routes.ts + 🤖 Recommended: typescript-developer (85% confidence) + → learned from 127 .ts files in repo + 📎 Likely next files: + - src/api/handlers.ts (12 co-edits) + - src/types/api.ts (8 co-edits) +``` + +### Command Analysis +``` +🧠 Command Analysis: + 📦 Category: rust + 🏷️ Type: test + ✅ Risk: LOW +``` + +## Best Practices + +1. **Run pretrain on existing repos** — Bootstrap intelligence before starting work +2. **Use focus modes** — Match agent generation to your current task +3. **Export before major changes** — Backup learning data +4. **Let it learn** — Intelligence improves with each edit + +## Troubleshooting + +```bash +# Check setup +npx ruvector hooks verify + +# Fix common issues +npx ruvector hooks doctor --fix + +# Reset and reinitialize +npx ruvector hooks init --force --pretrain +``` + +## Links + +- [RuVector GitHub](https://github.com/ruvnet/ruvector) +- [npm Package](https://www.npmjs.com/package/ruvector) +- [Claude Code Documentation](https://docs.anthropic.com/claude-code) diff --git a/PACKAGE_SUMMARY.md b/PACKAGE_SUMMARY.md new file mode 100644 index 0000000000000000000000000000000000000000..6c06da4bfe2d22457f5e5eb70db66267a3a53630 --- /dev/null +++ b/PACKAGE_SUMMARY.md @@ -0,0 +1,409 @@ +# ruvector Package Summary + +## Overview + +The main `ruvector` package provides a unified interface for high-performance vector database operations in Node.js, with automatic platform detection and smart fallback between native (Rust) and WASM implementations. + +## Package Structure + +``` +/workspaces/ruvector/npm/packages/ruvector/ +├── src/ # TypeScript source +│ ├── index.ts # Smart loader with platform detection +│ └── types.ts # TypeScript type definitions +├── dist/ # Compiled JavaScript and types +│ ├── index.js # Main entry point +│ ├── index.d.ts # Type definitions +│ ├── types.js # Compiled types +│ └── types.d.ts # Type definitions +├── bin/ +│ └── cli.js # CLI tool +├── test/ +│ ├── mock-implementation.js # Mock VectorDB for testing +│ ├── standalone-test.js # Package structure tests +│ └── integration.js # Integration tests +├── examples/ +│ ├── api-usage.js # API usage examples +│ └── cli-demo.sh # CLI demonstration +├── package.json # NPM package configuration +├── tsconfig.json # TypeScript configuration +└── README.md # Package documentation +``` + +## Key Features + +### 1. Smart Platform Detection + +The package automatically detects and loads the best available implementation: + +```typescript +// Tries to load in this order: +// 1. @ruvector/core (native Rust, fastest) +// 2. @ruvector/wasm (WebAssembly, universal fallback) + +import { VectorDB, getImplementationType, isNative, isWasm } from 'ruvector'; + +console.log(getImplementationType()); // 'native' or 'wasm' +console.log(isNative()); // true if using native +console.log(isWasm()); // true if using WASM +``` + +### 2. Complete TypeScript Support + +Full type definitions for all APIs: + +```typescript +interface VectorEntry { + id: string; + vector: number[]; + metadata?: Record; +} + +interface SearchQuery { + vector: number[]; + k?: number; + filter?: Record; + threshold?: number; +} + +interface SearchResult { + id: string; + score: number; + vector: number[]; + metadata?: Record; +} + +interface DbOptions { + dimension: number; + metric?: 'cosine' | 'euclidean' | 'dot'; + path?: string; + autoPersist?: boolean; + hnsw?: { + m?: number; + efConstruction?: number; + efSearch?: number; + }; +} +``` + +### 3. VectorDB API + +Comprehensive vector database operations: + +```typescript +const db = new VectorDB({ + dimension: 384, + metric: 'cosine' +}); + +// Insert operations +db.insert({ id: 'doc1', vector: [...], metadata: {...} }); +db.insertBatch([...entries]); + +// Search operations +const results = db.search({ + vector: [...], + k: 10, + threshold: 0.7 +}); + +// CRUD operations +const entry = db.get('doc1'); +db.updateMetadata('doc1', { updated: true }); +db.delete('doc1'); + +// Database management +const stats = db.stats(); +db.save('./mydb.vec'); +db.load('./mydb.vec'); +db.buildIndex(); +db.optimize(); +``` + +### 4. CLI Tools + +Command-line interface for database operations: + +```bash +# Create database +ruvector create mydb.vec --dimension 384 --metric cosine + +# Insert vectors +ruvector insert mydb.vec vectors.json --batch-size 1000 + +# Search +ruvector search mydb.vec --vector "[0.1,0.2,...]" --top-k 10 + +# Statistics +ruvector stats mydb.vec + +# Benchmark +ruvector benchmark --num-vectors 10000 --num-queries 1000 + +# Info +ruvector info +``` + +## API Reference + +### Constructor + +```typescript +new VectorDB(options: DbOptions): VectorDB +``` + +### Methods + +- `insert(entry: VectorEntry): void` - Insert single vector +- `insertBatch(entries: VectorEntry[]): void` - Batch insert +- `search(query: SearchQuery): SearchResult[]` - Search similar vectors +- `get(id: string): VectorEntry | null` - Get by ID +- `delete(id: string): boolean` - Delete vector +- `updateMetadata(id: string, metadata: Record): void` - Update metadata +- `stats(): DbStats` - Get database statistics +- `save(path?: string): void` - Save to disk +- `load(path: string): void` - Load from disk +- `clear(): void` - Clear all vectors +- `buildIndex(): void` - Build HNSW index +- `optimize(): void` - Optimize database + +### Utility Functions + +- `getImplementationType(): 'native' | 'wasm'` - Get current implementation +- `isNative(): boolean` - Check if using native +- `isWasm(): boolean` - Check if using WASM +- `getVersion(): { version: string, implementation: string }` - Get version info + +## Dependencies + +### Production Dependencies + +- `commander` (^11.1.0) - CLI framework +- `chalk` (^4.1.2) - Terminal styling +- `ora` (^5.4.1) - Spinners and progress + +### Optional Dependencies + +- `@ruvector/core` (^0.1.1) - Native Rust bindings (when available) +- `@ruvector/wasm` (^0.1.1) - WebAssembly module (fallback) + +### Dev Dependencies + +- `typescript` (^5.3.3) - TypeScript compiler +- `@types/node` (^20.10.5) - Node.js type definitions + +## Package.json Configuration + +```json +{ + "name": "ruvector", + "version": "0.1.1", + "main": "dist/index.js", + "types": "dist/index.d.ts", + "bin": { + "ruvector": "./bin/cli.js" + }, + "scripts": { + "build": "tsc", + "test": "node test/standalone-test.js" + } +} +``` + +## Build Process + +```bash +# Install dependencies +npm install + +# Build TypeScript +npm run build + +# Run tests +npm test + +# Package for NPM +npm pack +``` + +## Testing + +The package includes comprehensive tests: + +### 1. Standalone Test (`test/standalone-test.js`) + +Tests package structure and API using mock implementation: +- Package structure validation +- TypeScript type definitions +- VectorDB API functionality +- CLI structure +- Smart loader logic + +### 2. Integration Test (`test/integration.js`) + +Tests integration with real implementations when available. + +### 3. Mock Implementation (`test/mock-implementation.js`) + +JavaScript-based VectorDB implementation for testing and demonstration purposes. + +## Examples + +### API Usage (`examples/api-usage.js`) + +Demonstrates: +- Basic CRUD operations +- Batch operations +- Semantic search +- Different distance metrics +- Performance benchmarking +- Persistence + +### CLI Demo (`examples/cli-demo.sh`) + +Bash script demonstrating CLI tools. + +## Usage Examples + +### Simple Vector Search + +```javascript +const { VectorDB } = require('ruvector'); + +const db = new VectorDB({ dimension: 3 }); + +db.insertBatch([ + { id: 'cat', vector: [0.9, 0.1, 0.1], metadata: { animal: 'cat' } }, + { id: 'dog', vector: [0.1, 0.9, 0.1], metadata: { animal: 'dog' } }, + { id: 'tiger', vector: [0.8, 0.2, 0.15], metadata: { animal: 'tiger' } } +]); + +const results = db.search({ + vector: [0.9, 0.1, 0.1], + k: 2 +}); + +console.log(results); +// [ +// { id: 'cat', score: 1.0, ... }, +// { id: 'tiger', score: 0.97, ... } +// ] +``` + +### Semantic Document Search + +```javascript +const db = new VectorDB({ dimension: 768, metric: 'cosine' }); + +// Insert documents with embeddings (from your embedding model) +db.insertBatch([ + { id: 'doc1', vector: embedding1, metadata: { title: 'AI Guide' } }, + { id: 'doc2', vector: embedding2, metadata: { title: 'Web Dev' } } +]); + +// Search with query embedding +const results = db.search({ + vector: queryEmbedding, + k: 10, + threshold: 0.7 +}); +``` + +### Persistence + +```javascript +const db = new VectorDB({ + dimension: 384, + path: './vectors.db', + autoPersist: true +}); + +// Changes automatically saved +db.insert({ id: 'doc1', vector: [...] }); + +// Or manual save +db.save('./backup.db'); + +// Load from disk +db.load('./vectors.db'); +``` + +## Performance Characteristics + +### Mock Implementation (JavaScript) +- Insert: ~1M vectors/sec (batch) +- Search: ~400 queries/sec (1000 vectors, k=10) + +### Native Implementation (Rust) +- Insert: ~10M+ vectors/sec (batch) +- Search: ~100K+ queries/sec with HNSW index +- 150x faster than pgvector + +### WASM Implementation +- Insert: ~1M+ vectors/sec (batch) +- Search: ~10K+ queries/sec with HNSW index +- ~10x faster than pure JavaScript + +## Integration with Other Packages + +This package serves as the main interface and coordinates between: + +1. **@ruvector/core** - Native Rust bindings (napi-rs) + - Platform-specific native modules + - Maximum performance + - Optional dependency + +2. **@ruvector/wasm** - WebAssembly module + - Universal compatibility + - Near-native performance + - Fallback implementation + +## Error Handling + +The package provides clear error messages when implementations are unavailable: + +``` +Failed to load ruvector: Neither native nor WASM implementation available. +Native error: Cannot find module '@ruvector/core' +WASM error: Cannot find module '@ruvector/wasm' +``` + +## Environment Variables + +- `RUVECTOR_DEBUG=1` - Enable debug logging for implementation loading + +## Next Steps + +To complete the package ecosystem: + +1. **Create @ruvector/core** + - napi-rs bindings to Rust code + - Platform-specific builds (Linux, macOS, Windows) + - Native module packaging + +2. **Create @ruvector/wasm** + - wasm-pack build from Rust code + - WebAssembly module + - Universal compatibility layer + +3. **Update Dependencies** + - Add @ruvector/core as optionalDependency + - Add @ruvector/wasm as dependency + - Configure proper fallback chain + +4. **Publishing** + - Publish all three packages to npm + - Set up CI/CD for builds + - Create platform-specific releases + +## Version + +Current version: **0.1.1** + +## License + +MIT + +## Repository + +https://github.com/ruvnet/ruvector diff --git a/README.md b/README.md new file mode 100644 index 0000000000000000000000000000000000000000..63f4f8a91022a34b4d6b4bcebb50a7c96dba784a --- /dev/null +++ b/README.md @@ -0,0 +1,2228 @@ +# ruvector + +[![npm version](https://badge.fury.io/js/ruvector.svg)](https://www.npmjs.com/package/ruvector) +[![License: MIT](https://img.shields.io/badge/License-MIT-blue.svg)](https://opensource.org/licenses/MIT) +[![Node Version](https://img.shields.io/node/v/ruvector)](https://nodejs.org) +[![Downloads](https://img.shields.io/npm/dm/ruvector)](https://www.npmjs.com/package/ruvector) +[![Build Status](https://img.shields.io/badge/build-passing-brightgreen.svg)](https://github.com/ruvnet/ruvector) +[![Performance](https://img.shields.io/badge/latency-<0.5ms-green.svg)](https://github.com/ruvnet/ruvector) +[![GitHub Stars](https://img.shields.io/github/stars/ruvnet/ruvector?style=social)](https://github.com/ruvnet/ruvector) + +**The fastest vector database for Node.js—built in Rust, runs everywhere** + +Ruvector is a next-generation vector database that brings **enterprise-grade semantic search** to Node.js applications. Unlike cloud-only solutions or Python-first databases, Ruvector is designed specifically for JavaScript/TypeScript developers who need **blazing-fast vector similarity search** without the complexity of external services. + +> 🚀 **Sub-millisecond queries** • 🎯 **52,000+ inserts/sec** • 💾 **~50 bytes per vector** • 🌍 **Runs anywhere** + +Built by [rUv](https://ruv.io) with production-grade Rust performance and intelligent platform detection—**automatically uses native bindings when available, falls back to WebAssembly when needed**. + +🌐 **[Visit ruv.io](https://ruv.io)** | 📦 **[GitHub](https://github.com/ruvnet/ruvector)** | 📚 **[Documentation](https://github.com/ruvnet/ruvector/tree/main/docs)** + +--- + +## 🧠 Claude Code Intelligence v2.0 + +**Self-learning intelligence for Claude Code** — RuVector provides optimized hooks with ONNX embeddings, AST analysis, and coverage-aware routing. + +```bash +# One-command setup with pretrain and agent generation +npx ruvector hooks init --pretrain --build-agents quality +``` + +### Core Features +- 🎯 **Smart Agent Routing** — Q-learning optimized suggestions with 80%+ accuracy +- 📚 **9-Phase Pretrain** — AST, diff, coverage, neural, and graph analysis +- 🤖 **Agent Builder** — Generates optimized `.claude/agents/` configs +- 🔗 **Co-edit Patterns** — Learns file relationships from git history +- 💾 **Vector Memory** — HNSW-indexed semantic recall (150x faster) + +### New in v2.0 +- ⚡ **ONNX WASM Embeddings** — all-MiniLM-L6-v2 (384d) runs locally, no API needed +- 🌳 **AST Analysis** — Symbol extraction, complexity metrics, import graphs +- 📊 **Diff Embeddings** — Semantic change classification with risk scoring +- 🧪 **Coverage Routing** — Test coverage-aware agent selection +- 🔍 **Graph Algorithms** — MinCut boundaries, Louvain communities, Spectral clustering +- 🛡️ **Security Scanning** — Parallel vulnerability pattern detection +- 🎯 **RAG Context** — Semantic retrieval with HNSW indexing + +### Performance +| Backend | Read Time | Speedup | +|---------|-----------|---------| +| ONNX inference | ~400ms | baseline | +| HNSW search | ~0.045ms | 8,800x | +| Memory cache | ~0.01ms | **40,000x** | + +📖 **[Full Hooks Documentation →](https://github.com/ruvnet/ruvector/blob/main/npm/packages/ruvector/HOOKS.md)** + +### MCP Server Integration + +RuVector includes an MCP server for Claude Code with 30+ tools: + +```bash +# Add to Claude Code +claude mcp add ruvector -- npx ruvector mcp start +``` + +**Available MCP Tools:** +- `hooks_route`, `hooks_route_enhanced` — Agent routing with signals +- `hooks_ast_analyze`, `hooks_ast_complexity` — Code structure analysis +- `hooks_diff_analyze`, `hooks_diff_classify` — Change classification +- `hooks_coverage_route`, `hooks_coverage_suggest` — Test-aware routing +- `hooks_graph_mincut`, `hooks_graph_cluster` — Code boundaries +- `hooks_security_scan` — Vulnerability detection +- `hooks_rag_context` — Semantic context retrieval +- `hooks_attention_info`, `hooks_gnn_info` — Neural capabilities + +--- + +## 🌟 Why Ruvector? + +### The Problem with Existing Vector Databases + +Most vector databases force you to choose between three painful trade-offs: + +1. **Cloud-Only Services** (Pinecone, Weaviate Cloud) - Expensive, vendor lock-in, latency issues, API rate limits +2. **Python-First Solutions** (ChromaDB, Faiss) - Poor Node.js support, require separate Python processes +3. **Self-Hosted Complexity** (Milvus, Qdrant) - Heavy infrastructure, Docker orchestration, operational overhead + +**Ruvector eliminates these trade-offs.** + +### The Ruvector Advantage + +Ruvector is purpose-built for **modern JavaScript/TypeScript applications** that need vector search: + +🎯 **Native Node.js Integration** +- Drop-in npm package—no Docker, no Python, no external services +- Full TypeScript support with complete type definitions +- Automatic platform detection with native Rust bindings +- Seamless WebAssembly fallback for universal compatibility + +⚡ **Production-Grade Performance** +- **52,000+ inserts/second** with native Rust (10x faster than Python alternatives) +- **<0.5ms query latency** with HNSW indexing and SIMD optimizations +- **~50 bytes per vector** with advanced memory optimization +- Scales from edge devices to millions of vectors + +🧠 **Built for AI Applications** +- Optimized for LLM embeddings (OpenAI, Cohere, Hugging Face) +- Perfect for RAG (Retrieval-Augmented Generation) systems +- Agent memory and semantic caching +- Real-time recommendation engines + +🌍 **Universal Deployment** +- **Linux, macOS, Windows** with native performance +- **Browser support** via WebAssembly (experimental) +- **Edge computing** and serverless environments +- **Alpine Linux** and non-glibc systems supported + +💰 **Zero Operational Costs** +- No cloud API fees or usage limits +- No infrastructure to manage +- No separate database servers +- Open source MIT license + +### Key Advantages + +- ⚡ **Blazing Fast**: <0.5ms p50 latency with native Rust, 10-50ms with WASM fallback +- 🎯 **Automatic Platform Detection**: Uses native when available, falls back to WASM seamlessly +- 🧠 **AI-Native**: Built specifically for embeddings, RAG, semantic search, and agent memory +- 🔧 **CLI Tools Included**: Full command-line interface for database management +- 🌍 **Universal Deployment**: Works on all platforms—Linux, macOS, Windows, even browsers +- 💾 **Memory Efficient**: ~50 bytes per vector with advanced quantization +- 🚀 **Production Ready**: Battle-tested algorithms with comprehensive benchmarks +- 🔓 **Open Source**: MIT licensed, community-driven + +## 🚀 Quick Start Tutorial + +### Step 1: Installation + +Install Ruvector with a single npm command: + +```bash +npm install ruvector +``` + +**What happens during installation:** +- npm automatically detects your platform (Linux, macOS, Windows) +- Downloads the correct native binary for maximum performance +- Falls back to WebAssembly if native binaries aren't available +- No additional setup, Docker, or external services required + +**Windows Installation (without build tools):** +```bash +# Skip native compilation, use WASM fallback +npm install ruvector --ignore-scripts + +# The ONNX WASM runtime (7.4MB) works without build tools +# Memory cache provides 40,000x speedup over inference +``` + +**Verify installation:** +```bash +npx ruvector info +``` + +You should see your platform and implementation type (native Rust or WASM fallback). + +### Step 2: Your First Vector Database + +Let's create a simple vector database and perform basic operations. This example demonstrates the complete CRUD (Create, Read, Update, Delete) workflow: + +```javascript +const { VectorDb } = require('ruvector'); + +async function tutorial() { + // Step 2.1: Create a new vector database + // The 'dimensions' parameter must match your embedding model + // Common sizes: 128, 384 (sentence-transformers), 768 (BERT), 1536 (OpenAI) + const db = new VectorDb({ + dimensions: 128, // Vector size - MUST match your embeddings + maxElements: 10000, // Maximum vectors (can grow automatically) + storagePath: './my-vectors.db' // Persist to disk (omit for in-memory) + }); + + console.log('✅ Database created successfully'); + + // Step 2.2: Insert vectors + // In real applications, these would come from an embedding model + const documents = [ + { id: 'doc1', text: 'Artificial intelligence and machine learning' }, + { id: 'doc2', text: 'Deep learning neural networks' }, + { id: 'doc3', text: 'Natural language processing' }, + ]; + + for (const doc of documents) { + // Generate random vector for demonstration + // In production: use OpenAI, Cohere, or sentence-transformers + const vector = new Float32Array(128).map(() => Math.random()); + + await db.insert({ + id: doc.id, + vector: vector, + metadata: { + text: doc.text, + timestamp: Date.now(), + category: 'AI' + } + }); + + console.log(`✅ Inserted: ${doc.id}`); + } + + // Step 2.3: Search for similar vectors + // Create a query vector (in production, this would be from your search query) + const queryVector = new Float32Array(128).map(() => Math.random()); + + const results = await db.search({ + vector: queryVector, + k: 5, // Return top 5 most similar vectors + threshold: 0.7 // Only return results with similarity > 0.7 + }); + + console.log('\n🔍 Search Results:'); + results.forEach((result, index) => { + console.log(`${index + 1}. ${result.id} - Score: ${result.score.toFixed(3)}`); + console.log(` Text: ${result.metadata.text}`); + }); + + // Step 2.4: Retrieve a specific vector + const retrieved = await db.get('doc1'); + if (retrieved) { + console.log('\n📄 Retrieved document:', retrieved.metadata.text); + } + + // Step 2.5: Get database statistics + const count = await db.len(); + console.log(`\n📊 Total vectors in database: ${count}`); + + // Step 2.6: Delete a vector + const deleted = await db.delete('doc1'); + console.log(`\n🗑️ Deleted doc1: ${deleted ? 'Success' : 'Not found'}`); + + // Final count + const finalCount = await db.len(); + console.log(`📊 Final count: ${finalCount}`); +} + +// Run the tutorial +tutorial().catch(console.error); +``` + +**Expected Output:** +``` +✅ Database created successfully +✅ Inserted: doc1 +✅ Inserted: doc2 +✅ Inserted: doc3 + +🔍 Search Results: +1. doc2 - Score: 0.892 + Text: Deep learning neural networks +2. doc1 - Score: 0.856 + Text: Artificial intelligence and machine learning +3. doc3 - Score: 0.801 + Text: Natural language processing + +📄 Retrieved document: Artificial intelligence and machine learning + +📊 Total vectors in database: 3 + +🗑️ Deleted doc1: Success +📊 Final count: 2 +``` + +### Step 3: TypeScript Tutorial + +Ruvector provides full TypeScript support with complete type safety. Here's how to use it: + +```typescript +import { VectorDb, VectorEntry, SearchQuery, SearchResult } from 'ruvector'; + +// Step 3.1: Define your custom metadata type +interface DocumentMetadata { + title: string; + content: string; + author: string; + date: Date; + tags: string[]; +} + +async function typescriptTutorial() { + // Step 3.2: Create typed database + const db = new VectorDb({ + dimensions: 384, // sentence-transformers/all-MiniLM-L6-v2 + maxElements: 10000, + storagePath: './typed-vectors.db' + }); + + // Step 3.3: Type-safe vector entry + const entry: VectorEntry = { + id: 'article-001', + vector: new Float32Array(384), // Your embedding here + metadata: { + title: 'Introduction to Vector Databases', + content: 'Vector databases enable semantic search...', + author: 'Jane Doe', + date: new Date('2024-01-15'), + tags: ['database', 'AI', 'search'] + } + }; + + // Step 3.4: Insert with type checking + await db.insert(entry); + console.log('✅ Inserted typed document'); + + // Step 3.5: Type-safe search + const query: SearchQuery = { + vector: new Float32Array(384), + k: 10, + threshold: 0.8 + }; + + // Step 3.6: Fully typed results + const results: SearchResult[] = await db.search(query); + + // TypeScript knows the exact shape of metadata + results.forEach(result => { + console.log(`Title: ${result.metadata.title}`); + console.log(`Author: ${result.metadata.author}`); + console.log(`Tags: ${result.metadata.tags.join(', ')}`); + console.log(`Similarity: ${result.score.toFixed(3)}\n`); + }); + + // Step 3.7: Type-safe retrieval + const doc = await db.get('article-001'); + if (doc) { + // TypeScript autocomplete works perfectly here + const publishYear = doc.metadata.date.getFullYear(); + console.log(`Published in ${publishYear}`); + } +} + +typescriptTutorial().catch(console.error); +``` + +**TypeScript Benefits:** +- ✅ Full autocomplete for all methods and properties +- ✅ Compile-time type checking prevents errors +- ✅ IDE IntelliSense shows documentation +- ✅ Custom metadata types for your use case +- ✅ No `any` types - fully typed throughout + +## 🎯 Platform Detection + +Ruvector automatically detects the best implementation for your platform: + +```javascript +const { getImplementationType, isNative, isWasm } = require('ruvector'); + +console.log(getImplementationType()); // 'native' or 'wasm' +console.log(isNative()); // true if using native Rust +console.log(isWasm()); // true if using WebAssembly fallback + +// Performance varies by implementation: +// Native (Rust): <0.5ms latency, 50K+ ops/sec +// WASM fallback: 10-50ms latency, ~1K ops/sec +``` + +## 🔧 CLI Tools + +Ruvector includes a full command-line interface for database management: + +### Create Database + +```bash +# Create a new vector database +npx ruvector create mydb.vec --dimensions 384 --metric cosine + +# Options: +# --dimensions, -d Vector dimensionality (required) +# --metric, -m Distance metric (cosine, euclidean, dot) +# --max-elements Maximum number of vectors (default: 10000) +``` + +### Insert Vectors + +```bash +# Insert vectors from JSON file +npx ruvector insert mydb.vec vectors.json + +# JSON format: +# [ +# { "id": "doc1", "vector": [0.1, 0.2, ...], "metadata": {...} }, +# { "id": "doc2", "vector": [0.3, 0.4, ...], "metadata": {...} } +# ] +``` + +### Search Vectors + +```bash +# Search for similar vectors +npx ruvector search mydb.vec --vector "[0.1,0.2,0.3,...]" --top-k 10 + +# Options: +# --vector, -v Query vector (JSON array) +# --top-k, -k Number of results (default: 10) +# --threshold Minimum similarity score +``` + +### Database Statistics + +```bash +# Show database statistics +npx ruvector stats mydb.vec + +# Output: +# Total vectors: 10,000 +# Dimensions: 384 +# Metric: cosine +# Memory usage: ~500 KB +# Index type: HNSW +``` + +### Benchmarking + +```bash +# Run performance benchmark +npx ruvector benchmark --num-vectors 10000 --num-queries 1000 + +# Options: +# --num-vectors Number of vectors to insert +# --num-queries Number of search queries +# --dimensions Vector dimensionality (default: 128) +``` + +### System Information + +```bash +# Show platform and implementation info +npx ruvector info + +# Output: +# Platform: linux-x64-gnu +# Implementation: native (Rust) +# GNN Module: Available +# Node.js: v18.17.0 +# Performance: <0.5ms p50 latency +``` + +### Install Optional Packages + +Ruvector supports optional packages that extend functionality. Use the `install` command to add them: + +```bash +# List available packages +npx ruvector install + +# Output: +# Available Ruvector Packages: +# +# gnn not installed +# Graph Neural Network layers, tensor compression, differentiable search +# npm: @ruvector/gnn +# +# core ✓ installed +# Core vector database with native Rust bindings +# npm: @ruvector/core + +# Install specific package +npx ruvector install gnn + +# Install all optional packages +npx ruvector install --all + +# Interactive selection +npx ruvector install -i +``` + +The install command auto-detects your package manager (npm, yarn, pnpm, bun). + +### GNN Commands + +Ruvector includes Graph Neural Network (GNN) capabilities for advanced tensor compression and differentiable search. + +#### GNN Info + +```bash +# Show GNN module information +npx ruvector gnn info + +# Output: +# GNN Module Information +# Status: Available +# Platform: linux +# Architecture: x64 +# +# Available Features: +# • RuvectorLayer - GNN layer with multi-head attention +# • TensorCompress - Adaptive tensor compression (5 levels) +# • differentiableSearch - Soft attention-based search +# • hierarchicalForward - Multi-layer GNN processing +``` + +#### GNN Layer + +```bash +# Create and test a GNN layer +npx ruvector gnn layer -i 128 -h 256 --test + +# Options: +# -i, --input-dim Input dimension (required) +# -h, --hidden-dim Hidden dimension (required) +# -a, --heads Number of attention heads (default: 4) +# -d, --dropout Dropout rate (default: 0.1) +# --test Run a test forward pass +# -o, --output Save layer config to JSON file +``` + +#### GNN Compress + +```bash +# Compress embeddings using adaptive tensor compression +npx ruvector gnn compress -f embeddings.json -l pq8 -o compressed.json + +# Options: +# -f, --file Input JSON file with embeddings (required) +# -l, --level Compression level: none|half|pq8|pq4|binary (default: auto) +# -a, --access-freq Access frequency for auto compression (default: 0.5) +# -o, --output Output file for compressed data + +# Compression levels: +# none (freq > 0.8) - Full precision, hot data +# half (freq > 0.4) - ~50% savings, warm data +# pq8 (freq > 0.1) - ~8x compression, cool data +# pq4 (freq > 0.01) - ~16x compression, cold data +# binary (freq <= 0.01) - ~32x compression, archive +``` + +#### GNN Search + +```bash +# Differentiable search with soft attention +npx ruvector gnn search -q "[1.0,0.0,0.0]" -c candidates.json -k 5 + +# Options: +# -q, --query Query vector as JSON array (required) +# -c, --candidates Candidates file - JSON array of vectors (required) +# -k, --top-k Number of results (default: 5) +# -t, --temperature Softmax temperature (default: 1.0) +``` + +### Attention Commands + +Ruvector includes high-performance attention mechanisms for transformer-based operations, hyperbolic embeddings, and graph attention. + +```bash +# Install the attention module (optional) +npm install @ruvector/attention +``` + +#### Attention Mechanisms Reference + +| Mechanism | Type | Complexity | When to Use | +|-----------|------|------------|-------------| +| **DotProductAttention** | Core | O(n²) | Standard scaled dot-product attention for transformers | +| **MultiHeadAttention** | Core | O(n²) | Parallel attention heads for capturing different relationships | +| **FlashAttention** | Core | O(n²) IO-optimized | Memory-efficient attention for long sequences | +| **HyperbolicAttention** | Core | O(n²) | Hierarchical data, tree-like structures, taxonomies | +| **LinearAttention** | Core | O(n) | Very long sequences where O(n²) is prohibitive | +| **MoEAttention** | Core | O(n*k) | Mixture of Experts routing, specialized attention | +| **GraphRoPeAttention** | Graph | O(n²) | Graph data with rotary position embeddings | +| **EdgeFeaturedAttention** | Graph | O(n²) | Graphs with rich edge features/attributes | +| **DualSpaceAttention** | Graph | O(n²) | Combined Euclidean + hyperbolic representation | +| **LocalGlobalAttention** | Graph | O(n*k) | Large graphs with local + global context | + +#### Attention Info + +```bash +# Show attention module information +npx ruvector attention info + +# Output: +# Attention Module Information +# Status: Available +# Version: 0.1.0 +# Platform: linux +# Architecture: x64 +# +# Core Attention Mechanisms: +# • DotProductAttention - Scaled dot-product attention +# • MultiHeadAttention - Multi-head self-attention +# • FlashAttention - Memory-efficient IO-aware attention +# • HyperbolicAttention - Poincaré ball attention +# • LinearAttention - O(n) linear complexity attention +# • MoEAttention - Mixture of Experts attention +``` + +#### Attention List + +```bash +# List all available attention mechanisms +npx ruvector attention list + +# With verbose details +npx ruvector attention list -v +``` + +#### Attention Benchmark + +```bash +# Benchmark attention mechanisms +npx ruvector attention benchmark -d 256 -n 100 -i 100 + +# Options: +# -d, --dimension Vector dimension (default: 256) +# -n, --num-vectors Number of vectors (default: 100) +# -i, --iterations Benchmark iterations (default: 100) +# -t, --types Attention types to benchmark (default: dot,flash,linear) + +# Example output: +# Dimension: 256 +# Vectors: 100 +# Iterations: 100 +# +# dot: 0.012ms/op (84,386 ops/sec) +# flash: 0.012ms/op (82,844 ops/sec) +# linear: 0.066ms/op (15,259 ops/sec) +``` + +#### Hyperbolic Operations + +```bash +# Calculate Poincaré distance between two points +npx ruvector attention hyperbolic -a distance -v "[0.1,0.2,0.3]" -b "[0.4,0.5,0.6]" + +# Project vector to Poincaré ball +npx ruvector attention hyperbolic -a project -v "[1.5,2.0,0.8]" + +# Möbius addition in hyperbolic space +npx ruvector attention hyperbolic -a mobius-add -v "[0.1,0.2]" -b "[0.3,0.4]" + +# Exponential map (tangent space → Poincaré ball) +npx ruvector attention hyperbolic -a exp-map -v "[0.1,0.2,0.3]" + +# Options: +# -a, --action Action: distance|project|mobius-add|exp-map|log-map +# -v, --vector Input vector as JSON array (required) +# -b, --vector-b Second vector for binary operations +# -c, --curvature Poincaré ball curvature (default: 1.0) +``` + +#### When to Use Each Attention Type + +| Use Case | Recommended Attention | Reason | +|----------|----------------------|--------| +| **Standard NLP/Transformers** | MultiHeadAttention | Industry standard, well-tested | +| **Long Documents (>4K tokens)** | FlashAttention or LinearAttention | Memory efficient | +| **Hierarchical Classification** | HyperbolicAttention | Captures tree-like structures | +| **Knowledge Graphs** | GraphRoPeAttention | Position-aware graph attention | +| **Multi-Relational Graphs** | EdgeFeaturedAttention | Leverages edge attributes | +| **Taxonomy/Ontology Search** | DualSpaceAttention | Best of both Euclidean + hyperbolic | +| **Large-Scale Graphs** | LocalGlobalAttention | Efficient local + global context | +| **Model Routing/MoE** | MoEAttention | Expert selection and routing | + +### ⚡ ONNX WASM Embeddings (v2.0) + +RuVector includes a pure JavaScript ONNX runtime for local embeddings - no Python, no API calls, no build tools required. + +```bash +# Embeddings work out of the box +npx ruvector hooks remember "important context" -t project +npx ruvector hooks recall "context query" +npx ruvector hooks rag-context "how does auth work" +``` + +**Model**: all-MiniLM-L6-v2 (384 dimensions, 23MB) +- Downloads automatically on first use +- Cached in `.ruvector/models/` +- SIMD-accelerated when available + +**Performance:** +| Operation | Time | Notes | +|-----------|------|-------| +| Model load | ~2s | First use only | +| Embedding | ~50ms | Per text chunk | +| HNSW search | 0.045ms | 150x faster than brute force | +| Cache hit | 0.01ms | 40,000x faster than inference | + +**Fallback Chain:** +1. Native SQLite → best persistence +2. WASM SQLite → cross-platform +3. Memory Cache → fastest (no persistence) + +### 🧠 Self-Learning Hooks v2.0 + +Ruvector includes **self-learning intelligence hooks** for Claude Code integration with ONNX embeddings, AST analysis, and coverage-aware routing. + +#### Initialize Hooks + +```bash +# Initialize hooks in your project +npx ruvector hooks init + +# Options: +# --force Overwrite existing configuration +# --minimal Minimal configuration (no optional hooks) +# --pretrain Initialize + pretrain from git history +# --build-agents quality Generate optimized agent configs +``` + +This creates `.claude/settings.json` with pre-configured hooks and `CLAUDE.md` with comprehensive documentation. + +#### Session Management + +```bash +# Start a session (load intelligence data) +npx ruvector hooks session-start + +# End a session (save learned patterns) +npx ruvector hooks session-end +``` + +#### Pre/Post Edit Hooks + +```bash +# Before editing a file - get agent recommendations +npx ruvector hooks pre-edit src/index.ts +# Output: 🤖 Recommended: typescript-developer (85% confidence) + +# After editing - record success/failure for learning +npx ruvector hooks post-edit src/index.ts --success +npx ruvector hooks post-edit src/index.ts --error "Type error on line 42" +``` + +#### Pre/Post Command Hooks + +```bash +# Before running a command - risk analysis +npx ruvector hooks pre-command "npm test" +# Output: ✅ Risk: LOW, Category: test + +# After running - record outcome +npx ruvector hooks post-command "npm test" --success +npx ruvector hooks post-command "npm test" --error "3 tests failed" +``` + +#### Agent Routing + +```bash +# Get agent recommendation for a task +npx ruvector hooks route "fix the authentication bug in login.ts" +# Output: 🤖 Recommended: security-specialist (92% confidence) + +npx ruvector hooks route "add unit tests for the API" +# Output: 🤖 Recommended: tester (88% confidence) +``` + +#### Memory Operations + +```bash +# Store context in vector memory +npx ruvector hooks remember "API uses JWT tokens with 1h expiry" --type decision +npx ruvector hooks remember "Database schema in docs/schema.md" --type reference + +# Semantic search memory +npx ruvector hooks recall "authentication mechanism" +# Returns relevant stored memories +``` + +#### Context Suggestions + +```bash +# Get relevant context for current task +npx ruvector hooks suggest-context +# Output: Based on recent files, suggests relevant context +``` + +#### Intelligence Statistics + +```bash +# Show learned patterns and statistics +npx ruvector hooks stats + +# Output: +# Patterns: 156 learned +# Success rate: 87% +# Top agents: rust-developer, tester, reviewer +# Memory entries: 42 +``` + +#### Swarm Recommendations + +```bash +# Get agent recommendation for task type +npx ruvector hooks swarm-recommend "code-review" +# Output: Recommended agents for code review task +``` + +#### AST Analysis (v2.0) + +```bash +# Analyze file structure, symbols, imports, complexity +npx ruvector hooks ast-analyze src/index.ts --json + +# Get complexity metrics for multiple files +npx ruvector hooks ast-complexity src/*.ts --threshold 15 +# Flags files exceeding cyclomatic complexity threshold +``` + +#### Diff & Risk Analysis (v2.0) + +```bash +# Analyze commit with semantic embeddings and risk scoring +npx ruvector hooks diff-analyze HEAD +# Output: risk score, category, affected files + +# Classify change type (feature, bugfix, refactor, docs, test) +npx ruvector hooks diff-classify + +# Find similar past commits via embeddings +npx ruvector hooks diff-similar -k 5 + +# Git churn analysis (hot spots) +npx ruvector hooks git-churn --days 30 +``` + +#### Coverage-Aware Routing (v2.0) + +```bash +# Get coverage-aware routing for a file +npx ruvector hooks coverage-route src/api.ts +# Output: agent weights based on test coverage + +# Suggest tests for files based on coverage gaps +npx ruvector hooks coverage-suggest src/*.ts +``` + +#### Graph Analysis (v2.0) + +```bash +# Find optimal code boundaries (MinCut algorithm) +npx ruvector hooks graph-mincut src/*.ts + +# Detect code communities (Louvain/Spectral clustering) +npx ruvector hooks graph-cluster src/*.ts --method louvain +``` + +#### Security & RAG (v2.0) + +```bash +# Parallel security vulnerability scan +npx ruvector hooks security-scan src/*.ts + +# RAG-enhanced context retrieval +npx ruvector hooks rag-context "how does auth work" + +# Enhanced routing with all signals +npx ruvector hooks route-enhanced "fix bug" --file src/api.ts +``` + +#### Hooks Configuration + +The hooks integrate with Claude Code via `.claude/settings.json`: + +```json +{ + "env": { + "RUVECTOR_INTELLIGENCE_ENABLED": "true", + "RUVECTOR_LEARNING_RATE": "0.1", + "RUVECTOR_AST_ENABLED": "true", + "RUVECTOR_DIFF_EMBEDDINGS": "true", + "RUVECTOR_COVERAGE_ROUTING": "true", + "RUVECTOR_GRAPH_ALGORITHMS": "true", + "RUVECTOR_SECURITY_SCAN": "true" + }, + "hooks": { + "PreToolUse": [ + { + "matcher": "Edit|Write|MultiEdit", + "hooks": [{ "type": "command", "command": "npx ruvector hooks pre-edit \"$TOOL_INPUT_file_path\"" }] + }, + { + "matcher": "Bash", + "hooks": [{ "type": "command", "command": "npx ruvector hooks pre-command \"$TOOL_INPUT_command\"" }] + } + ], + "PostToolUse": [ + { + "matcher": "Edit|Write|MultiEdit", + "hooks": [{ "type": "command", "command": "npx ruvector hooks post-edit \"$TOOL_INPUT_file_path\"" }] + } + ], + "SessionStart": [{ "hooks": [{ "type": "command", "command": "npx ruvector hooks session-start" }] }], + "Stop": [{ "hooks": [{ "type": "command", "command": "npx ruvector hooks session-end" }] }] + } +} +``` + +#### How Self-Learning Works + +1. **Pattern Recording**: Every edit and command is recorded with context +2. **Q-Learning**: Success/failure updates agent routing weights +3. **AST Analysis**: Code complexity informs agent selection +4. **Diff Embeddings**: Change patterns improve risk assessment +5. **Coverage Routing**: Test coverage guides testing priorities +6. **Vector Memory**: Decisions and references stored for semantic recall (HNSW indexed) +7. **Continuous Improvement**: The more you use it, the smarter it gets + +## 📊 Performance Benchmarks + +Tested on AMD Ryzen 9 5950X, 128-dimensional vectors: + +### Native Performance (Rust) + +| Operation | Throughput | Latency (p50) | Latency (p99) | +|-----------|------------|---------------|---------------| +| Insert | 52,341 ops/sec | 0.019 ms | 0.045 ms | +| Search (k=10) | 11,234 ops/sec | 0.089 ms | 0.156 ms | +| Search (k=100) | 8,932 ops/sec | 0.112 ms | 0.203 ms | +| Delete | 45,678 ops/sec | 0.022 ms | 0.051 ms | + +**Memory Usage**: ~50 bytes per 128-dim vector (including index) + +### Comparison with Alternatives + +| Database | Insert (ops/sec) | Search (ops/sec) | Memory per Vector | Node.js | Browser | +|----------|------------------|------------------|-------------------|---------|---------| +| **Ruvector (Native)** | **52,341** | **11,234** | **50 bytes** | ✅ | ❌ | +| **Ruvector (WASM)** | **~1,000** | **~100** | **50 bytes** | ✅ | ✅ | +| Faiss (HNSW) | 38,200 | 9,800 | 68 bytes | ❌ | ❌ | +| Hnswlib | 41,500 | 10,200 | 62 bytes | ✅ | ❌ | +| ChromaDB | ~1,000 | ~20 | 150 bytes | ✅ | ❌ | + +*Benchmarks measured with 100K vectors, 128 dimensions, k=10* + +## 🔍 Comparison with Other Vector Databases + +Comprehensive comparison of Ruvector against popular vector database solutions: + +| Feature | Ruvector | Pinecone | Qdrant | Weaviate | Milvus | ChromaDB | Faiss | +|---------|----------|----------|--------|----------|--------|----------|-------| +| **Deployment** | +| Installation | `npm install` ✅ | Cloud API ☁️ | Docker 🐳 | Docker 🐳 | Docker/K8s 🐳 | `pip install` 🐍 | `pip install` 🐍 | +| Node.js Native | ✅ First-class | ❌ API only | ⚠️ HTTP API | ⚠️ HTTP API | ⚠️ HTTP API | ❌ Python | ❌ Python | +| Setup Time | < 1 minute | 5-10 minutes | 10-30 minutes | 15-30 minutes | 30-60 minutes | 5 minutes | 5 minutes | +| Infrastructure | None required | Managed cloud | Self-hosted | Self-hosted | Self-hosted | Embedded | Embedded | +| **Performance** | +| Query Latency (p50) | **<0.5ms** | ~2-5ms | ~1-2ms | ~2-3ms | ~3-5ms | ~50ms | ~1ms | +| Insert Throughput | **52,341 ops/sec** | ~10,000 ops/sec | ~20,000 ops/sec | ~15,000 ops/sec | ~25,000 ops/sec | ~1,000 ops/sec | ~40,000 ops/sec | +| Memory per Vector (128d) | **50 bytes** | ~80 bytes | 62 bytes | ~100 bytes | ~70 bytes | 150 bytes | 68 bytes | +| Recall @ k=10 | 95%+ | 93% | 94% | 92% | 96% | 85% | 97% | +| **Platform Support** | +| Linux | ✅ Native | ☁️ API | ✅ Docker | ✅ Docker | ✅ Docker | ✅ Python | ✅ Python | +| macOS | ✅ Native | ☁️ API | ✅ Docker | ✅ Docker | ✅ Docker | ✅ Python | ✅ Python | +| Windows | ✅ Native | ☁️ API | ✅ Docker | ✅ Docker | ⚠️ WSL2 | ✅ Python | ✅ Python | +| Browser/WASM | ✅ Yes | ❌ No | ❌ No | ❌ No | ❌ No | ❌ No | ❌ No | +| ARM64 | ✅ Native | ☁️ API | ✅ Yes | ✅ Yes | ⚠️ Limited | ✅ Yes | ✅ Yes | +| Alpine Linux | ✅ WASM | ☁️ API | ⚠️ Build from source | ⚠️ Build from source | ❌ No | ✅ Yes | ✅ Yes | +| **Features** | +| Distance Metrics | Cosine, L2, Dot | Cosine, L2, Dot | 11 metrics | 10 metrics | 8 metrics | L2, Cosine, IP | L2, IP, Cosine | +| Filtering | ✅ Metadata | ✅ Advanced | ✅ Advanced | ✅ Advanced | ✅ Advanced | ✅ Basic | ❌ Limited | +| Persistence | ✅ File-based | ☁️ Managed | ✅ Disk | ✅ Disk | ✅ Disk | ✅ DuckDB | ❌ Memory | +| Indexing | HNSW | Proprietary | HNSW | HNSW | IVF/HNSW | HNSW | IVF/HNSW | +| Quantization | ✅ PQ | ✅ Yes | ✅ Scalar | ✅ PQ | ✅ PQ/SQ | ❌ No | ✅ PQ | +| Batch Operations | ✅ Yes | ✅ Yes | ✅ Yes | ✅ Yes | ✅ Yes | ✅ Yes | ✅ Yes | +| **Developer Experience** | +| TypeScript Types | ✅ Full | ✅ Generated | ⚠️ Community | ⚠️ Community | ⚠️ Community | ⚠️ Partial | ❌ No | +| Documentation | ✅ Excellent | ✅ Excellent | ✅ Good | ✅ Good | ✅ Good | ✅ Good | ⚠️ Technical | +| Examples | ✅ Many | ✅ Many | ✅ Good | ✅ Good | ✅ Many | ✅ Good | ⚠️ Limited | +| CLI Tools | ✅ Included | ⚠️ Limited | ✅ Yes | ✅ Yes | ✅ Yes | ⚠️ Basic | ❌ No | +| **Operations** | +| Monitoring | ✅ Metrics | ✅ Dashboard | ✅ Prometheus | ✅ Prometheus | ✅ Prometheus | ⚠️ Basic | ❌ No | +| Backups | ✅ File copy | ☁️ Automatic | ✅ Snapshots | ✅ Snapshots | ✅ Snapshots | ✅ File copy | ❌ Manual | +| High Availability | ⚠️ App-level | ✅ Built-in | ✅ Clustering | ✅ Clustering | ✅ Clustering | ❌ No | ❌ No | +| Auto-Scaling | ⚠️ App-level | ✅ Automatic | ⚠️ Manual | ⚠️ Manual | ⚠️ K8s HPA | ❌ No | ❌ No | +| **Cost** | +| Pricing Model | Free (MIT) | Pay-per-use | Free (Apache) | Free (BSD) | Free (Apache) | Free (Apache) | Free (MIT) | +| Monthly Cost (1M vectors) | **$0** | ~$70-200 | ~$20-50 (infra) | ~$30-60 (infra) | ~$50-100 (infra) | $0 | $0 | +| Monthly Cost (10M vectors) | **$0** | ~$500-1000 | ~$100-200 (infra) | ~$150-300 (infra) | ~$200-400 (infra) | $0 | $0 | +| API Rate Limits | None | Yes | None | None | None | None | None | +| **Use Cases** | +| RAG Systems | ✅ Excellent | ✅ Excellent | ✅ Excellent | ✅ Excellent | ✅ Excellent | ✅ Good | ⚠️ Limited | +| Serverless | ✅ Perfect | ✅ Good | ❌ No | ❌ No | ❌ No | ⚠️ Possible | ⚠️ Possible | +| Edge Computing | ✅ Excellent | ❌ No | ❌ No | ❌ No | ❌ No | ❌ No | ⚠️ Possible | +| Production Scale (100M+) | ⚠️ Single node | ✅ Yes | ✅ Yes | ✅ Yes | ✅ Excellent | ⚠️ Limited | ⚠️ Manual | +| Embedded Apps | ✅ Excellent | ❌ No | ❌ No | ❌ No | ❌ No | ⚠️ Possible | ✅ Good | + +### When to Choose Ruvector + +✅ **Perfect for:** +- **Node.js/TypeScript applications** needing embedded vector search +- **Serverless and edge computing** where external services aren't practical +- **Rapid prototyping and development** with minimal setup time +- **RAG systems** with LangChain, LlamaIndex, or custom implementations +- **Cost-sensitive projects** that can't afford cloud API pricing +- **Offline-first applications** requiring local vector search +- **Browser-based AI** with WebAssembly fallback +- **Small to medium scale** (up to 10M vectors per instance) + +⚠️ **Consider alternatives for:** +- **Massive scale (100M+ vectors)** - Consider Pinecone, Milvus, or Qdrant clusters +- **Multi-tenancy requirements** - Weaviate or Qdrant offer better isolation +- **Distributed systems** - Milvus provides better horizontal scaling +- **Zero-ops cloud solution** - Pinecone handles all infrastructure + +### Why Choose Ruvector Over... + +**vs Pinecone:** +- ✅ No API costs (save $1000s/month) +- ✅ No network latency (10x faster queries) +- ✅ No vendor lock-in +- ✅ Works offline and in restricted environments +- ❌ No managed multi-region clusters + +**vs ChromaDB:** +- ✅ 50x faster queries (native Rust vs Python) +- ✅ True Node.js support (not HTTP API) +- ✅ Better TypeScript integration +- ✅ Lower memory usage +- ❌ Smaller ecosystem and community + +**vs Qdrant:** +- ✅ Zero infrastructure setup +- ✅ Embedded in your app (no Docker) +- ✅ Better for serverless environments +- ✅ Native Node.js bindings +- ❌ No built-in clustering or HA + +**vs Faiss:** +- ✅ Full Node.js support (Faiss is Python-only) +- ✅ Easier API and better developer experience +- ✅ Built-in persistence and metadata +- ⚠️ Slightly lower recall at same performance + +## 🎯 Real-World Tutorials + +### Tutorial 1: Building a RAG System with OpenAI + +**What you'll learn:** Create a production-ready Retrieval-Augmented Generation system that enhances LLM responses with relevant context from your documents. + +**Prerequisites:** +```bash +npm install ruvector openai +export OPENAI_API_KEY="your-api-key-here" +``` + +**Complete Implementation:** + +```javascript +const { VectorDb } = require('ruvector'); +const OpenAI = require('openai'); + +class RAGSystem { + constructor() { + // Initialize OpenAI client + this.openai = new OpenAI({ + apiKey: process.env.OPENAI_API_KEY + }); + + // Create vector database for OpenAI embeddings + // text-embedding-ada-002 produces 1536-dimensional vectors + this.db = new VectorDb({ + dimensions: 1536, + maxElements: 100000, + storagePath: './rag-knowledge-base.db' + }); + + console.log('✅ RAG System initialized'); + } + + // Step 1: Index your knowledge base + async indexDocuments(documents) { + console.log(`📚 Indexing ${documents.length} documents...`); + + for (let i = 0; i < documents.length; i++) { + const doc = documents[i]; + + // Generate embedding for the document + const response = await this.openai.embeddings.create({ + model: 'text-embedding-ada-002', + input: doc.content + }); + + // Store in vector database + await this.db.insert({ + id: doc.id || `doc_${i}`, + vector: new Float32Array(response.data[0].embedding), + metadata: { + title: doc.title, + content: doc.content, + source: doc.source, + date: doc.date || new Date().toISOString() + } + }); + + console.log(` ✅ Indexed: ${doc.title}`); + } + + const count = await this.db.len(); + console.log(`\n✅ Indexed ${count} documents total`); + } + + // Step 2: Retrieve relevant context for a query + async retrieveContext(query, k = 3) { + console.log(`🔍 Searching for: "${query}"`); + + // Generate embedding for the query + const response = await this.openai.embeddings.create({ + model: 'text-embedding-ada-002', + input: query + }); + + // Search for similar documents + const results = await this.db.search({ + vector: new Float32Array(response.data[0].embedding), + k: k, + threshold: 0.7 // Only use highly relevant results + }); + + console.log(`📄 Found ${results.length} relevant documents\n`); + + return results.map(r => ({ + content: r.metadata.content, + title: r.metadata.title, + score: r.score + })); + } + + // Step 3: Generate answer with retrieved context + async answer(question) { + // Retrieve relevant context + const context = await this.retrieveContext(question, 3); + + if (context.length === 0) { + return "I don't have enough information to answer that question."; + } + + // Build prompt with context + const contextText = context + .map((doc, i) => `[${i + 1}] ${doc.title}\n${doc.content}`) + .join('\n\n'); + + const prompt = `Answer the question based on the following context. If the context doesn't contain the answer, say so. + +Context: +${contextText} + +Question: ${question} + +Answer:`; + + console.log('🤖 Generating answer...\n'); + + // Generate completion + const completion = await this.openai.chat.completions.create({ + model: 'gpt-4', + messages: [ + { role: 'system', content: 'You are a helpful assistant that answers questions based on provided context.' }, + { role: 'user', content: prompt } + ], + temperature: 0.3 // Lower temperature for more factual responses + }); + + return { + answer: completion.choices[0].message.content, + sources: context.map(c => c.title) + }; + } +} + +// Example Usage +async function main() { + const rag = new RAGSystem(); + + // Step 1: Index your knowledge base + const documents = [ + { + id: 'doc1', + title: 'Ruvector Introduction', + content: 'Ruvector is a high-performance vector database for Node.js built in Rust. It provides sub-millisecond query latency and supports over 52,000 inserts per second.', + source: 'documentation' + }, + { + id: 'doc2', + title: 'Vector Databases Explained', + content: 'Vector databases store data as high-dimensional vectors, enabling semantic similarity search. They are essential for AI applications like RAG systems and recommendation engines.', + source: 'blog' + }, + { + id: 'doc3', + title: 'HNSW Algorithm', + content: 'Hierarchical Navigable Small World (HNSW) is a graph-based algorithm for approximate nearest neighbor search. It provides excellent recall with low latency.', + source: 'research' + } + ]; + + await rag.indexDocuments(documents); + + // Step 2: Ask questions + console.log('\n' + '='.repeat(60) + '\n'); + + const result = await rag.answer('What is Ruvector and what are its performance characteristics?'); + + console.log('📝 Answer:', result.answer); + console.log('\n📚 Sources:', result.sources.join(', ')); +} + +main().catch(console.error); +``` + +**Expected Output:** +``` +✅ RAG System initialized +📚 Indexing 3 documents... + ✅ Indexed: Ruvector Introduction + ✅ Indexed: Vector Databases Explained + ✅ Indexed: HNSW Algorithm + +✅ Indexed 3 documents total + +============================================================ + +🔍 Searching for: "What is Ruvector and what are its performance characteristics?" +📄 Found 2 relevant documents + +🤖 Generating answer... + +📝 Answer: Ruvector is a high-performance vector database built in Rust for Node.js applications. Its key performance characteristics include: +- Sub-millisecond query latency +- Over 52,000 inserts per second +- Optimized for semantic similarity search + +📚 Sources: Ruvector Introduction, Vector Databases Explained +``` + +**Production Tips:** +- ✅ Use batch embedding for better throughput (OpenAI supports up to 2048 texts) +- ✅ Implement caching for frequently asked questions +- ✅ Add error handling for API rate limits +- ✅ Monitor token usage and costs +- ✅ Regularly update your knowledge base + +--- + +### Tutorial 2: Semantic Search Engine + +**What you'll learn:** Build a semantic search engine that understands meaning, not just keywords. + +**Prerequisites:** +```bash +npm install ruvector @xenova/transformers +``` + +**Complete Implementation:** + +```javascript +const { VectorDb } = require('ruvector'); +const { pipeline } = require('@xenova/transformers'); + +class SemanticSearchEngine { + constructor() { + this.db = null; + this.embedder = null; + } + + // Step 1: Initialize the embedding model + async initialize() { + console.log('🚀 Initializing semantic search engine...'); + + // Load sentence-transformers model (runs locally, no API needed!) + console.log('📥 Loading embedding model...'); + this.embedder = await pipeline( + 'feature-extraction', + 'Xenova/all-MiniLM-L6-v2' + ); + + // Create vector database (384 dimensions for all-MiniLM-L6-v2) + this.db = new VectorDb({ + dimensions: 384, + maxElements: 50000, + storagePath: './semantic-search.db' + }); + + console.log('✅ Search engine ready!\n'); + } + + // Step 2: Generate embeddings + async embed(text) { + const output = await this.embedder(text, { + pooling: 'mean', + normalize: true + }); + + // Convert to Float32Array + return new Float32Array(output.data); + } + + // Step 3: Index documents + async indexDocuments(documents) { + console.log(`📚 Indexing ${documents.length} documents...`); + + for (const doc of documents) { + const vector = await this.embed(doc.content); + + await this.db.insert({ + id: doc.id, + vector: vector, + metadata: { + title: doc.title, + content: doc.content, + category: doc.category, + url: doc.url + } + }); + + console.log(` ✅ ${doc.title}`); + } + + const count = await this.db.len(); + console.log(`\n✅ Indexed ${count} documents\n`); + } + + // Step 4: Semantic search + async search(query, options = {}) { + const { + k = 5, + category = null, + threshold = 0.3 + } = options; + + console.log(`🔍 Searching for: "${query}"`); + + // Generate query embedding + const queryVector = await this.embed(query); + + // Search vector database + const results = await this.db.search({ + vector: queryVector, + k: k * 2, // Get more results for filtering + threshold: threshold + }); + + // Filter by category if specified + let filtered = results; + if (category) { + filtered = results.filter(r => r.metadata.category === category); + } + + // Return top k after filtering + const final = filtered.slice(0, k); + + console.log(`📄 Found ${final.length} results\n`); + + return final.map(r => ({ + id: r.id, + title: r.metadata.title, + content: r.metadata.content, + category: r.metadata.category, + score: r.score, + url: r.metadata.url + })); + } + + // Step 5: Find similar documents + async findSimilar(documentId, k = 5) { + const doc = await this.db.get(documentId); + + if (!doc) { + throw new Error(`Document ${documentId} not found`); + } + + const results = await this.db.search({ + vector: doc.vector, + k: k + 1 // +1 because the document itself will be included + }); + + // Remove the document itself from results + return results + .filter(r => r.id !== documentId) + .slice(0, k); + } +} + +// Example Usage +async function main() { + const engine = new SemanticSearchEngine(); + await engine.initialize(); + + // Sample documents (in production, load from your database) + const documents = [ + { + id: '1', + title: 'Understanding Neural Networks', + content: 'Neural networks are computing systems inspired by biological neural networks. They learn to perform tasks by considering examples.', + category: 'AI', + url: '/docs/neural-networks' + }, + { + id: '2', + title: 'Introduction to Machine Learning', + content: 'Machine learning is a subset of artificial intelligence that provides systems the ability to learn and improve from experience.', + category: 'AI', + url: '/docs/machine-learning' + }, + { + id: '3', + title: 'Web Development Best Practices', + content: 'Modern web development involves responsive design, performance optimization, and accessibility considerations.', + category: 'Web', + url: '/docs/web-dev' + }, + { + id: '4', + title: 'Deep Learning Applications', + content: 'Deep learning has revolutionized computer vision, natural language processing, and speech recognition.', + category: 'AI', + url: '/docs/deep-learning' + } + ]; + + // Index documents + await engine.indexDocuments(documents); + + // Example 1: Basic semantic search + console.log('Example 1: Basic Search\n' + '='.repeat(60)); + const results1 = await engine.search('AI and neural nets'); + results1.forEach((result, i) => { + console.log(`${i + 1}. ${result.title} (Score: ${result.score.toFixed(3)})`); + console.log(` ${result.content.slice(0, 80)}...`); + console.log(` Category: ${result.category}\n`); + }); + + // Example 2: Category-filtered search + console.log('\nExample 2: Category-Filtered Search\n' + '='.repeat(60)); + const results2 = await engine.search('learning algorithms', { + category: 'AI', + k: 3 + }); + results2.forEach((result, i) => { + console.log(`${i + 1}. ${result.title} (Score: ${result.score.toFixed(3)})`); + }); + + // Example 3: Find similar documents + console.log('\n\nExample 3: Find Similar Documents\n' + '='.repeat(60)); + const similar = await engine.findSimilar('1', 2); + console.log('Documents similar to "Understanding Neural Networks":'); + similar.forEach((doc, i) => { + console.log(`${i + 1}. ${doc.metadata.title} (Score: ${doc.score.toFixed(3)})`); + }); +} + +main().catch(console.error); +``` + +**Key Features:** +- ✅ Runs completely locally (no API keys needed) +- ✅ Understands semantic meaning, not just keywords +- ✅ Category filtering for better results +- ✅ "Find similar" functionality +- ✅ Fast: ~10ms query latency + +--- + +### Tutorial 3: AI Agent Memory System + +**What you'll learn:** Implement a memory system for AI agents that remembers past experiences and learns from them. + +**Complete Implementation:** + +```javascript +const { VectorDb } = require('ruvector'); + +class AgentMemory { + constructor(agentId) { + this.agentId = agentId; + + // Create separate databases for different memory types + this.episodicMemory = new VectorDb({ + dimensions: 768, + storagePath: `./memory/${agentId}-episodic.db` + }); + + this.semanticMemory = new VectorDb({ + dimensions: 768, + storagePath: `./memory/${agentId}-semantic.db` + }); + + console.log(`🧠 Memory system initialized for agent: ${agentId}`); + } + + // Step 1: Store an experience (episodic memory) + async storeExperience(experience) { + const { + state, + action, + result, + reward, + embedding + } = experience; + + const experienceId = `exp_${Date.now()}_${Math.random()}`; + + await this.episodicMemory.insert({ + id: experienceId, + vector: new Float32Array(embedding), + metadata: { + state: state, + action: action, + result: result, + reward: reward, + timestamp: Date.now(), + type: 'episodic' + } + }); + + console.log(`💾 Stored experience: ${action} -> ${result} (reward: ${reward})`); + return experienceId; + } + + // Step 2: Store learned knowledge (semantic memory) + async storeKnowledge(knowledge) { + const { + concept, + description, + embedding, + confidence = 1.0 + } = knowledge; + + const knowledgeId = `know_${Date.now()}`; + + await this.semanticMemory.insert({ + id: knowledgeId, + vector: new Float32Array(embedding), + metadata: { + concept: concept, + description: description, + confidence: confidence, + learned: Date.now(), + uses: 0, + type: 'semantic' + } + }); + + console.log(`📚 Learned: ${concept}`); + return knowledgeId; + } + + // Step 3: Recall similar experiences + async recallExperiences(currentState, k = 5) { + console.log(`🔍 Recalling similar experiences...`); + + const results = await this.episodicMemory.search({ + vector: new Float32Array(currentState.embedding), + k: k, + threshold: 0.6 // Only recall reasonably similar experiences + }); + + // Sort by reward to prioritize successful experiences + const sorted = results.sort((a, b) => b.metadata.reward - a.metadata.reward); + + console.log(`📝 Recalled ${sorted.length} relevant experiences`); + + return sorted.map(r => ({ + state: r.metadata.state, + action: r.metadata.action, + result: r.metadata.result, + reward: r.metadata.reward, + similarity: r.score + })); + } + + // Step 4: Query knowledge base + async queryKnowledge(query, k = 3) { + const results = await this.semanticMemory.search({ + vector: new Float32Array(query.embedding), + k: k + }); + + // Update usage statistics + for (const result of results) { + const knowledge = await this.semanticMemory.get(result.id); + if (knowledge) { + knowledge.metadata.uses += 1; + // In production, update the entry + } + } + + return results.map(r => ({ + concept: r.metadata.concept, + description: r.metadata.description, + confidence: r.metadata.confidence, + relevance: r.score + })); + } + + // Step 5: Reflect and learn from experiences + async reflect() { + console.log('\n🤔 Reflecting on experiences...'); + + // Get all experiences + const totalExperiences = await this.episodicMemory.len(); + console.log(`📊 Total experiences: ${totalExperiences}`); + + // Analyze success rate + // In production, you'd aggregate experiences and extract patterns + console.log('💡 Analysis complete'); + + return { + totalExperiences: totalExperiences, + knowledgeItems: await this.semanticMemory.len() + }; + } + + // Step 6: Get memory statistics + async getStats() { + return { + episodicMemorySize: await this.episodicMemory.len(), + semanticMemorySize: await this.semanticMemory.len(), + agentId: this.agentId + }; + } +} + +// Example Usage: Simulated agent learning to navigate +async function main() { + const agent = new AgentMemory('agent-001'); + + // Simulate embedding function (in production, use a real model) + function embed(text) { + return Array(768).fill(0).map(() => Math.random()); + } + + console.log('\n' + '='.repeat(60)); + console.log('PHASE 1: Learning from experiences'); + console.log('='.repeat(60) + '\n'); + + // Store some experiences + await agent.storeExperience({ + state: { location: 'room1', goal: 'room3' }, + action: 'move_north', + result: 'reached room2', + reward: 0.5, + embedding: embed('navigating from room1 to room2') + }); + + await agent.storeExperience({ + state: { location: 'room2', goal: 'room3' }, + action: 'move_east', + result: 'reached room3', + reward: 1.0, + embedding: embed('navigating from room2 to room3') + }); + + await agent.storeExperience({ + state: { location: 'room1', goal: 'room3' }, + action: 'move_south', + result: 'hit wall', + reward: -0.5, + embedding: embed('failed navigation attempt') + }); + + // Store learned knowledge + await agent.storeKnowledge({ + concept: 'navigation_strategy', + description: 'Moving north then east is efficient for reaching room3 from room1', + embedding: embed('navigation strategy knowledge'), + confidence: 0.9 + }); + + console.log('\n' + '='.repeat(60)); + console.log('PHASE 2: Applying memory'); + console.log('='.repeat(60) + '\n'); + + // Agent encounters a similar situation + const currentState = { + location: 'room1', + goal: 'room3', + embedding: embed('navigating from room1 to room3') + }; + + // Recall relevant experiences + const experiences = await agent.recallExperiences(currentState, 3); + + console.log('\n📖 Recalled experiences:'); + experiences.forEach((exp, i) => { + console.log(`${i + 1}. Action: ${exp.action} | Result: ${exp.result} | Reward: ${exp.reward} | Similarity: ${exp.similarity.toFixed(3)}`); + }); + + // Query relevant knowledge + const knowledge = await agent.queryKnowledge({ + embedding: embed('how to navigate efficiently') + }, 2); + + console.log('\n📚 Relevant knowledge:'); + knowledge.forEach((k, i) => { + console.log(`${i + 1}. ${k.concept}: ${k.description} (confidence: ${k.confidence})`); + }); + + console.log('\n' + '='.repeat(60)); + console.log('PHASE 3: Reflection'); + console.log('='.repeat(60) + '\n'); + + // Reflect on learning + const stats = await agent.reflect(); + const memoryStats = await agent.getStats(); + + console.log('\n📊 Memory Statistics:'); + console.log(` Episodic memories: ${memoryStats.episodicMemorySize}`); + console.log(` Semantic knowledge: ${memoryStats.semanticMemorySize}`); + console.log(` Agent ID: ${memoryStats.agentId}`); +} + +main().catch(console.error); +``` + +**Expected Output:** +``` +🧠 Memory system initialized for agent: agent-001 + +============================================================ +PHASE 1: Learning from experiences +============================================================ + +💾 Stored experience: move_north -> reached room2 (reward: 0.5) +💾 Stored experience: move_east -> reached room3 (reward: 1.0) +💾 Stored experience: move_south -> hit wall (reward: -0.5) +📚 Learned: navigation_strategy + +============================================================ +PHASE 2: Applying memory +============================================================ + +🔍 Recalling similar experiences... +📝 Recalled 3 relevant experiences + +📖 Recalled experiences: +1. Action: move_east | Result: reached room3 | Reward: 1.0 | Similarity: 0.892 +2. Action: move_north | Result: reached room2 | Reward: 0.5 | Similarity: 0.876 +3. Action: move_south | Result: hit wall | Reward: -0.5 | Similarity: 0.654 + +📚 Relevant knowledge: +1. navigation_strategy: Moving north then east is efficient for reaching room3 from room1 (confidence: 0.9) + +============================================================ +PHASE 3: Reflection +============================================================ + +🤔 Reflecting on experiences... +📊 Total experiences: 3 +💡 Analysis complete + +📊 Memory Statistics: + Episodic memories: 3 + Semantic knowledge: 1 + Agent ID: agent-001 +``` + +**Use Cases:** +- ✅ Reinforcement learning agents +- ✅ Chatbot conversation history +- ✅ Game AI that learns from gameplay +- ✅ Personal assistant memory +- ✅ Robotic navigation systems + +## 🏗️ API Reference + +### Constructor + +```typescript +new VectorDb(options: { + dimensions: number; // Vector dimensionality (required) + maxElements?: number; // Max vectors (default: 10000) + storagePath?: string; // Persistent storage path + ef_construction?: number; // HNSW construction parameter (default: 200) + m?: number; // HNSW M parameter (default: 16) + distanceMetric?: string; // 'cosine', 'euclidean', or 'dot' (default: 'cosine') +}) +``` + +### Methods + +#### insert(entry: VectorEntry): Promise +Insert a vector into the database. + +```javascript +const id = await db.insert({ + id: 'doc_1', + vector: new Float32Array([0.1, 0.2, 0.3, ...]), + metadata: { title: 'Document 1' } +}); +``` + +#### search(query: SearchQuery): Promise +Search for similar vectors. + +```javascript +const results = await db.search({ + vector: new Float32Array([0.1, 0.2, 0.3, ...]), + k: 10, + threshold: 0.7 +}); +``` + +#### get(id: string): Promise +Retrieve a vector by ID. + +```javascript +const entry = await db.get('doc_1'); +if (entry) { + console.log(entry.vector, entry.metadata); +} +``` + +#### delete(id: string): Promise +Remove a vector from the database. + +```javascript +const deleted = await db.delete('doc_1'); +console.log(deleted ? 'Deleted' : 'Not found'); +``` + +#### len(): Promise +Get the total number of vectors. + +```javascript +const count = await db.len(); +console.log(`Total vectors: ${count}`); +``` + +## 🎨 Advanced Configuration + +### HNSW Parameters + +```javascript +const db = new VectorDb({ + dimensions: 384, + maxElements: 1000000, + ef_construction: 200, // Higher = better recall, slower build + m: 16, // Higher = better recall, more memory + storagePath: './large-db.db' +}); +``` + +**Parameter Guidelines:** +- `ef_construction`: 100-400 (higher = better recall, slower indexing) +- `m`: 8-64 (higher = better recall, more memory) +- Default values work well for most use cases + +### Distance Metrics + +```javascript +// Cosine similarity (default, best for normalized vectors) +const db1 = new VectorDb({ + dimensions: 128, + distanceMetric: 'cosine' +}); + +// Euclidean distance (L2, best for spatial data) +const db2 = new VectorDb({ + dimensions: 128, + distanceMetric: 'euclidean' +}); + +// Dot product (best for pre-normalized vectors) +const db3 = new VectorDb({ + dimensions: 128, + distanceMetric: 'dot' +}); +``` + +### Persistence + +```javascript +// Auto-save to disk +const persistent = new VectorDb({ + dimensions: 128, + storagePath: './persistent.db' +}); + +// In-memory only (faster, but data lost on exit) +const temporary = new VectorDb({ + dimensions: 128 + // No storagePath = in-memory +}); +``` + +## 📦 Platform Support + +Automatically installs the correct implementation for: + +### Native (Rust) - Best Performance +- **Linux**: x64, ARM64 (GNU libc) +- **macOS**: x64 (Intel), ARM64 (Apple Silicon) +- **Windows**: x64 (MSVC) + +Performance: **<0.5ms latency**, **50K+ ops/sec** + +### WASM Fallback - Universal Compatibility +- Any platform where native module isn't available +- Browser environments (experimental) +- Alpine Linux (musl) and other non-glibc systems + +Performance: **10-50ms latency**, **~1K ops/sec** + +**Node.js 18+ required** for all platforms. + +## 🔧 Building from Source + +If you need to rebuild the native module: + +```bash +# Install Rust toolchain +curl --proto '=https' --tlsv1.2 -sSf https://sh.rustup.rs | sh + +# Clone repository +git clone https://github.com/ruvnet/ruvector.git +cd ruvector + +# Build native module +cd npm/packages/core +npm run build:napi + +# Build wrapper package +cd ../ruvector +npm install +npm run build + +# Run tests +npm test +``` + +**Requirements:** +- Rust 1.77+ +- Node.js 18+ +- Cargo + +## 🌍 Ecosystem + +### Related Packages + +- **[ruvector-core](https://www.npmjs.com/package/ruvector-core)** - Core native bindings (lower-level API) +- **[ruvector-wasm](https://www.npmjs.com/package/ruvector-wasm)** - WebAssembly implementation for browsers +- **[ruvector-cli](https://www.npmjs.com/package/ruvector-cli)** - Standalone CLI tools +- **[@ruvector/rvf](https://www.npmjs.com/package/@ruvector/rvf)** - RVF cognitive container SDK +- **[@ruvector/rvf-wasm](https://www.npmjs.com/package/@ruvector/rvf-wasm)** - RVF WASM build for browsers, Deno, and edge +- **[rvlite](https://www.npmjs.com/package/rvlite)** - Lightweight vector database with SQL, SPARQL, and Cypher + +### Platform-Specific Packages (auto-installed) + +- **[ruvector-core-linux-x64-gnu](https://www.npmjs.com/package/ruvector-core-linux-x64-gnu)** +- **[ruvector-core-linux-arm64-gnu](https://www.npmjs.com/package/ruvector-core-linux-arm64-gnu)** +- **[ruvector-core-darwin-x64](https://www.npmjs.com/package/ruvector-core-darwin-x64)** +- **[ruvector-core-darwin-arm64](https://www.npmjs.com/package/ruvector-core-darwin-arm64)** +- **[ruvector-core-win32-x64-msvc](https://www.npmjs.com/package/ruvector-core-win32-x64-msvc)** + +--- + +## RVF Cognitive Containers + +Ruvector integrates with [RVF (RuVector Format)](https://github.com/ruvnet/ruvector/tree/main/crates/rvf) — a universal binary substrate that stores vectors, models, graphs, compute kernels, and attestation in a single `.rvf` file. + +### Enable RVF Backend + +```bash +# Install the optional RVF package +npm install @ruvector/rvf + +# Set backend via environment variable +export RUVECTOR_BACKEND=rvf + +# Or detect automatically (native -> rvf -> wasm fallback) +npx ruvector info +``` + +```typescript +import { getImplementationType, isRvf } from 'ruvector'; + +console.log(getImplementationType()); // 'native' | 'rvf' | 'wasm' +console.log(isRvf()); // true if RVF backend is active +``` + +### RVF CLI Commands + +8 RVF-specific subcommands are available through the ruvector CLI: + +```bash +# Create an RVF store +npx ruvector rvf create mydb.rvf -d 384 --metric cosine + +# Ingest vectors from JSON +npx ruvector rvf ingest mydb.rvf --input vectors.json --format json + +# Query nearest neighbors +npx ruvector rvf query mydb.rvf --vector "[0.1,0.2,...]" --k 10 + +# File status and segment listing +npx ruvector rvf status mydb.rvf +npx ruvector rvf segments mydb.rvf + +# COW branching — derive a child file +npx ruvector rvf derive mydb.rvf --output child.rvf + +# Compact and reclaim space +npx ruvector rvf compact mydb.rvf + +# Export to JSON +npx ruvector rvf export mydb.rvf --output dump.json +``` + +### RVF Platform Support + +| Platform | Runtime | Backend | +|----------|---------|---------| +| Linux x86_64 / aarch64 | Node.js 18+ | Native (N-API) | +| macOS x86_64 / arm64 | Node.js 18+ | Native (N-API) | +| Windows x86_64 | Node.js 18+ | Native (N-API) | +| Any | Deno | WASM (`@ruvector/rvf-wasm`) | +| Any | Browser | WASM (`@ruvector/rvf-wasm`) | +| Any | Cloudflare Workers | WASM (`@ruvector/rvf-wasm`) | + +### Download Example .rvf Files + +45 pre-built example files are available (~11 MB total): + +```bash +# Download a specific example +curl -LO https://raw.githubusercontent.com/ruvnet/ruvector/main/examples/rvf/output/basic_store.rvf + +# Popular examples: +# basic_store.rvf (152 KB) — 1,000 vectors, dim 128 +# semantic_search.rvf (755 KB) — Semantic search with HNSW +# rag_pipeline.rvf (303 KB) — RAG pipeline embeddings +# agent_memory.rvf (32 KB) — AI agent memory store +# self_booting.rvf (31 KB) — Self-booting with kernel +# progressive_index.rvf (2.5 MB) — Large-scale HNSW index + +# Generate all examples locally +cd crates/rvf && cargo run --example generate_all +``` + +Full catalog: [examples/rvf/output/](https://github.com/ruvnet/ruvector/tree/main/examples/rvf/output) + +### Working Examples: Cognitive Containers + +#### Self-Booting Microservice + +A single `.rvf` file that contains vectors AND a bootable Linux kernel: + +```bash +# Build and run the self-booting example +cd crates/rvf && cargo run --example self_booting +# Output: +# Ingested 50 vectors (128 dims) +# Pre-kernel query: top-5 results OK (nearest ID=25) +# Kernel: 4,640 bytes embedded (x86_64, Hermit) +# Witness chain: 5 entries, all verified +# File: bootable.rvf (31 KB) — data + runtime in one file +``` + +```rust +// The pattern: vectors + kernel + witness in one file +let mut store = RvfStore::create("bootable.rvf", options)?; +store.ingest_batch(&vectors, &ids, None)?; +store.embed_kernel(KernelArch::X86_64 as u8, KernelType::Hermit as u8, + 0x0018, &kernel_image, 8080, Some("console=ttyS0 quiet"))?; +// Result: drop on a VM and it boots as a query service +``` + +#### Linux Microkernel Distribution + +20-package Linux distro with SSH keys and kernel in a single file: + +```bash +cd crates/rvf && cargo run --example linux_microkernel +# Output: +# Installed 20 packages as vector embeddings +# Kernel embedded: Linux x86_64 (4,640 bytes) +# SSH keys: Ed25519, signed and verified +# Witness chain: 22 entries (1 per package + kernel + SSH) +# File: microkernel.rvf (14 KB) — immutable bootable system +``` + +Features: package search by embedding similarity, Ed25519 signed SSH keys, witness-audited installs, COW-derived child images for atomic updates. + +#### Claude Code AI Appliance + +A sealed, bootable AI development environment: + +```bash +cd crates/rvf && cargo run --example claude_code_appliance +# Output: +# 20 dev packages (rust, node, python, docker, ...) +# Kernel: Linux x86_64 with SSH on port 2222 +# eBPF: XDP distance program for fast-path lookups +# Witness chain: 6 entries, all verified +# Crypto: Ed25519 signature +# File: claude_code_appliance.rvf (17 KB) +``` + +#### CLI Full Lifecycle + +```bash +# Create → Ingest → Query → Derive → Inspect +rvf create vectors.rvf --dimension 384 +rvf ingest vectors.rvf --input data.json --format json +rvf query vectors.rvf --vector "0.1,0.2,..." --k 10 +rvf derive vectors.rvf child.rvf --type filter +rvf inspect vectors.rvf + +# Embed kernel and launch as microVM +rvf embed-kernel vectors.rvf --image bzImage +rvf launch vectors.rvf --port 8080 + +# Verify tamper-evident witness chain +rvf verify-witness vectors.rvf +rvf verify-attestation vectors.rvf +``` + +#### Integration Tests (46 passing) + +```bash +cd crates/rvf +cargo test --workspace +# attestation .............. 6 passed +# crypto ................... 10 passed +# computational_container .. 8 passed +# cow_branching ............ 8 passed +# cross_platform ........... 6 passed +# lineage .................. 4 passed +# smoke .................... 4 passed +# Total: 46/46 passed +``` + +## 🐛 Troubleshooting + +### Native Module Not Loading + +If you see "Cannot find module 'ruvector-core-*'": + +```bash +# Reinstall with optional dependencies +npm install --include=optional ruvector + +# Verify platform +npx ruvector info + +# Check Node.js version (18+ required) +node --version +``` + +### WASM Fallback Performance + +If you're using WASM fallback and need better performance: + +1. **Install native toolchain** for your platform +2. **Rebuild native module**: `npm rebuild ruvector` +3. **Verify native**: `npx ruvector info` should show "native (Rust)" + +### Platform Compatibility + +- **Alpine Linux**: Uses WASM fallback (musl not supported) +- **Windows ARM**: Not yet supported, uses WASM fallback +- **Node.js < 18**: Not supported, upgrade to Node.js 18+ + +## 📚 Documentation + +- 🏠 [Homepage](https://ruv.io) +- 📦 [GitHub Repository](https://github.com/ruvnet/ruvector) +- 📚 [Full Documentation](https://github.com/ruvnet/ruvector/tree/main/docs) +- 🚀 [Getting Started Guide](https://github.com/ruvnet/ruvector/blob/main/docs/guide/GETTING_STARTED.md) +- 📖 [API Reference](https://github.com/ruvnet/ruvector/blob/main/docs/api/NODEJS_API.md) +- 🎯 [Performance Tuning](https://github.com/ruvnet/ruvector/blob/main/docs/optimization/PERFORMANCE_TUNING_GUIDE.md) +- 🐛 [Issue Tracker](https://github.com/ruvnet/ruvector/issues) +- 💬 [Discussions](https://github.com/ruvnet/ruvector/discussions) + +## 🤝 Contributing + +We welcome contributions! See [CONTRIBUTING.md](https://github.com/ruvnet/ruvector/blob/main/docs/development/CONTRIBUTING.md) for guidelines. + +### Quick Start + +1. Fork the repository +2. Create a feature branch: `git checkout -b feature/amazing-feature` +3. Commit changes: `git commit -m 'Add amazing feature'` +4. Push to branch: `git push origin feature/amazing-feature` +5. Open a Pull Request + +## 🌐 Community & Support + +- **GitHub**: [github.com/ruvnet/ruvector](https://github.com/ruvnet/ruvector) - ⭐ Star and follow +- **Discord**: [Join our community](https://discord.gg/ruvnet) - Chat with developers +- **Twitter**: [@ruvnet](https://twitter.com/ruvnet) - Follow for updates +- **Issues**: [Report bugs](https://github.com/ruvnet/ruvector/issues) + +### Enterprise Support + +Need custom development or consulting? + +📧 [enterprise@ruv.io](mailto:enterprise@ruv.io) + +## 📜 License + +**MIT License** - see [LICENSE](https://github.com/ruvnet/ruvector/blob/main/LICENSE) for details. + +Free for commercial and personal use. + +## 🙏 Acknowledgments + +Built with battle-tested technologies: + +- **HNSW**: Hierarchical Navigable Small World graphs +- **SIMD**: Hardware-accelerated vector operations via simsimd +- **Rust**: Memory-safe, zero-cost abstractions +- **NAPI-RS**: High-performance Node.js bindings +- **WebAssembly**: Universal browser compatibility + +--- + +
+ +**Built with ❤️ by [rUv](https://ruv.io)** + +[![npm](https://img.shields.io/npm/v/ruvector.svg)](https://www.npmjs.com/package/ruvector) +[![GitHub Stars](https://img.shields.io/github/stars/ruvnet/ruvector?style=social)](https://github.com/ruvnet/ruvector) +[![Twitter](https://img.shields.io/twitter/follow/ruvnet?style=social)](https://twitter.com/ruvnet) + +**[Get Started](https://github.com/ruvnet/ruvector/blob/main/docs/guide/GETTING_STARTED.md)** • **[Documentation](https://github.com/ruvnet/ruvector/tree/main/docs)** • **[API Reference](https://github.com/ruvnet/ruvector/blob/main/docs/api/NODEJS_API.md)** • **[Contributing](https://github.com/ruvnet/ruvector/blob/main/docs/development/CONTRIBUTING.md)** + +
diff --git a/bin/cli.js b/bin/cli.js new file mode 100755 index 0000000000000000000000000000000000000000..9bf22a4e6eafba340741a09fed1fdc6c00491c36 --- /dev/null +++ b/bin/cli.js @@ -0,0 +1,7356 @@ +#!/usr/bin/env node + +// Signal CLI context (disables parallel workers - hooks are short-lived) +process.env.RUVECTOR_CLI = '1'; + +const { Command } = require('commander'); +const chalk = require('chalk'); +const ora = require('ora'); +const fs = require('fs'); +const path = require('path'); + +// Lazy load ruvector (only when needed, not for install/help commands) +let VectorDB, getVersion, getImplementationType; +let ruvectorLoaded = false; + +function loadRuvector() { + if (ruvectorLoaded) return true; + try { + const ruvector = require('../dist/index.js'); + VectorDB = ruvector.VectorDB; + getVersion = ruvector.getVersion; + getImplementationType = ruvector.getImplementationType; + ruvectorLoaded = true; + return true; + } catch (e) { + return false; + } +} + +function requireRuvector() { + if (!loadRuvector()) { + console.error(chalk.red('Error: Failed to load ruvector. Please run: npm run build')); + console.error(chalk.yellow('Or install the package: npm install ruvector')); + process.exit(1); + } +} + +// Import GNN (optional - graceful fallback if not available) +let RuvectorLayer, TensorCompress, differentiableSearch, getCompressionLevel, hierarchicalForward; +let gnnAvailable = false; +try { + const gnn = require('@ruvector/gnn'); + RuvectorLayer = gnn.RuvectorLayer; + TensorCompress = gnn.TensorCompress; + differentiableSearch = gnn.differentiableSearch; + getCompressionLevel = gnn.getCompressionLevel; + hierarchicalForward = gnn.hierarchicalForward; + gnnAvailable = true; +} catch (e) { + // GNN not available - commands will show helpful message +} + +// Import Attention (optional - graceful fallback if not available) +let DotProductAttention, MultiHeadAttention, HyperbolicAttention, FlashAttention, LinearAttention, MoEAttention; +let GraphRoPeAttention, EdgeFeaturedAttention, DualSpaceAttention, LocalGlobalAttention; +let benchmarkAttention, computeAttentionAsync, batchAttentionCompute, parallelAttentionCompute; +let expMap, logMap, mobiusAddition, poincareDistance, projectToPoincareBall; +let attentionInfo, attentionVersion; +let attentionAvailable = false; +try { + const attention = require('@ruvector/attention'); + // Core mechanisms + DotProductAttention = attention.DotProductAttention; + MultiHeadAttention = attention.MultiHeadAttention; + HyperbolicAttention = attention.HyperbolicAttention; + FlashAttention = attention.FlashAttention; + LinearAttention = attention.LinearAttention; + MoEAttention = attention.MoEAttention; + // Graph attention + GraphRoPeAttention = attention.GraphRoPeAttention; + EdgeFeaturedAttention = attention.EdgeFeaturedAttention; + DualSpaceAttention = attention.DualSpaceAttention; + LocalGlobalAttention = attention.LocalGlobalAttention; + // Utilities + benchmarkAttention = attention.benchmarkAttention; + computeAttentionAsync = attention.computeAttentionAsync; + batchAttentionCompute = attention.batchAttentionCompute; + parallelAttentionCompute = attention.parallelAttentionCompute; + // Hyperbolic math + expMap = attention.expMap; + logMap = attention.logMap; + mobiusAddition = attention.mobiusAddition; + poincareDistance = attention.poincareDistance; + projectToPoincareBall = attention.projectToPoincareBall; + // Meta + attentionInfo = attention.info; + attentionVersion = attention.version; + attentionAvailable = true; +} catch (e) { + // Attention not available - commands will show helpful message +} + +const program = new Command(); + +// Get package version from package.json +const packageJson = require('../package.json'); + +// Version and description (lazy load implementation info) +program + .name('ruvector') + .description(`${chalk.cyan('ruvector')} - High-performance vector database CLI`) + .version(packageJson.version); + +// Create database +program + .command('create ') + .description('Create a new vector database') + .option('-d, --dimension ', 'Vector dimension', '384') + .option('-m, --metric ', 'Distance metric (cosine|euclidean|dot)', 'cosine') + .action((dbPath, options) => { + requireRuvector(); + const spinner = ora('Creating database...').start(); + + try { + const dimension = parseInt(options.dimension); + const db = new VectorDB({ + dimension, + metric: options.metric, + path: dbPath, + autoPersist: true + }); + + db.save(dbPath); + spinner.succeed(chalk.green(`Database created: ${dbPath}`)); + console.log(chalk.gray(` Dimension: ${dimension}`)); + console.log(chalk.gray(` Metric: ${options.metric}`)); + console.log(chalk.gray(` Implementation: ${getImplementationType()}`)); + } catch (error) { + spinner.fail(chalk.red('Failed to create database')); + console.error(chalk.red(error.message)); + process.exit(1); + } + }); + +// Insert vectors +program + .command('insert ') + .description('Insert vectors from JSON file') + .option('-b, --batch-size ', 'Batch size for insertion', '1000') + .action((dbPath, file, options) => { + requireRuvector(); + const spinner = ora('Loading database...').start(); + + try { + // Read database metadata to get dimension + let dimension = 384; // default + if (fs.existsSync(dbPath)) { + const dbData = fs.readFileSync(dbPath, 'utf8'); + const parsed = JSON.parse(dbData); + dimension = parsed.dimension || 384; + } + + const db = new VectorDB({ dimension }); + + if (fs.existsSync(dbPath)) { + db.load(dbPath); + } + + spinner.text = 'Reading vectors...'; + const data = JSON.parse(fs.readFileSync(file, 'utf8')); + const vectors = Array.isArray(data) ? data : [data]; + + spinner.text = `Inserting ${vectors.length} vectors...`; + const batchSize = parseInt(options.batchSize); + + for (let i = 0; i < vectors.length; i += batchSize) { + const batch = vectors.slice(i, i + batchSize); + db.insertBatch(batch); + spinner.text = `Inserted ${Math.min(i + batchSize, vectors.length)}/${vectors.length} vectors...`; + } + + db.save(dbPath); + spinner.succeed(chalk.green(`Inserted ${vectors.length} vectors`)); + + const stats = db.stats(); + console.log(chalk.gray(` Total vectors: ${stats.count}`)); + } catch (error) { + spinner.fail(chalk.red('Failed to insert vectors')); + console.error(chalk.red(error.message)); + process.exit(1); + } + }); + +// Search vectors +program + .command('search ') + .description('Search for similar vectors') + .requiredOption('-v, --vector ', 'Query vector as JSON array') + .option('-k, --top-k ', 'Number of results', '10') + .option('-t, --threshold ', 'Similarity threshold', '0.0') + .option('-f, --filter ', 'Metadata filter as JSON') + .action((dbPath, options) => { + requireRuvector(); + const spinner = ora('Loading database...').start(); + + try { + // Read database metadata + const dbData = fs.readFileSync(dbPath, 'utf8'); + const parsed = JSON.parse(dbData); + const dimension = parsed.dimension || 384; + + const db = new VectorDB({ dimension }); + db.load(dbPath); + + spinner.text = 'Searching...'; + + const vector = JSON.parse(options.vector); + const query = { + vector, + k: parseInt(options.topK), + threshold: parseFloat(options.threshold) + }; + + if (options.filter) { + query.filter = JSON.parse(options.filter); + } + + const results = db.search(query); + spinner.succeed(chalk.green(`Found ${results.length} results`)); + + console.log(chalk.cyan('\nSearch Results:')); + results.forEach((result, i) => { + console.log(chalk.white(`\n${i + 1}. ID: ${result.id}`)); + console.log(chalk.yellow(` Score: ${result.score.toFixed(4)}`)); + if (result.metadata) { + console.log(chalk.gray(` Metadata: ${JSON.stringify(result.metadata)}`)); + } + }); + } catch (error) { + spinner.fail(chalk.red('Failed to search')); + console.error(chalk.red(error.message)); + process.exit(1); + } + }); + +// Show stats +program + .command('stats ') + .description('Show database statistics') + .action((dbPath) => { + requireRuvector(); + const spinner = ora('Loading database...').start(); + + try { + const dbData = fs.readFileSync(dbPath, 'utf8'); + const parsed = JSON.parse(dbData); + const dimension = parsed.dimension || 384; + + const db = new VectorDB({ dimension }); + db.load(dbPath); + + const stats = db.stats(); + spinner.succeed(chalk.green('Database statistics')); + + console.log(chalk.cyan('\nDatabase Stats:')); + console.log(chalk.white(` Vector Count: ${chalk.yellow(stats.count)}`)); + console.log(chalk.white(` Dimension: ${chalk.yellow(stats.dimension)}`)); + console.log(chalk.white(` Metric: ${chalk.yellow(stats.metric)}`)); + console.log(chalk.white(` Implementation: ${chalk.yellow(getImplementationType())}`)); + + if (stats.memoryUsage) { + const mb = (stats.memoryUsage / (1024 * 1024)).toFixed(2); + console.log(chalk.white(` Memory Usage: ${chalk.yellow(mb + ' MB')}`)); + } + + const fileStats = fs.statSync(dbPath); + const fileMb = (fileStats.size / (1024 * 1024)).toFixed(2); + console.log(chalk.white(` File Size: ${chalk.yellow(fileMb + ' MB')}`)); + } catch (error) { + spinner.fail(chalk.red('Failed to load database')); + console.error(chalk.red(error.message)); + process.exit(1); + } + }); + +// Benchmark +program + .command('benchmark') + .description('Run performance benchmarks') + .option('-d, --dimension ', 'Vector dimension', '384') + .option('-n, --num-vectors ', 'Number of vectors', '10000') + .option('-q, --num-queries ', 'Number of queries', '1000') + .action((options) => { + requireRuvector(); + console.log(chalk.cyan('\nruvector Performance Benchmark')); + console.log(chalk.gray(`Implementation: ${getImplementationType()}\n`)); + + const dimension = parseInt(options.dimension); + const numVectors = parseInt(options.numVectors); + const numQueries = parseInt(options.numQueries); + + let spinner = ora('Creating database...').start(); + + try { + const db = new VectorDB({ dimension, metric: 'cosine' }); + spinner.succeed(); + + // Insert benchmark + spinner = ora(`Inserting ${numVectors} vectors...`).start(); + const insertStart = Date.now(); + + const vectors = []; + for (let i = 0; i < numVectors; i++) { + vectors.push({ + id: `vec_${i}`, + vector: Array.from({ length: dimension }, () => Math.random()), + metadata: { index: i, batch: Math.floor(i / 1000) } + }); + } + + db.insertBatch(vectors); + const insertTime = Date.now() - insertStart; + const insertRate = (numVectors / (insertTime / 1000)).toFixed(0); + + spinner.succeed(chalk.green(`Inserted ${numVectors} vectors in ${insertTime}ms`)); + console.log(chalk.gray(` Rate: ${chalk.yellow(insertRate)} vectors/sec`)); + + // Search benchmark + spinner = ora(`Running ${numQueries} searches...`).start(); + const searchStart = Date.now(); + + for (let i = 0; i < numQueries; i++) { + const query = { + vector: Array.from({ length: dimension }, () => Math.random()), + k: 10 + }; + db.search(query); + } + + const searchTime = Date.now() - searchStart; + const searchRate = (numQueries / (searchTime / 1000)).toFixed(0); + const avgLatency = (searchTime / numQueries).toFixed(2); + + spinner.succeed(chalk.green(`Completed ${numQueries} searches in ${searchTime}ms`)); + console.log(chalk.gray(` Rate: ${chalk.yellow(searchRate)} queries/sec`)); + console.log(chalk.gray(` Avg Latency: ${chalk.yellow(avgLatency)}ms`)); + + // Stats + const stats = db.stats(); + console.log(chalk.cyan('\nFinal Stats:')); + console.log(chalk.white(` Vector Count: ${chalk.yellow(stats.count)}`)); + console.log(chalk.white(` Dimension: ${chalk.yellow(stats.dimension)}`)); + console.log(chalk.white(` Implementation: ${chalk.yellow(getImplementationType())}`)); + + } catch (error) { + spinner.fail(chalk.red('Benchmark failed')); + console.error(chalk.red(error.message)); + process.exit(1); + } + }); + +// Info command +program + .command('info') + .description('Show ruvector information') + .action(() => { + console.log(chalk.cyan('\nruvector Information')); + console.log(chalk.white(` CLI Version: ${chalk.yellow(packageJson.version)}`)); + + // Try to load ruvector for implementation info + if (loadRuvector()) { + const version = typeof getVersion === 'function' ? getVersion() : 'unknown'; + const impl = typeof getImplementationType === 'function' ? getImplementationType() : 'native'; + console.log(chalk.white(` Core Version: ${chalk.yellow(version)}`)); + console.log(chalk.white(` Implementation: ${chalk.yellow(impl)}`)); + } else { + console.log(chalk.white(` Core: ${chalk.gray('Not loaded (install @ruvector/core)')}`)); + } + + console.log(chalk.white(` GNN Module: ${gnnAvailable ? chalk.green('Available') : chalk.gray('Not installed')}`)); + console.log(chalk.white(` Node Version: ${chalk.yellow(process.version)}`)); + console.log(chalk.white(` Platform: ${chalk.yellow(process.platform)}`)); + console.log(chalk.white(` Architecture: ${chalk.yellow(process.arch)}`)); + + if (!gnnAvailable) { + console.log(chalk.gray('\n Install GNN with: npx ruvector install gnn')); + } + }); + +// ============================================================================= +// Install Command +// ============================================================================= + +program + .command('install [packages...]') + .description('Install optional ruvector packages') + .option('-a, --all', 'Install all optional packages') + .option('-l, --list', 'List available packages') + .option('-i, --interactive', 'Interactive package selection') + .action(async (packages, options) => { + const { execSync } = require('child_process'); + + // Available optional packages - all ruvector npm packages + const availablePackages = { + // Core packages + core: { + name: '@ruvector/core', + description: 'Core vector database with native Rust bindings (HNSW, SIMD)', + installed: true, // Always installed with ruvector + category: 'core' + }, + gnn: { + name: '@ruvector/gnn', + description: 'Graph Neural Network layers, tensor compression, differentiable search', + installed: gnnAvailable, + category: 'core' + }, + 'graph-node': { + name: '@ruvector/graph-node', + description: 'Native Node.js bindings for hypergraph database with Cypher queries', + installed: false, + category: 'core' + }, + 'agentic-synth': { + name: '@ruvector/agentic-synth', + description: 'Synthetic data generator for AI/ML training, RAG, and agentic workflows', + installed: false, + category: 'tools' + }, + extensions: { + name: 'ruvector-extensions', + description: 'Advanced features: embeddings, UI, exports, temporal tracking, persistence', + installed: false, + category: 'tools' + }, + // Platform-specific native bindings for @ruvector/core + 'node-linux-x64': { + name: '@ruvector/node-linux-x64-gnu', + description: 'Linux x64 native bindings for @ruvector/core', + installed: false, + category: 'platform' + }, + 'node-linux-arm64': { + name: '@ruvector/node-linux-arm64-gnu', + description: 'Linux ARM64 native bindings for @ruvector/core', + installed: false, + category: 'platform' + }, + 'node-darwin-x64': { + name: '@ruvector/node-darwin-x64', + description: 'macOS Intel x64 native bindings for @ruvector/core', + installed: false, + category: 'platform' + }, + 'node-darwin-arm64': { + name: '@ruvector/node-darwin-arm64', + description: 'macOS Apple Silicon native bindings for @ruvector/core', + installed: false, + category: 'platform' + }, + 'node-win32-x64': { + name: '@ruvector/node-win32-x64-msvc', + description: 'Windows x64 native bindings for @ruvector/core', + installed: false, + category: 'platform' + }, + // Platform-specific native bindings for @ruvector/gnn + 'gnn-linux-x64': { + name: '@ruvector/gnn-linux-x64-gnu', + description: 'Linux x64 native bindings for @ruvector/gnn', + installed: false, + category: 'platform' + }, + 'gnn-linux-arm64': { + name: '@ruvector/gnn-linux-arm64-gnu', + description: 'Linux ARM64 native bindings for @ruvector/gnn', + installed: false, + category: 'platform' + }, + 'gnn-darwin-x64': { + name: '@ruvector/gnn-darwin-x64', + description: 'macOS Intel x64 native bindings for @ruvector/gnn', + installed: false, + category: 'platform' + }, + 'gnn-darwin-arm64': { + name: '@ruvector/gnn-darwin-arm64', + description: 'macOS Apple Silicon native bindings for @ruvector/gnn', + installed: false, + category: 'platform' + }, + 'gnn-win32-x64': { + name: '@ruvector/gnn-win32-x64-msvc', + description: 'Windows x64 native bindings for @ruvector/gnn', + installed: false, + category: 'platform' + }, + // Legacy/standalone packages + 'ruvector-core': { + name: 'ruvector-core', + description: 'Standalone vector database (legacy, use @ruvector/core instead)', + installed: false, + category: 'legacy' + } + }; + + // Check which packages are actually installed + for (const [key, pkg] of Object.entries(availablePackages)) { + if (key !== 'core' && key !== 'gnn') { + try { + require.resolve(pkg.name); + pkg.installed = true; + } catch (e) { + pkg.installed = false; + } + } + } + + // List packages + if (options.list || (packages.length === 0 && !options.all && !options.interactive)) { + console.log(chalk.cyan('\n═══════════════════════════════════════════════════════════════')); + console.log(chalk.cyan(' Ruvector Packages')); + console.log(chalk.cyan('═══════════════════════════════════════════════════════════════\n')); + + const categories = { + core: { title: '📦 Core Packages', packages: [] }, + tools: { title: '🔧 Tools & Extensions', packages: [] }, + platform: { title: '🖥️ Platform Bindings', packages: [] }, + legacy: { title: '📜 Legacy Packages', packages: [] } + }; + + // Group by category + Object.entries(availablePackages).forEach(([key, pkg]) => { + if (categories[pkg.category]) { + categories[pkg.category].packages.push({ key, ...pkg }); + } + }); + + // Display by category + for (const [catKey, cat] of Object.entries(categories)) { + if (cat.packages.length === 0) continue; + + console.log(chalk.cyan(`${cat.title}`)); + console.log(chalk.gray('─'.repeat(60))); + + cat.packages.forEach(pkg => { + const status = pkg.installed ? chalk.green('✓') : chalk.gray('○'); + const statusText = pkg.installed ? chalk.green('installed') : chalk.gray('available'); + console.log(chalk.white(` ${status} ${chalk.yellow(pkg.key.padEnd(18))} ${statusText}`)); + console.log(chalk.gray(` ${pkg.description}`)); + console.log(chalk.gray(` npm: ${chalk.white(pkg.name)}\n`)); + }); + } + + console.log(chalk.cyan('═══════════════════════════════════════════════════════════════')); + console.log(chalk.cyan('Usage:')); + console.log(chalk.white(' npx ruvector install gnn # Install GNN package')); + console.log(chalk.white(' npx ruvector install graph-node # Install graph database')); + console.log(chalk.white(' npx ruvector install agentic-synth # Install data generator')); + console.log(chalk.white(' npx ruvector install --all # Install all core packages')); + console.log(chalk.white(' npx ruvector install -i # Interactive selection')); + console.log(chalk.gray('\n Note: Platform bindings are auto-detected by @ruvector/core')); + return; + } + + // Interactive mode + if (options.interactive) { + const readline = require('readline'); + const rl = readline.createInterface({ + input: process.stdin, + output: process.stdout + }); + + console.log(chalk.cyan('\nSelect packages to install:\n')); + + const notInstalled = Object.entries(availablePackages) + .filter(([_, pkg]) => !pkg.installed); + + if (notInstalled.length === 0) { + console.log(chalk.green('All packages are already installed!')); + rl.close(); + return; + } + + notInstalled.forEach(([key, pkg], i) => { + console.log(chalk.white(` ${i + 1}. ${chalk.yellow(key)} - ${pkg.description}`)); + }); + console.log(chalk.white(` ${notInstalled.length + 1}. ${chalk.yellow('all')} - Install all packages`)); + console.log(chalk.white(` 0. ${chalk.gray('cancel')} - Exit without installing`)); + + rl.question(chalk.cyan('\nEnter selection (comma-separated for multiple): '), (answer) => { + rl.close(); + + const selections = answer.split(',').map(s => s.trim()); + let toInstall = []; + + for (const sel of selections) { + if (sel === '0' || sel.toLowerCase() === 'cancel') { + console.log(chalk.yellow('Installation cancelled.')); + return; + } + if (sel === String(notInstalled.length + 1) || sel.toLowerCase() === 'all') { + toInstall = notInstalled.map(([_, pkg]) => pkg.name); + break; + } + const idx = parseInt(sel) - 1; + if (idx >= 0 && idx < notInstalled.length) { + toInstall.push(notInstalled[idx][1].name); + } + } + + if (toInstall.length === 0) { + console.log(chalk.yellow('No valid packages selected.')); + return; + } + + installPackages(toInstall); + }); + return; + } + + // Install all (core + tools only, not platform-specific or legacy) + if (options.all) { + const toInstall = Object.values(availablePackages) + .filter(pkg => !pkg.installed && (pkg.category === 'core' || pkg.category === 'tools')) + .map(pkg => pkg.name); + + if (toInstall.length === 0) { + console.log(chalk.green('All core packages are already installed!')); + return; + } + + console.log(chalk.cyan(`Installing ${toInstall.length} packages...`)); + installPackages(toInstall); + return; + } + + // Install specific packages + const toInstall = []; + for (const pkg of packages) { + const key = pkg.toLowerCase().replace('@ruvector/', ''); + if (availablePackages[key]) { + if (availablePackages[key].installed) { + console.log(chalk.yellow(`${availablePackages[key].name} is already installed`)); + } else { + toInstall.push(availablePackages[key].name); + } + } else { + console.log(chalk.red(`Unknown package: ${pkg}`)); + console.log(chalk.gray(`Available: ${Object.keys(availablePackages).join(', ')}`)); + } + } + + if (toInstall.length > 0) { + installPackages(toInstall); + } + + function installPackages(pkgs) { + const spinner = ora(`Installing ${pkgs.join(', ')}...`).start(); + + try { + // Detect package manager + let pm = 'npm'; + if (fs.existsSync('yarn.lock')) pm = 'yarn'; + else if (fs.existsSync('pnpm-lock.yaml')) pm = 'pnpm'; + else if (fs.existsSync('bun.lockb')) pm = 'bun'; + + const cmd = pm === 'yarn' ? `yarn add ${pkgs.join(' ')}` + : pm === 'pnpm' ? `pnpm add ${pkgs.join(' ')}` + : pm === 'bun' ? `bun add ${pkgs.join(' ')}` + : `npm install ${pkgs.join(' ')}`; + + execSync(cmd, { stdio: 'pipe' }); + + spinner.succeed(chalk.green(`Installed: ${pkgs.join(', ')}`)); + console.log(chalk.cyan('\nRun "npx ruvector info" to verify installation.')); + } catch (error) { + spinner.fail(chalk.red('Installation failed')); + console.error(chalk.red(error.message)); + console.log(chalk.yellow(`\nTry manually: npm install ${pkgs.join(' ')}`)); + process.exit(1); + } + } + }); + +// ============================================================================= +// GNN Commands +// ============================================================================= + +// Helper to check GNN availability +function requireGnn() { + if (!gnnAvailable) { + console.error(chalk.red('Error: GNN module not available.')); + console.error(chalk.yellow('Install it with: npm install @ruvector/gnn')); + process.exit(1); + } +} + +// GNN parent command +const gnnCmd = program + .command('gnn') + .description('Graph Neural Network operations'); + +// GNN Layer command +gnnCmd + .command('layer') + .description('Create and test a GNN layer') + .requiredOption('-i, --input-dim ', 'Input dimension') + .requiredOption('-h, --hidden-dim ', 'Hidden dimension') + .option('-a, --heads ', 'Number of attention heads', '4') + .option('-d, --dropout ', 'Dropout rate', '0.1') + .option('--test', 'Run a test forward pass') + .option('-o, --output ', 'Save layer config to JSON file') + .action((options) => { + requireGnn(); + const spinner = ora('Creating GNN layer...').start(); + + try { + const inputDim = parseInt(options.inputDim); + const hiddenDim = parseInt(options.hiddenDim); + const heads = parseInt(options.heads); + const dropout = parseFloat(options.dropout); + + const layer = new RuvectorLayer(inputDim, hiddenDim, heads, dropout); + spinner.succeed(chalk.green('GNN Layer created')); + + console.log(chalk.cyan('\nLayer Configuration:')); + console.log(chalk.white(` Input Dim: ${chalk.yellow(inputDim)}`)); + console.log(chalk.white(` Hidden Dim: ${chalk.yellow(hiddenDim)}`)); + console.log(chalk.white(` Heads: ${chalk.yellow(heads)}`)); + console.log(chalk.white(` Dropout: ${chalk.yellow(dropout)}`)); + + if (options.test) { + spinner.start('Running test forward pass...'); + + // Create test data + const nodeEmbedding = Array.from({ length: inputDim }, () => Math.random()); + const neighborEmbeddings = [ + Array.from({ length: inputDim }, () => Math.random()), + Array.from({ length: inputDim }, () => Math.random()) + ]; + const edgeWeights = [0.6, 0.4]; + + const output = layer.forward(nodeEmbedding, neighborEmbeddings, edgeWeights); + spinner.succeed(chalk.green('Forward pass completed')); + + console.log(chalk.cyan('\nTest Results:')); + console.log(chalk.white(` Input shape: ${chalk.yellow(`[${inputDim}]`)}`)); + console.log(chalk.white(` Output shape: ${chalk.yellow(`[${output.length}]`)}`)); + console.log(chalk.white(` Output sample: ${chalk.gray(`[${output.slice(0, 4).map(v => v.toFixed(4)).join(', ')}...]`)}`)); + } + + if (options.output) { + const config = layer.toJson(); + fs.writeFileSync(options.output, config); + console.log(chalk.green(`\nLayer config saved to: ${options.output}`)); + } + } catch (error) { + spinner.fail(chalk.red('Failed to create GNN layer')); + console.error(chalk.red(error.message)); + process.exit(1); + } + }); + +// GNN Compress command +gnnCmd + .command('compress') + .description('Compress embeddings using adaptive tensor compression') + .requiredOption('-f, --file ', 'Input JSON file with embeddings') + .option('-l, --level ', 'Compression level (none|half|pq8|pq4|binary)', 'auto') + .option('-a, --access-freq ', 'Access frequency for auto compression (0.0-1.0)', '0.5') + .option('-o, --output ', 'Output file for compressed data') + .action((options) => { + requireGnn(); + const spinner = ora('Loading embeddings...').start(); + + try { + const data = JSON.parse(fs.readFileSync(options.file, 'utf8')); + const embeddings = Array.isArray(data) ? data : [data]; + + spinner.text = 'Compressing embeddings...'; + const compressor = new TensorCompress(); + const accessFreq = parseFloat(options.accessFreq); + + const results = []; + let totalOriginalSize = 0; + let totalCompressedSize = 0; + + for (const embedding of embeddings) { + const vec = embedding.vector || embedding; + totalOriginalSize += vec.length * 4; // float32 = 4 bytes + + let compressed; + if (options.level === 'auto') { + compressed = compressor.compress(vec, accessFreq); + } else { + const levelConfig = { levelType: options.level }; + if (options.level === 'pq8') { + levelConfig.subvectors = 8; + levelConfig.centroids = 256; + } else if (options.level === 'pq4') { + levelConfig.subvectors = 8; + } + compressed = compressor.compressWithLevel(vec, levelConfig); + } + + totalCompressedSize += compressed.length; + results.push({ + id: embedding.id, + compressed + }); + } + + const ratio = (totalOriginalSize / totalCompressedSize).toFixed(2); + const savings = ((1 - totalCompressedSize / totalOriginalSize) * 100).toFixed(1); + + spinner.succeed(chalk.green(`Compressed ${embeddings.length} embeddings`)); + + console.log(chalk.cyan('\nCompression Results:')); + console.log(chalk.white(` Embeddings: ${chalk.yellow(embeddings.length)}`)); + console.log(chalk.white(` Level: ${chalk.yellow(options.level === 'auto' ? `auto (${getCompressionLevel(accessFreq)})` : options.level)}`)); + console.log(chalk.white(` Original: ${chalk.yellow((totalOriginalSize / 1024).toFixed(2) + ' KB')}`)); + console.log(chalk.white(` Compressed: ${chalk.yellow((totalCompressedSize / 1024).toFixed(2) + ' KB')}`)); + console.log(chalk.white(` Ratio: ${chalk.yellow(ratio + 'x')}`)); + console.log(chalk.white(` Savings: ${chalk.yellow(savings + '%')}`)); + + if (options.output) { + fs.writeFileSync(options.output, JSON.stringify(results, null, 2)); + console.log(chalk.green(`\nCompressed data saved to: ${options.output}`)); + } + } catch (error) { + spinner.fail(chalk.red('Failed to compress embeddings')); + console.error(chalk.red(error.message)); + process.exit(1); + } + }); + +// GNN Search command +gnnCmd + .command('search') + .description('Differentiable search with soft attention') + .requiredOption('-q, --query ', 'Query vector as JSON array') + .requiredOption('-c, --candidates ', 'Candidates file (JSON array of vectors)') + .option('-k, --top-k ', 'Number of results', '5') + .option('-t, --temperature ', 'Softmax temperature (lower=sharper)', '1.0') + .action((options) => { + requireGnn(); + const spinner = ora('Loading candidates...').start(); + + try { + const query = JSON.parse(options.query); + const candidatesData = JSON.parse(fs.readFileSync(options.candidates, 'utf8')); + const candidates = candidatesData.map(c => c.vector || c); + const k = parseInt(options.topK); + const temperature = parseFloat(options.temperature); + + spinner.text = 'Running differentiable search...'; + const result = differentiableSearch(query, candidates, k, temperature); + + spinner.succeed(chalk.green(`Found top-${k} results`)); + + console.log(chalk.cyan('\nSearch Results:')); + console.log(chalk.white(` Query dim: ${chalk.yellow(query.length)}`)); + console.log(chalk.white(` Candidates: ${chalk.yellow(candidates.length)}`)); + console.log(chalk.white(` Temperature: ${chalk.yellow(temperature)}`)); + + console.log(chalk.cyan('\nTop-K Results:')); + for (let i = 0; i < result.indices.length; i++) { + const idx = result.indices[i]; + const weight = result.weights[i]; + const id = candidatesData[idx]?.id || `candidate_${idx}`; + console.log(chalk.white(` ${i + 1}. ${chalk.yellow(id)} (index: ${idx})`)); + console.log(chalk.gray(` Weight: ${weight.toFixed(6)}`)); + } + } catch (error) { + spinner.fail(chalk.red('Failed to run search')); + console.error(chalk.red(error.message)); + process.exit(1); + } + }); + +// GNN Info command +gnnCmd + .command('info') + .description('Show GNN module information') + .action(() => { + if (!gnnAvailable) { + console.log(chalk.yellow('\nGNN Module: Not installed')); + console.log(chalk.white('Install with: npm install @ruvector/gnn')); + return; + } + + console.log(chalk.cyan('\nGNN Module Information')); + console.log(chalk.white(` Status: ${chalk.green('Available')}`)); + console.log(chalk.white(` Platform: ${chalk.yellow(process.platform)}`)); + console.log(chalk.white(` Architecture: ${chalk.yellow(process.arch)}`)); + + console.log(chalk.cyan('\nAvailable Features:')); + console.log(chalk.white(` • RuvectorLayer - GNN layer with multi-head attention`)); + console.log(chalk.white(` • TensorCompress - Adaptive tensor compression (5 levels)`)); + console.log(chalk.white(` • differentiableSearch - Soft attention-based search`)); + console.log(chalk.white(` • hierarchicalForward - Multi-layer GNN processing`)); + + console.log(chalk.cyan('\nCompression Levels:')); + console.log(chalk.gray(` none (freq > 0.8) - Full precision, hot data`)); + console.log(chalk.gray(` half (freq > 0.4) - ~50% savings, warm data`)); + console.log(chalk.gray(` pq8 (freq > 0.1) - ~8x compression, cool data`)); + console.log(chalk.gray(` pq4 (freq > 0.01) - ~16x compression, cold data`)); + console.log(chalk.gray(` binary (freq <= 0.01) - ~32x compression, archive`)); + }); + +// ============================================================================= +// Attention Commands +// ============================================================================= + +// Helper to require attention module +function requireAttention() { + if (!attentionAvailable) { + console.error(chalk.red('Error: @ruvector/attention is not installed')); + console.error(chalk.yellow('Install it with: npm install @ruvector/attention')); + process.exit(1); + } +} + +// Attention parent command +const attentionCmd = program + .command('attention') + .description('High-performance attention mechanism operations'); + +// Attention compute command - run attention on input vectors +attentionCmd + .command('compute') + .description('Compute attention over input vectors') + .requiredOption('-q, --query ', 'Query vector as JSON array') + .requiredOption('-k, --keys ', 'Keys file (JSON array of vectors)') + .option('-v, --values ', 'Values file (JSON array of vectors, defaults to keys)') + .option('-t, --type ', 'Attention type (dot|multi-head|flash|hyperbolic|linear)', 'dot') + .option('-h, --heads ', 'Number of attention heads (for multi-head)', '4') + .option('-d, --head-dim ', 'Head dimension (for multi-head)', '64') + .option('--curvature ', 'Curvature for hyperbolic attention', '1.0') + .option('-o, --output ', 'Output file for results') + .action((options) => { + requireAttention(); + const spinner = ora('Loading keys...').start(); + + try { + const query = JSON.parse(options.query); + const keysData = JSON.parse(fs.readFileSync(options.keys, 'utf8')); + const keys = keysData.map(k => k.vector || k); + + let values = keys; + if (options.values) { + const valuesData = JSON.parse(fs.readFileSync(options.values, 'utf8')); + values = valuesData.map(v => v.vector || v); + } + + spinner.text = `Computing ${options.type} attention...`; + + let result; + let attentionWeights; + + switch (options.type) { + case 'dot': { + const attn = new DotProductAttention(); + const queryMat = [query]; + const output = attn.forward(queryMat, keys, values); + result = output[0]; + attentionWeights = attn.getLastWeights ? attn.getLastWeights()[0] : null; + break; + } + case 'multi-head': { + const numHeads = parseInt(options.heads); + const headDim = parseInt(options.headDim); + const attn = new MultiHeadAttention(query.length, numHeads, headDim); + const queryMat = [query]; + const output = attn.forward(queryMat, keys, values); + result = output[0]; + break; + } + case 'flash': { + const attn = new FlashAttention(query.length); + const queryMat = [query]; + const output = attn.forward(queryMat, keys, values); + result = output[0]; + break; + } + case 'hyperbolic': { + const curvature = parseFloat(options.curvature); + const attn = new HyperbolicAttention(query.length, curvature); + const queryMat = [query]; + const output = attn.forward(queryMat, keys, values); + result = output[0]; + break; + } + case 'linear': { + const attn = new LinearAttention(query.length); + const queryMat = [query]; + const output = attn.forward(queryMat, keys, values); + result = output[0]; + break; + } + default: + throw new Error(`Unknown attention type: ${options.type}`); + } + + spinner.succeed(chalk.green(`Attention computed (${options.type})`)); + + console.log(chalk.cyan('\nAttention Results:')); + console.log(chalk.white(` Type: ${chalk.yellow(options.type)}`)); + console.log(chalk.white(` Query dim: ${chalk.yellow(query.length)}`)); + console.log(chalk.white(` Num keys: ${chalk.yellow(keys.length)}`)); + console.log(chalk.white(` Output dim: ${chalk.yellow(result.length)}`)); + console.log(chalk.white(` Output: ${chalk.gray(`[${result.slice(0, 4).map(v => v.toFixed(4)).join(', ')}...]`)}`)); + + if (attentionWeights) { + console.log(chalk.cyan('\nAttention Weights:')); + attentionWeights.slice(0, 5).forEach((w, i) => { + console.log(chalk.gray(` Key ${i}: ${w.toFixed(4)}`)); + }); + if (attentionWeights.length > 5) { + console.log(chalk.gray(` ... and ${attentionWeights.length - 5} more`)); + } + } + + if (options.output) { + const outputData = { result, attentionWeights }; + fs.writeFileSync(options.output, JSON.stringify(outputData, null, 2)); + console.log(chalk.green(`\nResults saved to: ${options.output}`)); + } + } catch (error) { + spinner.fail(chalk.red('Failed to compute attention')); + console.error(chalk.red(error.message)); + process.exit(1); + } + }); + +// Attention benchmark command +attentionCmd + .command('benchmark') + .description('Benchmark attention mechanisms') + .option('-d, --dimension ', 'Vector dimension', '256') + .option('-n, --num-vectors ', 'Number of vectors', '100') + .option('-i, --iterations ', 'Benchmark iterations', '100') + .option('-t, --types ', 'Attention types to benchmark (comma-separated)', 'dot,flash,linear') + .action((options) => { + requireAttention(); + const spinner = ora('Setting up benchmark...').start(); + + try { + const dim = parseInt(options.dimension); + const numVectors = parseInt(options.numVectors); + const iterations = parseInt(options.iterations); + const types = options.types.split(',').map(t => t.trim()); + + // Generate random test data + spinner.text = 'Generating test data...'; + const query = Array.from({ length: dim }, () => Math.random()); + const keys = Array.from({ length: numVectors }, () => + Array.from({ length: dim }, () => Math.random()) + ); + + console.log(chalk.cyan('\n═══════════════════════════════════════════════════════════════')); + console.log(chalk.cyan(' Attention Mechanism Benchmark')); + console.log(chalk.cyan('═══════════════════════════════════════════════════════════════\n')); + + console.log(chalk.white(` Dimension: ${chalk.yellow(dim)}`)); + console.log(chalk.white(` Vectors: ${chalk.yellow(numVectors)}`)); + console.log(chalk.white(` Iterations: ${chalk.yellow(iterations)}`)); + console.log(''); + + const results = []; + + // Convert to Float32Arrays for compute() + const queryF32 = new Float32Array(query); + const keysF32 = keys.map(k => new Float32Array(k)); + + for (const type of types) { + spinner.text = `Benchmarking ${type} attention...`; + spinner.start(); + + let attn; + try { + switch (type) { + case 'dot': + attn = new DotProductAttention(dim); + break; + case 'flash': + attn = new FlashAttention(dim, 64); // dim, block_size + break; + case 'linear': + attn = new LinearAttention(dim, 64); // dim, num_features + break; + case 'hyperbolic': + attn = new HyperbolicAttention(dim, 1.0); + break; + case 'multi-head': + attn = new MultiHeadAttention(dim, 4); // dim, num_heads + break; + default: + console.log(chalk.yellow(` Skipping unknown type: ${type}`)); + continue; + } + } catch (e) { + console.log(chalk.yellow(` ${type}: not available (${e.message})`)); + continue; + } + + // Warm up + for (let i = 0; i < 5; i++) { + try { + attn.compute(queryF32, keysF32, keysF32); + } catch (e) { + // Some mechanisms may fail warmup + } + } + + // Benchmark + const start = process.hrtime.bigint(); + for (let i = 0; i < iterations; i++) { + attn.compute(queryF32, keysF32, keysF32); + } + const end = process.hrtime.bigint(); + const totalMs = Number(end - start) / 1_000_000; + const avgMs = totalMs / iterations; + const opsPerSec = 1000 / avgMs; + + results.push({ type, avgMs, opsPerSec }); + spinner.succeed(chalk.green(`${type}: ${avgMs.toFixed(3)} ms/op (${opsPerSec.toFixed(1)} ops/sec)`)); + } + + // Summary + if (results.length > 0) { + console.log(chalk.cyan('\n═══════════════════════════════════════════════════════════════')); + console.log(chalk.cyan(' Summary')); + console.log(chalk.cyan('═══════════════════════════════════════════════════════════════\n')); + + const fastest = results.reduce((a, b) => a.avgMs < b.avgMs ? a : b); + console.log(chalk.green(` Fastest: ${fastest.type} (${fastest.avgMs.toFixed(3)} ms/op)\n`)); + + console.log(chalk.white(' Relative Performance:')); + for (const r of results) { + const relPerf = (fastest.avgMs / r.avgMs * 100).toFixed(1); + const bar = '█'.repeat(Math.round(relPerf / 5)); + console.log(chalk.white(` ${r.type.padEnd(12)} ${chalk.cyan(bar)} ${relPerf}%`)); + } + } + } catch (error) { + spinner.fail(chalk.red('Benchmark failed')); + console.error(chalk.red(error.message)); + process.exit(1); + } + }); + +// Hyperbolic math command +attentionCmd + .command('hyperbolic') + .description('Hyperbolic geometry operations') + .requiredOption('-a, --action ', 'Action: exp-map|log-map|distance|project|mobius-add') + .requiredOption('-v, --vector ', 'Input vector(s) as JSON') + .option('-b, --vector-b ', 'Second vector for binary operations') + .option('-c, --curvature ', 'Poincaré ball curvature', '1.0') + .option('-o, --origin ', 'Origin point for exp/log maps') + .action((options) => { + requireAttention(); + + try { + const vecArray = JSON.parse(options.vector); + const vec = new Float32Array(vecArray); + const curvature = parseFloat(options.curvature); + + let result; + let description; + + switch (options.action) { + case 'exp-map': { + const originArray = options.origin ? JSON.parse(options.origin) : Array(vec.length).fill(0); + const origin = new Float32Array(originArray); + result = expMap(origin, vec, curvature); + description = 'Exponential map (tangent → Poincaré ball)'; + break; + } + case 'log-map': { + const originArray = options.origin ? JSON.parse(options.origin) : Array(vec.length).fill(0); + const origin = new Float32Array(originArray); + result = logMap(origin, vec, curvature); + description = 'Logarithmic map (Poincaré ball → tangent)'; + break; + } + case 'distance': { + if (!options.vectorB) { + throw new Error('--vector-b required for distance calculation'); + } + const vecBArray = JSON.parse(options.vectorB); + const vecB = new Float32Array(vecBArray); + result = poincareDistance(vec, vecB, curvature); + description = 'Poincaré distance'; + break; + } + case 'project': { + result = projectToPoincareBall(vec, curvature); + description = 'Project to Poincaré ball'; + break; + } + case 'mobius-add': { + if (!options.vectorB) { + throw new Error('--vector-b required for Möbius addition'); + } + const vecBArray = JSON.parse(options.vectorB); + const vecB = new Float32Array(vecBArray); + result = mobiusAddition(vec, vecB, curvature); + description = 'Möbius addition'; + break; + } + default: + throw new Error(`Unknown action: ${options.action}`); + } + + console.log(chalk.cyan('\nHyperbolic Operation:')); + console.log(chalk.white(` Action: ${chalk.yellow(description)}`)); + console.log(chalk.white(` Curvature: ${chalk.yellow(curvature)}`)); + + if (typeof result === 'number') { + console.log(chalk.white(` Result: ${chalk.green(result.toFixed(6))}`)); + } else { + const resultArray = Array.from(result); + console.log(chalk.white(` Input dim: ${chalk.yellow(vec.length)}`)); + console.log(chalk.white(` Output dim: ${chalk.yellow(resultArray.length)}`)); + console.log(chalk.white(` Result: ${chalk.gray(`[${resultArray.slice(0, 5).map(v => v.toFixed(4)).join(', ')}...]`)}`)); + + // Compute norm to verify it's in the ball + const norm = Math.sqrt(resultArray.reduce((sum, x) => sum + x * x, 0)); + console.log(chalk.white(` Norm: ${chalk.yellow(norm.toFixed(6))} ${norm < 1 ? chalk.green('(inside ball)') : chalk.red('(outside ball)')}`)); + } + } catch (error) { + console.error(chalk.red('Hyperbolic operation failed:'), error.message); + process.exit(1); + } + }); + +// Attention info command +attentionCmd + .command('info') + .description('Show attention module information') + .action(() => { + if (!attentionAvailable) { + console.log(chalk.yellow('\nAttention Module: Not installed')); + console.log(chalk.white('Install with: npm install @ruvector/attention')); + return; + } + + console.log(chalk.cyan('\nAttention Module Information')); + console.log(chalk.white(` Status: ${chalk.green('Available')}`)); + console.log(chalk.white(` Version: ${chalk.yellow(attentionVersion ? attentionVersion() : 'unknown')}`)); + console.log(chalk.white(` Platform: ${chalk.yellow(process.platform)}`)); + console.log(chalk.white(` Architecture: ${chalk.yellow(process.arch)}`)); + + console.log(chalk.cyan('\nCore Attention Mechanisms:')); + console.log(chalk.white(` • DotProductAttention - Scaled dot-product attention`)); + console.log(chalk.white(` • MultiHeadAttention - Multi-head self-attention`)); + console.log(chalk.white(` • FlashAttention - Memory-efficient IO-aware attention`)); + console.log(chalk.white(` • HyperbolicAttention - Poincaré ball attention`)); + console.log(chalk.white(` • LinearAttention - O(n) linear complexity attention`)); + console.log(chalk.white(` • MoEAttention - Mixture of Experts attention`)); + + console.log(chalk.cyan('\nGraph Attention:')); + console.log(chalk.white(` • GraphRoPeAttention - Rotary position embeddings for graphs`)); + console.log(chalk.white(` • EdgeFeaturedAttention - Edge feature-enhanced attention`)); + console.log(chalk.white(` • DualSpaceAttention - Euclidean + hyperbolic dual space`)); + console.log(chalk.white(` • LocalGlobalAttention - Local-global graph attention`)); + + console.log(chalk.cyan('\nHyperbolic Math:')); + console.log(chalk.white(` • expMap, logMap - Exponential/logarithmic maps`)); + console.log(chalk.white(` • mobiusAddition - Möbius addition in Poincaré ball`)); + console.log(chalk.white(` • poincareDistance - Hyperbolic distance metric`)); + console.log(chalk.white(` • projectToPoincareBall - Project vectors to ball`)); + + console.log(chalk.cyan('\nTraining Utilities:')); + console.log(chalk.white(` • AdamOptimizer, AdamWOptimizer, SgdOptimizer`)); + console.log(chalk.white(` • InfoNceLoss, LocalContrastiveLoss`)); + console.log(chalk.white(` • CurriculumScheduler, TemperatureAnnealing`)); + console.log(chalk.white(` • HardNegativeMiner, InBatchMiner`)); + }); + +// Attention list command - list available mechanisms +attentionCmd + .command('list') + .description('List all available attention mechanisms') + .option('-v, --verbose', 'Show detailed information') + .action((options) => { + console.log(chalk.cyan('\n═══════════════════════════════════════════════════════════════')); + console.log(chalk.cyan(' Available Attention Mechanisms')); + console.log(chalk.cyan('═══════════════════════════════════════════════════════════════\n')); + + const mechanisms = [ + { name: 'DotProductAttention', type: 'core', complexity: 'O(n²)', available: !!DotProductAttention }, + { name: 'MultiHeadAttention', type: 'core', complexity: 'O(n²)', available: !!MultiHeadAttention }, + { name: 'FlashAttention', type: 'core', complexity: 'O(n²) IO-optimized', available: !!FlashAttention }, + { name: 'HyperbolicAttention', type: 'core', complexity: 'O(n²)', available: !!HyperbolicAttention }, + { name: 'LinearAttention', type: 'core', complexity: 'O(n)', available: !!LinearAttention }, + { name: 'MoEAttention', type: 'core', complexity: 'O(n*k)', available: !!MoEAttention }, + { name: 'GraphRoPeAttention', type: 'graph', complexity: 'O(n²)', available: !!GraphRoPeAttention }, + { name: 'EdgeFeaturedAttention', type: 'graph', complexity: 'O(n²)', available: !!EdgeFeaturedAttention }, + { name: 'DualSpaceAttention', type: 'graph', complexity: 'O(n²)', available: !!DualSpaceAttention }, + { name: 'LocalGlobalAttention', type: 'graph', complexity: 'O(n*k)', available: !!LocalGlobalAttention }, + ]; + + console.log(chalk.white(' Core Attention:')); + mechanisms.filter(m => m.type === 'core').forEach(m => { + const status = m.available ? chalk.green('✓') : chalk.red('✗'); + console.log(chalk.white(` ${status} ${m.name.padEnd(22)} ${chalk.gray(m.complexity)}`)); + }); + + console.log(chalk.white('\n Graph Attention:')); + mechanisms.filter(m => m.type === 'graph').forEach(m => { + const status = m.available ? chalk.green('✓') : chalk.red('✗'); + console.log(chalk.white(` ${status} ${m.name.padEnd(22)} ${chalk.gray(m.complexity)}`)); + }); + + if (!attentionAvailable) { + console.log(chalk.yellow('\n Note: @ruvector/attention not installed')); + console.log(chalk.white(' Install with: npm install @ruvector/attention')); + } + + if (options.verbose) { + console.log(chalk.cyan('\n Usage Examples:')); + console.log(chalk.gray(' # Compute dot-product attention')); + console.log(chalk.white(' npx ruvector attention compute -q "[1,2,3]" -k keys.json -t dot')); + console.log(chalk.gray('\n # Benchmark attention mechanisms')); + console.log(chalk.white(' npx ruvector attention benchmark -d 256 -n 100')); + console.log(chalk.gray('\n # Hyperbolic distance')); + console.log(chalk.white(' npx ruvector attention hyperbolic -a distance -v "[0.1,0.2]" -b "[0.3,0.4]"')); + } + }); + +// ============================================================================= +// Doctor Command - Check system health and dependencies +// ============================================================================= + +program + .command('doctor') + .description('Check system health and dependencies') + .option('-v, --verbose', 'Show detailed information') + .action(async (options) => { + const { execSync } = require('child_process'); + + console.log(chalk.cyan('\n═══════════════════════════════════════════════════════════════')); + console.log(chalk.cyan(' RuVector Doctor')); + console.log(chalk.cyan('═══════════════════════════════════════════════════════════════\n')); + + let issues = 0; + let warnings = 0; + + // Helper functions + const check = (name, condition, fix) => { + if (condition) { + console.log(chalk.green(` ✓ ${name}`)); + return true; + } else { + console.log(chalk.red(` ✗ ${name}`)); + if (fix) console.log(chalk.gray(` Fix: ${fix}`)); + issues++; + return false; + } + }; + + const warn = (name, condition, suggestion) => { + if (condition) { + console.log(chalk.green(` ✓ ${name}`)); + return true; + } else { + console.log(chalk.yellow(` ! ${name}`)); + if (suggestion) console.log(chalk.gray(` Suggestion: ${suggestion}`)); + warnings++; + return false; + } + }; + + const getVersion = (cmd) => { + try { + return execSync(cmd, { encoding: 'utf8', stdio: ['pipe', 'pipe', 'pipe'] }).trim(); + } catch (e) { + return null; + } + }; + + // System Information + console.log(chalk.cyan('System Information:')); + console.log(chalk.white(` Platform: ${chalk.yellow(process.platform)}`)); + console.log(chalk.white(` Architecture: ${chalk.yellow(process.arch)}`)); + console.log(chalk.white(` Node.js: ${chalk.yellow(process.version)}`)); + console.log(''); + + // Node.js Checks + console.log(chalk.cyan('Node.js Environment:')); + const nodeVersion = parseInt(process.version.slice(1).split('.')[0]); + check('Node.js >= 14', nodeVersion >= 14, 'Upgrade Node.js: https://nodejs.org'); + + const npmVersion = getVersion('npm --version'); + if (npmVersion) { + console.log(chalk.green(` ✓ npm ${npmVersion}`)); + } else { + check('npm installed', false, 'Install npm or reinstall Node.js'); + } + console.log(''); + + // RuVector Packages + console.log(chalk.cyan('RuVector Packages:')); + + // Check @ruvector/core + let coreAvailable = false; + try { + require.resolve('@ruvector/core'); + coreAvailable = true; + console.log(chalk.green(` ✓ @ruvector/core installed`)); + } catch (e) { + console.log(chalk.yellow(` ! @ruvector/core not found (using WASM fallback)`)); + warnings++; + } + + // Check if native binding works + if (coreAvailable && loadRuvector()) { + const version = typeof getVersion === 'function' ? getVersion() : null; + const impl = typeof getImplementationType === 'function' ? getImplementationType() : 'native'; + const versionStr = version ? `, v${version}` : ''; + console.log(chalk.green(` ✓ Native binding working (${impl}${versionStr})`)); + } else if (coreAvailable) { + console.log(chalk.yellow(` ! Native binding failed to load`)); + warnings++; + } + + // Check @ruvector/gnn + if (gnnAvailable) { + console.log(chalk.green(` ✓ @ruvector/gnn installed`)); + } else { + console.log(chalk.gray(` ○ @ruvector/gnn not installed (optional)`)); + } + + // Check @ruvector/attention + if (attentionAvailable) { + console.log(chalk.green(` ✓ @ruvector/attention installed`)); + } else { + console.log(chalk.gray(` ○ @ruvector/attention not installed (optional)`)); + } + + // Check @ruvector/graph-node + try { + require.resolve('@ruvector/graph-node'); + console.log(chalk.green(` ✓ @ruvector/graph-node installed`)); + } catch (e) { + console.log(chalk.gray(` ○ @ruvector/graph-node not installed (optional)`)); + } + console.log(''); + + // Rust Toolchain (optional for development) + console.log(chalk.cyan('Rust Toolchain (optional):')); + + const rustVersion = getVersion('rustc --version'); + if (rustVersion) { + console.log(chalk.green(` ✓ ${rustVersion}`)); + } else { + console.log(chalk.gray(` ○ Rust not installed (only needed for development)`)); + } + + const cargoVersion = getVersion('cargo --version'); + if (cargoVersion) { + console.log(chalk.green(` ✓ ${cargoVersion}`)); + } else if (rustVersion) { + console.log(chalk.yellow(` ! cargo not found`)); + warnings++; + } + console.log(''); + + // Build Tools (optional) + if (options.verbose) { + console.log(chalk.cyan('Build Tools (for native compilation):')); + + const hasGcc = getVersion('gcc --version'); + const hasClang = getVersion('clang --version'); + const hasCc = getVersion('cc --version'); + + if (hasGcc || hasClang || hasCc) { + console.log(chalk.green(` ✓ C compiler available`)); + } else { + console.log(chalk.gray(` ○ No C compiler found (only needed for building from source)`)); + } + + const hasMake = getVersion('make --version'); + if (hasMake) { + console.log(chalk.green(` ✓ make available`)); + } else { + console.log(chalk.gray(` ○ make not found`)); + } + + const hasCmake = getVersion('cmake --version'); + if (hasCmake) { + console.log(chalk.green(` ✓ cmake available`)); + } else { + console.log(chalk.gray(` ○ cmake not found`)); + } + console.log(''); + } + + // Summary + console.log(chalk.cyan('═══════════════════════════════════════════════════════════════')); + if (issues === 0 && warnings === 0) { + console.log(chalk.green('\n ✓ All checks passed! RuVector is ready to use.\n')); + } else if (issues === 0) { + console.log(chalk.yellow(`\n ! ${warnings} warning(s) found. RuVector should work but may have limited features.\n`)); + } else { + console.log(chalk.red(`\n ✗ ${issues} issue(s) and ${warnings} warning(s) found.\n`)); + console.log(chalk.white(' Run "npx ruvector setup" for installation instructions.\n')); + } + }); + +// ============================================================================= +// Setup Command - Installation instructions +// ============================================================================= + +program + .command('setup') + .description('Show installation and setup instructions') + .option('--rust', 'Show Rust installation instructions') + .option('--npm', 'Show npm package installation instructions') + .option('--all', 'Show all installation instructions') + .action((options) => { + const showAll = options.all || (!options.rust && !options.npm); + + console.log(chalk.cyan('\n═══════════════════════════════════════════════════════════════')); + console.log(chalk.cyan(' RuVector Setup Guide')); + console.log(chalk.cyan('═══════════════════════════════════════════════════════════════\n')); + + // Quick install + console.log(chalk.cyan('Quick Install (one-liner):')); + console.log(chalk.white(' curl -fsSL https://raw.githubusercontent.com/ruvnet/ruvector/main/install.sh | bash')); + console.log(''); + + if (showAll || options.npm) { + console.log(chalk.cyan('───────────────────────────────────────────────────────────────')); + console.log(chalk.cyan('npm Packages')); + console.log(chalk.cyan('───────────────────────────────────────────────────────────────\n')); + + console.log(chalk.yellow('All-in-one CLI:')); + console.log(chalk.white(' npm install -g ruvector')); + console.log(chalk.white(' npx ruvector')); + console.log(''); + + console.log(chalk.yellow('Core packages:')); + console.log(chalk.white(' npm install @ruvector/core # Vector database')); + console.log(chalk.white(' npm install @ruvector/gnn # Graph Neural Networks')); + console.log(chalk.white(' npm install @ruvector/graph-node # Hypergraph database')); + console.log(''); + + console.log(chalk.yellow('Install all optional packages:')); + console.log(chalk.white(' npx ruvector install --all')); + console.log(''); + + console.log(chalk.yellow('List available packages:')); + console.log(chalk.white(' npx ruvector install')); + console.log(''); + } + + if (showAll || options.rust) { + console.log(chalk.cyan('───────────────────────────────────────────────────────────────')); + console.log(chalk.cyan('Rust Installation')); + console.log(chalk.cyan('───────────────────────────────────────────────────────────────\n')); + + console.log(chalk.yellow('1. Install Rust:')); + console.log(chalk.white(' curl --proto \'=https\' --tlsv1.2 -sSf https://sh.rustup.rs | sh')); + console.log(chalk.gray(' # Follow the prompts, then restart your terminal or run:')); + console.log(chalk.white(' source $HOME/.cargo/env')); + console.log(''); + + console.log(chalk.yellow('2. Verify installation:')); + console.log(chalk.white(' rustc --version')); + console.log(chalk.white(' cargo --version')); + console.log(''); + + console.log(chalk.yellow('3. Add RuVector crates to your project:')); + console.log(chalk.white(' cargo add ruvector-core # Vector database')); + console.log(chalk.white(' cargo add ruvector-graph # Hypergraph with Cypher')); + console.log(chalk.white(' cargo add ruvector-gnn # Graph Neural Networks')); + console.log(''); + + console.log(chalk.yellow('4. Other available crates:')); + console.log(chalk.white(' cargo add ruvector-cluster # Distributed clustering')); + console.log(chalk.white(' cargo add ruvector-raft # Raft consensus')); + console.log(chalk.white(' cargo add ruvector-replication # Data replication')); + console.log(chalk.white(' cargo add ruvector-tiny-dancer-core # AI routing')); + console.log(chalk.white(' cargo add ruvector-router-core # Semantic routing')); + console.log(''); + + console.log(chalk.yellow('Platform-specific notes:')); + console.log(''); + + if (process.platform === 'darwin') { + console.log(chalk.cyan(' macOS:')); + console.log(chalk.white(' xcode-select --install # Install command line tools')); + console.log(''); + } else if (process.platform === 'linux') { + console.log(chalk.cyan(' Linux (Debian/Ubuntu):')); + console.log(chalk.white(' sudo apt-get update')); + console.log(chalk.white(' sudo apt-get install build-essential pkg-config libssl-dev')); + console.log(''); + console.log(chalk.cyan(' Linux (RHEL/CentOS):')); + console.log(chalk.white(' sudo yum groupinstall "Development Tools"')); + console.log(chalk.white(' sudo yum install openssl-devel')); + console.log(''); + } else if (process.platform === 'win32') { + console.log(chalk.cyan(' Windows:')); + console.log(chalk.white(' # Install Visual Studio Build Tools')); + console.log(chalk.white(' # https://visualstudio.microsoft.com/visual-cpp-build-tools/')); + console.log(chalk.white(' # Or use WSL2 for best experience')); + console.log(''); + } + } + + console.log(chalk.cyan('───────────────────────────────────────────────────────────────')); + console.log(chalk.cyan('Documentation & Resources')); + console.log(chalk.cyan('───────────────────────────────────────────────────────────────\n')); + + console.log(chalk.white(' GitHub: https://github.com/ruvnet/ruvector')); + console.log(chalk.white(' npm: https://www.npmjs.com/package/ruvector')); + console.log(chalk.white(' crates.io: https://crates.io/crates/ruvector-core')); + console.log(chalk.white(' Issues: https://github.com/ruvnet/ruvector/issues')); + console.log(''); + + console.log(chalk.cyan('Quick Commands:')); + console.log(chalk.white(' npx ruvector doctor # Check system health')); + console.log(chalk.white(' npx ruvector info # Show version info')); + console.log(chalk.white(' npx ruvector benchmark # Run performance test')); + console.log(chalk.white(' npx ruvector install # List available packages')); + console.log(''); + }); + +// ============================================================================= +// Graph Commands - Cypher queries and graph operations +// ============================================================================= + +program + .command('graph') + .description('Graph database operations (requires @ruvector/graph-node)') + .option('-q, --query ', 'Execute Cypher query') + .option('-c, --create