9x25dillon
πŸš€ MAJOR UPDATE: Complete emergent technology integration + PR #20 restoration
63678b1
#!/usr/bin/env python3
"""
Setup script for Advanced Embedding Pipeline
Installs dependencies and configures the system
"""
import os
import sys
import subprocess
import logging
from pathlib import Path
logging.basicConfig(level=logging.INFO, format="%(asctime)s | %(levelname)s | %(message)s")
logger = logging.getLogger(__name__)
def run_command(command, description):
"""Run a command and handle errors"""
logger.info(f"πŸ”„ {description}...")
try:
result = subprocess.run(command, shell=True, check=True, capture_output=True, text=True)
logger.info(f"βœ… {description} completed successfully")
return True
except subprocess.CalledProcessError as e:
logger.error(f"❌ {description} failed: {e}")
logger.error(f"Error output: {e.stderr}")
return False
def check_python_version():
"""Check if Python version is compatible"""
logger.info("🐍 Checking Python version...")
if sys.version_info < (3, 8):
logger.error("❌ Python 3.8 or higher is required")
return False
logger.info(f"βœ… Python {sys.version_info.major}.{sys.version_info.minor} is compatible")
return True
def install_dependencies():
"""Install required dependencies"""
logger.info("πŸ“¦ Installing dependencies...")
# Upgrade pip first
if not run_command("pip install --upgrade pip", "Upgrading pip"):
return False
# Install core requirements
if not run_command("pip install -r requirements.txt", "Installing core dependencies"):
return False
# Install optional dependencies if available
optional_deps = [
"faiss-gpu", # GPU-accelerated FAISS
"torch-gpu", # GPU-accelerated PyTorch
]
for dep in optional_deps:
run_command(f"pip install {dep}", f"Installing optional dependency: {dep}")
return True
def create_directories():
"""Create necessary directories"""
logger.info("πŸ“ Creating directories...")
directories = [
"cache/embeddings",
"cache/optimized_embeddings",
"logs",
"data",
"models"
]
for directory in directories:
path = Path(directory)
path.mkdir(parents=True, exist_ok=True)
logger.info(f"βœ… Created directory: {directory}")
return True
def setup_configuration():
"""Setup default configuration files"""
logger.info("βš™οΈ Setting up configuration...")
# Create default config file
config_content = """# Advanced Embedding Pipeline Configuration
[services]
eopiez_url = "http://localhost:8001"
limps_url = "http://localhost:8000"
[semantic]
embedding_dim = 768
batch_size = 32
use_cache = true
[mathematical]
max_dimension = 1024
polynomial_degree = 3
use_matrix_optimization = true
[fractal]
max_depth = 6
branching_factor = 3
embedding_dim = 1024
fractal_type = "mandelbrot"
use_entropy = true
[hybrid]
fusion_method = "weighted_average"
semantic_weight = 0.4
mathematical_weight = 0.3
fractal_weight = 0.3
parallel_processing = true
[optimization]
use_disk_cache = true
batch_processing = true
max_batch_size = 64
adaptive_batching = true
use_indexing = true
index_type = "faiss"
"""
config_file = Path("config.ini")
with open(config_file, 'w') as f:
f.write(config_content)
logger.info("βœ… Created configuration file: config.ini")
return True
def test_installation():
"""Test the installation"""
logger.info("πŸ§ͺ Testing installation...")
try:
# Test imports
import numpy as np
import scipy
import sklearn
import torch
logger.info("βœ… Core scientific libraries imported successfully")
# Test our modules
sys.path.insert(0, str(Path.cwd()))
from semantic_embedder import SemanticEmbedder
from mathematical_embedder import MathematicalEmbedder
from fractal_cascade_embedder import FractalCascadeEmbedder
from hybrid_pipeline import HybridEmbeddingPipeline
from optimizer import EmbeddingOptimizer
logger.info("βœ… All embedding pipeline modules imported successfully")
# Test basic functionality
import asyncio
async def test_basic_functionality():
# Test semantic embedder
semantic_embedder = SemanticEmbedder()
test_embedding = await semantic_embedder.embed_text("Test text")
assert len(test_embedding) > 0
logger.info("βœ… Semantic embedder test passed")
await semantic_embedder.close()
# Test fractal embedder
fractal_embedder = FractalCascadeEmbedder()
fractal_embedding = fractal_embedder.embed_text_with_fractal("Test fractal")
assert len(fractal_embedding) > 0
logger.info("βœ… Fractal embedder test passed")
return True
# Run async test
asyncio.run(test_basic_functionality())
logger.info("βœ… All basic functionality tests passed")
return True
except Exception as e:
logger.error(f"❌ Installation test failed: {e}")
return False
def check_external_services():
"""Check if external services are available"""
logger.info("πŸ” Checking external services...")
import httpx
import asyncio
async def check_services():
services = [
("Eopiez", "http://localhost:8001/health"),
("LIMPS", "http://localhost:8000/health")
]
async with httpx.AsyncClient(timeout=5.0) as client:
for service_name, url in services:
try:
response = await client.get(url)
if response.status_code == 200:
logger.info(f"βœ… {service_name} service is available")
else:
logger.warning(f"⚠️ {service_name} service responded with status {response.status_code}")
except Exception as e:
logger.warning(f"⚠️ {service_name} service is not available: {e}")
try:
asyncio.run(check_services())
except Exception as e:
logger.warning(f"⚠️ Service check failed: {e}")
return True
def create_example_scripts():
"""Create example usage scripts"""
logger.info("πŸ“ Creating example scripts...")
# Simple usage example
simple_example = """#!/usr/bin/env python3
'''
Simple usage example for Advanced Embedding Pipeline
'''
import asyncio
from advanced_embedding_pipeline import HybridEmbeddingPipeline, HybridConfig
async def main():
# Configure pipeline
config = HybridConfig(
use_semantic=True,
use_mathematical=True,
use_fractal=True,
fusion_method="weighted_average"
)
# Create pipeline
pipeline = HybridEmbeddingPipeline(config)
# Example texts
texts = [
"The quick brown fox jumps over the lazy dog",
"x^2 + y^2 = z^2",
"Fractal geometry reveals infinite complexity"
]
# Generate embeddings
print("πŸš€ Generating embeddings...")
results = await pipeline.embed_batch(texts)
# Display results
for i, result in enumerate(results):
print(f"\\nText {i+1}: {result['text']}")
print(f"Embedding dimension: {len(result['fused_embedding'])}")
print(f"Processing time: {result['metadata']['processing_time']:.3f}s")
# Get metrics
metrics = pipeline.get_metrics()
print(f"\\nπŸ“Š Metrics:")
print(f"Total embeddings: {metrics['total_embeddings']}")
print(f"Average time: {metrics['average_time']:.3f}s")
# Cleanup
await pipeline.close()
print("\\nβœ… Example completed!")
if __name__ == "__main__":
asyncio.run(main())
"""
with open("example_simple.py", 'w') as f:
f.write(simple_example)
logger.info("βœ… Created example_simple.py")
# Advanced usage example
advanced_example = """#!/usr/bin/env python3
'''
Advanced usage example with optimization and indexing
'''
import asyncio
import numpy as np
from advanced_embedding_pipeline import (
HybridEmbeddingPipeline, HybridConfig,
EmbeddingOptimizer, OptimizationConfig
)
async def main():
# Configure pipeline with optimization
hybrid_config = HybridConfig(
use_semantic=True,
use_mathematical=True,
use_fractal=True,
fusion_method="attention",
parallel_processing=True
)
optimization_config = OptimizationConfig(
use_disk_cache=True,
batch_processing=True,
adaptive_batching=True,
use_indexing=True,
index_type="faiss"
)
# Create components
pipeline = HybridEmbeddingPipeline(hybrid_config)
optimizer = EmbeddingOptimizer(optimization_config)
# Large corpus of texts
texts = [
"Mathematical formula: E = mcΒ²",
"Code: def fibonacci(n): return n if n <= 1 else fibonacci(n-1) + fibonacci(n-2)",
"Natural language: The theory of relativity revolutionized physics",
"Fractal: The Mandelbrot set exhibits self-similarity at all scales",
"Scientific: Quantum mechanics describes atomic behavior",
"Programming: Neural networks learn through backpropagation",
"Physics: SchrΓΆdinger equation: iβ„βˆ‚Οˆ/βˆ‚t = Āψ",
"Mathematics: Fractal dimension D = log(N)/log(r)",
"AI: Machine learning algorithms optimize objective functions",
"Geometry: Sierpinski triangle shows recursive patterns"
]
print("πŸš€ Processing large corpus with optimization...")
# Optimized embedding generation
async def embedder_func(texts_batch):
return await pipeline.embed_batch(texts_batch)
results = await optimizer.optimize_embedding_generation(
embedder_func, texts, "advanced_demo"
)
# Create search index
embeddings = [result['fused_embedding'] for result in results]
index_data = optimizer.create_index(embeddings, texts)
if index_data['index']:
print(f"βœ… Created {index_data['type']} index with {index_data['size']} vectors")
# Test similarity search
query_embedding = embeddings[0]
search_results = optimizer.search_similar(index_data, query_embedding, top_k=5)
print("\\nπŸ” Similarity search results:")
for i, (idx, score) in enumerate(search_results):
print(f"{i+1}. {texts[idx]} (similarity: {score:.4f})")
# Performance report
performance_report = optimizer.get_performance_report()
print(f"\\nπŸ“Š Performance Report:")
print(f"Cache hit rate: {performance_report['cache_stats']['hit_rate']:.2%}")
print(f"Average processing time: {performance_report['performance_metrics']['average_processing_time']:.3f}s")
print(f"Total embeddings: {performance_report['performance_metrics']['total_embeddings']}")
# Cleanup
await pipeline.close()
print("\\nβœ… Advanced example completed!")
if __name__ == "__main__":
asyncio.run(main())
"""
with open("example_advanced.py", 'w') as f:
f.write(advanced_example)
logger.info("βœ… Created example_advanced.py")
return True
def main():
"""Main setup function"""
logger.info("πŸš€ Starting Advanced Embedding Pipeline Setup")
# Check Python version
if not check_python_version():
sys.exit(1)
# Create directories
if not create_directories():
logger.error("❌ Failed to create directories")
sys.exit(1)
# Install dependencies
if not install_dependencies():
logger.error("❌ Failed to install dependencies")
sys.exit(1)
# Setup configuration
if not setup_configuration():
logger.error("❌ Failed to setup configuration")
sys.exit(1)
# Test installation
if not test_installation():
logger.error("❌ Installation test failed")
sys.exit(1)
# Check external services
check_external_services()
# Create example scripts
if not create_example_scripts():
logger.error("❌ Failed to create example scripts")
sys.exit(1)
logger.info("πŸŽ‰ Setup completed successfully!")
logger.info("")
logger.info("πŸ“‹ Next steps:")
logger.info("1. Run the demo: python demo.py")
logger.info("2. Try the simple example: python example_simple.py")
logger.info("3. Try the advanced example: python example_advanced.py")
logger.info("4. Start your Eopiez service: cd ~/aipyapp/Eopiez && python api.py --port 8001")
logger.info("5. Start your LIMPS service: cd ~/aipyapp/9xdSq-LIMPS-FemTO-R1C/limps && julia --project=. -e 'using LIMPS; LIMPS.start_limps_server(8000)'")
logger.info("")
logger.info("πŸ”§ Configuration file: config.ini")
logger.info("πŸ“š Documentation: README.md")
logger.info("πŸ§ͺ Demo script: demo.py")
if __name__ == "__main__":
main()