Spaces:

akshayboora
/

AFML

No application file

App Files Files Community

AFML / afml /__init__.py

akshayboora

Upload 940 files

669d6a1 verified 27 days ago

raw

history blame contribute delete

10.6 kB

	"""
	AFML helps portfolio managers and traders leverage machine learning with
	reproducible, interpretable, and easy to use tools.
	"""

	import importlib
	import sys
	from types import ModuleType
	from typing import Dict, List

	from loguru import logger

	from .cache import (
	CacheAnalyzer,
	cache_maintenance,
	cacheable,
	clear_afml_cache,
	clear_cache_stats,
	get_cache_hit_rate,
	get_cache_stats,
	get_cache_summary,
	get_comprehensive_cache_status,
	initialize_cache_system,
	memory,
	optimize_cache_system,
	robust_cacheable,
	setup_production_cache,
	time_aware_cacheable,
	)

	# =============================================================================
	# IMPORT CACHE SYSTEM - Updated with new features
	# =============================================================================


	# NEW: Import monitoring (optional - only if you want it at top level)
	try:
	from .cache import (
	analyze_cache_patterns,
	get_cache_efficiency_report,
	get_cache_monitor,
	print_cache_health,
	)

	CACHE_MONITORING_AVAILABLE = True
	except ImportError:
	CACHE_MONITORING_AVAILABLE = False
	logger.debug("Cache monitoring not available")

	# NEW: Import MLflow integration (optional)
	try:
	from .cache import (
	MLFLOW_INTEGRATION_AVAILABLE,
	get_mlflow_cache,
	mlflow_cached,
	setup_mlflow_cache,
	)
	except ImportError:
	MLFLOW_INTEGRATION_AVAILABLE = False
	logger.debug("MLflow integration not available")

	# NEW: Import backtest caching (optional)
	try:
	from .cache import BacktestCache, cached_backtest, get_backtest_cache

	BACKTEST_CACHE_AVAILABLE = True
	except ImportError:
	BACKTEST_CACHE_AVAILABLE = False
	logger.debug("Backtest cache not available")

	# Numba warmup utilities
	from .numba_warmup import lazy_warmup, prewarm_numba_in_package, register_numba_dummy

	# =============================================================================
	# INITIALIZE CACHE SYSTEM FIRST (before any heavy imports)
	# =============================================================================

	# Initialize cache system immediately
	initialize_cache_system()

	# Adjust these as needed after inspecting actual function signatures in your codebase.
	# register_numba_dummy(
	# "_precompute_active_indices_nopython",
	# args=(np.array([np.int64(0)]), np.array([np.int64(0)]), np.array([np.int64(0)])),
	# )


	# =============================================================================
	# LAZY LOADING SETUP
	# =============================================================================

	# Module mapping - only add modules that are actually heavy to import
	HEAVY_MODULES = {
	# ML modules (typically have sklearn, xgboost, etc.)
	"cross_validation": "cross_validation",
	}

	# Lightweight modules - import directly for better IDE support
	try:
	from . import (
	data_structures,
	datasets,
	features,
	labeling,
	mt5,
	production,
	sample_weights,
	sampling,
	util,
	)
	# Handle nested modules
	from .filters import filters
	from .strategies import trading_strategies

	logger.debug("Imported lightweight modules directly")
	except ImportError as e:
	logger.warning("Some lightweight modules failed to import: {}", e)

	# Cache for lazy-loaded heavy modules
	_module_cache: Dict[str, ModuleType] = {}

	# =============================================================================
	# LAZY LOADING FOR HEAVY MODULES ONLY
	# =============================================================================


	def __getattr__(name: str) -> ModuleType:
	"""Lazy load heavy modules only."""
	if name in HEAVY_MODULES:
	# Check cache first
	if name in _module_cache:
	return _module_cache[name]

	# Import and cache
	try:
	import_path = f"afml.{HEAVY_MODULES[name]}"
	logger.debug("Lazy loading heavy module: {}", name)
	module = importlib.import_module(import_path)
	_module_cache[name] = module
	logger.info(
	"Loaded heavy module: {} ({} MB)", name, _get_module_size(module)
	)
	return module
	except ImportError as e:
	logger.error("Failed to import heavy module {}: {}", name, e)
	raise AttributeError(f"Module 'afml' has no attribute '{name}'") from e

	# Handle filters special case (nested module)
	if name == "filters":
	try:
	from .filters import filters

	return filters
	except ImportError as e:
	raise AttributeError(f"Module 'afml' has no attribute '{name}'") from e
	if name == "strategies":
	try:
	from .strategies import trading_strategies

	return trading_strategies
	except ImportError as e:
	raise AttributeError(f"Module 'afml' has no attribute '{name}'") from e

	raise AttributeError(f"Module 'afml' has no attribute '{name}'")


	def _get_module_size(module) -> str:
	"""Rough estimate of module memory footprint."""
	try:
	# Count objects in module namespace
	obj_count = len(
	[
	obj
	for obj in vars(module).values()
	if not callable(obj) or hasattr(obj, "__module__")
	]
	)
	return f"~{obj_count // 10}0" # Very rough estimate
	except Exception:
	return "unknown"


	# =============================================================================
	# SIMPLE MODULE MANAGEMENT (only what's actually useful)
	# =============================================================================


	def preload_heavy_modules(*module_names: str) -> Dict[str, ModuleType]:
	"""
	Preload specific heavy modules. Only use this if you know you'll need them.

	Args:
	*module_names: Names of heavy modules to preload

	Returns:
	Dict of successfully loaded modules
	"""
	loaded = {}
	for name in module_names:
	if name in HEAVY_MODULES:
	try:
	module = getattr(sys.modules[__name__], name) # Triggers __getattr__
	loaded[name] = module
	except Exception as e:
	logger.warning("Failed to preload {}: {}", name, e)
	else:
	logger.warning(
	"'{}' is not a heavy module (already imported or doesn't exist)", name
	)

	return loaded


	def get_loaded_heavy_modules() -> List[str]:
	"""Get list of currently loaded heavy modules."""
	return list(_module_cache.keys())


	def preload_ml_modules() -> Dict[str, ModuleType]:
	"""Convenience function to preload all ML-related modules."""
	ml_modules = ["cross_validation"]
	return preload_heavy_modules(*ml_modules)


	def preload_portfolio_modules() -> Dict[str, ModuleType]:
	"""Convenience function to preload portfolio-related modules."""
	logger.warning("Portfolio modules are not present in this checkout")
	return {}


	# =============================================================================
	# CACHE MONITORING UTILITIES - Enhanced
	# =============================================================================


	def cache_status() -> str:
	"""Get human-readable cache status string."""
	summary = get_cache_summary()
	loaded = get_loaded_heavy_modules()

	status_parts = [
	f"Hit rate: {summary['hit_rate']:.1%}",
	f"Tracked functions: {summary['functions_tracked']}",
	f"Heavy modules loaded: {len(loaded)}",
	]

	if loaded:
	status_parts.append(f"({', '.join(loaded)})")

	return " \| ".join(status_parts)


	def maintain_cache(
	auto_clear: bool = True, max_size_mb: int = 500, max_age_days: int = 30
	):
	"""
	Perform intelligent cache maintenance.

	Args:
	auto_clear: Automatically clear changed functions
	max_size_mb: Maximum cache size in MB
	max_age_days: Remove cache files older than this
	"""
	logger.info("Running cache maintenance...")
	report = cache_maintenance(
	auto_clear_changed=auto_clear,
	max_cache_size_mb=max_size_mb,
	max_age_days=max_age_days,
	)
	return report


	# =============================================================================
	# __all__ AND METADATA
	# =============================================================================

	__version__ = "1.0.0"
	__author__ = "AFML Team"

	__all__ = [
	# Core cache system
	"memory",
	"cacheable",
	"get_cache_hit_rate",
	"get_cache_stats",
	"clear_cache_stats",
	"clear_afml_cache",
	"get_cache_summary",
	"CacheAnalyzer",
	"initialize_cache_system",
	# NEW: Robust cache keys
	"robust_cacheable",
	"time_aware_cacheable",
	# NEW: Enhanced cache functions
	"get_comprehensive_cache_status",
	"optimize_cache_system",
	"setup_production_cache",
	# Cache monitoring (if available)
	"print_cache_health",
	"get_cache_efficiency_report",
	"analyze_cache_patterns",
	"get_cache_monitor",
	# MLflow integration (if available)
	"setup_mlflow_cache",
	"get_mlflow_cache",
	"mlflow_cached",
	# Backtest caching (if available)
	"cached_backtest",
	"get_backtest_cache",
	"BacktestCache",
	# Module management
	"preload_heavy_modules",
	"preload_ml_modules",
	"preload_portfolio_modules",
	"get_loaded_heavy_modules",
	# Utilities
	"cache_status",
	"maintain_cache",
	# Selective cache management
	"cache_maintenance",
	# Numba utilities
	"lazy_warmup",
	"prewarm_numba_in_package",
	"register_numba_dummy",
	# Lightweight modules (directly imported)
	"data_structures",
	"util",
	"datasets",
	"labeling",
	"features",
	"sample_weights",
	"sampling",
	"trading_strategies",
	"filters",
	"mt5",
	"production",
	# Heavy modules (lazy loaded)
	"cross_validation",
	]

	# =============================================================================
	# STARTUP
	# =============================================================================

	logger.info(
	"AFML v{} ready - {} heavy modules available for lazy loading",
	__version__,
	len(HEAVY_MODULES),
	)
	logger.debug("Cache status: {}", cache_status())

	# Log available enhanced features
	if CACHE_MONITORING_AVAILABLE:
	logger.debug("✓ Cache monitoring available")
	if MLFLOW_INTEGRATION_AVAILABLE:
	logger.debug("✓ MLflow integration available")
	if BACKTEST_CACHE_AVAILABLE:
	logger.debug("✓ Backtest caching available")