Spaces:

crackbit
/

ai-learning-path-generator

Sleeping

ai-learning-path-generator / src /utils /observability.py

“shubhamdhamal”

Deploy Flask app with Docker

7644eac about 1 month ago

10.3 kB

	"""
	Observability utilities for LLM monitoring and evaluation.
	Provides centralized logging, tracing, and metrics tracking using LangSmith and W&B.
	"""

	import os
	import time
	import functools
	from typing import Dict, Any, Optional, Callable
	from datetime import datetime
	import logging

	# Import configuration
	from src.utils.config import (
	LANGCHAIN_TRACING_V2,
	LANGCHAIN_API_KEY,
	LANGCHAIN_PROJECT,
	WANDB_API_KEY,
	WANDB_PROJECT,
	WANDB_ENTITY,
	WANDB_MODE
	)

	logger = logging.getLogger(__name__)


	class ObservabilityManager:
	"""
	Centralized manager for observability tools (LangSmith + W&B).
	Handles initialization, tracing, and metrics logging.
	"""

	def __init__(self):
	"""Initialize observability tools."""
	self.langsmith_enabled = False
	self.wandb_enabled = False
	self.wandb_run = None

	# Initialize LangSmith
	self._init_langsmith()

	# Initialize Weights & Biases
	self._init_wandb()

	def _init_langsmith(self):
	"""Initialize LangSmith tracing."""
	if LANGCHAIN_TRACING_V2 and LANGCHAIN_API_KEY:
	try:
	# Set environment variables for automatic LangChain tracing
	os.environ["LANGCHAIN_TRACING_V2"] = "true"
	os.environ["LANGCHAIN_API_KEY"] = LANGCHAIN_API_KEY
	os.environ["LANGCHAIN_PROJECT"] = LANGCHAIN_PROJECT

	self.langsmith_enabled = True
	logger.info(f"✅ LangSmith tracing enabled for project: {LANGCHAIN_PROJECT}")
	except Exception as e:
	logger.warning(f"⚠️ Failed to initialize LangSmith: {e}")
	self.langsmith_enabled = False
	else:
	logger.info("ℹ️ LangSmith tracing disabled (set LANGCHAIN_TRACING_V2=true and LANGCHAIN_API_KEY to enable)")

	def _init_wandb(self):
	"""Initialize Weights & Biases."""
	if WANDB_API_KEY and WANDB_MODE != "disabled":
	try:
	import wandb

	# Initialize W&B run
	self.wandb_run = wandb.init(
	project=WANDB_PROJECT,
	entity=WANDB_ENTITY,
	mode=WANDB_MODE,
	config={
	"framework": "langchain",
	"application": "ai-learning-path-generator"
	},
	# Don't reinitialize if already running
	reinit=False,
	resume="allow"
	)

	self.wandb_enabled = True
	logger.info(f"✅ W&B tracking enabled for project: {WANDB_PROJECT}")
	except ImportError:
	logger.warning("⚠️ wandb package not installed. Run: pip install wandb")
	self.wandb_enabled = False
	except Exception as e:
	logger.warning(f"⚠️ Failed to initialize W&B: {e}")
	self.wandb_enabled = False
	else:
	logger.info("ℹ️ W&B tracking disabled (set WANDB_API_KEY to enable)")

	def log_llm_call(
	self,
	prompt: str,
	response: str,
	model: str,
	metadata: Optional[Dict[str, Any]] = None,
	latency_ms: Optional[float] = None,
	token_count: Optional[int] = None,
	cost: Optional[float] = None
	):
	"""
	Log an LLM call to W&B Prompts.

	Args:
	prompt: The input prompt sent to the LLM
	response: The LLM's response
	model: Model name (e.g., 'gpt-4o-mini')
	metadata: Additional metadata (user_id, topic, etc.)
	latency_ms: Response time in milliseconds
	token_count: Total tokens used
	cost: Estimated cost in USD
	"""
	if not self.wandb_enabled:
	return

	try:
	import wandb

	# Log to W&B Prompts table
	prompts_table = wandb.Table(
	columns=[
	"timestamp", "model", "prompt", "response",
	"latency_ms", "token_count", "cost", "metadata"
	]
	)

	prompts_table.add_data(
	datetime.utcnow().isoformat(),
	model,
	prompt[:500], # Truncate for display
	response[:500],
	latency_ms,
	token_count,
	cost,
	str(metadata or {})
	)

	wandb.log({"llm_calls": prompts_table})

	# Also log metrics separately for easier aggregation
	if latency_ms:
	wandb.log({"llm_latency_ms": latency_ms})
	if token_count:
	wandb.log({"llm_tokens": token_count})
	if cost:
	wandb.log({"llm_cost_usd": cost})

	except Exception as e:
	logger.warning(f"Failed to log LLM call to W&B: {e}")

	def log_metric(self, name: str, value: float, metadata: Optional[Dict] = None):
	"""
	Log a custom metric to W&B.

	Args:
	name: Metric name (e.g., 'path_generation_success')
	value: Metric value
	metadata: Additional context
	"""
	if not self.wandb_enabled:
	return

	try:
	import wandb

	log_data = {name: value}
	if metadata:
	# Flatten metadata into the log
	for key, val in metadata.items():
	log_data[f"{name}_{key}"] = val

	wandb.log(log_data)
	except Exception as e:
	logger.warning(f"Failed to log metric to W&B: {e}")

	def log_event(self, event_name: str, properties: Optional[Dict] = None):
	"""
	Log a custom event to W&B.

	Args:
	event_name: Name of the event (e.g., 'path_generated', 'validation_failed')
	properties: Event properties
	"""
	if not self.wandb_enabled:
	return

	try:
	import wandb

	wandb.log({
	"event": event_name,
	"timestamp": datetime.utcnow().isoformat(),
	"properties": properties or {}
	})
	except Exception as e:
	logger.warning(f"Failed to log event to W&B: {e}")

	def finish(self):
	"""Clean up and finish W&B run."""
	if self.wandb_enabled and self.wandb_run:
	try:
	import wandb
	wandb.finish()
	logger.info("✅ W&B run finished")
	except Exception as e:
	logger.warning(f"Failed to finish W&B run: {e}")


	# Global observability manager instance
	_observability_manager = None


	def get_observability_manager() -> ObservabilityManager:
	"""Get or create the global observability manager instance."""
	global _observability_manager
	if _observability_manager is None:
	_observability_manager = ObservabilityManager()
	return _observability_manager


	def traceable(
	name: Optional[str] = None,
	metadata: Optional[Dict[str, Any]] = None,
	log_to_wandb: bool = True
	):
	"""
	Decorator to trace function execution with LangSmith and log to W&B.

	Usage:
	@traceable(name="generate_learning_path", metadata={"version": "v2"})
	def generate_path(topic: str) -> dict:
	...

	Args:
	name: Custom name for the trace (defaults to function name)
	metadata: Additional metadata to attach to the trace
	log_to_wandb: Whether to also log execution metrics to W&B
	"""
	def decorator(func: Callable) -> Callable:
	@functools.wraps(func)
	def wrapper(args, *kwargs):
	obs_manager = get_observability_manager()
	trace_name = name or func.__name__

	# Start timing
	start_time = time.time()
	success = False
	error = None
	result = None

	try:
	# Execute the function
	result = func(args, *kwargs)
	success = True
	return result
	except Exception as e:
	error = str(e)
	raise
	finally:
	# Calculate latency
	latency_ms = (time.time() - start_time) * 1000

	# Log to W&B if enabled
	if log_to_wandb and obs_manager.wandb_enabled:
	obs_manager.log_metric(
	f"{trace_name}_latency_ms",
	latency_ms,
	metadata={
	"success": success,
	"error": error,
	**(metadata or {})
	}
	)

	obs_manager.log_metric(
	f"{trace_name}_success",
	1.0 if success else 0.0
	)

	return wrapper
	return decorator


	def estimate_cost(model: str, input_tokens: int, output_tokens: int) -> float:
	"""
	Estimate the cost of an LLM call based on token usage.

	Args:
	model: Model name
	input_tokens: Number of input tokens
	output_tokens: Number of output tokens

	Returns:
	Estimated cost in USD
	"""
	# Pricing per 1M tokens (as of 2024)
	pricing = {
	"gpt-4o-mini": {"input": 0.15, "output": 0.60},
	"gpt-4o": {"input": 5.00, "output": 15.00},
	"gpt-3.5-turbo": {"input": 0.50, "output": 1.50},
	"gpt-4": {"input": 30.00, "output": 60.00},
	}

	# Default to gpt-4o-mini pricing if model not found
	model_pricing = pricing.get(model, pricing["gpt-4o-mini"])

	input_cost = (input_tokens / 1_000_000) * model_pricing["input"]
	output_cost = (output_tokens / 1_000_000) * model_pricing["output"]

	return input_cost + output_cost