Spaces:

zazaman
/

guardrails-final

Sleeping

App Files Files Community

guardrails-final / llm_clients /shared_models.py

zazaman

Add multilingual translation support with Qwen3-0.6B-GGUF and optimize for Hugging Face Spaces deployment

a2e1879 about 1 month ago

raw

history blame contribute delete

7.88 kB

	# llm_clients/shared_models.py
	"""
	Shared model manager to avoid loading the same model multiple times.
	This significantly improves memory usage and startup time.
	"""

	from typing import Optional, Dict, Any, Tuple
	import threading
	import os

	class SharedModelManager:
	"""Singleton class to manage shared model instances"""

	_instance = None
	_lock = threading.Lock()
	_models: Dict[str, Any] = {}
	_model_components: Dict[str, Dict[str, Any]] = {} # Store actual model components

	def __new__(cls):
	if cls._instance is None:
	with cls._lock:
	if cls._instance is None:
	cls._instance = super().__new__(cls)
	return cls._instance

	def get_finetuned_model_components(self, model_name: str = "zazaman/fmb") -> Optional[Dict[str, Any]]:
	"""
	Get or load shared model components (model, tokenizer, classifier).

	Args:
	model_name: Name of the model to load

	Returns:
	Dict with 'model', 'tokenizer', 'classifier' components or None if loading fails
	"""
	model_key = f"finetuned_components_{model_name}"

	if model_key not in self._model_components:
	try:
	print(f"🔄 Loading shared finetuned model components: {model_name}")

	# Import here to avoid circular imports
	from transformers import AutoModelForSequenceClassification, AutoTokenizer, pipeline
	import torch

	# Set up cache directory for HF Spaces compatibility
	if not os.getenv('HF_HOME'):
	cache_dir = os.path.expanduser("~/.cache/huggingface")
	os.environ['HF_HOME'] = cache_dir
	os.environ['TRANSFORMERS_CACHE'] = os.path.join(cache_dir, 'transformers')

	# Create cache directories if they don't exist
	os.makedirs(cache_dir, exist_ok=True)
	os.makedirs(os.path.join(cache_dir, 'transformers'), exist_ok=True)
	print(f" 📁 Using cache directory: {cache_dir}")

	# Apply optimizations
	torch._dynamo.config.suppress_errors = True
	torch._dynamo.config.disable = True
	os.environ["TORCH_COMPILE_DISABLE"] = "1"
	os.environ["TORCHDYNAMO_DISABLE"] = "1"
	os.environ["TF_ENABLE_ONEDNN_OPTS"] = "0"

	print(f" 📥 Downloading model from Hugging Face: {model_name}")

	# Load model and tokenizer with explicit cache directory
	model = AutoModelForSequenceClassification.from_pretrained(
	model_name,
	torch_dtype=torch.float32,
	device_map=None,
	cache_dir=os.environ.get('TRANSFORMERS_CACHE'),
	local_files_only=False, # Allow downloading
	trust_remote_code=False # Security best practice
	)
	tokenizer = AutoTokenizer.from_pretrained(
	model_name,
	cache_dir=os.environ.get('TRANSFORMERS_CACHE'),
	local_files_only=False,
	trust_remote_code=False
	)

	# Disable compilation
	if hasattr(model, '_compiler_config'):
	model._compiler_config = None

	# Move to CPU
	device = "cpu"
	model = model.to(device)

	print(f" 🧠 Creating classifier pipeline...")

	# Create classifier pipeline
	classifier = pipeline(
	"text-classification",
	model=model,
	tokenizer=tokenizer,
	device=device,
	framework="pt",
	torch_dtype=torch.float32
	)

	# Store components
	self._model_components[model_key] = {
	"model": model,
	"tokenizer": tokenizer,
	"classifier": classifier,
	"device": device,
	"model_name": model_name
	}

	print(f"✅ Shared finetuned model components loaded successfully: {model_name}")
	print(f" Device: {device}")
	print(f" Cache: {os.environ.get('TRANSFORMERS_CACHE', 'default')}")

	except PermissionError as e:
	print(f"❌ Permission error loading model {model_name}: {e}")
	print(f" This might be a cache directory issue in the deployment environment.")
	print(f" Suggestion: Check HF_HOME and cache directory permissions.")
	self._model_components[model_key] = None
	return None
	except Exception as e:
	print(f"❌ Failed to load shared finetuned model components {model_name}: {e}")
	print(f" Error type: {type(e).__name__}")
	if "connection" in str(e).lower() or "network" in str(e).lower():
	print(f" This appears to be a network issue. Check internet connectivity.")
	elif "disk" in str(e).lower() or "space" in str(e).lower():
	print(f" This appears to be a disk space issue.")
	self._model_components[model_key] = None
	return None

	return self._model_components[model_key]

	def get_finetuned_guard_client(self, model_name: str = "zazaman/fmb") -> Optional[Any]:
	"""
	Get or create a shared FinetunedGuardClient instance that uses shared model components.

	Args:
	model_name: Name of the model to load

	Returns:
	FinetunedGuardClient instance or None if loading fails
	"""
	model_key = f"finetuned_guard_{model_name}"

	if model_key not in self._models:
	try:
	# Get shared model components
	components = self.get_finetuned_model_components(model_name)
	if not components:
	return None

	from .finetuned_guard import FinetunedGuardClient

	print(f" 🔍 Creating FinetunedGuardClient with shared model components: {model_name}")

	model_config = {
	"model_name": model_name
	}

	# Create client that will use shared components
	client = FinetunedGuardClient(model_config, "", shared_components=components)
	self._models[model_key] = client

	print(f"✅ Shared finetuned guard client created successfully: {model_name}")

	except Exception as e:
	print(f"❌ Failed to create shared finetuned guard client {model_name}: {e}")
	self._models[model_key] = None
	return None

	return self._models[model_key]

	def clear_models(self):
	"""Clear all cached models (useful for testing)"""
	self._models.clear()
	self._model_components.clear()

	def get_model_info(self) -> Dict[str, bool]:
	"""Get information about loaded models"""
	return {
	model_key: model is not None
	for model_key, model in self._models.items()
	}

	# Global singleton instance
	shared_model_manager = SharedModelManager()