shri-ads
/

phi4-guardrail

Text Generation

sentinel_guarded_phi

Model card Files Files and versions

phi4-guardrail / configuration_sentinel.py

shri-ads's picture

Upload folder using huggingface_hub

3a0dc65 verified 2 months ago

history blame contribute delete

1.34 kB

	from transformers import PretrainedConfig


	class SentinelConfig(PretrainedConfig):
	"""
	Configuration for SentinelGuardedPhi.

	Holds source IDs for both sub-models and guardrail hyperparameters.
	Neither model's weights are stored in this repo — they are pulled from
	HuggingFace Hub at load time using these IDs.
	"""

	model_type = "sentinel_guarded_phi"

	def __init__(
	self,
	phi_model_id: str = "microsoft/Phi-4-mini-instruct",
	guard_model_id: str = "meta-llama/Llama-Prompt-Guard-2-86M",
	guard_threshold: float = 0.5,
	blocked_response: str = "I'm not able to assist with that.",
	**kwargs,
	):
	"""
	Args:
	phi_model_id: HF repo ID for the base Phi-4-mini-instruct model.
	guard_model_id: HF repo ID for the Llama-Prompt-Guard-2-86M classifier.
	guard_threshold: Probability threshold above which a prompt is blocked.
	A float in [0, 1]. Lower = stricter.
	blocked_response: Static string returned when the guardrail fires.
	"""
	self.phi_model_id = phi_model_id
	self.guard_model_id = guard_model_id
	self.guard_threshold = guard_threshold
	self.blocked_response = blocked_response
	super().__init__(**kwargs)