Instructions to use DeepXR/Helion-V2 with libraries, inference providers, notebooks, and local apps. Follow these links to get started.

Libraries

How to use DeepXR/Helion-V2 with Transformers:

# Use a pipeline as a high-level helper
from transformers import pipeline

pipe = pipeline("text-generation", model="DeepXR/Helion-V2", trust_remote_code=True)
messages = [
    {"role": "user", "content": "Who are you?"},
]
pipe(messages)

# Load model directly
from transformers import AutoModelForCausalLM
model = AutoModelForCausalLM.from_pretrained("DeepXR/Helion-V2", trust_remote_code=True, dtype="auto")

Notebooks
Google Colab
Kaggle
Local Apps

vLLM

How to use DeepXR/Helion-V2 with vLLM:

Install from pip and serve model

# Install vLLM from pip:
pip install vllm
# Start the vLLM server:
vllm serve "DeepXR/Helion-V2"
# Call the server using curl (OpenAI-compatible API):
curl -X POST "http://localhost:8000/v1/chat/completions" \
	-H "Content-Type: application/json" \
	--data '{
		"model": "DeepXR/Helion-V2",
		"messages": [
			{
				"role": "user",
				"content": "What is the capital of France?"
			}
		]
	}'

Use Docker

docker model run hf.co/DeepXR/Helion-V2

SGLang

How to use DeepXR/Helion-V2 with SGLang:

Install from pip and serve model

# Install SGLang from pip:
pip install sglang
# Start the SGLang server:
python3 -m sglang.launch_server \
    --model-path "DeepXR/Helion-V2" \
    --host 0.0.0.0 \
    --port 30000
# Call the server using curl (OpenAI-compatible API):
curl -X POST "http://localhost:30000/v1/chat/completions" \
	-H "Content-Type: application/json" \
	--data '{
		"model": "DeepXR/Helion-V2",
		"messages": [
			{
				"role": "user",
				"content": "What is the capital of France?"
			}
		]
	}'

Use Docker images

docker run --gpus all \
    --shm-size 32g \
    -p 30000:30000 \
    -v ~/.cache/huggingface:/root/.cache/huggingface \
    --env "HF_TOKEN=<secret>" \
    --ipc=host \
    lmsysorg/sglang:latest \
    python3 -m sglang.launch_server \
        --model-path "DeepXR/Helion-V2" \
        --host 0.0.0.0 \
        --port 30000
# Call the server using curl (OpenAI-compatible API):
curl -X POST "http://localhost:30000/v1/chat/completions" \
	-H "Content-Type: application/json" \
	--data '{
		"model": "DeepXR/Helion-V2",
		"messages": [
			{
				"role": "user",
				"content": "What is the capital of France?"
			}
		]
	}'

Docker Model Runner
How to use DeepXR/Helion-V2 with Docker Model Runner:
```
docker model run hf.co/DeepXR/Helion-V2
```

Trouter-Library commited on Nov 16, 2025

Commit

ce03ed3

verified ·

1 Parent(s): 0d052fa

Create safety_classifier.py

Browse files

Files changed (1) hide show

safety_classifier.py +406 -0

safety_classifier.py ADDED Viewed

	@@ -0,0 +1,406 @@

+"""
+Safety Classifier for Helion-V2
+Provides content moderation and safety checks for inputs and outputs.
+"""
+import re
+from typing import Tuple, List, Dict, Optional
+from dataclasses import dataclass
+from enum import Enum
+class SafetyCategory(Enum):
+    """Safety violation categories."""
+    SAFE = "safe"
+    HATE_SPEECH = "hate_speech"
+    VIOLENCE = "violence"
+    SEXUAL_CONTENT = "sexual_content"
+    SELF_HARM = "self_harm"
+    ILLEGAL_ACTIVITY = "illegal_activity"
+    PERSONAL_INFO = "personal_info"
+    MISINFORMATION = "misinformation"
+    SPAM = "spam"
+    MALICIOUS_CODE = "malicious_code"
+    CHILD_SAFETY = "child_safety"
+@dataclass
+class SafetyResult:
+    """Result from safety check."""
+    is_safe: bool
+    category: SafetyCategory
+    confidence: float
+    flagged_content: Optional[List[str]] = None
+    explanation: Optional[str] = None
+class SafetyClassifier:
+    """
+    Multi-layer safety classifier for content moderation.
+    Implements rule-based filtering, pattern matching, and heuristics.
+    """
+    def __init__(self, strict_mode: bool = False):
+        """
+        Initialize safety classifier.
+        Args:
+            strict_mode: Enable stricter filtering (may increase false positives)
+        """
+        self.strict_mode = strict_mode
+        self._load_patterns()
+    def _load_patterns(self):
+        """Load detection patterns for various safety categories."""
+        # Hate speech and discriminatory content
+        self.hate_patterns = [
+            r'\b(hate|despise|loathe)\s+(jews|muslims|christians|blacks|whites|asians|hispanics|lgbt)',
+            r'\b(kill|eliminate|remove|exterminate)\s+all\s+\w+',
+            r'\b(racial|ethnic)\s+(slur|epithet|cleansing)',
+            r'\b(white|black|asian|hispanic)\s+supremacy',
+            r'\b(nazi|kkk|fascist|terrorist)\s+(ideology|propaganda)',
+        ]
+        # Violence and threats
+        self.violence_patterns = [
+            r'\b(kill|murder|assassinate|harm|hurt|torture)\s+(you|him|her|them)',
+            r'\b(make|build|create)\s+(bomb|explosive|weapon|poison)',
+            r'\bhow\s+to\s+(kill|murder|harm|hurt|torture)',
+            r'\b(shoot|stab|strangle|drown|poison)\s+(someone|people)',
+            r'\b(instructions|guide|tutorial)\s+(for|to)\s+(violence|murder|harm)',
+        ]
+        # Sexual and explicit content
+        self.sexual_patterns = [
+            r'\b(sex|sexual)\s+with\s+(child|minor|kid|underage)',
+            r'\b(nude|naked|explicit)\s+(photos|images|pictures)\s+of\s+(child|minor|kid)',
+            r'\b(pornographic|erotic|sexual)\s+content\s+(involving|featuring|with)\s+(child|minor)',
+            r'\b(child|minor)\s+(pornography|abuse|exploitation)',
+            r'\b(groom|seduce|lure)\s+(child|minor|kid)',
+        ]
+        # Self-harm content
+        self.self_harm_patterns = [
+            r'\bhow\s+to\s+(commit\s+suicide|kill\s+myself|end\s+my\s+life)',
+            r'\b(suicide|self-harm|self-injury)\s+(methods|ways|techniques)',
+            r'\b(cut|hurt|harm)\s+myself',
+            r'\b(overdose|lethal\s+dose)\s+of',
+            r'\bpainless\s+way\s+to\s+(die|suicide)',
+        ]
+        # Illegal activities
+        self.illegal_patterns = [
+            r'\bhow\s+to\s+(hack|crack|break\s+into)',
+            r'\b(steal|rob|burglarize|shoplift)',
+            r'\b(sell|buy|obtain)\s+(drugs|cocaine|heroin|meth|fentanyl)',
+            r'\b(make|produce|synthesize)\s+(meth|cocaine|heroin|lsd)',
+            r'\b(forge|counterfeit|fake)\s+(money|documents|passport|id)',
+            r'\b(evade|avoid)\s+(taxes|law|police)',
+            r'\b(launder|hide|conceal)\s+money',
+        ]
+        # Personal information patterns
+        self.pii_patterns = [
+            r'\b\d{3}-\d{2}-\d{4}\b',  # SSN
+            r'\b\d{4}[\s-]?\d{4}[\s-]?\d{4}[\s-]?\d{4}\b',  # Credit card
+            r'\b[A-Z0-9._%+-]+@[A-Z0-9.-]+\.[A-Z]{2,}\b',  # Email (if requesting to scrape/harvest)
+            r'\b\d{3}[-.]?\d{3}[-.]?\d{4}\b',  # Phone number
+        ]
+        # Malicious code patterns
+        self.malicious_code_patterns = [
+            r'exec\s*\(',
+            r'eval\s*\(',
+            r'__import__\s*\(["\']os["\']\)',
+            r'subprocess\.(call|run|Popen)',
+            r'os\.system\s*\(',
+            r'rm\s+-rf\s+/',
+            r'format\s+c:',
+            r'del\s+/[fqsa]',
+        ]
+        # Child safety keywords
+        self.child_safety_keywords = [
+            'child abuse', 'child exploitation', 'child pornography',
+            'minor abuse', 'underage', 'csam', 'pedophile', 'pedophilia',
+            'grooming children', 'sexualizing minors', 'child trafficking'
+        ]
+        # Misinformation indicators
+        self.misinfo_patterns = [
+            r'\b(proven|scientific)\s+fact:\s+(vaccines|5g|covid).+(cause|spread|contain)',
+            r'\b(the\s+earth\s+is|earth\s+is)\s+(flat|hollow)',
+            r'\bholocaust\s+(never\s+happened|hoax|myth)',
+            r'\b(climate\s+change|global\s+warming)\s+is\s+(hoax|fake|scam)',
+        ]
+    def check_prompt(self, text: str) -> SafetyResult:
+        """
+        Check if input prompt is safe.
+        Args:
+            text: Input text to check
+        Returns:
+            SafetyResult with classification details
+        """
+        text_lower = text.lower()
+        # Check for child safety violations (highest priority)
+        for keyword in self.child_safety_keywords:
+            if keyword in text_lower:
+                return SafetyResult(
+                    is_safe=False,
+                    category=SafetyCategory.CHILD_SAFETY,
+                    confidence=1.0,
+                    flagged_content=[keyword],
+                    explanation="Content involves child safety violations"
+                )
+        # Check hate speech
+        for pattern in self.hate_patterns:
+            if re.search(pattern, text_lower):
+                return SafetyResult(
+                    is_safe=False,
+                    category=SafetyCategory.HATE_SPEECH,
+                    confidence=0.95,
+                    explanation="Content contains hate speech or discriminatory language"
+                )
+        # Check violence
+        for pattern in self.violence_patterns:
+            if re.search(pattern, text_lower):
+                return SafetyResult(
+                    is_safe=False,
+                    category=SafetyCategory.VIOLENCE,
+                    confidence=0.90,
+                    explanation="Content contains violent or threatening language"
+                )
+        # Check sexual content
+        for pattern in self.sexual_patterns:
+            if re.search(pattern, text_lower):
+                return SafetyResult(
+                    is_safe=False,
+                    category=SafetyCategory.SEXUAL_CONTENT,
+                    confidence=0.95,
+                    explanation="Content contains inappropriate sexual content"
+                )
+        # Check self-harm
+        for pattern in self.self_harm_patterns:
+            if re.search(pattern, text_lower):
+                return SafetyResult(
+                    is_safe=False,
+                    category=SafetyCategory.SELF_HARM,
+                    confidence=0.85,
+                    explanation="Content relates to self-harm or suicide"
+                )
+        # Check illegal activities
+        for pattern in self.illegal_patterns:
+            if re.search(pattern, text_lower):
+                return SafetyResult(
+                    is_safe=False,
+                    category=SafetyCategory.ILLEGAL_ACTIVITY,
+                    confidence=0.80,
+                    explanation="Content requests information about illegal activities"
+                )
+        # Check for malicious code
+        for pattern in self.malicious_code_patterns:
+            if re.search(pattern, text):
+                return SafetyResult(
+                    is_safe=False,
+                    category=SafetyCategory.MALICIOUS_CODE,
+                    confidence=0.75,
+                    explanation="Content contains potentially malicious code"
+                )
+        # Check misinformation patterns
+        if self.strict_mode:
+            for pattern in self.misinfo_patterns:
+                if re.search(pattern, text_lower):
+                    return SafetyResult(
+                        is_safe=False,
+                        category=SafetyCategory.MISINFORMATION,
+                        confidence=0.70,
+                        explanation="Content may contain misinformation"
+                    )
+        # Passed all checks
+        return SafetyResult(
+            is_safe=True,
+            category=SafetyCategory.SAFE,
+            confidence=1.0,
+            explanation="Content passed all safety checks"
+        )
+    def check_response(self, text: str) -> SafetyResult:
+        """
+        Check if model output is safe.
+        Args:
+            text: Generated text to check
+        Returns:
+            SafetyResult with classification details
+        """
+        # Use same checks as prompt
+        return self.check_prompt(text)
+    def check_pii(self, text: str) -> Tuple[bool, List[str]]:
+        """
+        Check for personally identifiable information.
+        Args:
+            text: Text to check
+        Returns:
+            Tuple of (has_pii, list of PII types found)
+        """
+        found_pii = []
+        for pattern in self.pii_patterns:
+            if re.search(pattern, text, re.IGNORECASE):
+                found_pii.append("Potential PII detected")
+        return len(found_pii) > 0, found_pii
+    def sanitize_response(self, text: str) -> str:
+        """
+        Remove or redact unsafe content from response.
+        Args:
+            text: Text to sanitize
+        Returns:
+            Sanitized text
+        """
+        # Redact PII patterns
+        sanitized = text
+        # SSN
+        sanitized = re.sub(r'\b\d{3}-\d{2}-\d{4}\b', '[REDACTED-SSN]', sanitized)
+        # Credit card
+        sanitized = re.sub(r'\b\d{4}[\s-]?\d{4}[\s-]?\d{4}[\s-]?\d{4}\b', '[REDACTED-CC]', sanitized)
+        # Email
+        sanitized = re.sub(r'\b[A-Z0-9._%+-]+@[A-Z0-9.-]+\.[A-Z]{2,}\b', '[REDACTED-EMAIL]', sanitized, flags=re.IGNORECASE)
+        # Phone
+        sanitized = re.sub(r'\b\d{3}[-.]?\d{3}[-.]?\d{4}\b', '[REDACTED-PHONE]', sanitized)
+        return sanitized
+    def get_safety_score(self, text: str) -> float:
+        """
+        Get overall safety score (0.0 = unsafe, 1.0 = completely safe).
+        Args:
+            text: Text to evaluate
+        Returns:
+            Safety score between 0.0 and 1.0
+        """
+        result = self.check_prompt(text)
+        if result.is_safe:
+            return 1.0
+        else:
+            # Return inverse of confidence
+            return 1.0 - result.confidence
+class ContentModerationPipeline:
+    """Complete content moderation pipeline with multiple checks."""
+    def __init__(self):
+        """Initialize moderation pipeline."""
+        self.safety_classifier = SafetyClassifier()
+    def moderate_interaction(
+        self,
+        user_input: str,
+        model_response: str
+    ) -> Dict[str, any]:
+        """
+        Moderate complete user-model interaction.
+        Args:
+            user_input: User's input text
+            model_response: Model's generated response
+        Returns:
+            Dictionary with moderation results
+        """
+        # Check input
+        input_result = self.safety_classifier.check_prompt(user_input)
+        # Check output
+        output_result = self.safety_classifier.check_response(model_response)
+        # Check for PII in response
+        has_pii, pii_types = self.safety_classifier.check_pii(model_response)
+        # Determine overall safety
+        is_safe = input_result.is_safe and output_result.is_safe and not has_pii
+        return {
+            "is_safe": is_safe,
+            "input_check": {
+                "is_safe": input_result.is_safe,
+                "category": input_result.category.value,
+                "confidence": input_result.confidence,
+                "explanation": input_result.explanation
+            },
+            "output_check": {
+                "is_safe": output_result.is_safe,
+                "category": output_result.category.value,
+                "confidence": output_result.confidence,
+                "explanation": output_result.explanation
+            },
+            "pii_check": {
+                "has_pii": has_pii,
+                "types": pii_types
+            },
+            "sanitized_response": self.safety_classifier.sanitize_response(model_response) if has_pii else model_response
+        }
+# Example usage
+if __name__ == "__main__":
+    # Initialize classifier
+    classifier = SafetyClassifier()
+    # Test cases
+    test_prompts = [
+        "What is the capital of France?",  # Safe
+        "How to make a bomb?",  # Unsafe - violence
+        "Tell me about climate change",  # Safe
+        "My SSN is 123-45-6789",  # PII
+    ]
+    print("Safety Classifier Tests:\n")
+    for prompt in test_prompts:
+        result = classifier.check_prompt(prompt)
+        print(f"Prompt: {prompt}")
+        print(f"Safe: {result.is_safe}")
+        print(f"Category: {result.category.value}")
+        print(f"Confidence: {result.confidence}")
+        print(f"Explanation: {result.explanation}\n")
+    # Test moderation pipeline
+    print("\nModeration Pipeline Test:")
+    pipeline = ContentModerationPipeline()
+    user_input = "What's the weather like today?"
+    model_response = "I don't have access to real-time data, but you can check weather.com or your local forecast."
+    moderation_result = pipeline.moderate_interaction(user_input, model_response)
+    print(f"Overall Safe: {moderation_result['is_safe']}")
+    print(f"Input Safe: {moderation_result['input_check']['is_safe']}")
+    print(f"Output Safe: {moderation_result['output_check']['is_safe']}")
+    print(f"Has PII: {moderation_result['pii_check']['has_pii']}")