DeepXR
/

Helion-OSC

+"""
+Helion-OSC Inference Script
+DeepXR/Helion-OSC - Mathematical Coding Language Model
+"""
+import torch
+from transformers import AutoTokenizer, AutoModelForCausalLM
+from typing import Optional, Dict, Any
+class HelionOSCInference:
+    """Inference wrapper for Helion-OSC model"""
+    def __init__(
+        self,
+        model_name: str = "DeepXR/Helion-OSC",
+        device: Optional[str] = None,
+        load_in_8bit: bool = False
+    ):
+        """
+        Initialize the Helion-OSC model
+        Args:
+            model_name: HuggingFace model identifier
+            device: Device to load model on (cuda/cpu)
+            load_in_8bit: Whether to load model in 8-bit precision
+        """
+        self.device = device or ("cuda" if torch.cuda.is_available() else "cpu")
+        print(f"Loading Helion-OSC on {self.device}...")
+        self.tokenizer = AutoTokenizer.from_pretrained(model_name)
+        model_kwargs = {"device_map": "auto"} if self.device == "cuda" else {}
+        if load_in_8bit:
+            model_kwargs["load_in_8bit"] = True
+        self.model = AutoModelForCausalLM.from_pretrained(
+            model_name,
+            torch_dtype=torch.bfloat16 if self.device == "cuda" else torch.float32,
+            **model_kwargs
+        )
+        if self.device == "cpu":
+            self.model = self.model.to(self.device)
+        self.model.eval()
+        print("Model loaded successfully!")
+    def generate(
+        self,
+        prompt: str,
+        max_length: int = 512,
+        temperature: float = 0.7,
+        top_p: float = 0.95,
+        top_k: int = 50,
+        num_return_sequences: int = 1,
+        do_sample: bool = True,
+        **kwargs
+    ) -> str:
+        """
+        Generate code or text based on prompt
+        Args:
+            prompt: Input prompt
+            max_length: Maximum length of generated text
+            temperature: Sampling temperature
+            top_p: Nucleus sampling parameter
+            top_k: Top-k sampling parameter
+            num_return_sequences: Number of sequences to generate
+            do_sample: Whether to use sampling
+        Returns:
+            Generated text
+        """
+        inputs = self.tokenizer(prompt, return_tensors="pt").to(self.device)
+        with torch.no_grad():
+            outputs = self.model.generate(
+                **inputs,
+                max_length=max_length,
+                temperature=temperature,
+                top_p=top_p,
+                top_k=top_k,
+                num_return_sequences=num_return_sequences,
+                do_sample=do_sample,
+                pad_token_id=self.tokenizer.eos_token_id,
+                **kwargs
+            )
+        generated_text = self.tokenizer.decode(outputs[0], skip_special_tokens=True)
+        return generated_text
+    def code_generation(self, prompt: str, max_length: int = 1024) -> str:
+        """Optimized for code generation tasks"""
+        return self.generate(
+            prompt,
+            max_length=max_length,
+            temperature=0.7,
+            top_p=0.95,
+            do_sample=True
+        )
+    def mathematical_reasoning(self, prompt: str, max_length: int = 512) -> str:
+        """Optimized for mathematical reasoning tasks"""
+        return self.generate(
+            prompt,
+            max_length=max_length,
+            temperature=0.3,
+            top_p=0.9,
+            do_sample=False
+        )
+def main():
+    """Example usage"""
+    # Initialize model
+    helion = HelionOSCInference()
+    # Example 1: Code generation
+    code_prompt = "Write a Python function to calculate the factorial of a number using recursion:"
+    print("\n=== Code Generation ===")
+    print(f"Prompt: {code_prompt}")
+    result = helion.code_generation(code_prompt)
+    print(f"Output:\n{result}\n")
+    # Example 2: Mathematical reasoning
+    math_prompt = "Prove that the sum of first n natural numbers is n(n+1)/2:"
+    print("\n=== Mathematical Reasoning ===")
+    print(f"Prompt: {math_prompt}")
+    result = helion.mathematical_reasoning(math_prompt)
+    print(f"Output:\n{result}\n")
+    # Example 3: Algorithm design
+    algo_prompt = "Design an efficient algorithm to find the longest palindromic substring:"
+    print("\n=== Algorithm Design ===")
+    print(f"Prompt: {algo_prompt}")
+    result = helion.generate(algo_prompt, max_length=1024)
+    print(f"Output:\n{result}\n")
+if __name__ == "__main__":
+    main()