DeepXR
/

Helion-V1.5

@@ -22,6 +22,107 @@ logging.basicConfig(
 logger = logging.getLogger(__name__)
 @dataclass
 class HelionConfig:
     """Central configuration for all Helion operations."""
@@ -50,6 +151,11 @@ class HelionConfig:
     enable_safeguards: bool = True
     enable_tools: bool = False
     # HuggingFace
     hf_token: Optional[str] = None
     push_to_hub: bool = False
@@ -74,8 +180,13 @@ class HelionOrchestrator:
         self.tokenizer = None
         self.safeguards = None
         self.tool_system = None
         self.session_log = []
     # ==================== Model Loading ====================
@@ -169,6 +280,14 @@ class HelionOrchestrator:
         except ImportError:
             logger.warning("Tools module not found")
     def unload_model(self):
         """Unload model to free memory."""
         if self.model:
@@ -185,7 +304,9 @@ class HelionOrchestrator:
         max_tokens: Optional[int] = None,
         temperature: Optional[float] = None,
         system_prompt: Optional[str] = None,
-        use_safeguards: bool = True
     ) -> Dict[str, Any]:
         """
         Generate response from prompt.
@@ -196,6 +317,8 @@ class HelionOrchestrator:
             temperature: Sampling temperature
             system_prompt: Optional system prompt
             use_safeguards: Apply safeguard checks
         Returns:
             Dict with response and metadata
@@ -206,10 +329,20 @@ class HelionOrchestrator:
         max_tokens = max_tokens or self.config.max_tokens
         temperature = temperature or self.config.temperature
         # Build messages
         messages = []
         if system_prompt:
             messages.append({"role": "system", "content": system_prompt})
         messages.append({"role": "user", "content": prompt})
         # Check with safeguards
@@ -245,12 +378,17 @@ class HelionOrchestrator:
             skip_special_tokens=True
         ).strip()
         result = {
             "response": response_text,
             "blocked": False,
             "prompt_tokens": input_ids.shape[1],
             "completion_tokens": output.shape[1] - input_ids.shape[1],
-            "total_tokens": output.shape[1]
         }
         self._log_event("generation", {"prompt": prompt[:100], "tokens": result["total_tokens"]})
@@ -259,6 +397,8 @@ class HelionOrchestrator:
     def chat(
         self,
         messages: List[Dict[str, str]],
         **kwargs
     ) -> Dict[str, Any]:
         """
@@ -266,6 +406,8 @@ class HelionOrchestrator:
         Args:
             messages: List of message dicts
             **kwargs: Generation parameters
         Returns:
@@ -274,6 +416,15 @@ class HelionOrchestrator:
         if not self.model:
             raise RuntimeError("Model not loaded")
         # Similar to generate but maintains conversation
         input_ids = self.tokenizer.apply_chat_template(
             messages,
@@ -297,6 +448,11 @@ class HelionOrchestrator:
             skip_special_tokens=True
         ).strip()
         return {"response": response, "blocked": False}
     def interactive_chat(self):
@@ -306,11 +462,20 @@ class HelionOrchestrator:
             return
         print("\n" + "="*60)
-        print("Helion Interactive Chat")
-        print("Commands: /quit, /clear, /save, /load, /help")
         print("="*60 + "\n")
         conversation = []
         while True:
             try:
@@ -322,12 +487,24 @@ class HelionOrchestrator:
                 # Handle commands
                 if user_input.startswith("/"):
                     if user_input == "/quit":
                         print("Goodbye!")
                         break
                     elif user_input == "/clear":
                         conversation = []
                         print("Conversation cleared.")
                         continue
                     elif user_input.startswith("/save"):
                         self._save_conversation(conversation, user_input.split()[1] if len(user_input.split()) > 1 else None)
                         continue
@@ -340,7 +517,11 @@ class HelionOrchestrator:
                 conversation.append({"role": "user", "content": user_input})
-                result = self.chat(conversation)
                 if result.get("blocked"):
                     print(f"🤖 Helion: {result['response']}")
@@ -675,6 +856,7 @@ CMD ["python3", "server.py", "--host", "0.0.0.0", "--port", "8000"]
             "device": str(self.model.device) if self.model else None,
             "safeguards_enabled": self.safeguards is not None,
             "tools_enabled": self.tool_system is not None,
             "config": asdict(self.config),
             "session_events": len(self.session_log)
         }
@@ -682,8 +864,28 @@ CMD ["python3", "server.py", "--host", "0.0.0.0", "--port", "8000"]
         if self.model:
             info["model_memory"] = torch.cuda.max_memory_allocated() / 1024**3 if torch.cuda.is_available() else 0
         return info
     def _log_event(self, event_type: str, data: Dict[str, Any]):
         """Log orchestrator event."""
         event = {
@@ -712,9 +914,11 @@ CMD ["python3", "server.py", "--host", "0.0.0.0", "--port", "8000"]
         """Print chat help."""
         print("""
 Available Commands:
-  /quit          - Exit chat
-  /clear         - Clear conversation history
   /save [name]   - Save conversation to file
   /help          - Show this help message
 """)

 logger = logging.getLogger(__name__)
+class MemoryManager:
+    """
+    Conversation memory manager for Helion.
+    Stores and retrieves conversation history for context-aware responses.
+    """
+    def __init__(self, memory_file: str = "helion_memory.json", window_size: int = 10):
+        self.memory_file = Path(memory_file)
+        self.window_size = window_size
+        self.conversations: Dict[str, List[Dict]] = {}
+        self.load()
+    def add_interaction(self, conversation_id: str, user_input: str, assistant_response: str):
+        """
+        Add interaction to memory.
+        Args:
+            conversation_id: Unique conversation identifier
+            user_input: User's message
+            assistant_response: Assistant's response
+        """
+        if conversation_id not in self.conversations:
+            self.conversations[conversation_id] = []
+        self.conversations[conversation_id].append({
+            "timestamp": datetime.now().isoformat(),
+            "user": user_input,
+            "assistant": assistant_response
+        })
+        # Keep only last N interactions per conversation
+        if len(self.conversations[conversation_id]) > self.window_size:
+            self.conversations[conversation_id] = self.conversations[conversation_id][-self.window_size:]
+        self.save()
+    def get_context(self, conversation_id: str, max_length: int = 500) -> str:
+        """
+        Get conversation context as a summary string.
+        Args:
+            conversation_id: Conversation ID
+            max_length: Maximum context length in characters
+        Returns:
+            Context string
+        """
+        if conversation_id not in self.conversations:
+            return ""
+        interactions = self.conversations[conversation_id]
+        # Build context from recent interactions
+        context_parts = []
+        total_length = 0
+        for interaction in reversed(interactions):
+            part = f"User: {interaction['user'][:100]} | Assistant: {interaction['assistant'][:100]}"
+            if total_length + len(part) > max_length:
+                break
+            context_parts.insert(0, part)
+            total_length += len(part)
+        return " | ".join(context_parts)
+    def get_conversation(self, conversation_id: str) -> List[Dict]:
+        """Get full conversation history."""
+        return self.conversations.get(conversation_id, [])
+    def clear_conversation(self, conversation_id: str):
+        """Clear specific conversation."""
+        if conversation_id in self.conversations:
+            del self.conversations[conversation_id]
+            self.save()
+    def clear_all(self):
+        """Clear all conversations."""
+        self.conversations = {}
+        self.save()
+    def save(self):
+        """Save memory to file."""
+        try:
+            self.memory_file.parent.mkdir(parents=True, exist_ok=True)
+            with open(self.memory_file, 'w') as f:
+                json.dump(self.conversations, f, indent=2)
+        except Exception as e:
+            logger.error(f"Failed to save memory: {e}")
+    def load(self):
+        """Load memory from file."""
+        try:
+            if self.memory_file.exists():
+                with open(self.memory_file, 'r') as f:
+                    self.conversations = json.load(f)
+                logger.info(f"Loaded {len(self.conversations)} conversations from memory")
+        except Exception as e:
+            logger.warning(f"Failed to load memory: {e}")
+            self.conversations = {}
 @dataclass
 class HelionConfig:
     """Central configuration for all Helion operations."""
     enable_safeguards: bool = True
     enable_tools: bool = False
+    # Memory settings
+    enable_memory: bool = True
+    memory_window: int = 10  # Remember last N conversations
+    memory_file: str = "helion_memory.json"
     # HuggingFace
     hf_token: Optional[str] = None
     push_to_hub: bool = False
         self.tokenizer = None
         self.safeguards = None
         self.tool_system = None
+        self.memory = None
         self.session_log = []
+        # Initialize memory if enabled
+        if self.config.enable_memory:
+            self._init_memory()
     # ==================== Model Loading ====================
         except ImportError:
             logger.warning("Tools module not found")
+    def _init_memory(self):
+        """Initialize memory system."""
+        self.memory = MemoryManager(
+            memory_file=os.path.join(self.config.output_dir, self.config.memory_file),
+            window_size=self.config.memory_window
+        )
+        logger.info("Memory system initialized")
     def unload_model(self):
         """Unload model to free memory."""
         if self.model:
         max_tokens: Optional[int] = None,
         temperature: Optional[float] = None,
         system_prompt: Optional[str] = None,
+        use_safeguards: bool = True,
+        use_memory: bool = True,
+        conversation_id: Optional[str] = None
     ) -> Dict[str, Any]:
         """
         Generate response from prompt.
             temperature: Sampling temperature
             system_prompt: Optional system prompt
             use_safeguards: Apply safeguard checks
+            use_memory: Use conversation memory
+            conversation_id: Conversation identifier for memory
         Returns:
             Dict with response and metadata
         max_tokens = max_tokens or self.config.max_tokens
         temperature = temperature or self.config.temperature
+        # Retrieve memory context if enabled
+        memory_context = ""
+        if use_memory and self.memory and conversation_id:
+            memory_context = self.memory.get_context(conversation_id)
         # Build messages
         messages = []
         if system_prompt:
             messages.append({"role": "system", "content": system_prompt})
+        # Add memory context if available
+        if memory_context:
+            messages.append({"role": "system", "content": f"Previous context: {memory_context}"})
         messages.append({"role": "user", "content": prompt})
         # Check with safeguards
             skip_special_tokens=True
         ).strip()
+        # Store in memory if enabled
+        if use_memory and self.memory and conversation_id:
+            self.memory.add_interaction(conversation_id, prompt, response_text)
         result = {
             "response": response_text,
             "blocked": False,
             "prompt_tokens": input_ids.shape[1],
             "completion_tokens": output.shape[1] - input_ids.shape[1],
+            "total_tokens": output.shape[1],
+            "conversation_id": conversation_id
         }
         self._log_event("generation", {"prompt": prompt[:100], "tokens": result["total_tokens"]})
     def chat(
         self,
         messages: List[Dict[str, str]],
+        use_memory: bool = True,
+        conversation_id: Optional[str] = None,
         **kwargs
     ) -> Dict[str, Any]:
         """
         Args:
             messages: List of message dicts
+            use_memory: Use memory for context
+            conversation_id: Conversation ID for memory
             **kwargs: Generation parameters
         Returns:
         if not self.model:
             raise RuntimeError("Model not loaded")
+        # Add memory context if available
+        if use_memory and self.memory and conversation_id:
+            memory_context = self.memory.get_context(conversation_id)
+            if memory_context:
+                # Insert memory context before user messages
+                messages = [
+                    {"role": "system", "content": f"Previous context: {memory_context}"}
+                ] + messages
         # Similar to generate but maintains conversation
         input_ids = self.tokenizer.apply_chat_template(
             messages,
             skip_special_tokens=True
         ).strip()
+        # Store in memory
+        if use_memory and self.memory and conversation_id:
+            user_message = messages[-1]["content"]
+            self.memory.add_interaction(conversation_id, user_message, response)
         return {"response": response, "blocked": False}
     def interactive_chat(self):
             return
         print("\n" + "="*60)
+        print("Helion Interactive Chat with Memory")
+        print("Commands: /quit, /clear, /save, /memory, /newconv, /help")
         print("="*60 + "\n")
         conversation = []
+        conversation_id = f"chat_{datetime.now().strftime('%Y%m%d_%H%M%S')}"
+        # Show memory status
+        if self.memory:
+            print(f"💾 Memory: Enabled (ID: {conversation_id})")
+            # Check if there's previous context
+            prev_context = self.memory.get_context(conversation_id)
+            if prev_context:
+                print(f"📝 Retrieved previous context\n")
         while True:
             try:
                 # Handle commands
                 if user_input.startswith("/"):
                     if user_input == "/quit":
+                        if self.memory:
+                            self.memory.save()
                         print("Goodbye!")
                         break
                     elif user_input == "/clear":
                         conversation = []
                         print("Conversation cleared.")
                         continue
+                    elif user_input == "/memory":
+                        self._show_memory(conversation_id)
+                        continue
+                    elif user_input == "/newconv":
+                        if self.memory:
+                            self.memory.save()
+                        conversation = []
+                        conversation_id = f"chat_{datetime.now().strftime('%Y%m%d_%H%M%S')}"
+                        print(f"New conversation started (ID: {conversation_id})")
+                        continue
                     elif user_input.startswith("/save"):
                         self._save_conversation(conversation, user_input.split()[1] if len(user_input.split()) > 1 else None)
                         continue
                 conversation.append({"role": "user", "content": user_input})
+                result = self.chat(
+                    conversation,
+                    use_memory=True,
+                    conversation_id=conversation_id
+                )
                 if result.get("blocked"):
                     print(f"🤖 Helion: {result['response']}")
             "device": str(self.model.device) if self.model else None,
             "safeguards_enabled": self.safeguards is not None,
             "tools_enabled": self.tool_system is not None,
+            "memory_enabled": self.memory is not None,
             "config": asdict(self.config),
             "session_events": len(self.session_log)
         }
         if self.model:
             info["model_memory"] = torch.cuda.max_memory_allocated() / 1024**3 if torch.cuda.is_available() else 0
+        if self.memory:
+            info["total_conversations"] = len(self.memory.conversations)
+            info["total_interactions"] = sum(len(conv) for conv in self.memory.conversations.values())
         return info
+    def _show_memory(self, conversation_id: str):
+        """Display memory for conversation."""
+        if not self.memory:
+            print("Memory not enabled")
+            return
+        context = self.memory.get_context(conversation_id)
+        interactions = self.memory.get_conversation(conversation_id)
+        print(f"\n{'='*60}")
+        print(f"Memory for Conversation: {conversation_id}")
+        print(f"{'='*60}")
+        print(f"Total interactions: {len(interactions)}")
+        print(f"\nContext summary:\n{context[:200]}..." if len(context) > 200 else f"\nContext:\n{context}")
+        print(f"{'='*60}\n")
     def _log_event(self, event_type: str, data: Dict[str, Any]):
         """Log orchestrator event."""
         event = {
         """Print chat help."""
         print("""
 Available Commands:
+  /quit          - Exit chat and save memory
+  /clear         - Clear current conversation
   /save [name]   - Save conversation to file
+  /memory        - Show memory for this conversation
+  /newconv       - Start a new conversation (saves current)
   /help          - Show this help message
 """)