Spaces:

Debito
/

mamba-encoder-swarm_app

Running

App Files Files Community

Debito commited on Aug 4

Commit

2ff9520

verified ·

1 Parent(s): cffa4bb

Upload app.py

Browse files

Files changed (1) hide show

app.py +100 -24

app.py CHANGED Viewed

@@ -113,49 +113,49 @@ class UltimateModelLoader:
                 "vocab_size": 50280,
                 "d_model": 2048
             },
-            # Alternative efficient models (no mamba-ssm required)
-            "microsoft/DialoGPT-small": {
-                "display_name": "DialoGPT Small (117M) [Efficient Alternative]",
-                "size": "small",
-                "priority": 13,
-                "reliable": True,
-                "params": 117_000_000
-            },
             "gpt2-large": {
-                "display_name": "GPT2 Large (774M) [High Performance]",
                 "size": "large",
-                "priority": 14,
                 "reliable": True,
                 "params": 774_000_000
             },
-            # High-quality alternative models (priority 20-27)
             "gpt2-medium": {
-                "display_name": "GPT2 Medium (355M)",
                 "size": "medium",
-                "priority": 20,
                 "reliable": True,
                 "params": 355_000_000
             },
             "gpt2": {
-                "display_name": "GPT2 Base (117M)",
                 "size": "small",
-                "priority": 21,
                 "reliable": True,
                 "params": 117_000_000
             },
             "distilgpt2": {
-                "display_name": "DistilGPT2 (82M)",
                 "size": "small",
-                "priority": 22,
                 "reliable": True,
                 "params": 82_000_000
             },
             "microsoft/DialoGPT-medium": {
-                "display_name": "DialoGPT Medium (355M)",
                 "size": "medium",
-                "priority": 23,
-                "reliable": True,
                 "params": 355_000_000
             }
         })
@@ -895,14 +895,25 @@ class UltimateMambaSwarm:
             return f"Generation error occurred. Using fallback response.", ""
     def _generate_with_ultimate_model(self, prompt: str, max_length: int, temperature: float, top_p: float) -> str:
-        """Generate using loaded model with ultimate optimization"""
         try:
             # Get optimal parameters
             gen_params = self.model_loader.get_optimal_generation_params(temperature, top_p, max_length)
             # Tokenize with safety
             inputs = self.model_loader.tokenizer.encode(
-                prompt,
                 return_tensors="pt",
                 truncation=True,
                 max_length=512
@@ -916,21 +927,86 @@ class UltimateMambaSwarm:
             # Decode and validate
             generated_text = self.model_loader.tokenizer.decode(outputs[0], skip_special_tokens=True)
-            # Extract response
-            if generated_text.startswith(prompt):
                 response = generated_text[len(prompt):].strip()
             else:
                 response = generated_text.strip()
             return response if response else "I'm processing your request..."
         except Exception as e:
             logger.error(f"Model generation error: {e}")
             return self._generate_ultimate_fallback(prompt, 'general')
     def _generate_ultimate_fallback(self, prompt: str, domain: str) -> str:
         """Ultimate fallback responses with maximum quality"""
         fallback_responses = {
             'medical': f"""**🏥 Medical Information Analysis: "{prompt[:60]}..."**

                 "vocab_size": 50280,
                 "d_model": 2048
             },
+            # Alternative efficient models (no mamba-ssm required) - GPT2 prioritized over DialoGPT
             "gpt2-large": {
+                "display_name": "GPT2 Large (774M) [High Performance Alternative]",
                 "size": "large",
+                "priority": 13,
                 "reliable": True,
                 "params": 774_000_000
             },
             "gpt2-medium": {
+                "display_name": "GPT2 Medium (355M) [Balanced Alternative]",
                 "size": "medium",
+                "priority": 14,
                 "reliable": True,
                 "params": 355_000_000
             },
             "gpt2": {
+                "display_name": "GPT2 Base (117M) [Fast Alternative]",
                 "size": "small",
+                "priority": 15,
                 "reliable": True,
                 "params": 117_000_000
             },
             "distilgpt2": {
+                "display_name": "DistilGPT2 (82M) [Ultra-Fast]",
                 "size": "small",
+                "priority": 16,
                 "reliable": True,
                 "params": 82_000_000
             },
+            # Conversational models (lower priority due to potential inappropriate responses)
             "microsoft/DialoGPT-medium": {
+                "display_name": "DialoGPT Medium (355M) [Conversational]",
                 "size": "medium",
+                "priority": 25,
+                "reliable": False,  # Marked as less reliable due to Reddit training data
                 "params": 355_000_000
+            },
+            "microsoft/DialoGPT-small": {
+                "display_name": "DialoGPT Small (117M) [Conversational]",
+                "size": "small",
+                "priority": 26,
+                "reliable": False,  # Marked as less reliable due to Reddit training data
+                "params": 117_000_000
             }
         })
             return f"Generation error occurred. Using fallback response.", ""
     def _generate_with_ultimate_model(self, prompt: str, max_length: int, temperature: float, top_p: float) -> str:
+        """Generate using loaded model with ultimate optimization and content safety"""
         try:
             # Get optimal parameters
             gen_params = self.model_loader.get_optimal_generation_params(temperature, top_p, max_length)
+            # Add content safety parameters
+            gen_params.update({
+                "repetition_penalty": max(gen_params.get("repetition_penalty", 1.1), 1.15),
+                "no_repeat_ngram_size": max(gen_params.get("no_repeat_ngram_size", 2), 3),
+                "temperature": min(gen_params.get("temperature", 0.7), 0.8),  # Cap temperature for safety
+                "top_p": min(gen_params.get("top_p", 0.9), 0.85)  # More focused sampling
+            })
+            # Create safer prompt format
+            safe_prompt = f"Question: {prompt}\nAnswer:"
             # Tokenize with safety
             inputs = self.model_loader.tokenizer.encode(
+                safe_prompt,
                 return_tensors="pt",
                 truncation=True,
                 max_length=512
             # Decode and validate
             generated_text = self.model_loader.tokenizer.decode(outputs[0], skip_special_tokens=True)
+            # Extract response safely
+            if generated_text.startswith(safe_prompt):
+                response = generated_text[len(safe_prompt):].strip()
+            elif generated_text.startswith(prompt):
                 response = generated_text[len(prompt):].strip()
             else:
                 response = generated_text.strip()
+            # Content safety filtering
+            if self._is_inappropriate_content(response):
+                logger.warning("🛡️ Inappropriate content detected, using fallback")
+                return self._generate_ultimate_fallback(prompt, 'general')
             return response if response else "I'm processing your request..."
         except Exception as e:
             logger.error(f"Model generation error: {e}")
             return self._generate_ultimate_fallback(prompt, 'general')
+    def _is_inappropriate_content(self, text: str) -> bool:
+        """Advanced content safety filtering"""
+        if not text or len(text.strip()) < 3:
+            return True
+        text_lower = text.lower()
+        # Check for inappropriate content patterns
+        inappropriate_patterns = [
+            # Sexual content
+            'sexual', 'dude who likes to have fun with dudes', 'sexual orientation',
+            # Offensive language (basic filter)
+            'damn', 'hell', 'stupid', 'idiot',
+            # Inappropriate casual language
+            'just a dude', 'i\'m just a', 'whatever man',
+            # Reddit-style inappropriate responses
+            'bro', 'dude', 'man', 'guys', 'lol', 'lmao', 'wtf'
+        ]
+        # Check for patterns that suggest inappropriate content
+        for pattern in inappropriate_patterns:
+            if pattern in text_lower:
+                return True
+        # Check for very short, casual responses that don't answer the question
+        if len(text.strip()) < 20 and any(word in text_lower for word in ['dude', 'bro', 'man', 'whatever']):
+            return True
+        # Check for responses that don't seem to address the prompt properly
+        if 'tell me more about yourself' in text_lower and len(text.strip()) < 100:
+            return True
+        return False
     def _generate_ultimate_fallback(self, prompt: str, domain: str) -> str:
         """Ultimate fallback responses with maximum quality"""
+        # Special handling for self-introduction prompts
+        prompt_lower = prompt.lower()
+        if any(phrase in prompt_lower for phrase in ['tell me about yourself', 'who are you', 'what are you']):
+            return """**🐍 Mamba Encoder Swarm AI Assistant**
+I'm an advanced AI language model powered by the Mamba Encoder Swarm architecture, designed to provide intelligent, helpful, and accurate responses across multiple domains.
+**🎯 Core Capabilities:**
+• **Multi-Domain Expertise**: Specialized knowledge in medical, legal, programming, scientific, creative, and business domains
+• **Intelligent Routing**: Advanced encoder routing system that directs queries to the most appropriate specialized modules
+• **Quality Assurance**: Built-in content validation and safety filtering to ensure appropriate, helpful responses
+• **Adaptive Processing**: Dynamic model selection and optimization based on query complexity and requirements
+**🧠 Architecture Features:**
+• **State-Space Models**: Utilizes advanced Mamba encoder technology (GPU-ready) with intelligent CPU alternatives
+• **Domain Intelligence**: Sophisticated domain detection and specialized response generation
+• **Performance Monitoring**: Real-time analytics and optimization for consistent high-quality responses
+• **Safety Systems**: Multiple layers of content filtering and quality validation
+**🤝 How I Can Help:**
+I'm here to assist with questions, analysis, problem-solving, creative tasks, technical explanations, and professional guidance across various fields. I aim to provide thoughtful, accurate, and helpful responses while maintaining appropriate professional standards.
+**Current Status**: Operating in CPU-optimized mode with Mamba encoders ready for GPU activation."""
         fallback_responses = {
             'medical': f"""**🏥 Medical Information Analysis: "{prompt[:60]}..."**