Spaces:
Running
Running
Upload app.py
Browse files
app.py
CHANGED
|
@@ -113,49 +113,49 @@ class UltimateModelLoader:
|
|
| 113 |
"vocab_size": 50280,
|
| 114 |
"d_model": 2048
|
| 115 |
},
|
| 116 |
-
# Alternative efficient models (no mamba-ssm required)
|
| 117 |
-
"microsoft/DialoGPT-small": {
|
| 118 |
-
"display_name": "DialoGPT Small (117M) [Efficient Alternative]",
|
| 119 |
-
"size": "small",
|
| 120 |
-
"priority": 13,
|
| 121 |
-
"reliable": True,
|
| 122 |
-
"params": 117_000_000
|
| 123 |
-
},
|
| 124 |
"gpt2-large": {
|
| 125 |
-
"display_name": "GPT2 Large (774M) [High Performance]",
|
| 126 |
"size": "large",
|
| 127 |
-
"priority":
|
| 128 |
"reliable": True,
|
| 129 |
"params": 774_000_000
|
| 130 |
},
|
| 131 |
-
# High-quality alternative models (priority 20-27)
|
| 132 |
"gpt2-medium": {
|
| 133 |
-
"display_name": "GPT2 Medium (355M)",
|
| 134 |
"size": "medium",
|
| 135 |
-
"priority":
|
| 136 |
"reliable": True,
|
| 137 |
"params": 355_000_000
|
| 138 |
},
|
| 139 |
"gpt2": {
|
| 140 |
-
"display_name": "GPT2 Base (117M)",
|
| 141 |
"size": "small",
|
| 142 |
-
"priority":
|
| 143 |
"reliable": True,
|
| 144 |
"params": 117_000_000
|
| 145 |
},
|
| 146 |
"distilgpt2": {
|
| 147 |
-
"display_name": "DistilGPT2 (82M)",
|
| 148 |
"size": "small",
|
| 149 |
-
"priority":
|
| 150 |
"reliable": True,
|
| 151 |
"params": 82_000_000
|
| 152 |
},
|
|
|
|
| 153 |
"microsoft/DialoGPT-medium": {
|
| 154 |
-
"display_name": "DialoGPT Medium (355M)",
|
| 155 |
"size": "medium",
|
| 156 |
-
"priority":
|
| 157 |
-
"reliable":
|
| 158 |
"params": 355_000_000
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 159 |
}
|
| 160 |
})
|
| 161 |
|
|
@@ -895,14 +895,25 @@ class UltimateMambaSwarm:
|
|
| 895 |
return f"Generation error occurred. Using fallback response.", ""
|
| 896 |
|
| 897 |
def _generate_with_ultimate_model(self, prompt: str, max_length: int, temperature: float, top_p: float) -> str:
|
| 898 |
-
"""Generate using loaded model with ultimate optimization"""
|
| 899 |
try:
|
| 900 |
# Get optimal parameters
|
| 901 |
gen_params = self.model_loader.get_optimal_generation_params(temperature, top_p, max_length)
|
| 902 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 903 |
# Tokenize with safety
|
| 904 |
inputs = self.model_loader.tokenizer.encode(
|
| 905 |
-
|
| 906 |
return_tensors="pt",
|
| 907 |
truncation=True,
|
| 908 |
max_length=512
|
|
@@ -916,21 +927,86 @@ class UltimateMambaSwarm:
|
|
| 916 |
# Decode and validate
|
| 917 |
generated_text = self.model_loader.tokenizer.decode(outputs[0], skip_special_tokens=True)
|
| 918 |
|
| 919 |
-
# Extract response
|
| 920 |
-
if generated_text.startswith(
|
|
|
|
|
|
|
| 921 |
response = generated_text[len(prompt):].strip()
|
| 922 |
else:
|
| 923 |
response = generated_text.strip()
|
| 924 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 925 |
return response if response else "I'm processing your request..."
|
| 926 |
|
| 927 |
except Exception as e:
|
| 928 |
logger.error(f"Model generation error: {e}")
|
| 929 |
return self._generate_ultimate_fallback(prompt, 'general')
|
| 930 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 931 |
def _generate_ultimate_fallback(self, prompt: str, domain: str) -> str:
|
| 932 |
"""Ultimate fallback responses with maximum quality"""
|
| 933 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 934 |
fallback_responses = {
|
| 935 |
'medical': f"""**π₯ Medical Information Analysis: "{prompt[:60]}..."**
|
| 936 |
|
|
|
|
| 113 |
"vocab_size": 50280,
|
| 114 |
"d_model": 2048
|
| 115 |
},
|
| 116 |
+
# Alternative efficient models (no mamba-ssm required) - GPT2 prioritized over DialoGPT
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 117 |
"gpt2-large": {
|
| 118 |
+
"display_name": "GPT2 Large (774M) [High Performance Alternative]",
|
| 119 |
"size": "large",
|
| 120 |
+
"priority": 13,
|
| 121 |
"reliable": True,
|
| 122 |
"params": 774_000_000
|
| 123 |
},
|
|
|
|
| 124 |
"gpt2-medium": {
|
| 125 |
+
"display_name": "GPT2 Medium (355M) [Balanced Alternative]",
|
| 126 |
"size": "medium",
|
| 127 |
+
"priority": 14,
|
| 128 |
"reliable": True,
|
| 129 |
"params": 355_000_000
|
| 130 |
},
|
| 131 |
"gpt2": {
|
| 132 |
+
"display_name": "GPT2 Base (117M) [Fast Alternative]",
|
| 133 |
"size": "small",
|
| 134 |
+
"priority": 15,
|
| 135 |
"reliable": True,
|
| 136 |
"params": 117_000_000
|
| 137 |
},
|
| 138 |
"distilgpt2": {
|
| 139 |
+
"display_name": "DistilGPT2 (82M) [Ultra-Fast]",
|
| 140 |
"size": "small",
|
| 141 |
+
"priority": 16,
|
| 142 |
"reliable": True,
|
| 143 |
"params": 82_000_000
|
| 144 |
},
|
| 145 |
+
# Conversational models (lower priority due to potential inappropriate responses)
|
| 146 |
"microsoft/DialoGPT-medium": {
|
| 147 |
+
"display_name": "DialoGPT Medium (355M) [Conversational]",
|
| 148 |
"size": "medium",
|
| 149 |
+
"priority": 25,
|
| 150 |
+
"reliable": False, # Marked as less reliable due to Reddit training data
|
| 151 |
"params": 355_000_000
|
| 152 |
+
},
|
| 153 |
+
"microsoft/DialoGPT-small": {
|
| 154 |
+
"display_name": "DialoGPT Small (117M) [Conversational]",
|
| 155 |
+
"size": "small",
|
| 156 |
+
"priority": 26,
|
| 157 |
+
"reliable": False, # Marked as less reliable due to Reddit training data
|
| 158 |
+
"params": 117_000_000
|
| 159 |
}
|
| 160 |
})
|
| 161 |
|
|
|
|
| 895 |
return f"Generation error occurred. Using fallback response.", ""
|
| 896 |
|
| 897 |
def _generate_with_ultimate_model(self, prompt: str, max_length: int, temperature: float, top_p: float) -> str:
|
| 898 |
+
"""Generate using loaded model with ultimate optimization and content safety"""
|
| 899 |
try:
|
| 900 |
# Get optimal parameters
|
| 901 |
gen_params = self.model_loader.get_optimal_generation_params(temperature, top_p, max_length)
|
| 902 |
|
| 903 |
+
# Add content safety parameters
|
| 904 |
+
gen_params.update({
|
| 905 |
+
"repetition_penalty": max(gen_params.get("repetition_penalty", 1.1), 1.15),
|
| 906 |
+
"no_repeat_ngram_size": max(gen_params.get("no_repeat_ngram_size", 2), 3),
|
| 907 |
+
"temperature": min(gen_params.get("temperature", 0.7), 0.8), # Cap temperature for safety
|
| 908 |
+
"top_p": min(gen_params.get("top_p", 0.9), 0.85) # More focused sampling
|
| 909 |
+
})
|
| 910 |
+
|
| 911 |
+
# Create safer prompt format
|
| 912 |
+
safe_prompt = f"Question: {prompt}\nAnswer:"
|
| 913 |
+
|
| 914 |
# Tokenize with safety
|
| 915 |
inputs = self.model_loader.tokenizer.encode(
|
| 916 |
+
safe_prompt,
|
| 917 |
return_tensors="pt",
|
| 918 |
truncation=True,
|
| 919 |
max_length=512
|
|
|
|
| 927 |
# Decode and validate
|
| 928 |
generated_text = self.model_loader.tokenizer.decode(outputs[0], skip_special_tokens=True)
|
| 929 |
|
| 930 |
+
# Extract response safely
|
| 931 |
+
if generated_text.startswith(safe_prompt):
|
| 932 |
+
response = generated_text[len(safe_prompt):].strip()
|
| 933 |
+
elif generated_text.startswith(prompt):
|
| 934 |
response = generated_text[len(prompt):].strip()
|
| 935 |
else:
|
| 936 |
response = generated_text.strip()
|
| 937 |
|
| 938 |
+
# Content safety filtering
|
| 939 |
+
if self._is_inappropriate_content(response):
|
| 940 |
+
logger.warning("π‘οΈ Inappropriate content detected, using fallback")
|
| 941 |
+
return self._generate_ultimate_fallback(prompt, 'general')
|
| 942 |
+
|
| 943 |
return response if response else "I'm processing your request..."
|
| 944 |
|
| 945 |
except Exception as e:
|
| 946 |
logger.error(f"Model generation error: {e}")
|
| 947 |
return self._generate_ultimate_fallback(prompt, 'general')
|
| 948 |
|
| 949 |
+
def _is_inappropriate_content(self, text: str) -> bool:
|
| 950 |
+
"""Advanced content safety filtering"""
|
| 951 |
+
if not text or len(text.strip()) < 3:
|
| 952 |
+
return True
|
| 953 |
+
|
| 954 |
+
text_lower = text.lower()
|
| 955 |
+
|
| 956 |
+
# Check for inappropriate content patterns
|
| 957 |
+
inappropriate_patterns = [
|
| 958 |
+
# Sexual content
|
| 959 |
+
'sexual', 'dude who likes to have fun with dudes', 'sexual orientation',
|
| 960 |
+
# Offensive language (basic filter)
|
| 961 |
+
'damn', 'hell', 'stupid', 'idiot',
|
| 962 |
+
# Inappropriate casual language
|
| 963 |
+
'just a dude', 'i\'m just a', 'whatever man',
|
| 964 |
+
# Reddit-style inappropriate responses
|
| 965 |
+
'bro', 'dude', 'man', 'guys', 'lol', 'lmao', 'wtf'
|
| 966 |
+
]
|
| 967 |
+
|
| 968 |
+
# Check for patterns that suggest inappropriate content
|
| 969 |
+
for pattern in inappropriate_patterns:
|
| 970 |
+
if pattern in text_lower:
|
| 971 |
+
return True
|
| 972 |
+
|
| 973 |
+
# Check for very short, casual responses that don't answer the question
|
| 974 |
+
if len(text.strip()) < 20 and any(word in text_lower for word in ['dude', 'bro', 'man', 'whatever']):
|
| 975 |
+
return True
|
| 976 |
+
|
| 977 |
+
# Check for responses that don't seem to address the prompt properly
|
| 978 |
+
if 'tell me more about yourself' in text_lower and len(text.strip()) < 100:
|
| 979 |
+
return True
|
| 980 |
+
|
| 981 |
+
return False
|
| 982 |
+
|
| 983 |
def _generate_ultimate_fallback(self, prompt: str, domain: str) -> str:
|
| 984 |
"""Ultimate fallback responses with maximum quality"""
|
| 985 |
|
| 986 |
+
# Special handling for self-introduction prompts
|
| 987 |
+
prompt_lower = prompt.lower()
|
| 988 |
+
if any(phrase in prompt_lower for phrase in ['tell me about yourself', 'who are you', 'what are you']):
|
| 989 |
+
return """**π Mamba Encoder Swarm AI Assistant**
|
| 990 |
+
|
| 991 |
+
I'm an advanced AI language model powered by the Mamba Encoder Swarm architecture, designed to provide intelligent, helpful, and accurate responses across multiple domains.
|
| 992 |
+
|
| 993 |
+
**π― Core Capabilities:**
|
| 994 |
+
β’ **Multi-Domain Expertise**: Specialized knowledge in medical, legal, programming, scientific, creative, and business domains
|
| 995 |
+
β’ **Intelligent Routing**: Advanced encoder routing system that directs queries to the most appropriate specialized modules
|
| 996 |
+
β’ **Quality Assurance**: Built-in content validation and safety filtering to ensure appropriate, helpful responses
|
| 997 |
+
β’ **Adaptive Processing**: Dynamic model selection and optimization based on query complexity and requirements
|
| 998 |
+
|
| 999 |
+
**π§ Architecture Features:**
|
| 1000 |
+
β’ **State-Space Models**: Utilizes advanced Mamba encoder technology (GPU-ready) with intelligent CPU alternatives
|
| 1001 |
+
β’ **Domain Intelligence**: Sophisticated domain detection and specialized response generation
|
| 1002 |
+
β’ **Performance Monitoring**: Real-time analytics and optimization for consistent high-quality responses
|
| 1003 |
+
β’ **Safety Systems**: Multiple layers of content filtering and quality validation
|
| 1004 |
+
|
| 1005 |
+
**π€ How I Can Help:**
|
| 1006 |
+
I'm here to assist with questions, analysis, problem-solving, creative tasks, technical explanations, and professional guidance across various fields. I aim to provide thoughtful, accurate, and helpful responses while maintaining appropriate professional standards.
|
| 1007 |
+
|
| 1008 |
+
**Current Status**: Operating in CPU-optimized mode with Mamba encoders ready for GPU activation."""
|
| 1009 |
+
|
| 1010 |
fallback_responses = {
|
| 1011 |
'medical': f"""**π₯ Medical Information Analysis: "{prompt[:60]}..."**
|
| 1012 |
|