tigres2526
/

CAI-20B

+#!/usr/bin/env python3
+"""
+CAI-20B Utils - Production utilities for the CAI-20B Marketing Strategy Expert model
+"""
+import re
+import torch
+from typing import Optional, Dict, Any
+from transformers import AutoModelForCausalLM, AutoTokenizer
+class ResponseCleaner:
+    """Clean up model responses to remove artifacts and formatting issues"""
+    def __init__(self):
+        # Common artifacts to remove
+        self.artifact_patterns = [
+            r'<\|[^>]+\|>',  # Special tokens like <|assistant|>
+            r'assistantfinal',
+            r'assistant\s*final',
+            r'\bassistant\b(?![\w\s]*:)',
+            r'We need to understand:.*?(?=\n|$)',
+            r'We need to.*?(?=\n|$)',
+            r'I need to.*?(?=\n|$)',
+            r'Let me.*?(?=\n|$)',
+            r'According to guidelines.*?(?=\n|$)',
+            r'The prompt asks.*?(?=\n|$)',
+            r'The user asks.*?(?=\n|$)',
+            r'Wait question.*?(?=\n|$)',
+            r'We must respond.*?(?=\n|$)',
+            r"Let's produce.*?(?=\n|$)",
+            r'The answer:.*?(?=\n|$)',
+            r'The conversation ends.*?(?=\n|$)',
+            r'\\n\\n\\n+',  # Multiple newlines
+            r'\\u[0-9a-fA-F]{4}',  # Unicode escapes
+        ]
+        # Pattern for detecting repetition
+        self.repetition_pattern = r'(.{10,}?)\1{2,}'
+        # Patterns for incomplete endings
+        self.incomplete_patterns = [
+            r'\.{3,}$',  # Trailing ellipsis
+            r'\s+\.\s*$',  # Trailing period with spaces
+            r'\s+$',  # Trailing spaces
+            r'^\s+',  # Leading spaces
+        ]
+    def clean_response(self, text: str) -> str:
+        """Main cleaning function - removes all artifacts"""
+        if not text:
+            return ""
+        # Step 1: Remove artifacts
+        cleaned = self.clean_artifacts(text)
+        # Step 2: Fix repetitions
+        cleaned = self.fix_repetitions(cleaned)
+        # Step 3: Fix incomplete endings
+        cleaned = self.fix_incomplete_endings(cleaned)
+        # Step 4: Ensure minimum quality
+        cleaned = self.ensure_minimum_quality(cleaned)
+        return cleaned if cleaned else text
+    def clean_artifacts(self, text: str) -> str:
+        """Remove known artifacts from response"""
+        cleaned = text
+        for pattern in self.artifact_patterns:
+            cleaned = re.sub(pattern, '', cleaned, flags=re.IGNORECASE | re.MULTILINE)
+        # Clean up excessive whitespace
+        cleaned = re.sub(r'\s+', ' ', cleaned)
+        cleaned = re.sub(r'\n\s*\n\s*\n', '\n\n', cleaned)
+        return cleaned.strip()
+    def fix_repetitions(self, text: str) -> str:
+        """Fix repetitive segments in text"""
+        def replace_repetition(match):
+            return match.group(1)
+        cleaned = re.sub(self.repetition_pattern, replace_repetition, text)
+        # Remove duplicate words
+        cleaned = re.sub(r'\b(\w+)\s+\1\b', r'\1', cleaned)
+        return cleaned
+    def fix_incomplete_endings(self, text: str) -> str:
+        """Fix incomplete or trailing endings"""
+        cleaned = text
+        # Remove incomplete patterns
+        for pattern in self.incomplete_patterns:
+            cleaned = re.sub(pattern, '', cleaned)
+        # Ensure proper ending punctuation
+        if cleaned and not cleaned[-1] in '.!?':
+            last_sentence = cleaned.split('.')[-1].strip()
+            if len(last_sentence) < 20:
+                parts = cleaned.rsplit('.', 1)
+                if len(parts) > 1:
+                    cleaned = parts[0] + '.'
+            else:
+                cleaned += '.'
+        return cleaned
+    def ensure_minimum_quality(self, text: str, min_length: int = 50) -> Optional[str]:
+        """Ensure response meets minimum quality standards"""
+        if len(text.strip()) < min_length:
+            return None
+        # Check for too many special characters
+        special_char_ratio = len(re.findall(r'[^a-zA-Z0-9\s.,!?;:\'\"-]', text)) / max(len(text), 1)
+        if special_char_ratio > 0.3:
+            return None
+        # Check for coherent sentences
+        sentences = re.split(r'[.!?]+', text)
+        complete_sentences = [s for s in sentences if len(s.strip()) > 10]
+        if len(complete_sentences) < 1:
+            return None
+        return text
+class StrictPromptTemplate:
+    """Strict prompt templates to prevent artifacts"""
+    SYSTEM_PROMPT = """You are a marketing strategy assistant powered by gpt-oss.
+Knowledge cutoff: 2024-06
+Current date: 2025-08-06
+CRITICAL INSTRUCTIONS:
+- Provide ONLY the final answer without any internal reasoning
+- NEVER include tokens like <|assistant|>, <|user|>, or similar
+- NEVER explain your thought process or what you're doing
+- NEVER use phrases like "We need to", "Let me", "I need to"
+- NEVER repeat words or phrases
+- Always end responses properly with punctuation
+- Keep responses concise and professional"""
+    DEVELOPER_PROMPT = """# Response Requirements
+- Output ONLY the final response to the user
+- NO internal dialogue or reasoning exposition
+- NO meta-commentary about the task
+- NO repetitive text or loops
+- Must be complete, coherent sentences
+- Professional marketing expertise only
+- If uncertain, provide best practice guidance
+- Format: Direct, actionable advice"""
+    @classmethod
+    def format_prompt(cls, user_message: str) -> str:
+        """Format a user message with strict anti-artifact prompting"""
+        return f"""{cls.SYSTEM_PROMPT}
+{cls.DEVELOPER_PROMPT}
+User: {user_message}
+Assistant:"""
+class CAI20B:
+    """Production-ready wrapper for CAI-20B Marketing Strategy Expert"""
+    def __init__(
+        self,
+        model_name: str = "tigres2526/CAI-20B",
+        device: str = "auto",
+        torch_dtype = torch.bfloat16,
+        trust_remote_code: bool = True
+    ):
+        """Initialize the model with production settings"""
+        print("Loading CAI-20B Marketing Strategy Expert...")
+        self.device = device
+        self.cleaner = ResponseCleaner()
+        self.prompt_template = StrictPromptTemplate()
+        # Load tokenizer
+        self.tokenizer = AutoTokenizer.from_pretrained(
+            model_name,
+            trust_remote_code=trust_remote_code
+        )
+        if not self.tokenizer.pad_token:
+            self.tokenizer.pad_token = self.tokenizer.eos_token
+        # Load model
+        self.model = AutoModelForCausalLM.from_pretrained(
+            model_name,
+            device_map=device,
+            torch_dtype=torch_dtype,
+            trust_remote_code=trust_remote_code
+        )
+        self.model.eval()
+        print("✅ Model ready for production use!")
+    def generate(
+        self,
+        user_message: str,
+        max_new_tokens: int = 250,
+        temperature: float = 0.7,
+        top_p: float = 0.9,
+        repetition_penalty: float = 1.1,
+        no_repeat_ngram_size: int = 3,
+        do_sample: bool = True,
+        clean_output: bool = True,
+        retry_on_artifacts: bool = True,
+        max_retries: int = 2
+    ) -> str:
+        """Generate a clean response to user message"""
+        # Format prompt with strict template
+        prompt = self.prompt_template.format_prompt(user_message)
+        # Try generation with retries
+        for attempt in range(max_retries):
+            # Adjust parameters for retries
+            if attempt > 0:
+                temperature = max(0.5, temperature - 0.1)
+                repetition_penalty = min(1.5, repetition_penalty + 0.1)
+            # Generate response
+            response = self._generate_raw(
+                prompt,
+                max_new_tokens=max_new_tokens,
+                temperature=temperature,
+                top_p=top_p,
+                repetition_penalty=repetition_penalty,
+                no_repeat_ngram_size=no_repeat_ngram_size,
+                do_sample=do_sample
+            )
+            # Clean if requested
+            if clean_output:
+                response = self.cleaner.clean_response(response)
+            # Check for artifacts
+            if retry_on_artifacts and self._has_artifacts(response):
+                if attempt < max_retries - 1:
+                    print(f"⚠️ Artifacts detected, retrying... (attempt {attempt + 2}/{max_retries})")
+                    continue
+            return response
+        # Final fallback
+        return response if response else "I can help with marketing strategy questions. Please try rephrasing your question."
+    def _generate_raw(
+        self,
+        prompt: str,
+        max_new_tokens: int,
+        temperature: float,
+        top_p: float,
+        repetition_penalty: float,
+        no_repeat_ngram_size: int,
+        do_sample: bool
+    ) -> str:
+        """Internal method for raw generation"""
+        inputs = self.tokenizer(
+            prompt,
+            return_tensors="pt",
+            truncation=True,
+            max_length=2048
+        )
+        if self.device != "auto":
+            inputs = inputs.to(self.device)
+        with torch.no_grad():
+            outputs = self.model.generate(
+                **inputs,
+                max_new_tokens=max_new_tokens,
+                temperature=temperature,
+                top_p=top_p,
+                repetition_penalty=repetition_penalty,
+                no_repeat_ngram_size=no_repeat_ngram_size,
+                do_sample=do_sample,
+                pad_token_id=self.tokenizer.pad_token_id,
+                eos_token_id=self.tokenizer.eos_token_id,
+                early_stopping=True
+            )
+        response = self.tokenizer.decode(
+            outputs[0][inputs['input_ids'].shape[1]:],
+            skip_special_tokens=True
+        )
+        return response
+    def _has_artifacts(self, text: str) -> bool:
+        """Check if response has artifacts"""
+        if not text or len(text.strip()) < 50:
+            return True
+        artifact_indicators = [
+            "we need to", "let me", "<|", "|>",
+            "assistant", "...", "  ", "according to guidelines",
+            "the prompt asks", "wait question"
+        ]
+        text_lower = text.lower()
+        for indicator in artifact_indicators:
+            if indicator in text_lower:
+                return True
+        return False
+    def chat(self):
+        """Interactive chat mode"""
+        print("\n" + "=" * 70)
+        print("CAI-20B Marketing Strategy Expert - Interactive Chat")
+        print("Type 'exit' to quit, 'clear' to reset conversation")
+        print("=" * 70 + "\n")
+        while True:
+            user_input = input("You: ").strip()
+            if user_input.lower() == 'exit':
+                print("Goodbye!")
+                break
+            if user_input.lower() == 'clear':
+                print("Conversation cleared.\n")
+                continue
+            if not user_input:
+                continue
+            response = self.generate(user_input)
+            print(f"\nCAI-20B: {response}\n")
+            print("-" * 70 + "\n")
+# Convenience function for quick usage
+def quick_generate(question: str, model_name: str = "tigres2526/CAI-20B") -> str:
+    """Quick one-off generation without keeping model in memory"""
+    model = CAI20B(model_name)
+    return model.generate(question)
+if __name__ == "__main__":
+    # Example usage
+    print("Testing CAI-20B Marketing Strategy Expert...")
+    # Initialize model
+    model = CAI20B()
+    # Test questions
+    test_questions = [
+        "What are the top 3 marketing channels for a B2B SaaS startup?",
+        "How should I allocate a $10K monthly marketing budget?",
+        "What's the difference between CAC and LTV?"
+    ]
+    print("\nRunning test questions:\n")
+    for question in test_questions:
+        print(f"Q: {question}")
+        response = model.generate(question)
+        print(f"A: {response}\n")
+        print("-" * 50 + "\n")