Spaces:

phxdev
/

Creed-Thoughts

Running

App Files Files Community

phxdev commited on Jun 23, 2025

Commit

85f63f5

verified ·

1 Parent(s): 4c3a213

Update app.py

Browse files

Files changed (1) hide show

app.py +704 -785

app.py CHANGED Viewed

@@ -1,44 +1,26 @@
 # Create wrapper function for proper chat handling
-def respond(message, history):
-    """Response handler for Gradio messages format"""
-    if not message.strip():
-        return "", history
-    print(f"🔍 Input message: {message}")
-    print(f"🔍 History length: {len(history)}")
-    # Convert messages format to simple tuples for model processing
-    simple_history = []
-    for i in range(0, len(history) - 1, 2):
-        if i + 1 < len(history):
-            user_msg = history[i].get('content', '') if isinstance(history[i], dict) else str(history[i])
-            bot_msg = history[i + 1].get('content', '') if isinstance(history[i + 1], dict) else str(history[i + 1])
-            if user_msg and bot_msg:
-                simple_history.append([user_msg, bot_msg])
-    print(f"🔍 Simple history: {simple_history}")
-    # Generate response with debugging
-    try:
-        response_generator = creed_ai.generate_response(message, simple_history)
-        final_response = ""
-        for response_chunk in response_generator:
-            final_response = response_chunk
             # Create new history with the streaming response
             new_history = history + [
                 {"role": "user", "content": message},
                 {"role": "assistant", "content": response_chunk}
             ]
-            yield "", new_history
-    except Exception as e:
-        print(f"❌ Error in respond: {e}")
-        error_history = history + [
-            {"role": "user", "content": message},
-            {"role": "assistant", "content": f"🎸 *Creed's brain malfunctioned* Error: {str(e)[:100]}"}
-        ]
-        yield "", error_history#!/usr/bin/env python3
 """
 🎸 Creed Bratton AI - Using phxdev/creed-qwen-0.5b-lora
 The REAL Creed, trained by Mark, not some knockoff prompt engineering
@@ -59,267 +41,216 @@ os.environ['GRADIO_MCP_ENABLED'] = 'true'
 # Spaces compatibility
 try:
-import spaces
-SPACES_AVAILABLE = True
-@spaces.GPU
-def gpu_placeholder():
-    return "GPU satisfied"
 except ImportError:
-SPACES_AVAILABLE = False
 class CreedBrattonAI:
-"""Real Creed AI using Mark's trained model - GPU optimized"""
-def __init__(self):
-    self.model = None
-    self.tokenizer = None
-    self.model_loaded = False
-    self.loading = False
-    self.device = "cuda" if torch.cuda.is_available() else "cpu"
-    print(f"🎸 Initializing Creed AI")
-    print(f"🖥️ Device detected: {self.device}")
-    if torch.cuda.is_available():
-        print(f"🚀 GPU: {torch.cuda.get_device_name()}")
-        print(f"💾 GPU Memory: {torch.cuda.get_device_properties(0).total_memory // 1024**3} GB")
-    # Load model with proper GPU detection
-    self.load_model()
-def load_model(self):
-    """Load the model with GPU optimization when available"""
-    if self.loading or self.model_loaded:
-        return
-    self.loading = True
-    try:
-        print(f"🧠 Loading Creed's consciousness on {self.device}...")
-        # Load model and tokenizer
-        model_name = "phxdev/creed-qwen-0.5b-lora"
-        print("📦 Loading tokenizer...")
-        self.tokenizer = AutoTokenizer.from_pretrained(
-            model_name,
-            trust_remote_code=True,
-            padding_side="left"
-        )
-        # Fix pad token issue
-        if self.tokenizer.pad_token is None:
-            self.tokenizer.pad_token = self.tokenizer.unk_token or "[PAD]"
-            print(f"🔧 Set pad_token to: {self.tokenizer.pad_token}")
-        # Add Creed's custom tokens
-        custom_tokens = ["<thinking>", "<conspiracy>", "<tangent>"]
-        print(f"🎸 Adding Creed's custom tokens: {custom_tokens}")
-        num_added_tokens = self.tokenizer.add_tokens(custom_tokens)
-        print(f"✅ Added {num_added_tokens} custom tokens")
-        print(f"🤖 Loading model on {self.device}...")
-        # Load model with proper device handling
-        if self.device == "cuda":
-            self.model = AutoModelForCausalLM.from_pretrained(
-                model_name,
-                torch_dtype=torch.float16,  # Use float16 for GPU efficiency
-                device_map="auto",  # Auto device mapping for GPU
-                trust_remote_code=True,
-                low_cpu_mem_usage=True
-            )
-            # Explicitly ensure model is on CUDA
-            if self.model.device.type != "cuda":
-                print("🔧 Explicitly moving model to CUDA...")
-                self.model = self.model.to(self.device)
-        else:
-            self.model = AutoModelForCausalLM.from_pretrained(
                 model_name,
-                torch_dtype=torch.float32,  # Use float32 for CPU
-                device_map=None,
                 trust_remote_code=True,
-                low_cpu_mem_usage=True
             )
-            self.model = self.model.to("cpu")
-        # Resize embeddings for custom tokens
-        if num_added_tokens > 0:
-            print(f"🔧 Resizing model embeddings for {num_added_tokens} custom tokens")
-            self.model.resize_token_embeddings(len(self.tokenizer))
-        self.model.eval()
-        # Verify device placement
-        model_device = next(self.model.parameters()).device
-        print(f"🎯 Model is actually on: {model_device}")
-        self.model_loaded = True
-        self.loading = False
-        print(f"✅ Creed's consciousness loaded on {model_device}!")
-        # GPU memory info
-        if self.device == "cuda" and torch.cuda.is_available():
-            print(f"🔥 GPU Memory Used: {torch.cuda.memory_allocated() // 1024**2} MB")
-            print(f"📊 GPU Memory Cached: {torch.cuda.memory_reserved() // 1024**2} MB")
-    except Exception as e:
-        print(f"❌ Error loading Creed model: {e}")
-        print("🔄 Falling back to base model...")
-        try:
-            base_model = "Qwen/Qwen2.5-0.5B-Instruct"
-            self.tokenizer = AutoTokenizer.from_pretrained(base_model)
-            # Fix pad token for fallback too
             if self.tokenizer.pad_token is None:
-                self.tokenizer.pad_token = self.tokenizer.unk_token or "[PAD]"
             if self.device == "cuda":
                 self.model = AutoModelForCausalLM.from_pretrained(
-                    base_model,
-                    torch_dtype=torch.float16,
-                    device_map="auto"
                 )
                 if self.model.device.type != "cuda":
                     self.model = self.model.to(self.device)
             else:
                 self.model = AutoModelForCausalLM.from_pretrained(
-                    base_model,
-                    torch_dtype=torch.float32,
-                    device_map=None
                 )
                 self.model = self.model.to("cpu")
             self.model.eval()
-            self.model_loaded = True
             model_device = next(self.model.parameters()).device
-            print(f"✅ Fallback model loaded on {model_device}")
-        except Exception as fallback_error:
-            print(f"❌ Fallback also failed: {fallback_error}")
-        self.loading = False
-@spaces.GPU if SPACES_AVAILABLE else lambda func: func
-def generate_response_gpu(self, conversation: str) -> str:
-    """Generate response using the loaded model with proper device handling"""
-    if not self.model_loaded:
-        return "❌ Model not loaded"
-    try:
-        print(f"🔍 Input conversation length: {len(conversation)}")
-        print(f"🔍 Input sample: {conversation[:200]}...")
-        # Ensure model is on the correct device
-        if self.device == "cuda" and self.model.device.type != "cuda":
-            print(f"🔄 Moving model from {self.model.device} to {self.device}")
-            self.model = self.model.to(self.device)
-        # Tokenize input with attention mask
-        inputs = self.tokenizer(
-            conversation,
-            return_tensors="pt",
-            padding=True,
-            truncation=True,
-            max_length=1024,  # Reduce max length to avoid issues
-            add_special_tokens=True
-        )
-        print(f"🔍 Tokenized input_ids shape: {inputs['input_ids'].shape}")
-        print(f"🔍 First few tokens: {inputs['input_ids'][0][:10]}")
-        # Move inputs to same device as model
-        input_ids = inputs['input_ids'].to(self.device)
-        attention_mask = inputs['attention_mask'].to(self.device)
-        print(f"🔍 Model device: {self.model.device}, Input device: {input_ids.device}")
-        # Generate response with proper attention mask
-        with torch.no_grad():
-            outputs = self.model.generate(
-                input_ids=input_ids,
-                attention_mask=attention_mask,  # Pass attention mask
-                max_new_tokens=100,  # Reduce to debug
-                do_sample=True,
-                temperature=0.7,  # Lower temperature
-                top_p=0.9,
-                top_k=50,
-                repetition_penalty=1.1,
-                pad_token_id=self.tokenizer.pad_token_id,
-                eos_token_id=self.tokenizer.eos_token_id,
-                use_cache=True
-            )
-        print(f"🔍 Generated output shape: {outputs.shape}")
-        print(f"🔍 Generated tokens: {outputs[0][-20:]}")  # Last 20 tokens
-        # Decode response
-        full_response = self.tokenizer.decode(outputs[0], skip_special_tokens=True)
-        input_text = self.tokenizer.decode(input_ids[0], skip_special_tokens=True)
-        response = full_response[len(input_text):].strip()
-        print(f"🔍 Raw response: {response[:200]}...")
-        cleaned_response = self._clean_response(response)
-        print(f"🔍 Cleaned response: {cleaned_response[:200]}...")
-        return cleaned_response
-    except Exception as e:
-        print(f"❌ Generation error: {e}")
-        import traceback
-        traceback.print_exc()
-        return f"🎸 *Creed scratches his head* Something weird happened... {str(e)[:100]}"
-def generate_response(self, message: str, history: List[Dict]) -> Iterator[str]:
-    """Generate response using the trained Creed model - works with messages format"""
-    if not self.model_loaded:
-        if self.loading:
-            yield "🧠 Creed's consciousness is still loading... give me a moment..."
-            return
-        else:
-            yield "❌ Something went wrong loading Creed's mind. Try refreshing the page."
-            return
-    try:
-        # Convert messages format to simple history
-        formatted_history = []
-        if history:
-            for msg in history:
-                if msg.get('role') == 'user':
-                    user_msg = msg.get('content', '')
-                    # Look for the next assistant message
-                    assistant_msg = ""
-                    formatted_history.append([user_msg, assistant_msg])
-                elif msg.get('role') == 'assistant':
-                    # Update the last entry with assistant response
-                    if formatted_history:
-                        formatted_history[-1][1] = msg.get('content', '')
-        # Format the conversation
-        conversation = self._format_conversation(message, formatted_history)
-        # Generate response using GPU function
-        response = self.generate_response_gpu(conversation)
-        # Stream the response word by word for effect
-        words = response.split()
-        current_response = ""
-        for word in words:
-            current_response += word + " "
-            time.sleep(0.05)
-            yield current_response.strip()
-    except Exception as e:
-        print(f"❌ Error generating response: {e}")
-        yield f"🎸 *Creed scratches his head* Something weird happened in my brain... {str(e)[:100]}"
-def _format_conversation(self, message: str, history: List[List[str]]) -> str:
-    """Format the conversation for the model with proper system prompt"""
-    # Comprehensive Creed system prompt
-    system_prompt = """You are Creed Bratton from The Office. You embody his complete personality and speaking patterns.
 CORE IDENTITY:
 - Former member of The Grass Roots (1960s rock band)
@@ -359,554 +290,542 @@ RESPONSE GUIDELINES:
 Remember: You're not trying to be helpful in a traditional sense - you're being Creed Bratton.
 """
-    # Add conversation history
-    conversation = system_prompt
-    for user_msg, creed_msg in history[-4:]:  # Keep recent context
-        if user_msg and creed_msg:  # Only add complete exchanges
-            conversation += f"Human: {user_msg}\n"
-            conversation += f"Creed: {creed_msg}\n"
-    # Add current message
-    conversation += f"Human: {message}\n"
-    conversation += "Creed:"
-    return conversation
-def _clean_response(self, response: str) -> str:
-    """Clean up the model response and format custom tokens"""
-    # Remove common artifacts
-    response = response.replace("Human:", "").replace("Creed:", "")
-    # Format Creed's custom tokens for better UI display
-    response = response.replace("<thinking>", "🤔 *thinking* ")
-    response = response.replace("</thinking>", "")
-    response = response.replace("<conspiracy>", "🕵️ *conspiracy mode* ")
-    response = response.replace("</conspiracy>", "")
-    response = response.replace("<tangent>", "🌀 *tangent* ")
-    response = response.replace("</tangent>", "")
-    # Remove excessive whitespace
-    response = " ".join(response.split())
-    # Ensure it ends properly
-    if response and not response.endswith(('.', '!', '?', '...', '*')):
-        response += "."
-    return response
-def creed_wisdom_tool(self, topic: str = "life") -> str:
-    """MCP tool: Get Creed's wisdom on a topic"""
-    if not self.model_loaded:
-        return "🧠 Creed's consciousness is still loading..."
-    prompt = f"Give me your wisdom about {topic}."
-    # Generate a one-shot response
-    final_response = ""
-    for response in self.generate_response(prompt, []):
-        final_response = response
-    return final_response
-def cleanup_gpu_memory(self):
-    """Clean up GPU memory if using CUDA"""
-    if self.device == "cuda" and torch.cuda.is_available():
-        torch.cuda.empty_cache()
-        print(f"🧹 GPU Memory cleaned. Current: {torch.cuda.memory_allocated() // 1024**2} MB")
-def creed_story_tool(self, situation: str = "mysterious") -> str:
-    """MCP tool: Get a Creed story"""
-    if not self.model_loaded:
-        return "🧠 Creed's consciousness is still loading..."
-    prompt = f"Tell me a {situation} story from your past."
-    # Generate a one-shot response
-    final_response = ""
-    for response in self.generate_response(prompt, []):
-        final_response = response
-    return final_response
 def main():
-"""Initialize and launch the real Creed AI with modern styling"""
-print("🎸 Initializing REAL Creed Bratton AI...")
-print("📡 Loading Mark's trained model: phxdev/creed-qwen-0.5b-lora")
-# Initialize Creed AI
-creed_ai = CreedBrattonAI()
-# Test the model with a simple input
-if creed_ai.model_loaded:
-    print("🧪 Testing model with simple input...")
-    test_response = creed_ai.generate_response_gpu("Hello, test.")
-    print(f"🧪 Test response: {test_response}")
-    if "Woah:" in test_response or len(test_response) > 100 and any(ord(c) > 127 for c in test_response):
-        print("❌ Model is outputting garbage! Trying fallback...")
-        # Force fallback to base model
-        creed_ai.model_loaded = False
-        creed_ai.load_model()
-if SPACES_AVAILABLE:
-    gpu_placeholder()
-    print("✅ Spaces GPU compatibility enabled")
-# Memory status if GPU available
-if torch.cuda.is_available() and creed_ai.model_loaded:
-    print(f"🎯 Model device verification: {next(creed_ai.model.parameters()).device}")
-    print(f"🔥 Final GPU Memory: {torch.cuda.memory_allocated() // 1024**2} MB allocated")
-    print(f"📊 GPU Memory Reserved: {torch.cuda.memory_reserved() // 1024**2} MB reserved")
-# Modern glassmorphism CSS
-modern_css = """
-/* Creed AI - Modern Glassmorphism Design */
-:root {
-    --primary-gradient: linear-gradient(135deg, #667eea 0%, #764ba2 100%);
-    --secondary-gradient: linear-gradient(135deg, #f093fb 0%, #f5576c 100%);
-    --glass-bg: rgba(255, 255, 255, 0.08);
-    --glass-border: rgba(255, 255, 255, 0.18);
-    --text-primary: #ffffff;
-    --text-secondary: rgba(255, 255, 255, 0.8);
-    --accent-purple: #8b5cf6;
-    --accent-blue: #3b82f6;
-    --shadow-glow: 0 8px 32px rgba(139, 92, 246, 0.3);
-}
-/* Main container with animated background */
-.gradio-container {
-    min-height: 100vh !important;
-    background: var(--primary-gradient) !important;
-    background-attachment: fixed !important;
-    font-family: 'Inter', -apple-system, BlinkMacSystemFont, sans-serif !important;
-    color: var(--text-primary) !important;
-    padding: 20px !important;
-    position: relative !important;
-    overflow-x: hidden !important;
-}
-.gradio-container::before {
-    content: '';
-    position: fixed;
-    top: 0;
-    left: 0;
-    width: 100%;
-    height: 100%;
-    background:
-        radial-gradient(circle at 20% 80%, rgba(139, 92, 246, 0.3) 0%, transparent 50%),
-        radial-gradient(circle at 80% 20%, rgba(59, 130, 246, 0.3) 0%, transparent 50%),
-        radial-gradient(circle at 40% 40%, rgba(167, 139, 250, 0.2) 0%, transparent 50%);
-    pointer-events: none;
-    z-index: -1;
-}
-/* Floating particles animation */
-.gradio-container::after {
-    content: '';
-    position: fixed;
-    top: 0;
-    left: 0;
-    width: 100%;
-    height: 100%;
-    background-image:
-        radial-gradient(2px 2px at 20px 30px, rgba(255, 255, 255, 0.3), transparent),
-        radial-gradient(2px 2px at 40px 70px, rgba(139, 92, 246, 0.4), transparent),
-        radial-gradient(1px 1px at 90px 40px, rgba(59, 130, 246, 0.3), transparent);
-    background-size: 120px 120px;
-    animation: float 20s ease-in-out infinite;
-    pointer-events: none;
-    z-index: -1;
-}
-@keyframes float {
-    0%, 100% { transform: translateY(0px) rotate(0deg); }
-    50% { transform: translateY(-20px) rotate(180deg); }
-}
-/* Header styling */
-.header {
-    background: var(--glass-bg) !important;
-    backdrop-filter: blur(20px) !important;
-    border: 1px solid var(--glass-border) !important;
-    border-radius: 24px !important;
-    padding: 32px !important;
-    margin-bottom: 24px !important;
-    text-align: center !important;
-    box-shadow: var(--shadow-glow) !important;
-    position: relative !important;
-    overflow: hidden !important;
-}
-.header::before {
-    content: '';
-    position: absolute;
-    top: 0;
-    left: 0;
-    right: 0;
-    height: 1px;
-    background: linear-gradient(90deg, transparent, rgba(255, 255, 255, 0.6), transparent);
-}
-.header h1 {
-    font-size: 36px !important;
-    font-weight: 700 !important;
-    background: linear-gradient(135deg, #ffffff 0%, #a855f7 50%, #3b82f6 100%) !important;
-    -webkit-background-clip: text !important;
-    -webkit-text-fill-color: transparent !important;
-    background-clip: text !important;
-    margin: 0 0 12px 0 !important;
-    text-shadow: 0 0 30px rgba(168, 85, 247, 0.5) !important;
-}
-.header p {
-    font-size: 16px !important;
-    color: var(--text-secondary) !important;
-    margin: 0 !important;
-    font-weight: 500 !important;
-}
-/* Info boxes with glass effect */
-.info-box {
-    background: rgba(255, 255, 255, 0.06) !important;
-    backdrop-filter: blur(16px) !important;
-    border: 1px solid rgba(255, 255, 255, 0.12) !important;
-    border-radius: 16px !important;
-    padding: 20px !important;
-    margin: 16px 0 !important;
-    color: var(--text-secondary) !important;
-    font-size: 14px !important;
-    line-height: 1.6 !important;
-    box-shadow: 0 4px 20px rgba(0, 0, 0, 0.1) !important;
-}
-.status-box {
-    background: rgba(16, 185, 129, 0.1) !important;
-    backdrop-filter: blur(16px) !important;
-    border: 1px solid rgba(16, 185, 129, 0.3) !important;
-    border-radius: 16px !important;
-    padding: 16px 20px !important;
-    margin: 16px 0 !important;
-    color: #10b981 !important;
-    font-weight: 600 !important;
-    box-shadow: 0 4px 20px rgba(16, 185, 129, 0.2) !important;
-}
-/* Chat area styling */
-.chat-area {
-    background: var(--glass-bg) !important;
-    backdrop-filter: blur(20px) !important;
-    border: 1px solid var(--glass-border) !important;
-    border-radius: 20px !important;
-    margin: 16px 0 !important;
-    overflow: hidden !important;
-    box-shadow: var(--shadow-glow) !important;
-}
-/* Tools section */
-.tools-area {
-    background: var(--glass-bg) !important;
-    backdrop-filter: blur(20px) !important;
-    border: 1px solid var(--glass-border) !important;
-    border-radius: 20px !important;
-    padding: 28px !important;
-    margin: 24px 0 !important;
-    box-shadow: var(--shadow-glow) !important;
-}
-.tools-title {
-    font-size: 22px !important;
-    font-weight: 600 !important;
-    color: var(--text-primary) !important;
-    margin: 0 0 20px 0 !important;
-    padding-bottom: 12px !important;
-    border-bottom: 1px solid rgba(255, 255, 255, 0.2) !important;
-    background: linear-gradient(135deg, #ffffff 0%, #a855f7 100%) !important;
-    -webkit-background-clip: text !important;
-    -webkit-text-fill-color: transparent !important;
-}
-/* Form elements */
-.gradio-textbox input,
-.gradio-textbox textarea {
-    background: rgba(255, 255, 255, 0.08) !important;
-    backdrop-filter: blur(10px) !important;
-    border: 1px solid rgba(255, 255, 255, 0.16) !important;
-    color: var(--text-primary) !important;
-    border-radius: 12px !important;
-    padding: 12px 16px !important;
-    transition: all 0.3s ease !important;
-    font-size: 14px !important;
-}
-.gradio-textbox input:focus,
-.gradio-textbox textarea:focus {
-    border-color: var(--accent-purple) !important;
-    outline: none !important;
-    box-shadow: 0 0 0 2px rgba(139, 92, 246, 0.3) !important;
-    background: rgba(255, 255, 255, 0.12) !important;
-}
-.gradio-textbox input::placeholder,
-.gradio-textbox textarea::placeholder {
-    color: rgba(255, 255, 255, 0.5) !important;
-}
-/* Labels */
-.gradio-container label {
-    color: var(--text-secondary) !important;
-    font-weight: 500 !important;
-    font-size: 14px !important;
-    margin-bottom: 6px !important;
-    display: block !important;
-}
-/* Buttons */
-.gradio-container button {
-    background: linear-gradient(135deg, var(--accent-purple) 0%, var(--accent-blue) 100%) !important;
-    color: var(--text-primary) !important;
-    border: none !important;
-    border-radius: 12px !important;
-    padding: 12px 24px !important;
-    font-weight: 600 !important;
-    cursor: pointer !important;
-    transition: all 0.3s ease !important;
-    box-shadow: 0 4px 15px rgba(139, 92, 246, 0.4) !important;
-    backdrop-filter: blur(10px) !important;
-    min-height: 44px !important;
-    display: flex !important;
-    align-items: center !important;
-    justify-content: center !important;
-}
-.gradio-container button:hover {
-    transform: translateY(-2px) !important;
-    box-shadow: 0 8px 25px rgba(139, 92, 246, 0.6) !important;
-    background: linear-gradient(135deg, #9333ea 0%, #2563eb 100%) !important;
-}
-.gradio-container button:active {
-    transform: translateY(0px) !important;
-}
-/* Send button specific styling */
-.gradio-container .gr-button {
-    background: linear-gradient(135deg, var(--accent-purple) 0%, var(--accent-blue) 100%) !important;
-    border: 1px solid rgba(255, 255, 255, 0.2) !important;
-    color: white !important;
-    font-weight: 600 !important;
-    text-transform: none !important;
-    letter-spacing: 0.5px !important;
-}
-/* Chatbot specific styling */
-.gradio-chatbot {
-    background: transparent !important;
-    border: none !important;
-}
-/* Footer */
-.footer {
-    text-align: center !important;
-    padding: 28px !important;
-    color: var(--text-secondary) !important;
-    background: var(--glass-bg) !important;
-    backdrop-filter: blur(20px) !important;
-    border: 1px solid var(--glass-border) !important;
-    border-radius: 20px !important;
-    margin-top: 32px !important;
-    box-shadow: var(--shadow-glow) !important;
-}
-/* Scrollbar styling */
-::-webkit-scrollbar {
-    width: 8px;
-}
-::-webkit-scrollbar-track {
-    background: rgba(255, 255, 255, 0.05);
-    border-radius: 4px;
-}
-::-webkit-scrollbar-thumb {
-    background: linear-gradient(135deg, var(--accent-purple), var(--accent-blue));
-    border-radius: 4px;
-}
-::-webkit-scrollbar-thumb:hover {
-    background: linear-gradient(135deg, #9333ea, #2563eb);
-}
-/* Responsive design */
-@media (max-width: 768px) {
-    .gradio-container {
-        padding: 12px !important;
     }
     .header {
-        padding: 20px !important;
-        border-radius: 16px !important;
     }
     .header h1 {
-        font-size: 28px !important;
     }
-    .tools-area,
-    .chat-area {
         border-radius: 16px !important;
         padding: 20px !important;
     }
-}
-"""
-# Create wrapper function for proper chat handling
-def respond(message, history):
-    """Response handler for Gradio messages format"""
-    for response_chunk in creed_ai.generate_response(message, history):
-        # Update the history with the current response in messages format
-        updated_history = history + [
-            {"role": "user", "content": message},
-            {"role": "assistant", "content": response_chunk}
-        ]
-        yield "", updated_history
-# Create the interface with modern theme
-with gr.Blocks(
-    title="🎸 Creed Bratton AI",
-    css=modern_css,
-    theme=gr.themes.Base()  # Use base theme for better CSS control
-) as demo:
-    # Modern header
-    gr.HTML(f"""
-    <div class="header">
-        <h1>🎸 Creed Bratton AI</h1>
-        <p>Powered by phxdev/creed-qwen-0.5b-lora • Running on {'🚀 GPU' if creed_ai.device == 'cuda' else '🖥️ CPU'}</p>
-    </div>
-    """)
-    # Model info with glass styling
-    gr.HTML("""
-    <div class="info-box">
-        <strong>Model:</strong> phxdev/creed-qwen-0.5b-lora<br>
-        <strong>Base:</strong> Qwen 0.5B + LoRA fine-tuning<br>
-        <strong>Tokens:</strong> &lt;thinking&gt;, &lt;conspiracy&gt;, &lt;tangent&gt;
-    </div>
-    """)
-    # MCP status
-    if os.environ.get('GRADIO_MCP_ENABLED'):
         gr.HTML("""
-        <div class="status-box">
-            ✓ MCP Server Active • Available as tool for Claude Desktop
         </div>
         """)
-    # Main chat interface with glass styling
-    with gr.Row(elem_classes="chat-area"):
-        chatbot = gr.Chatbot(
-            type='messages',  # Use messages format (modern)
-            height=550,
-            show_copy_button=True,
-            show_share_button=False,
-            avatar_images=["👤", "🎸"],
-            bubble_full_width=False,
-            show_label=False,
-            placeholder="🎸 Creed is ready...",
-            container=False
-        )
-    # Input with explicit send button
-    with gr.Row():
-        with gr.Column(scale=7):
-            msg = gr.Textbox(
-                placeholder="Ask Creed anything...",
-                container=False,
-                submit_btn=False,  # Disable built-in submit
-                stop_btn=False
             )
-        with gr.Column(scale=1, min_width=100):
-            send_btn = gr.Button("Send", variant="primary", size="lg")
-    # Wire up the chat - both Enter key and Send button
-    msg.submit(
-        respond,
-        inputs=[msg, chatbot],
-        outputs=[msg, chatbot],
-        show_progress="hidden"
-    )
-    send_btn.click(
-        respond,
-        inputs=[msg, chatbot],
-        outputs=[msg, chatbot],
-        show_progress="hidden"
-    )
-    # MCP Tools section with glass styling
-    with gr.Row(elem_classes="tools-area"):
-        gr.HTML('<div class="tools-title">🛠️ MCP Tools</div>')
         with gr.Row():
-            with gr.Column():
-                wisdom_topic = gr.Textbox(
-                    label="Wisdom Topic",
-                    placeholder="life, business, relationships..."
                 )
-                wisdom_output = gr.Textbox(
-                    label="Creed's Response",
-                    interactive=False,
-                    lines=3
-                )
-                wisdom_btn = gr.Button("Ask Creed", variant="primary")
-            with gr.Column():
-                story_situation = gr.Textbox(
-                    label="Story Request",
-                    placeholder="Tell me about..."
-                )
-                story_output = gr.Textbox(
-                    label="Creed's Story",
-                    interactive=False,
-                    lines=3
-                )
-                story_btn = gr.Button("Get Story", variant="primary")
-    # Wire up the tools
-    wisdom_btn.click(
-        creed_ai.creed_wisdom_tool,
-        inputs=[wisdom_topic],
-        outputs=[wisdom_output]
-    )
-    story_btn.click(
-        creed_ai.creed_story_tool,
-        inputs=[story_situation],
-        outputs=[story_output]
     )
-    # Modern footer
-    gr.HTML("""
-    <div class="footer">
-        <strong>Creed Bratton AI</strong><br>
-        Model: phxdev/creed-qwen-0.5b-lora • Trained by Mark Scott<br>
-        <em>"Sometimes a guy's gotta ride the bull, am I right?"</em>
-    </div>
-    """)
-# Launch with modern styling and public sharing
-print("🚀 Launching Real Creed AI with modern glassmorphism design...")
-demo.launch(
-    ssr_mode=False,
-    server_name="0.0.0.0",
-    server_port=7860,
-    share=True,  # Create public link
-    show_error=True
-)
 if __name__ == "__main__":
-main()

 # Create wrapper function for proper chat handling
+    def respond(message, history):
+        """Response handler for Gradio messages format"""
+        if not message.strip():
+            return "", history
+        # Convert messages format to simple tuples
+        simple_history = []
+        for i in range(0, len(history), 2):
+            if i + 1 < len(history):
+                user_msg = history[i].get('content', '') if isinstance(history[i], dict) else str(history[i])
+                bot_msg = history[i + 1].get('content', '') if isinstance(history[i + 1], dict) else str(history[i + 1])
+                if user_msg and bot_msg:
+                    simple_history.append([user_msg, bot_msg])
+        # Generate response
+        for response_chunk in creed_ai.generate_response(message, simple_history):
             # Create new history with the streaming response
             new_history = history + [
                 {"role": "user", "content": message},
                 {"role": "assistant", "content": response_chunk}
             ]
+            yield "", new_history#!/usr/bin/env python3
 """
 🎸 Creed Bratton AI - Using phxdev/creed-qwen-0.5b-lora
 The REAL Creed, trained by Mark, not some knockoff prompt engineering
 # Spaces compatibility
 try:
+    import spaces
+    SPACES_AVAILABLE = True
+    @spaces.GPU
+    def gpu_placeholder():
+        return "GPU satisfied"
 except ImportError:
+    SPACES_AVAILABLE = False
 class CreedBrattonAI:
+    """Real Creed AI using Mark's trained model - GPU optimized"""
+    def __init__(self):
+        self.model = None
+        self.tokenizer = None
+        self.model_loaded = False
+        self.loading = False
+        self.device = "cuda" if torch.cuda.is_available() else "cpu"
+        print(f"🎸 Initializing Creed AI")
+        print(f"🖥️ Device detected: {self.device}")
+        if torch.cuda.is_available():
+            print(f"🚀 GPU: {torch.cuda.get_device_name()}")
+            print(f"💾 GPU Memory: {torch.cuda.get_device_properties(0).total_memory // 1024**3} GB")
+        # Load model with proper GPU detection
+        self.load_model()
+    def load_model(self):
+        """Load the model with GPU optimization when available"""
+        if self.loading or self.model_loaded:
+            return
+        self.loading = True
+        try:
+            print(f"🧠 Loading Creed's consciousness on {self.device}...")
+            # Load model and tokenizer
+            model_name = "phxdev/creed-qwen-0.5b-lora"
+            print("📦 Loading tokenizer...")
+            self.tokenizer = AutoTokenizer.from_pretrained(
                 model_name,
                 trust_remote_code=True,
+                padding_side="left"
             )
+            # Add Creed's custom tokens
+            custom_tokens = ["<thinking>", "<conspiracy>", "<tangent>"]
+            print(f"🎸 Adding Creed's custom tokens: {custom_tokens}")
+            num_added_tokens = self.tokenizer.add_tokens(custom_tokens)
+            print(f"✅ Added {num_added_tokens} custom tokens")
             if self.tokenizer.pad_token is None:
+                self.tokenizer.pad_token = self.tokenizer.eos_token
+            print(f"🤖 Loading model on {self.device}...")
+            # Load model with proper device handling
             if self.device == "cuda":
                 self.model = AutoModelForCausalLM.from_pretrained(
+                    model_name,
+                    torch_dtype=torch.float16,  # Use float16 for GPU efficiency
+                    device_map="auto",  # Auto device mapping for GPU
+                    trust_remote_code=True,
+                    low_cpu_mem_usage=True
                 )
+                # Explicitly ensure model is on CUDA
                 if self.model.device.type != "cuda":
+                    print("🔧 Explicitly moving model to CUDA...")
                     self.model = self.model.to(self.device)
             else:
                 self.model = AutoModelForCausalLM.from_pretrained(
+                    model_name,
+                    torch_dtype=torch.float32,  # Use float32 for CPU
+                    device_map=None,
+                    trust_remote_code=True,
+                    low_cpu_mem_usage=True
                 )
                 self.model = self.model.to("cpu")
+            # Resize embeddings for custom tokens
+            if num_added_tokens > 0:
+                print(f"🔧 Resizing model embeddings for {num_added_tokens} custom tokens")
+                self.model.resize_token_embeddings(len(self.tokenizer))
             self.model.eval()
+            # Verify device placement
             model_device = next(self.model.parameters()).device
+            print(f"🎯 Model is actually on: {model_device}")
+            self.model_loaded = True
+            self.loading = False
+            print(f"✅ Creed's consciousness loaded on {model_device}!")
+            # GPU memory info
+            if self.device == "cuda" and torch.cuda.is_available():
+                print(f"🔥 GPU Memory Used: {torch.cuda.memory_allocated() // 1024**2} MB")
+                print(f"📊 GPU Memory Cached: {torch.cuda.memory_reserved() // 1024**2} MB")
+        except Exception as e:
+            print(f"❌ Error loading Creed model: {e}")
+            print("🔄 Falling back to base model...")
+            try:
+                base_model = "Qwen/Qwen2.5-0.5B-Instruct"
+                self.tokenizer = AutoTokenizer.from_pretrained(base_model)
+                # Use same pad token setup that was working
+                if self.tokenizer.pad_token is None:
+                    self.tokenizer.pad_token = self.tokenizer.eos_token
+                if self.device == "cuda":
+                    self.model = AutoModelForCausalLM.from_pretrained(
+                        base_model,
+                        torch_dtype=torch.float16,
+                        device_map="auto"
+                    )
+                    if self.model.device.type != "cuda":
+                        self.model = self.model.to(self.device)
+                else:
+                    self.model = AutoModelForCausalLM.from_pretrained(
+                        base_model,
+                        torch_dtype=torch.float32,
+                        device_map=None
+                    )
+                    self.model = self.model.to("cpu")
+                self.model.eval()
+                self.model_loaded = True
+                model_device = next(self.model.parameters()).device
+                print(f"✅ Fallback model loaded on {model_device}")
+            except Exception as fallback_error:
+                print(f"❌ Fallback also failed: {fallback_error}")
+            self.loading = False
+    @spaces.GPU if SPACES_AVAILABLE else lambda func: func
+    def generate_response_gpu(self, conversation: str) -> str:
+        """Generate response using the loaded model - back to working version"""
+        if not self.model_loaded:
+            return "❌ Model not loaded"
+        try:
+            # Simple tokenization that was working before
+            inputs = self.tokenizer.encode(conversation, return_tensors="pt")
+            if self.device == "cuda":
+                inputs = inputs.to(self.device)
+            # Generate response with original settings that worked
+            with torch.no_grad():
+                outputs = self.model.generate(
+                    inputs,
+                    max_new_tokens=200,
+                    do_sample=True,
+                    temperature=0.9,
+                    top_p=0.95,
+                    top_k=40,
+                    repetition_penalty=1.15,
+                    pad_token_id=self.tokenizer.eos_token_id,
+                    eos_token_id=self.tokenizer.eos_token_id,
+                    use_cache=True
+                )
+            # Decode response
+            full_response = self.tokenizer.decode(outputs[0], skip_special_tokens=True)
+            response = full_response[len(self.tokenizer.decode(inputs[0], skip_special_tokens=True)):].strip()
+            return self._clean_response(response)
+        except Exception as e:
+            return f"🎸 *Creed scratches his head* Something weird happened... {str(e)[:100]}"
+    def generate_response(self, message: str, history: List[List[str]]) -> Iterator[str]:
+        """Generate response using the trained Creed model - back to working version"""
+        if not self.model_loaded:
+            if self.loading:
+                yield "🧠 Creed's consciousness is still loading... give me a moment..."
+                return
+            else:
+                yield "❌ Something went wrong loading Creed's mind. Try refreshing the page."
+                return
+        try:
+            # Format the conversation
+            conversation = self._format_conversation(message, history)
+            # Generate response using GPU function
+            response = self.generate_response_gpu(conversation)
+            # Stream the response word by word for effect
+            words = response.split()
+            current_response = ""
+            for word in words:
+                current_response += word + " "
+                time.sleep(0.05)
+                yield current_response.strip()
+        except Exception as e:
+            print(f"❌ Error generating response: {e}")
+            yield f"🎸 *Creed scratches his head* Something weird happened in my brain... {str(e)[:100]}"
+    def _format_conversation(self, message: str, history: List[List[str]]) -> str:
+        """Format the conversation for the model with proper system prompt"""
+        # Comprehensive Creed system prompt
+        system_prompt = """You are Creed Bratton from The Office. You embody his complete personality and speaking patterns.
 CORE IDENTITY:
 - Former member of The Grass Roots (1960s rock band)
 Remember: You're not trying to be helpful in a traditional sense - you're being Creed Bratton.
 """
+        # Add conversation history
+        conversation = system_prompt
+        for user_msg, creed_msg in history[-4:]:  # Keep recent context
+            if user_msg and creed_msg:  # Only add complete exchanges
+                conversation += f"Human: {user_msg}\n"
+                conversation += f"Creed: {creed_msg}\n"
+        # Add current message
+        conversation += f"Human: {message}\n"
+        conversation += "Creed:"
+        return conversation
+    def _clean_response(self, response: str) -> str:
+        """Clean up the model response and format custom tokens"""
+        # Remove common artifacts
+        response = response.replace("Human:", "").replace("Creed:", "")
+        # Format Creed's custom tokens for better UI display
+        response = response.replace("<thinking>", "🤔 *thinking* ")
+        response = response.replace("</thinking>", "")
+        response = response.replace("<conspiracy>", "🕵️ *conspiracy mode* ")
+        response = response.replace("</conspiracy>", "")
+        response = response.replace("<tangent>", "🌀 *tangent* ")
+        response = response.replace("</tangent>", "")
+        # Remove excessive whitespace
+        response = " ".join(response.split())
+        # Ensure it ends properly
+        if response and not response.endswith(('.', '!', '?', '...', '*')):
+            response += "."
+        return response
+    def creed_wisdom_tool(self, topic: str = "life") -> str:
+        """MCP tool: Get Creed's wisdom on a topic"""
+        if not self.model_loaded:
+            return "🧠 Creed's consciousness is still loading..."
+        prompt = f"Give me your wisdom about {topic}."
+        # Generate a one-shot response
+        final_response = ""
+        for response in self.generate_response(prompt, []):
+            final_response = response
+        return final_response
+    def cleanup_gpu_memory(self):
+        """Clean up GPU memory if using CUDA"""
+        if self.device == "cuda" and torch.cuda.is_available():
+            torch.cuda.empty_cache()
+            print(f"🧹 GPU Memory cleaned. Current: {torch.cuda.memory_allocated() // 1024**2} MB")
+    def creed_story_tool(self, situation: str = "mysterious") -> str:
+        """MCP tool: Get a Creed story"""
+        if not self.model_loaded:
+            return "🧠 Creed's consciousness is still loading..."
+        prompt = f"Tell me a {situation} story from your past."
+        # Generate a one-shot response
+        final_response = ""
+        for response in self.generate_response(prompt, []):
+            final_response = response
+        return final_response
 def main():
+    """Initialize and launch the real Creed AI with modern styling"""
+    print("🎸 Initializing REAL Creed Bratton AI...")
+    print("📡 Loading Mark's trained model: phxdev/creed-qwen-0.5b-lora")
+    # Initialize Creed AI
+    creed_ai = CreedBrattonAI()
+    if SPACES_AVAILABLE:
+        gpu_placeholder()
+        print("✅ Spaces GPU compatibility enabled")
+    # Memory status if GPU available
+    if torch.cuda.is_available() and creed_ai.model_loaded:
+        print(f"🎯 Model device verification: {next(creed_ai.model.parameters()).device}")
+        print(f"🔥 Final GPU Memory: {torch.cuda.memory_allocated() // 1024**2} MB allocated")
+        print(f"📊 GPU Memory Reserved: {torch.cuda.memory_reserved() // 1024**2} MB reserved")
+    # Modern glassmorphism CSS
+    modern_css = """
+    /* Creed AI - Modern Glassmorphism Design */
+    :root {
+        --primary-gradient: linear-gradient(135deg, #667eea 0%, #764ba2 100%);
+        --secondary-gradient: linear-gradient(135deg, #f093fb 0%, #f5576c 100%);
+        --glass-bg: rgba(255, 255, 255, 0.08);
+        --glass-border: rgba(255, 255, 255, 0.18);
+        --text-primary: #ffffff;
+        --text-secondary: rgba(255, 255, 255, 0.8);
+        --accent-purple: #8b5cf6;
+        --accent-blue: #3b82f6;
+        --shadow-glow: 0 8px 32px rgba(139, 92, 246, 0.3);
+    }
+    /* Main container with animated background */
+    .gradio-container {
+        min-height: 100vh !important;
+        background: var(--primary-gradient) !important;
+        background-attachment: fixed !important;
+        font-family: 'Inter', -apple-system, BlinkMacSystemFont, sans-serif !important;
+        color: var(--text-primary) !important;
+        padding: 20px !important;
+        position: relative !important;
+        overflow-x: hidden !important;
+    }
+    .gradio-container::before {
+        content: '';
+        position: fixed;
+        top: 0;
+        left: 0;
+        width: 100%;
+        height: 100%;
+        background:
+            radial-gradient(circle at 20% 80%, rgba(139, 92, 246, 0.3) 0%, transparent 50%),
+            radial-gradient(circle at 80% 20%, rgba(59, 130, 246, 0.3) 0%, transparent 50%),
+            radial-gradient(circle at 40% 40%, rgba(167, 139, 250, 0.2) 0%, transparent 50%);
+        pointer-events: none;
+        z-index: -1;
+    }
+    /* Floating particles animation */
+    .gradio-container::after {
+        content: '';
+        position: fixed;
+        top: 0;
+        left: 0;
+        width: 100%;
+        height: 100%;
+        background-image:
+            radial-gradient(2px 2px at 20px 30px, rgba(255, 255, 255, 0.3), transparent),
+            radial-gradient(2px 2px at 40px 70px, rgba(139, 92, 246, 0.4), transparent),
+            radial-gradient(1px 1px at 90px 40px, rgba(59, 130, 246, 0.3), transparent);
+        background-size: 120px 120px;
+        animation: float 20s ease-in-out infinite;
+        pointer-events: none;
+        z-index: -1;
     }
+    @keyframes float {
+        0%, 100% { transform: translateY(0px) rotate(0deg); }
+        50% { transform: translateY(-20px) rotate(180deg); }
+    }
+    /* Header styling */
     .header {
+        background: var(--glass-bg) !important;
+        backdrop-filter: blur(20px) !important;
+        border: 1px solid var(--glass-border) !important;
+        border-radius: 24px !important;
+        padding: 32px !important;
+        margin-bottom: 24px !important;
+        text-align: center !important;
+        box-shadow: var(--shadow-glow) !important;
+        position: relative !important;
+        overflow: hidden !important;
     }
+    .header::before {
+        content: '';
+        position: absolute;
+        top: 0;
+        left: 0;
+        right: 0;
+        height: 1px;
+        background: linear-gradient(90deg, transparent, rgba(255, 255, 255, 0.6), transparent);
+    }
     .header h1 {
+        font-size: 36px !important;
+        font-weight: 700 !important;
+        background: linear-gradient(135deg, #ffffff 0%, #a855f7 50%, #3b82f6 100%) !important;
+        -webkit-background-clip: text !important;
+        -webkit-text-fill-color: transparent !important;
+        background-clip: text !important;
+        margin: 0 0 12px 0 !important;
+        text-shadow: 0 0 30px rgba(168, 85, 247, 0.5) !important;
     }
+    .header p {
+        font-size: 16px !important;
+        color: var(--text-secondary) !important;
+        margin: 0 !important;
+        font-weight: 500 !important;
+    }
+    /* Info boxes with glass effect */
+    .info-box {
+        background: rgba(255, 255, 255, 0.06) !important;
+        backdrop-filter: blur(16px) !important;
+        border: 1px solid rgba(255, 255, 255, 0.12) !important;
         border-radius: 16px !important;
         padding: 20px !important;
+        margin: 16px 0 !important;
+        color: var(--text-secondary) !important;
+        font-size: 14px !important;
+        line-height: 1.6 !important;
+        box-shadow: 0 4px 20px rgba(0, 0, 0, 0.1) !important;
     }
+    .status-box {
+        background: rgba(16, 185, 129, 0.1) !important;
+        backdrop-filter: blur(16px) !important;
+        border: 1px solid rgba(16, 185, 129, 0.3) !important;
+        border-radius: 16px !important;
+        padding: 16px 20px !important;
+        margin: 16px 0 !important;
+        color: #10b981 !important;
+        font-weight: 600 !important;
+        box-shadow: 0 4px 20px rgba(16, 185, 129, 0.2) !important;
+    }
+    /* Chat area styling */
+    .chat-area {
+        background: var(--glass-bg) !important;
+        backdrop-filter: blur(20px) !important;
+        border: 1px solid var(--glass-border) !important;
+        border-radius: 20px !important;
+        margin: 16px 0 !important;
+        overflow: hidden !important;
+        box-shadow: var(--shadow-glow) !important;
+    }
+    /* Tools section */
+    .tools-area {
+        background: var(--glass-bg) !important;
+        backdrop-filter: blur(20px) !important;
+        border: 1px solid var(--glass-border) !important;
+        border-radius: 20px !important;
+        padding: 28px !important;
+        margin: 24px 0 !important;
+        box-shadow: var(--shadow-glow) !important;
+    }
+    .tools-title {
+        font-size: 22px !important;
+        font-weight: 600 !important;
+        color: var(--text-primary) !important;
+        margin: 0 0 20px 0 !important;
+        padding-bottom: 12px !important;
+        border-bottom: 1px solid rgba(255, 255, 255, 0.2) !important;
+        background: linear-gradient(135deg, #ffffff 0%, #a855f7 100%) !important;
+        -webkit-background-clip: text !important;
+        -webkit-text-fill-color: transparent !important;
+    }
+    /* Form elements */
+    .gradio-textbox input,
+    .gradio-textbox textarea {
+        background: rgba(255, 255, 255, 0.08) !important;
+        backdrop-filter: blur(10px) !important;
+        border: 1px solid rgba(255, 255, 255, 0.16) !important;
+        color: var(--text-primary) !important;
+        border-radius: 12px !important;
+        padding: 12px 16px !important;
+        transition: all 0.3s ease !important;
+        font-size: 14px !important;
+    }
+    .gradio-textbox input:focus,
+    .gradio-textbox textarea:focus {
+        border-color: var(--accent-purple) !important;
+        outline: none !important;
+        box-shadow: 0 0 0 2px rgba(139, 92, 246, 0.3) !important;
+        background: rgba(255, 255, 255, 0.12) !important;
+    }
+    .gradio-textbox input::placeholder,
+    .gradio-textbox textarea::placeholder {
+        color: rgba(255, 255, 255, 0.5) !important;
+    }
+    /* Labels */
+    .gradio-container label {
+        color: var(--text-secondary) !important;
+        font-weight: 500 !important;
+        font-size: 14px !important;
+        margin-bottom: 6px !important;
+        display: block !important;
+    }
+    /* Buttons */
+    .gradio-container button {
+        background: linear-gradient(135deg, var(--accent-purple) 0%, var(--accent-blue) 100%) !important;
+        color: var(--text-primary) !important;
+        border: none !important;
+        border-radius: 12px !important;
+        padding: 12px 24px !important;
+        font-weight: 600 !important;
+        cursor: pointer !important;
+        transition: all 0.3s ease !important;
+        box-shadow: 0 4px 15px rgba(139, 92, 246, 0.4) !important;
+        backdrop-filter: blur(10px) !important;
+        min-height: 44px !important;
+        display: flex !important;
+        align-items: center !important;
+        justify-content: center !important;
+    }
+    .gradio-container button:hover {
+        transform: translateY(-2px) !important;
+        box-shadow: 0 8px 25px rgba(139, 92, 246, 0.6) !important;
+        background: linear-gradient(135deg, #9333ea 0%, #2563eb 100%) !important;
+    }
+    .gradio-container button:active {
+        transform: translateY(0px) !important;
+    }
+    /* Send button specific styling */
+    .gradio-container .gr-button {
+        background: linear-gradient(135deg, var(--accent-purple) 0%, var(--accent-blue) 100%) !important;
+        border: 1px solid rgba(255, 255, 255, 0.2) !important;
+        color: white !important;
+        font-weight: 600 !important;
+        text-transform: none !important;
+        letter-spacing: 0.5px !important;
+    }
+    /* Chatbot specific styling */
+    .gradio-chatbot {
+        background: transparent !important;
+        border: none !important;
+    }
+    /* Footer */
+    .footer {
+        text-align: center !important;
+        padding: 28px !important;
+        color: var(--text-secondary) !important;
+        background: var(--glass-bg) !important;
+        backdrop-filter: blur(20px) !important;
+        border: 1px solid var(--glass-border) !important;
+        border-radius: 20px !important;
+        margin-top: 32px !important;
+        box-shadow: var(--shadow-glow) !important;
+    }
+    /* Scrollbar styling */
+    ::-webkit-scrollbar {
+        width: 8px;
+    }
+    ::-webkit-scrollbar-track {
+        background: rgba(255, 255, 255, 0.05);
+        border-radius: 4px;
+    }
+    ::-webkit-scrollbar-thumb {
+        background: linear-gradient(135deg, var(--accent-purple), var(--accent-blue));
+        border-radius: 4px;
+    }
+    ::-webkit-scrollbar-thumb:hover {
+        background: linear-gradient(135deg, #9333ea, #2563eb);
+    }
+    /* Responsive design */
+    @media (max-width: 768px) {
+        .gradio-container {
+            padding: 12px !important;
+        }
+        .header {
+            padding: 20px !important;
+            border-radius: 16px !important;
+        }
+        .header h1 {
+            font-size: 28px !important;
+        }
+        .tools-area,
+        .chat-area {
+            border-radius: 16px !important;
+            padding: 20px !important;
+        }
+    }
+    """
+    # Create wrapper function for proper chat handling
+    def respond(message, history):
+        """Response handler for Gradio messages format"""
+        for response_chunk in creed_ai.generate_response(message, history):
+            # Update the history with the current response in messages format
+            updated_history = history + [
+                {"role": "user", "content": message},
+                {"role": "assistant", "content": response_chunk}
+            ]
+            yield "", updated_history
+    # Create the interface with modern theme
+    with gr.Blocks(
+        title="🎸 Creed Bratton AI",
+        css=modern_css,
+        theme=gr.themes.Base()  # Use base theme for better CSS control
+    ) as demo:
+        # Modern header
+        gr.HTML(f"""
+        <div class="header">
+            <h1>🎸 Creed Bratton AI</h1>
+            <p>Powered by phxdev/creed-qwen-0.5b-lora • Running on {'🚀 GPU' if creed_ai.device == 'cuda' else '🖥️ CPU'}</p>
+        </div>
+        """)
+        # Model info with glass styling
         gr.HTML("""
+        <div class="info-box">
+            <strong>Model:</strong> phxdev/creed-qwen-0.5b-lora<br>
+            <strong>Base:</strong> Qwen 0.5B + LoRA fine-tuning<br>
+            <strong>Tokens:</strong> &lt;thinking&gt;, &lt;conspiracy&gt;, &lt;tangent&gt;
         </div>
         """)
+        # MCP status
+        if os.environ.get('GRADIO_MCP_ENABLED'):
+            gr.HTML("""
+            <div class="status-box">
+                ✓ MCP Server Active • Available as tool for Claude Desktop
+            </div>
+            """)
+        # Main chat interface with glass styling
+        with gr.Row(elem_classes="chat-area"):
+            chatbot = gr.Chatbot(
+                type='messages',  # Use messages format (modern)
+                height=550,
+                show_copy_button=True,
+                show_share_button=False,
+                avatar_images=["👤", "🎸"],
+                bubble_full_width=False,
+                show_label=False,
+                placeholder="🎸 Creed is ready...",
+                container=False
             )
+        # Input with explicit send button
         with gr.Row():
+            with gr.Column(scale=7):
+                msg = gr.Textbox(
+                    placeholder="Ask Creed anything...",
+                    container=False,
+                    submit_btn=False,  # Disable built-in submit
+                    stop_btn=False
                 )
+            with gr.Column(scale=1, min_width=100):
+                send_btn = gr.Button("Send", variant="primary", size="lg")
+        # Wire up the chat - both Enter key and Send button
+        msg.submit(
+            respond,
+            inputs=[msg, chatbot],
+            outputs=[msg, chatbot],
+            show_progress="hidden"
+        )
+        send_btn.click(
+            respond,
+            inputs=[msg, chatbot],
+            outputs=[msg, chatbot],
+            show_progress="hidden"
+        )
+        # MCP Tools section with glass styling
+        with gr.Row(elem_classes="tools-area"):
+            gr.HTML('<div class="tools-title">🛠️ MCP Tools</div>')
+            with gr.Row():
+                with gr.Column():
+                    wisdom_topic = gr.Textbox(
+                        label="Wisdom Topic",
+                        placeholder="life, business, relationships..."
+                    )
+                    wisdom_output = gr.Textbox(
+                        label="Creed's Response",
+                        interactive=False,
+                        lines=3
+                    )
+                    wisdom_btn = gr.Button("Ask Creed", variant="primary")
+                with gr.Column():
+                    story_situation = gr.Textbox(
+                        label="Story Request",
+                        placeholder="Tell me about..."
+                    )
+                    story_output = gr.Textbox(
+                        label="Creed's Story",
+                        interactive=False,
+                        lines=3
+                    )
+                    story_btn = gr.Button("Get Story", variant="primary")
+        # Wire up the tools
+        wisdom_btn.click(
+            creed_ai.creed_wisdom_tool,
+            inputs=[wisdom_topic],
+            outputs=[wisdom_output]
+        )
+        story_btn.click(
+            creed_ai.creed_story_tool,
+            inputs=[story_situation],
+            outputs=[story_output]
+        )
+        # Modern footer
+        gr.HTML("""
+        <div class="footer">
+            <strong>Creed Bratton AI</strong><br>
+            Model: phxdev/creed-qwen-0.5b-lora • Trained by Mark Scott<br>
+            <em>"Sometimes a guy's gotta ride the bull, am I right?"</em>
+        </div>
+        """)
+    # Launch with modern styling and public sharing
+    print("🚀 Launching Real Creed AI with modern glassmorphism design...")
+    demo.launch(
+        ssr_mode=False,
+        server_name="0.0.0.0",
+        server_port=7860,
+        share=True,  # Create public link
+        show_error=True
     )
 if __name__ == "__main__":
+    main()