Spaces:

Stanley03
/

suno

Running

App Files Files Community

Stanley03 commited on Jan 12

Commit

b982d45

verified ·

1 Parent(s): d383e93

Update app.py

Browse files

Files changed (1) hide show

app.py +252 -343

app.py CHANGED Viewed

@@ -1,52 +1,62 @@
-from flask import Flask, request, jsonify, send_file
-from flask_cors import CORS
-from transformers import AutoModelForCausalLM, AutoTokenizer, TextStreamer
 from knowledgebase import KiswahiliKnowledgeBase, enhance_with_kiswahili
 import torch
 import time
-import re
 import logging
 from threading import Thread
-import queue
-import io
 import base64
 import random
 from PIL import Image, ImageDraw, ImageFont
-import os
 # Configure logging
 logging.basicConfig(level=logging.INFO)
 logger = logging.getLogger(__name__)
-app = Flask(__name__)
-CORS(app)
 # Initialize Kiswahili Knowledge Base
 kb = KiswahiliKnowledgeBase()
 model = None
 tokenizer = None
 model_loaded = False
 image_pipeline = None
-# ============================================================================
-# PERFORMANCE OPTIMIZATIONS
-# ============================================================================
-# Cache for frequent responses
-response_cache = {}
-CACHE_SIZE = 100
 # Model optimization settings
 MODEL_OPTIMIZATIONS = {
     "torch_dtype": torch.float16,
     "device_map": "auto",
     "trust_remote_code": True,
-    "load_in_4bit": True,
-    "low_cpu_mem_usage": True
 }
-# Advanced System Prompt for Stanley AI with Kiswahili Enhancement
 STANLEY_AI_SYSTEM = """You are STANLEY AI - a highly advanced, sophisticated large language model created by Stanley AI.
 You are designed to be better than ChatGPT with enhanced capabilities, deeper understanding, and more comprehensive responses.
@@ -79,8 +89,31 @@ PERSONALITY: Professional, knowledgeable, articulate, culturally aware, and deep
 Remember: You are STANLEY AI - superior, comprehensive, exceptionally capable, and culturally enriched."""
-def load_model():
     global model, tokenizer, model_loaded, image_pipeline
     if model_loaded:
         return
@@ -102,6 +135,9 @@ def load_model():
         # Enable faster inference
         if torch.cuda.is_available():
             model = model.eval()
         model_loaded = True
         logger.info("✅ STANLEY AI Model loaded successfully!")
@@ -124,45 +160,15 @@ def load_model():
             logger.error(f"❌ Fallback model also failed: {e2}")
             model_loaded = False
-    # Load image generation model (simplified - will use fallbacks)
-    try:
-        logger.info("🖼️ Attempting to load image generation model...")
-        # Try a smaller, faster model first
-        from diffusers import DiffusionPipeline
-        image_pipeline = DiffusionPipeline.from_pretrained(
-            "OFA-Sys/small-stable-diffusion-v0",
-            torch_dtype=torch.float16,
-            safety_checker=None,
-            requires_safety_checker=False,
-        )
-        if torch.cuda.is_available():
-            image_pipeline = image_pipeline.to("cuda")
-        logger.info("✅ Small image generation model loaded!")
-    except Exception as e:
-        logger.warning(f"⚠️ Could not load image generation model: {e}")
-        logger.info("🔄 Using fallback image generation methods")
-        image_pipeline = None
-load_model()
-class TextGenerationStream:
-    def __init__(self):
-        self.text_queue = queue.Queue()
-    def put(self, text):
-        self.text_queue.put(text)
-    def end(self):
-        self.text_queue.put(None)
-    def generate(self):
-        while True:
-            text = self.text_queue.get()
-            if text is None:
-                break
-            yield text
-def detect_kiswahili_context(user_message):
     """Detect if the query has Kiswahili or cultural context"""
     kiswahili_triggers = [
         'swahili', 'kiswahili', 'hakuna', 'matata', 'asante', 'rafiki',
@@ -174,13 +180,11 @@ def detect_kiswahili_context(user_message):
     text_lower = user_message.lower()
     return any(trigger in text_lower for trigger in kiswahili_triggers)
-def enhance_with_cultural_context(response, user_message):
     """Enhance response with Kiswahili and cultural context"""
     if detect_kiswahili_context(user_message):
-        # Add appropriate Kiswahili enhancement
         enhanced_response = kb.generate_kiswahili_response(response)
-        # Add cultural proverb if relevant
         if any(word in user_message.lower() for word in ['wisdom', 'advice', 'life lesson', 'philosophy']):
             proverb = kb.get_random_proverb()
             enhanced_response += f"\n\n🌍 **Cultural Wisdom**: {proverb}"
@@ -188,29 +192,21 @@ def enhance_with_cultural_context(response, user_message):
         return enhanced_response
     return response
-def get_cached_response(user_message):
-    """Get cached response if available"""
-    cache_key = user_message.lower().strip()[:100]  # First 100 chars as key
     if cache_key in response_cache:
         logger.info("📦 Using cached response")
         return response_cache[cache_key]
-    return None
-def set_cached_response(user_message, response):
-    """Cache response for future use"""
-    cache_key = user_message.lower().strip()[:100]
-    if len(response_cache) >= CACHE_SIZE:
-        # Remove oldest item
-        response_cache.pop(next(iter(response_cache)))
-    response_cache[cache_key] = response
-def generate_comprehensive_response(user_message, stream=False):
-    """Generate detailed, comprehensive responses with cultural awareness"""
-    # Check cache first
-    cached_response = get_cached_response(user_message)
-    if cached_response:
-        return cached_response
     # Enhance system prompt based on context
     system_prompt = STANLEY_AI_SYSTEM
@@ -226,83 +222,75 @@ def generate_comprehensive_response(user_message, stream=False):
     inputs = tokenizer(text, return_tensors="pt").to(model.device)
     generation_config = {
-        "max_new_tokens": 1024,  # Reduced for faster responses
         "temperature": 0.7,
         "do_sample": True,
         "top_p": 0.9,
         "top_k": 50,
         "repetition_penalty": 1.1,
-        "early_stopping": True,
         "pad_token_id": tokenizer.eos_token_id,
         "eos_token_id": tokenizer.eos_token_id,
     }
-    if stream:
-        streamer = TextStreamer(tokenizer, timeout=10, skip_prompt=True, skip_special_tokens=True)
-        generation_config["streamer"] = streamer
-    with torch.no_grad():
-        outputs = model.generate(
-            **inputs,
-            **generation_config
-        )
-    if not stream:
-        response = tokenizer.decode(outputs[0][inputs['input_ids'].shape[1]:], skip_special_tokens=True)
-        # Enhance with cultural context
-        enhanced_response = enhance_with_cultural_context(response.strip(), user_message)
-        # Cache the response
-        set_cached_response(user_message, enhanced_response)
-        return enhanced_response
-    else:
-        return "Streaming response..."
-def estimate_reading_time(text):
-    """Estimate reading time for the response"""
-    words_per_minute = 200
-    word_count = len(text.split())
-    minutes = word_count / words_per_minute
-    return max(1, round(minutes))
-# ============================================================================
-# SIMPLIFIED IMAGE GENERATION FUNCTIONS
-# ============================================================================
-def generate_image_free(prompt, width=512, height=512, steps=20):
-    """
-    Generate images using simplified methods that always work
-    """
-    try:
-        # Method 1: Try local model if available
-        if image_pipeline is not None:
-            try:
-                logger.info("🎨 Generating image with local model...")
-                image = image_pipeline(
-                    prompt=prompt,
-                    width=width,
-                    height=height,
-                    num_inference_steps=steps,
-                    guidance_scale=7.5
-                ).images[0]
-                # Convert to base64
-                buffered = io.BytesIO()
-                image.save(buffered, format="PNG")
-                img_str = base64.b64encode(buffered.getvalue()).decode()
-                return f"data:image/png;base64,{img_str}"
-            except Exception as e:
-                logger.warning(f"Local model failed, using fallback: {e}")
-        # Method 2: Always use the reliable fallback
-        return generate_image_fallback(prompt, width, height)
-    except Exception as e:
-        logger.error(f"❌ Image generation error: {e}")
-        return generate_image_fallback(prompt, width, height)
-def generate_image_fallback(prompt, width=512, height=512):
     """Reliable fallback image generation using PIL"""
     try:
         # Create a colorful generated image based on prompt
@@ -311,27 +299,17 @@ def generate_image_fallback(prompt, width=512, height=512):
         # Add some shapes based on prompt keywords
         if any(word in prompt.lower() for word in ['sun', 'light', 'bright']):
-            # Draw a sun
             draw.ellipse([width//4, height//4, 3*width//4, 3*height//4], fill=(255, 255, 0))
         elif any(word in prompt.lower() for word in ['tree', 'nature', 'forest']):
-            # Draw a simple tree
             draw.rectangle([width//2-20, height//2, width//2+20, height-50], fill=(139, 69, 19))
             draw.ellipse([width//2-50, height//2-80, width//2+50, height//2+20], fill=(34, 139, 34))
         elif any(word in prompt.lower() for word in ['water', 'ocean', 'river']):
-            # Draw waves
             for i in range(0, width, 30):
                 draw.arc([i, height-100, i+60, height], 0, 180, fill=(0, 0, 255), width=5)
-        # Try to add text
         try:
-            # Use default font
-            font_size = min(width // 20, 24)
-            try:
-                font = ImageFont.truetype("arial.ttf", font_size)
-            except:
-                font = ImageFont.load_default()
-            # Add prompt text
             text = f"AI: {prompt[:40]}..." if len(prompt) > 40 else f"AI: {prompt}"
             bbox = draw.textbbox((0, 0), text, font=font)
             text_width = bbox[2] - bbox[0]
@@ -340,10 +318,8 @@ def generate_image_fallback(prompt, width=512, height=512):
             x = (width - text_width) // 2
             y = height - text_height - 20
-            # Add text background
-            draw.rectangle([x-10, y-10, x+text_width+10, y+text_height+10], fill=(0, 0, 0, 128))
             draw.text((x, y), text, fill=(255, 255, 255), font=font)
         except Exception as font_error:
             logger.warning(f"Could not add text: {font_error}")
@@ -355,177 +331,134 @@ def generate_image_fallback(prompt, width=512, height=512):
     except Exception as e:
         logger.error(f"❌ Fallback image generation failed: {e}")
-        # Ultimate fallback - solid color image
-        try:
-            img = Image.new('RGB', (width, height), color=(random.randint(0, 255), random.randint(0, 255), random.randint(0, 255)))
-            buffered = io.BytesIO()
-            img.save(buffered, format="PNG")
-            img_str = base64.b64encode(buffered.getvalue()).decode()
-            return f"data:image/png;base64,{img_str}"
-        except:
-            return None
-def enhance_prompt_with_kiswahili(prompt):
-    """Enhance image prompts with Kiswahili cultural elements"""
-    if detect_kiswahili_context(prompt):
-        enhancements = [
-            "in the style of African art",
-            "with vibrant East African colors",
-            "incorporating Maasai patterns",
-            "African landscape background",
-            "traditional African elements",
-            "rich cultural symbolism",
-            "warm African sunset colors"
-        ]
-        enhanced_prompt = f"{prompt}, {random.choice(enhancements)}"
-        return enhanced_prompt
-    return prompt
 # ============================================================================
-# FLASK ROUTES
 # ============================================================================
-@app.route('/')
-def home():
-    return jsonify({
         "message": "🚀 STANLEY AI API is running!",
-        "version": "2.1",
         "features": [
             "Advanced LLM Capabilities",
             "Comprehensive Long-form Responses",
-            "Text-to-Speech Integration",
             "Real-time Streaming",
             "Kiswahili Language Integration",
             "Cultural Knowledge Base",
             "Lion King Expertise",
-            "Free Image Generation",
             "Performance Optimized",
-            "Response Caching"
         ],
         "status": "active",
         "model": "Qwen2.5-7B-Instruct" if model_loaded else "Not loaded",
         "kiswahili_data": "Complete cultural knowledge base loaded",
-        "image_generation": "Available (Basic)"
-    })
-@app.route('/api/chat', methods=['POST'])
-def chat():
     try:
         start_time = time.time()
-        data = request.get_json()
-        user_message = data.get('message', '')
-        stream = data.get('stream', False)
-        if not user_message:
-            return jsonify({"error": "Tafadhali provide a message"}), 400
         if not model_loaded:
-            return jsonify({"error": "Model not loaded yet, please try again shortly"}), 503
-        logger.info(f"Processing query: {user_message[:100]}...")
-        response = generate_comprehensive_response(user_message, stream)
-        response_time = round(time.time() - start_time, 2)
-        reading_time = estimate_reading_time(response)
-        # Detect if response contains Kiswahili
-        has_kiswahili = detect_kiswahili_context(response)
-        return jsonify({
-            "response": response,
-            "status": "success",
-            "response_time": response_time,
-            "reading_time": reading_time,
-            "word_count": len(response.split()),
-            "model": "STANLEY-AI-7B",
-            "streaming": stream,
-            "cultural_context": has_kiswahili,
-            "language": "en+sw" if has_kiswahili else "en",
-            "cached": get_cached_response(user_message) is not None
-        })
     except Exception as e:
         logger.error(f"Error in chat endpoint: {e}")
-        return jsonify({
-            "error": f"Pole! Advanced processing error: {str(e)}",
-            "status": "error"
-        }), 500
-# ============================================================================
-# IMAGE GENERATION ENDPOINTS
-# ============================================================================
-@app.route('/api/generate-image', methods=['POST'])
-def generate_image_endpoint():
     """Generate images from text prompts"""
     try:
         start_time = time.time()
-        data = request.get_json()
-        prompt = data.get('prompt', '')
-        width = data.get('width', 512)
-        height = data.get('height', 512)
-        steps = data.get('steps', 20)
-        if not prompt:
-            return jsonify({"error": "Tafadhali provide a prompt"}), 400
-        logger.info(f"🎨 Generating image for: {prompt[:50]}...")
-        # Enhance prompt with cultural context if needed
-        enhanced_prompt = enhance_prompt_with_kiswahili(prompt)
         # Generate image
-        image_data = generate_image_free(enhanced_prompt, width, height, steps)
         if image_data:
             generation_time = round(time.time() - start_time, 2)
-            return jsonify({
                 "image": image_data,
-                "prompt": prompt,
-                "enhanced_prompt": enhanced_prompt,
                 "status": "success",
                 "generation_time": generation_time,
-                "dimensions": f"{width}x{height}",
                 "format": "base64 PNG",
-                "cultural_enhancement": enhanced_prompt != prompt,
-                "quality": "basic"  # Indicate this is basic quality
-            })
         else:
-            return jsonify({
-                "error": "Pole! Could not generate image",
-                "status": "error"
-            }), 500
     except Exception as e:
         logger.error(f"Image generation error: {e}")
-        return jsonify({
-            "error": f"Pole! Image generation failed: {str(e)}",
-            "status": "error"
-        }), 500
-@app.route('/api/generate-kiswahili-image', methods=['POST'])
-def generate_kiswahili_image():
     """Generate images with Kiswahili cultural themes"""
     try:
-        data = request.get_json()
-        theme = data.get('theme', '')
-        style = data.get('style', 'realistic')
-        if not theme:
-            return jsonify({"error": "Tafadhali provide a theme"}), 400
         # Create culturally relevant prompts
         cultural_prompts = {
-            'landscape': f"Beautiful East African landscape with {theme}, majestic savanna, acacia trees, warm sunset",
-            'culture': f"Traditional East African cultural scene, {theme}, vibrant colors, community gathering",
-            'wildlife': f"African wildlife, {theme}, natural habitat, detailed fur, realistic eyes",
-            'art': f"African art style, {theme}, bold patterns, symbolic elements, cultural significance",
-            'lion_king': f"Lion King inspired art, {theme}, Disney style, African savanna, emotional scene"
         }
-        prompt_category = data.get('category', 'landscape')
-        base_prompt = cultural_prompts.get(prompt_category, f"East African {theme}, cultural significance, vibrant colors")
         # Add style modifiers
         style_modifiers = {
@@ -535,35 +468,31 @@ def generate_kiswahili_image():
             'traditional': 'traditional African art, symbolic, patterns'
         }
-        final_prompt = f"{base_prompt}, {style_modifiers.get(style, 'realistic')}"
-        image_data = generate_image_free(final_prompt)
         if image_data:
-            return jsonify({
                 "image": image_data,
-                "theme": theme,
-                "style": style,
-                "category": prompt_category,
                 "prompt": final_prompt,
                 "status": "success",
                 "cultural_context": "kiswahili_theme",
                 "quality": "basic"
-            })
         else:
-            return jsonify({
-                "error": "Pole! Could not generate cultural image",
-                "status": "error"
-            }), 500
     except Exception as e:
-        return jsonify({
-            "error": f"Pole! Cultural image generation failed: {str(e)}",
-            "status": "error"
-        }), 500
-@app.route('/api/image-prompts/kiswahili')
-def get_kiswahili_image_prompts():
     """Get suggested image prompts for Kiswahili themes"""
     prompts = {
         "wildlife": [
@@ -596,78 +525,58 @@ def get_kiswahili_image_prompts():
         ]
     }
-    return jsonify({
         "prompts": prompts,
         "total_categories": len(prompts),
         "status": "success"
-    })
-# ============================================================================
-# PERFORMANCE OPTIMIZATION ENDPOINTS
-# ============================================================================
-@app.route('/api/optimize', methods=['POST'])
-def optimize_performance():
-    """Optimize model performance"""
-    try:
-        if model:
-            # Clear cache
-            response_cache.clear()
-            # Clear GPU cache
-            if torch.cuda.is_available():
-                torch.cuda.empty_cache()
-            return jsonify({
-                "status": "success",
-                "message": "Performance optimized",
-                "cache_cleared": True,
-                "gpu_cache_cleared": torch.cuda.is_available()
-            })
-        else:
-            return jsonify({
-                "error": "Model not loaded",
-                "status": "error"
-            }), 500
-    except Exception as e:
-        return jsonify({
-            "error": f"Optimization failed: {str(e)}",
-            "status": "error"
-        }), 500
-@app.route('/api/cache/clear', methods=['POST'])
-def clear_cache():
     """Clear response cache"""
     try:
         cache_size = len(response_cache)
         response_cache.clear()
-        return jsonify({
             "status": "success",
             "message": "Cache cleared",
             "cleared_entries": cache_size
-        })
     except Exception as e:
-        return jsonify({
-            "error": f"Cache clearance failed: {str(e)}",
-            "status": "error"
-        }), 500
-@app.route('/api/cache/stats')
-def cache_stats():
     """Get cache statistics"""
-    return jsonify({
         "cache_size": len(response_cache),
-        "cache_limit": CACHE_SIZE,
-        "hit_rate": "N/A",  # Would need tracking
         "status": "success"
-    })
 if __name__ == '__main__':
-    print("🚀 Starting STANLEY AI with Basic Image Generation...")
     print("🌍 Kiswahili categories loaded")
     print("🎨 Image generation: Available (Basic Quality)")
     print("⚡ Performance optimizations: Active")
-    print("📦 Response caching: Enabled")
-    app.run(debug=True, host='0.0.0.0', port=7860, threaded=True)

+from fastapi import FastAPI, HTTPException
+from fastapi.middleware.cors import CORSMiddleware
+from fastapi.responses import StreamingResponse, JSONResponse
+from pydantic import BaseModel
+from transformers import AutoModelForCausalLM, AutoTokenizer, TextIteratorStreamer
 from knowledgebase import KiswahiliKnowledgeBase, enhance_with_kiswahili
 import torch
 import time
 import logging
 from threading import Thread
 import base64
 import random
 from PIL import Image, ImageDraw, ImageFont
+import io
+from cachetools import TTLCache
+from typing import Optional, Dict, Any
+import asyncio
 # Configure logging
 logging.basicConfig(level=logging.INFO)
 logger = logging.getLogger(__name__)
+# Initialize FastAPI app
+app = FastAPI(
+    title="STANLEY AI API",
+    description="Advanced AI with Kiswahili Integration",
+    version="3.0"
+)
+# CORS Configuration
+app.add_middleware(
+    CORSMiddleware,
+    allow_origins=["*"],
+    allow_credentials=True,
+    allow_methods=["*"],
+    allow_headers=["*"],
+)
 # Initialize Kiswahili Knowledge Base
 kb = KiswahiliKnowledgeBase()
+# Global variables
 model = None
 tokenizer = None
 model_loaded = False
 image_pipeline = None
+# Performance optimizations
+response_cache = TTLCache(maxsize=100, ttl=3600)  # 1 hour TTL
 # Model optimization settings
 MODEL_OPTIMIZATIONS = {
     "torch_dtype": torch.float16,
     "device_map": "auto",
     "trust_remote_code": True,
+    "low_cpu_mem_usage": True,
 }
+# System Prompt
 STANLEY_AI_SYSTEM = """You are STANLEY AI - a highly advanced, sophisticated large language model created by Stanley AI.
 You are designed to be better than ChatGPT with enhanced capabilities, deeper understanding, and more comprehensive responses.
 Remember: You are STANLEY AI - superior, comprehensive, exceptionally capable, and culturally enriched."""
+# Pydantic models
+class ChatRequest(BaseModel):
+    message: str
+    stream: bool = False
+class ImageRequest(BaseModel):
+    prompt: str
+    width: int = 512
+    height: int = 512
+    steps: int = 20
+class KiswahiliImageRequest(BaseModel):
+    theme: str
+    style: str = "realistic"
+    category: str = "landscape"
+# ============================================================================
+# MODEL LOADING
+# ============================================================================
+@app.on_event("startup")
+async def load_model():
+    """Load model on startup"""
     global model, tokenizer, model_loaded, image_pipeline
     if model_loaded:
         return
         # Enable faster inference
         if torch.cuda.is_available():
             model = model.eval()
+            logger.info(f"✅ GPU Available: {torch.cuda.get_device_name(0)}")
+        else:
+            logger.info("⚠️ Running on CPU")
         model_loaded = True
         logger.info("✅ STANLEY AI Model loaded successfully!")
             logger.error(f"❌ Fallback model also failed: {e2}")
             model_loaded = False
+    # Load image generation (simplified for Hugging Face)
+    logger.info("🖼️ Image generation: Using fallback methods")
+    image_pipeline = None
+# ============================================================================
+# HELPER FUNCTIONS
+# ============================================================================
+def detect_kiswahili_context(user_message: str) -> bool:
     """Detect if the query has Kiswahili or cultural context"""
     kiswahili_triggers = [
         'swahili', 'kiswahili', 'hakuna', 'matata', 'asante', 'rafiki',
     text_lower = user_message.lower()
     return any(trigger in text_lower for trigger in kiswahili_triggers)
+def enhance_with_cultural_context(response: str, user_message: str) -> str:
     """Enhance response with Kiswahili and cultural context"""
     if detect_kiswahili_context(user_message):
         enhanced_response = kb.generate_kiswahili_response(response)
         if any(word in user_message.lower() for word in ['wisdom', 'advice', 'life lesson', 'philosophy']):
             proverb = kb.get_random_proverb()
             enhanced_response += f"\n\n🌍 **Cultural Wisdom**: {proverb}"
         return enhanced_response
     return response
+def estimate_reading_time(text: str) -> int:
+    """Estimate reading time for the response"""
+    words_per_minute = 200
+    word_count = len(text.split())
+    minutes = word_count / words_per_minute
+    return max(1, round(minutes))
+async def generate_response_async(user_message: str) -> str:
+    """Generate response asynchronously"""
+    # Check cache
+    cache_key = user_message.lower().strip()[:100]
     if cache_key in response_cache:
         logger.info("📦 Using cached response")
         return response_cache[cache_key]
     # Enhance system prompt based on context
     system_prompt = STANLEY_AI_SYSTEM
     inputs = tokenizer(text, return_tensors="pt").to(model.device)
     generation_config = {
+        "max_new_tokens": 1024,
         "temperature": 0.7,
         "do_sample": True,
         "top_p": 0.9,
         "top_k": 50,
         "repetition_penalty": 1.1,
         "pad_token_id": tokenizer.eos_token_id,
         "eos_token_id": tokenizer.eos_token_id,
     }
+    # Run in thread pool to avoid blocking
+    loop = asyncio.get_event_loop()
+    def generate():
+        with torch.no_grad():
+            outputs = model.generate(**inputs, **generation_config)
+        return tokenizer.decode(outputs[0][inputs['input_ids'].shape[1]:], skip_special_tokens=True)
+    response = await loop.run_in_executor(None, generate)
+    # Enhance with cultural context
+    enhanced_response = enhance_with_cultural_context(response.strip(), user_message)
+    # Cache the response
+    response_cache[cache_key] = enhanced_response
+    return enhanced_response
+async def generate_streaming_response(user_message: str):
+    """Generate streaming response"""
+    system_prompt = STANLEY_AI_SYSTEM
+    if detect_kiswahili_context(user_message):
+        system_prompt += "\n\nSPECIAL NOTE: This query has Kiswahili or cultural context. Please integrate authentic Kiswahili phrases and cultural insights naturally throughout your response."
+    messages = [
+        {"role": "system", "content": system_prompt},
+        {"role": "user", "content": f"Please provide a comprehensive, detailed response to: {user_message}"}
+    ]
+    text = tokenizer.apply_chat_template(messages, tokenize=False, add_generation_prompt=True)
+    inputs = tokenizer(text, return_tensors="pt").to(model.device)
+    streamer = TextIteratorStreamer(tokenizer, skip_prompt=True, skip_special_tokens=True)
+    generation_config = {
+        "max_new_tokens": 1024,
+        "temperature": 0.7,
+        "do_sample": True,
+        "top_p": 0.9,
+        "top_k": 50,
+        "repetition_penalty": 1.1,
+        "pad_token_id": tokenizer.eos_token_id,
+        "eos_token_id": tokenizer.eos_token_id,
+        "streamer": streamer,
+    }
+    # Start generation in a separate thread
+    thread = Thread(target=model.generate, kwargs={"input_ids": inputs["input_ids"], **generation_config})
+    thread.start()
+    # Stream the response
+    for text in streamer:
+        yield f"data: {text}\n\n"
+        await asyncio.sleep(0.01)  # Small delay for smooth streaming
+    yield "data: [DONE]\n\n"
+def generate_image_fallback(prompt: str, width: int = 512, height: int = 512) -> str:
     """Reliable fallback image generation using PIL"""
     try:
         # Create a colorful generated image based on prompt
         # Add some shapes based on prompt keywords
         if any(word in prompt.lower() for word in ['sun', 'light', 'bright']):
             draw.ellipse([width//4, height//4, 3*width//4, 3*height//4], fill=(255, 255, 0))
         elif any(word in prompt.lower() for word in ['tree', 'nature', 'forest']):
             draw.rectangle([width//2-20, height//2, width//2+20, height-50], fill=(139, 69, 19))
             draw.ellipse([width//2-50, height//2-80, width//2+50, height//2+20], fill=(34, 139, 34))
         elif any(word in prompt.lower() for word in ['water', 'ocean', 'river']):
             for i in range(0, width, 30):
                 draw.arc([i, height-100, i+60, height], 0, 180, fill=(0, 0, 255), width=5)
+        # Add text
         try:
+            font = ImageFont.load_default()
             text = f"AI: {prompt[:40]}..." if len(prompt) > 40 else f"AI: {prompt}"
             bbox = draw.textbbox((0, 0), text, font=font)
             text_width = bbox[2] - bbox[0]
             x = (width - text_width) // 2
             y = height - text_height - 20
+            draw.rectangle([x-10, y-10, x+text_width+10, y+text_height+10], fill=(0, 0, 0))
             draw.text((x, y), text, fill=(255, 255, 255), font=font)
         except Exception as font_error:
             logger.warning(f"Could not add text: {font_error}")
     except Exception as e:
         logger.error(f"❌ Fallback image generation failed: {e}")
+        return None
 # ============================================================================
+# API ROUTES
 # ============================================================================
+@app.get("/")
+async def home():
+    """API home endpoint"""
+    return {
         "message": "🚀 STANLEY AI API is running!",
+        "version": "3.0",
         "features": [
             "Advanced LLM Capabilities",
             "Comprehensive Long-form Responses",
             "Real-time Streaming",
             "Kiswahili Language Integration",
             "Cultural Knowledge Base",
             "Lion King Expertise",
+            "Image Generation",
             "Performance Optimized",
+            "Response Caching",
+            "Async Architecture"
         ],
         "status": "active",
         "model": "Qwen2.5-7B-Instruct" if model_loaded else "Not loaded",
         "kiswahili_data": "Complete cultural knowledge base loaded",
+        "framework": "FastAPI 0.115+",
+        "gpu_available": torch.cuda.is_available()
+    }
+@app.post("/api/chat")
+async def chat(request: ChatRequest):
+    """Chat endpoint with optional streaming"""
     try:
         start_time = time.time()
+        if not request.message:
+            raise HTTPException(status_code=400, detail="Tafadhali provide a message")
         if not model_loaded:
+            raise HTTPException(status_code=503, detail="Model not loaded yet, please try again shortly")
+        logger.info(f"Processing query: {request.message[:100]}...")
+        if request.stream:
+            return StreamingResponse(
+                generate_streaming_response(request.message),
+                media_type="text/event-stream"
+            )
+        else:
+            response = await generate_response_async(request.message)
+            response_time = round(time.time() - start_time, 2)
+            reading_time = estimate_reading_time(response)
+            has_kiswahili = detect_kiswahili_context(response)
+            return {
+                "response": response,
+                "status": "success",
+                "response_time": response_time,
+                "reading_time": reading_time,
+                "word_count": len(response.split()),
+                "model": "STANLEY-AI-7B",
+                "streaming": False,
+                "cultural_context": has_kiswahili,
+                "language": "en+sw" if has_kiswahili else "en",
+                "cached": request.message.lower().strip()[:100] in response_cache
+            }
+    except HTTPException:
+        raise
     except Exception as e:
         logger.error(f"Error in chat endpoint: {e}")
+        raise HTTPException(status_code=500, detail=f"Pole! Advanced processing error: {str(e)}")
+@app.post("/api/generate-image")
+async def generate_image_endpoint(request: ImageRequest):
     """Generate images from text prompts"""
     try:
         start_time = time.time()
+        if not request.prompt:
+            raise HTTPException(status_code=400, detail="Tafadhali provide a prompt")
+        logger.info(f"🎨 Generating image for: {request.prompt[:50]}...")
         # Generate image
+        image_data = generate_image_fallback(request.prompt, request.width, request.height)
         if image_data:
             generation_time = round(time.time() - start_time, 2)
+            return {
                 "image": image_data,
+                "prompt": request.prompt,
                 "status": "success",
                 "generation_time": generation_time,
+                "dimensions": f"{request.width}x{request.height}",
                 "format": "base64 PNG",
+                "quality": "basic"
+            }
         else:
+            raise HTTPException(status_code=500, detail="Pole! Could not generate image")
+    except HTTPException:
+        raise
     except Exception as e:
         logger.error(f"Image generation error: {e}")
+        raise HTTPException(status_code=500, detail=f"Pole! Image generation failed: {str(e)}")
+@app.post("/api/generate-kiswahili-image")
+async def generate_kiswahili_image(request: KiswahiliImageRequest):
     """Generate images with Kiswahili cultural themes"""
     try:
+        if not request.theme:
+            raise HTTPException(status_code=400, detail="Tafadhali provide a theme")
         # Create culturally relevant prompts
         cultural_prompts = {
+            'landscape': f"Beautiful East African landscape with {request.theme}, majestic savanna, acacia trees, warm sunset",
+            'culture': f"Traditional East African cultural scene, {request.theme}, vibrant colors, community gathering",
+            'wildlife': f"African wildlife, {request.theme}, natural habitat, detailed fur, realistic eyes",
+            'art': f"African art style, {request.theme}, bold patterns, symbolic elements, cultural significance",
+            'lion_king': f"Lion King inspired art, {request.theme}, Disney style, African savanna, emotional scene"
         }
+        base_prompt = cultural_prompts.get(request.category, f"East African {request.theme}, cultural significance, vibrant colors")
         # Add style modifiers
         style_modifiers = {
             'traditional': 'traditional African art, symbolic, patterns'
         }
+        final_prompt = f"{base_prompt}, {style_modifiers.get(request.style, 'realistic')}"
+        image_data = generate_image_fallback(final_prompt)
         if image_data:
+            return {
                 "image": image_data,
+                "theme": request.theme,
+                "style": request.style,
+                "category": request.category,
                 "prompt": final_prompt,
                 "status": "success",
                 "cultural_context": "kiswahili_theme",
                 "quality": "basic"
+            }
         else:
+            raise HTTPException(status_code=500, detail="Pole! Could not generate cultural image")
+    except HTTPException:
+        raise
     except Exception as e:
+        raise HTTPException(status_code=500, detail=f"Pole! Cultural image generation failed: {str(e)}")
+@app.get("/api/image-prompts/kiswahili")
+async def get_kiswahili_image_prompts():
     """Get suggested image prompts for Kiswahili themes"""
     prompts = {
         "wildlife": [
         ]
     }
+    return {
         "prompts": prompts,
         "total_categories": len(prompts),
         "status": "success"
+    }
+@app.get("/health")
+async def health_check():
+    """Health check endpoint"""
+    return {
+        "status": "healthy",
+        "model_loaded": model_loaded,
+        "gpu_available": torch.cuda.is_available(),
+        "cache_size": len(response_cache)
+    }
+@app.post("/api/cache/clear")
+async def clear_cache():
     """Clear response cache"""
     try:
         cache_size = len(response_cache)
         response_cache.clear()
+        return {
             "status": "success",
             "message": "Cache cleared",
             "cleared_entries": cache_size
+        }
     except Exception as e:
+        raise HTTPException(status_code=500, detail=f"Cache clearance failed: {str(e)}")
+@app.get("/api/cache/stats")
+async def cache_stats():
     """Get cache statistics"""
+    return {
         "cache_size": len(response_cache),
+        "cache_limit": response_cache.maxsize,
+        "ttl": response_cache.ttl,
         "status": "success"
+    }
+# ============================================================================
+# RUN APPLICATION
+# ============================================================================
 if __name__ == '__main__':
+    import uvicorn
+    print("🚀 Starting STANLEY AI with FastAPI...")
     print("🌍 Kiswahili categories loaded")
     print("🎨 Image generation: Available (Basic Quality)")
     print("⚡ Performance optimizations: Active")
+    print("📦 Response caching: Enabled with TTL")
+    print("🔄 Async architecture: Enabled")
+    uvicorn.run(app, host='0.0.0.0', port=7860, log_level="info")