File size: 18,991 Bytes
4f06181
 
 
 
 
 
 
 
59983ef
daab671
4f06181
baaa104
5209672
4f06181
 
5209672
 
 
4f06181
 
a0695a6
 
 
 
daab671
4f06181
 
daab671
4f06181
 
 
 
 
 
 
 
5209672
 
 
 
4f06181
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
5209672
7df1070
4f06181
 
 
b982d45
4f06181
eb9dd30
 
4f06181
 
 
 
 
a0695a6
e3b1de7
4f06181
 
 
 
 
 
 
 
5209672
4f06181
 
 
 
 
 
7df1070
 
a0695a6
4f06181
7df1070
5209672
4f06181
 
 
 
 
7df1070
 
4f06181
a0695a6
4f06181
 
 
 
 
 
 
 
 
7df1070
 
5209672
4f06181
 
 
 
 
 
 
5209672
4f06181
31d7eee
4f06181
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
a0695a6
4f06181
 
 
 
 
 
 
5209672
 
4f06181
 
5209672
4f06181
 
5209672
4f06181
 
5209672
4f06181
 
 
2892625
4f06181
 
 
 
 
 
2892625
4f06181
 
 
 
 
 
 
5209672
 
4f06181
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
b982d45
 
4f06181
 
5209672
4f06181
5209672
4f06181
 
 
 
 
5209672
4f06181
5209672
4f06181
 
5209672
 
4f06181
 
 
 
 
 
 
 
 
 
 
 
b982d45
4f06181
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
7df1070
4f06181
 
5209672
4f06181
 
5209672
 
4f06181
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
5209672
4f06181
 
 
 
 
 
 
 
 
 
 
 
5209672
 
 
 
 
4f06181
 
 
 
 
5209672
4f06181
5209672
4f06181
5209672
 
 
 
 
4f06181
5209672
4f06181
5209672
 
 
4f06181
 
 
 
 
 
 
 
 
 
7df1070
 
4f06181
a0695a6
 
4f06181
 
 
5209672
b982d45
5209672
4f06181
 
 
 
 
 
 
 
 
 
 
2892625
4f06181
 
 
 
 
 
 
 
 
 
 
 
a0695a6
5209672
4f06181
 
a0695a6
4f06181
 
a0695a6
4f06181
7df1070
4f06181
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
a0695a6
 
4f06181
 
 
 
 
 
 
 
 
5209672
4f06181
 
5209672
4f06181
 
5209672
4f06181
 
5209672
4f06181
 
 
 
 
 
5209672
 
4f06181
5209672
4f06181
 
 
5209672
4f06181
 
 
 
5209672
4f06181
5209672
4f06181
5209672
 
4f06181
5209672
4f06181
5209672
4f06181
 
 
5209672
4f06181
5209672
 
4f06181
5209672
4f06181
 
5209672
4f06181
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
5209672
4f06181
 
5209672
4f06181
 
 
 
 
 
 
 
 
 
 
 
 
5209672
4f06181
b982d45
 
4f06181
b982d45
a0695a6
4f06181
 
 
 
 
 
 
7df1070
4f06181
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
"""
STANLEY AI - Optimized Flask Backend
Deploy on Hugging Face Spaces with fast, smaller models
"""

from flask import Flask, request, jsonify, send_file
from flask_cors import CORS
from transformers import AutoModelForCausalLM, AutoTokenizer, TextStreamer
import torch
import time
import re
import logging
from threading import Thread
import queue
import io
import base64
import random
from PIL import Image, ImageDraw, ImageFont
import os
import gc

# Configure logging
logging.basicConfig(level=logging.INFO)
logger = logging.getLogger(__name__)

app = Flask(__name__)
CORS(app)

# ============================================================================
# MODEL CONFIGURATION - OPTIMIZED FOR SPEED
# ============================================================================
MODEL_CONFIG = {
    "primary": "Qwen/Qwen2.5-1.8B-Instruct",  # Fast, multilingual, good balance
    "fallback": "microsoft/Phi-3-mini-4k-instruct",  # Ultra-fast alternative
    "tiny": "TinyLlama/TinyLlama-1.1B-Chat-v1.0"  # For minimal memory usage
}

model = None
tokenizer = None
model_loaded = False
current_model_name = None

# Performance cache
response_cache = {}
CACHE_SIZE = 200

# System Prompt (optimized for speed)
STANLEY_AI_SYSTEM = """You are STANLEY AI - an advanced assistant with Kiswahili cultural knowledge.
Provide helpful, concise responses. Integrate Kiswahili phrases naturally when relevant.

Key capabilities:
- Answer questions knowledgeably
- Use Kiswahili for greetings, proverbs, and cultural references
- Explain concepts clearly
- Be efficient and to the point

Format: Use **bold** for emphasis. Keep responses under 300 words unless detailed explanation is needed."""

# Simple Kiswahili knowledge base (replaces external file)
KISWAHILI_KNOWLEDGE = {
    "greetings": {
        "hello": "Jambo / Habari",
        "how_are_you": "Habari yako?",
        "goodbye": "Kwaheri / Tuonane tena",
        "thank_you": "Asante sana",
        "welcome": "Karibu / Karibuni"
    },
    "proverbs": [
        "Mwenye pupa hadiriki kula tamu - The impatient one misses sweet things.",
        "Asiyefunzwa na mamae hufunzwa na ulimwengu - He who is not taught by his mother is taught by the world.",
        "Haraka haraka haina baraka - Hurry hurry has no blessing.",
        "Ukitaka kwenda haraka, nenda peke yako. Ukitaka kwenda mbali, nenda na wenzako - If you want to go fast, go alone. If you want to go far, go together."
    ],
    "lion_king": {
        "simba": "Lion (the main character)",
        "rafiki": "Friend (the wise baboon)",
        "hakuna_matata": "No worries / No problems",
        "mufasa": "Simba's father, the king",
        "nala": "Simba's childhood friend and queen"
    }
}

def load_model_optimized(model_name=None):
    """Load model with optimizations for Hugging Face Spaces"""
    global model, tokenizer, model_loaded, current_model_name
    
    if model_loaded and model_name == current_model_name:
        return
    
    # Choose model
    if not model_name:
        model_name = MODEL_CONFIG["primary"]
    
    logger.info(f"πŸš€ Loading model: {model_name}")
    
    try:
        # Clear previous model from memory
        if model is not None:
            del model
            del tokenizer
            gc.collect()
            if torch.cuda.is_available():
                torch.cuda.empty_cache()
        
        # Load tokenizer
        tokenizer = AutoTokenizer.from_pretrained(
            model_name,
            trust_remote_code=True,
            use_fast=True  # Fast tokenizer for speed
        )
        
        if tokenizer.pad_token is None:
            tokenizer.pad_token = tokenizer.eos_token
        
        # Load model with 4-bit quantization for speed and memory efficiency
        model = AutoModelForCausalLM.from_pretrained(
            model_name,
            torch_dtype=torch.float16,
            device_map="auto",
            load_in_4bit=True,  # 4-bit quantization for speed
            low_cpu_mem_usage=True,
            trust_remote_code=True
        )
        
        model.eval()  # Set to evaluation mode
        model_loaded = True
        current_model_name = model_name
        
        # Pre-warm model with a simple prompt
        prewarm_model()
        
        logger.info(f"βœ… Model loaded successfully: {model_name}")
        logger.info(f"πŸ“Š Model device: {model.device}")
        
        return True
        
    except Exception as e:
        logger.error(f"❌ Error loading model: {e}")
        
        # Try fallback
        if model_name != MODEL_CONFIG["fallback"]:
            logger.info("πŸ”„ Trying fallback model...")
            return load_model_optimized(MODEL_CONFIG["fallback"])
        else:
            logger.error("❌ All models failed to load")
            model_loaded = False
            return False

def prewarm_model():
    """Generate a dummy response to warm up the model"""
    try:
        dummy_input = "Hello, STANLEY AI!"
        messages = [
            {"role": "system", "content": "Say hello briefly."},
            {"role": "user", "content": dummy_input}
        ]
        
        text = tokenizer.apply_chat_template(messages, tokenize=False, add_generation_prompt=True)
        inputs = tokenizer(text, return_tensors="pt").to(model.device)
        
        with torch.no_grad():
            _ = model.generate(
                **inputs,
                max_new_tokens=10,
                do_sample=False
            )
        
        logger.info("βœ… Model pre-warmed successfully!")
    except Exception as e:
        logger.warning(f"Pre-warm failed: {e}")

def detect_kiswahili_context(text):
    """Detect if text contains Kiswahili or cultural references"""
    if not text:
        return False
    
    text_lower = text.lower()
    kiswahili_keywords = [
        'swahili', 'kiswahili', 'hakuna', 'matata', 'asante', 'rafiki',
        'jambo', 'mambo', 'pole', 'sawa', 'karibu', 'kwaheri', 'simba',
        'lion king', 'mufasa', 'nala', 'kenya', 'tanzania', 'africa',
        'habari', 'nze', 'pumbaa', 'timon', 'safari', 'ujamaa'
    ]
    
    return any(keyword in text_lower for keyword in kiswahili_keywords)

def enhance_with_kiswahili(response, user_message):
    """Add Kiswahili elements to response"""
    if detect_kiswahili_context(user_message):
        # Add a Kiswahili greeting or phrase
        greetings = list(KISWAHILI_KNOWLEDGE["greetings"].values())
        greeting = random.choice(greetings)
        
        # Add a proverb if appropriate
        if any(word in user_message.lower() for word in ['advice', 'wisdom', 'lesson', 'teach']):
            proverb = random.choice(KISWAHILI_KNOWLEDGE["proverbs"])
            enhanced = f"{greeting}! {response}\n\n**πŸ”₯ Kiswahili Proverb:** {proverb}"
        else:
            enhanced = f"{greeting}! {response}"
        
        # Add Lion King reference if relevant
        if any(word in user_message.lower() for word in ['lion', 'simba', 'mufasa', 'disney']):
            lion_fact = "Did you know? 'Simba' means lion in Kiswahili, and 'Rafiki' means friend!"
            enhanced += f"\n\n{lion_fact}"
        
        return enhanced
    
    return response

def get_cached_response(user_message):
    """Get response from cache"""
    cache_key = user_message.lower().strip()[:80]
    return response_cache.get(cache_key)

def set_cached_response(user_message, response):
    """Cache response"""
    cache_key = user_message.lower().strip()[:80]
    if len(response_cache) >= CACHE_SIZE:
        # Remove random item to make space
        random_key = random.choice(list(response_cache.keys()))
        del response_cache[random_key]
    response_cache[cache_key] = response

def generate_response(user_message, max_tokens=512):
    """Generate optimized response"""
    
    # Check cache
    cached = get_cached_response(user_message)
    if cached:
        logger.info("πŸ“¦ Using cached response")
        return cached
    
    # Ensure model is loaded
    if not model_loaded:
        success = load_model_optimized()
        if not success:
            return "I'm still initializing. Please try again in a moment."
    
    # Prepare messages
    messages = [
        {"role": "system", "content": STANLEY_AI_SYSTEM},
        {"role": "user", "content": user_message}
    ]
    
    try:
        # Apply chat template
        text = tokenizer.apply_chat_template(
            messages,
            tokenize=False,
            add_generation_prompt=True
        )
        
        # Tokenize
        inputs = tokenizer(text, return_tensors="pt").to(model.device)
        
        # Generate with optimized settings
        with torch.no_grad():
            outputs = model.generate(
                **inputs,
                max_new_tokens=max_tokens,
                temperature=0.7,
                top_p=0.9,
                top_k=40,
                do_sample=True,
                pad_token_id=tokenizer.eos_token_id,
                repetition_penalty=1.1,
                no_repeat_ngram_size=3,
                early_stopping=True
            )
        
        # Decode response
        response = tokenizer.decode(
            outputs[0][inputs['input_ids'].shape[1]:],
            skip_special_tokens=True
        ).strip()
        
        # Enhance with Kiswahili
        enhanced_response = enhance_with_kiswahili(response, user_message)
        
        # Cache
        set_cached_response(user_message, enhanced_response)
        
        return enhanced_response
        
    except Exception as e:
        logger.error(f"Generation error: {e}")
        return f"Pole! I encountered an error: {str(e)[:100]}"

def generate_image_simple(prompt, width=512, height=512):
    """Simple image generation using PIL (no external dependencies)"""
    try:
        # Create base image with gradient
        img = Image.new('RGB', (width, height), color='white')
        draw = ImageDraw.Draw(img)
        
        # Create a simple gradient or pattern
        for i in range(height):
            r = int(100 + 155 * i / height)
            g = int(150 + 105 * i / height)
            b = int(200 + 55 * i / height)
            draw.line([(0, i), (width, i)], fill=(r, g, b))
        
        # Add shapes based on prompt keywords
        prompt_lower = prompt.lower()
        
        if any(word in prompt_lower for word in ['sun', 'bright', 'light']):
            draw.ellipse([width//3, height//3, 2*width//3, 2*height//3], 
                        fill=(255, 255, 0), outline=(255, 200, 0))
        
        if any(word in prompt_lower for word in ['tree', 'nature']):
            draw.rectangle([width//2-15, height//2, width//2+15, height-50], 
                          fill=(101, 67, 33))
            for i in range(5):
                y_offset = i * 30
                draw.ellipse([width//2-60, height//2-100+y_offset, 
                             width//2+60, height//2-40+y_offset], 
                            fill=(34, 139, 34))
        
        if any(word in prompt_lower for word in ['water', 'ocean', 'river']):
            for i in range(0, width, 40):
                draw.arc([i, height-80, i+80, height], 0, 180, 
                        fill=(64, 164, 223), width=3)
        
        # Try to add text
        try:
            # Use default font
            font_size = min(width // 25, 20)
            try:
                font = ImageFont.truetype("arial.ttf", font_size)
            except:
                font = ImageFont.load_default()
            
            # Truncate prompt for display
            display_text = prompt[:50] + "..." if len(prompt) > 50 else prompt
            text = f"STANLEY AI: {display_text}"
            
            # Calculate text position
            bbox = draw.textbbox((0, 0), text, font=font)
            text_width = bbox[2] - bbox[0]
            text_height = bbox[3] - bbox[1]
            
            x = (width - text_width) // 2
            y = 20
            
            # Add text background
            draw.rectangle([x-10, y-5, x+text_width+10, y+text_height+5], 
                          fill=(0, 0, 0, 180))
            
            # Add text
            draw.text((x, y), text, fill=(255, 255, 255), font=font)
            
        except Exception as font_error:
            logger.warning(f"Could not add text: {font_error}")
        
        # Convert to base64
        buffered = io.BytesIO()
        img.save(buffered, format="PNG", optimize=True)
        img_str = base64.b64encode(buffered.getvalue()).decode()
        
        return f"data:image/png;base64,{img_str}"
        
    except Exception as e:
        logger.error(f"Image generation error: {e}")
        # Ultimate fallback - solid color
        img = Image.new('RGB', (width, height), 
                       color=(random.randint(50, 200), 
                              random.randint(50, 200), 
                              random.randint(50, 200)))
        buffered = io.BytesIO()
        img.save(buffered, format="PNG")
        img_str = base64.b64encode(buffered.getvalue()).decode()
        return f"data:image/png;base64,{img_str}"

# ============================================================================
# FLASK ROUTES
# ============================================================================

@app.route('/')
def home():
    return jsonify({
        "message": "πŸš€ STANLEY AI API is running!", 
        "version": "3.0",
        "status": "active",
        "model": current_model_name or "Loading...",
        "optimized": "true",
        "cache_size": len(response_cache),
        "endpoints": [
            "/api/chat - Main chat endpoint",
            "/api/chat-fast - Faster responses",
            "/api/generate-image - Simple image generation",
            "/api/health - System health check",
            "/api/cache/clear - Clear response cache"
        ]
    })

@app.route('/api/health')
def health_check():
    return jsonify({
        "status": "healthy" if model_loaded else "loading",
        "model_loaded": model_loaded,
        "model": current_model_name,
        "cache_entries": len(response_cache),
        "timestamp": time.time()
    })

@app.route('/api/chat', methods=['POST'])
def chat():
    try:
        start_time = time.time()
        data = request.get_json()
        user_message = data.get('message', '')
        
        if not user_message:
            return jsonify({"error": "Tafadhali provide a message"}), 400
        
        logger.info(f"πŸ’¬ Processing: {user_message[:60]}...")
        
        # Generate response
        response = generate_response(user_message)
        response_time = round(time.time() - start_time, 2)
        
        # Check if response contains Kiswahili
        has_kiswahili = detect_kiswahili_context(response)
        
        return jsonify({
            "response": response,
            "status": "success",
            "response_time": f"{response_time}s",
            "model": current_model_name,
            "cultural_context": has_kiswahili,
            "language": "en+sw" if has_kiswahili else "en",
            "word_count": len(response.split())
        })
        
    except Exception as e:
        logger.error(f"Chat error: {e}")
        return jsonify({
            "error": f"Pole! Error: {str(e)[:100]}",
            "status": "error"
        }), 500

@app.route('/api/chat-fast', methods=['POST'])
def chat_fast():
    """Faster endpoint with shorter responses"""
    try:
        data = request.get_json()
        user_message = data.get('message', '')
        
        if not user_message:
            return jsonify({"error": "Please provide a message"}), 400
        
        # Quick response with fewer tokens
        response = generate_response(user_message, max_tokens=256)
        
        return jsonify({
            "response": response,
            "status": "success",
            "model": f"{current_model_name} (fast mode)",
            "response_type": "concise"
        })
        
    except Exception as e:
        return jsonify({"error": "Quick response failed"}), 500

@app.route('/api/generate-image', methods=['POST'])
def generate_image_endpoint():
    """Simple image generation endpoint"""
    try:
        data = request.get_json()
        prompt = data.get('prompt', 'A beautiful landscape')
        width = min(data.get('width', 512), 1024)
        height = min(data.get('height', 512), 1024)
        
        logger.info(f"🎨 Generating image: {prompt[:40]}...")
        
        image_data = generate_image_simple(prompt, width, height)
        
        if image_data:
            return jsonify({
                "image": image_data,
                "prompt": prompt,
                "status": "success",
                "method": "PIL generated",
                "dimensions": f"{width}x{height}"
            })
        else:
            return jsonify({"error": "Could not generate image"}), 500
            
    except Exception as e:
        return jsonify({"error": f"Image error: {str(e)[:80]}"}), 500

@app.route('/api/cache/clear', methods=['POST'])
def clear_cache():
    """Clear response cache"""
    cache_size = len(response_cache)
    response_cache.clear()
    
    # Clear GPU cache if available
    if torch.cuda.is_available():
        torch.cuda.empty_cache()
    
    return jsonify({
        "status": "success",
        "cleared_entries": cache_size,
        "message": "Cache cleared"
    })

@app.route('/api/switch-model', methods=['POST'])
def switch_model():
    """Switch between available models"""
    try:
        data = request.get_json()
        model_choice = data.get('model', 'primary')
        
        model_name = MODEL_CONFIG.get(model_choice, MODEL_CONFIG["primary"])
        
        success = load_model_optimized(model_name)
        
        if success:
            return jsonify({
                "status": "success",
                "message": f"Switched to {model_name}",
                "current_model": current_model_name
            })
        else:
            return jsonify({"error": "Failed to switch model"}), 500
            
    except Exception as e:
        return jsonify({"error": str(e)}), 500

# ============================================================================
# INITIALIZATION & STARTUP
# ============================================================================

def initialize_app():
    """Initialize the application"""
    logger.info("πŸš€ Initializing STANLEY AI...")
    
    # Load model in background thread
    def load_model_background():
        load_model_optimized()
    
    background_thread = Thread(target=load_model_background, daemon=True)
    background_thread.start()
    
    logger.info("βœ… STANLEY AI initialized and ready!")

# Initialize on import
initialize_app()

if __name__ == '__main__':
    port = int(os.environ.get('PORT', 7860))
    app.run(debug=False, host='0.0.0.0', port=port, threaded=True)