File size: 18,991 Bytes
4f06181 59983ef daab671 4f06181 baaa104 5209672 4f06181 5209672 4f06181 a0695a6 daab671 4f06181 daab671 4f06181 5209672 4f06181 5209672 7df1070 4f06181 b982d45 4f06181 eb9dd30 4f06181 a0695a6 e3b1de7 4f06181 5209672 4f06181 7df1070 a0695a6 4f06181 7df1070 5209672 4f06181 7df1070 4f06181 a0695a6 4f06181 7df1070 5209672 4f06181 5209672 4f06181 31d7eee 4f06181 a0695a6 4f06181 5209672 4f06181 5209672 4f06181 5209672 4f06181 5209672 4f06181 2892625 4f06181 2892625 4f06181 5209672 4f06181 b982d45 4f06181 5209672 4f06181 5209672 4f06181 5209672 4f06181 5209672 4f06181 5209672 4f06181 b982d45 4f06181 7df1070 4f06181 5209672 4f06181 5209672 4f06181 5209672 4f06181 5209672 4f06181 5209672 4f06181 5209672 4f06181 5209672 4f06181 5209672 4f06181 5209672 4f06181 7df1070 4f06181 a0695a6 4f06181 5209672 b982d45 5209672 4f06181 2892625 4f06181 a0695a6 5209672 4f06181 a0695a6 4f06181 a0695a6 4f06181 7df1070 4f06181 a0695a6 4f06181 5209672 4f06181 5209672 4f06181 5209672 4f06181 5209672 4f06181 5209672 4f06181 5209672 4f06181 5209672 4f06181 5209672 4f06181 5209672 4f06181 5209672 4f06181 5209672 4f06181 5209672 4f06181 5209672 4f06181 5209672 4f06181 5209672 4f06181 5209672 4f06181 5209672 4f06181 5209672 4f06181 5209672 4f06181 b982d45 4f06181 b982d45 a0695a6 4f06181 7df1070 4f06181 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414 415 416 417 418 419 420 421 422 423 424 425 426 427 428 429 430 431 432 433 434 435 436 437 438 439 440 441 442 443 444 445 446 447 448 449 450 451 452 453 454 455 456 457 458 459 460 461 462 463 464 465 466 467 468 469 470 471 472 473 474 475 476 477 478 479 480 481 482 483 484 485 486 487 488 489 490 491 492 493 494 495 496 497 498 499 500 501 502 503 504 505 506 507 508 509 510 511 512 513 514 515 516 517 518 519 520 521 522 523 524 525 526 527 528 529 530 531 532 533 534 535 536 537 538 539 540 541 542 543 544 545 546 547 548 549 550 551 552 553 |
"""
STANLEY AI - Optimized Flask Backend
Deploy on Hugging Face Spaces with fast, smaller models
"""
from flask import Flask, request, jsonify, send_file
from flask_cors import CORS
from transformers import AutoModelForCausalLM, AutoTokenizer, TextStreamer
import torch
import time
import re
import logging
from threading import Thread
import queue
import io
import base64
import random
from PIL import Image, ImageDraw, ImageFont
import os
import gc
# Configure logging
logging.basicConfig(level=logging.INFO)
logger = logging.getLogger(__name__)
app = Flask(__name__)
CORS(app)
# ============================================================================
# MODEL CONFIGURATION - OPTIMIZED FOR SPEED
# ============================================================================
MODEL_CONFIG = {
"primary": "Qwen/Qwen2.5-1.8B-Instruct", # Fast, multilingual, good balance
"fallback": "microsoft/Phi-3-mini-4k-instruct", # Ultra-fast alternative
"tiny": "TinyLlama/TinyLlama-1.1B-Chat-v1.0" # For minimal memory usage
}
model = None
tokenizer = None
model_loaded = False
current_model_name = None
# Performance cache
response_cache = {}
CACHE_SIZE = 200
# System Prompt (optimized for speed)
STANLEY_AI_SYSTEM = """You are STANLEY AI - an advanced assistant with Kiswahili cultural knowledge.
Provide helpful, concise responses. Integrate Kiswahili phrases naturally when relevant.
Key capabilities:
- Answer questions knowledgeably
- Use Kiswahili for greetings, proverbs, and cultural references
- Explain concepts clearly
- Be efficient and to the point
Format: Use **bold** for emphasis. Keep responses under 300 words unless detailed explanation is needed."""
# Simple Kiswahili knowledge base (replaces external file)
KISWAHILI_KNOWLEDGE = {
"greetings": {
"hello": "Jambo / Habari",
"how_are_you": "Habari yako?",
"goodbye": "Kwaheri / Tuonane tena",
"thank_you": "Asante sana",
"welcome": "Karibu / Karibuni"
},
"proverbs": [
"Mwenye pupa hadiriki kula tamu - The impatient one misses sweet things.",
"Asiyefunzwa na mamae hufunzwa na ulimwengu - He who is not taught by his mother is taught by the world.",
"Haraka haraka haina baraka - Hurry hurry has no blessing.",
"Ukitaka kwenda haraka, nenda peke yako. Ukitaka kwenda mbali, nenda na wenzako - If you want to go fast, go alone. If you want to go far, go together."
],
"lion_king": {
"simba": "Lion (the main character)",
"rafiki": "Friend (the wise baboon)",
"hakuna_matata": "No worries / No problems",
"mufasa": "Simba's father, the king",
"nala": "Simba's childhood friend and queen"
}
}
def load_model_optimized(model_name=None):
"""Load model with optimizations for Hugging Face Spaces"""
global model, tokenizer, model_loaded, current_model_name
if model_loaded and model_name == current_model_name:
return
# Choose model
if not model_name:
model_name = MODEL_CONFIG["primary"]
logger.info(f"π Loading model: {model_name}")
try:
# Clear previous model from memory
if model is not None:
del model
del tokenizer
gc.collect()
if torch.cuda.is_available():
torch.cuda.empty_cache()
# Load tokenizer
tokenizer = AutoTokenizer.from_pretrained(
model_name,
trust_remote_code=True,
use_fast=True # Fast tokenizer for speed
)
if tokenizer.pad_token is None:
tokenizer.pad_token = tokenizer.eos_token
# Load model with 4-bit quantization for speed and memory efficiency
model = AutoModelForCausalLM.from_pretrained(
model_name,
torch_dtype=torch.float16,
device_map="auto",
load_in_4bit=True, # 4-bit quantization for speed
low_cpu_mem_usage=True,
trust_remote_code=True
)
model.eval() # Set to evaluation mode
model_loaded = True
current_model_name = model_name
# Pre-warm model with a simple prompt
prewarm_model()
logger.info(f"β
Model loaded successfully: {model_name}")
logger.info(f"π Model device: {model.device}")
return True
except Exception as e:
logger.error(f"β Error loading model: {e}")
# Try fallback
if model_name != MODEL_CONFIG["fallback"]:
logger.info("π Trying fallback model...")
return load_model_optimized(MODEL_CONFIG["fallback"])
else:
logger.error("β All models failed to load")
model_loaded = False
return False
def prewarm_model():
"""Generate a dummy response to warm up the model"""
try:
dummy_input = "Hello, STANLEY AI!"
messages = [
{"role": "system", "content": "Say hello briefly."},
{"role": "user", "content": dummy_input}
]
text = tokenizer.apply_chat_template(messages, tokenize=False, add_generation_prompt=True)
inputs = tokenizer(text, return_tensors="pt").to(model.device)
with torch.no_grad():
_ = model.generate(
**inputs,
max_new_tokens=10,
do_sample=False
)
logger.info("β
Model pre-warmed successfully!")
except Exception as e:
logger.warning(f"Pre-warm failed: {e}")
def detect_kiswahili_context(text):
"""Detect if text contains Kiswahili or cultural references"""
if not text:
return False
text_lower = text.lower()
kiswahili_keywords = [
'swahili', 'kiswahili', 'hakuna', 'matata', 'asante', 'rafiki',
'jambo', 'mambo', 'pole', 'sawa', 'karibu', 'kwaheri', 'simba',
'lion king', 'mufasa', 'nala', 'kenya', 'tanzania', 'africa',
'habari', 'nze', 'pumbaa', 'timon', 'safari', 'ujamaa'
]
return any(keyword in text_lower for keyword in kiswahili_keywords)
def enhance_with_kiswahili(response, user_message):
"""Add Kiswahili elements to response"""
if detect_kiswahili_context(user_message):
# Add a Kiswahili greeting or phrase
greetings = list(KISWAHILI_KNOWLEDGE["greetings"].values())
greeting = random.choice(greetings)
# Add a proverb if appropriate
if any(word in user_message.lower() for word in ['advice', 'wisdom', 'lesson', 'teach']):
proverb = random.choice(KISWAHILI_KNOWLEDGE["proverbs"])
enhanced = f"{greeting}! {response}\n\n**π₯ Kiswahili Proverb:** {proverb}"
else:
enhanced = f"{greeting}! {response}"
# Add Lion King reference if relevant
if any(word in user_message.lower() for word in ['lion', 'simba', 'mufasa', 'disney']):
lion_fact = "Did you know? 'Simba' means lion in Kiswahili, and 'Rafiki' means friend!"
enhanced += f"\n\n{lion_fact}"
return enhanced
return response
def get_cached_response(user_message):
"""Get response from cache"""
cache_key = user_message.lower().strip()[:80]
return response_cache.get(cache_key)
def set_cached_response(user_message, response):
"""Cache response"""
cache_key = user_message.lower().strip()[:80]
if len(response_cache) >= CACHE_SIZE:
# Remove random item to make space
random_key = random.choice(list(response_cache.keys()))
del response_cache[random_key]
response_cache[cache_key] = response
def generate_response(user_message, max_tokens=512):
"""Generate optimized response"""
# Check cache
cached = get_cached_response(user_message)
if cached:
logger.info("π¦ Using cached response")
return cached
# Ensure model is loaded
if not model_loaded:
success = load_model_optimized()
if not success:
return "I'm still initializing. Please try again in a moment."
# Prepare messages
messages = [
{"role": "system", "content": STANLEY_AI_SYSTEM},
{"role": "user", "content": user_message}
]
try:
# Apply chat template
text = tokenizer.apply_chat_template(
messages,
tokenize=False,
add_generation_prompt=True
)
# Tokenize
inputs = tokenizer(text, return_tensors="pt").to(model.device)
# Generate with optimized settings
with torch.no_grad():
outputs = model.generate(
**inputs,
max_new_tokens=max_tokens,
temperature=0.7,
top_p=0.9,
top_k=40,
do_sample=True,
pad_token_id=tokenizer.eos_token_id,
repetition_penalty=1.1,
no_repeat_ngram_size=3,
early_stopping=True
)
# Decode response
response = tokenizer.decode(
outputs[0][inputs['input_ids'].shape[1]:],
skip_special_tokens=True
).strip()
# Enhance with Kiswahili
enhanced_response = enhance_with_kiswahili(response, user_message)
# Cache
set_cached_response(user_message, enhanced_response)
return enhanced_response
except Exception as e:
logger.error(f"Generation error: {e}")
return f"Pole! I encountered an error: {str(e)[:100]}"
def generate_image_simple(prompt, width=512, height=512):
"""Simple image generation using PIL (no external dependencies)"""
try:
# Create base image with gradient
img = Image.new('RGB', (width, height), color='white')
draw = ImageDraw.Draw(img)
# Create a simple gradient or pattern
for i in range(height):
r = int(100 + 155 * i / height)
g = int(150 + 105 * i / height)
b = int(200 + 55 * i / height)
draw.line([(0, i), (width, i)], fill=(r, g, b))
# Add shapes based on prompt keywords
prompt_lower = prompt.lower()
if any(word in prompt_lower for word in ['sun', 'bright', 'light']):
draw.ellipse([width//3, height//3, 2*width//3, 2*height//3],
fill=(255, 255, 0), outline=(255, 200, 0))
if any(word in prompt_lower for word in ['tree', 'nature']):
draw.rectangle([width//2-15, height//2, width//2+15, height-50],
fill=(101, 67, 33))
for i in range(5):
y_offset = i * 30
draw.ellipse([width//2-60, height//2-100+y_offset,
width//2+60, height//2-40+y_offset],
fill=(34, 139, 34))
if any(word in prompt_lower for word in ['water', 'ocean', 'river']):
for i in range(0, width, 40):
draw.arc([i, height-80, i+80, height], 0, 180,
fill=(64, 164, 223), width=3)
# Try to add text
try:
# Use default font
font_size = min(width // 25, 20)
try:
font = ImageFont.truetype("arial.ttf", font_size)
except:
font = ImageFont.load_default()
# Truncate prompt for display
display_text = prompt[:50] + "..." if len(prompt) > 50 else prompt
text = f"STANLEY AI: {display_text}"
# Calculate text position
bbox = draw.textbbox((0, 0), text, font=font)
text_width = bbox[2] - bbox[0]
text_height = bbox[3] - bbox[1]
x = (width - text_width) // 2
y = 20
# Add text background
draw.rectangle([x-10, y-5, x+text_width+10, y+text_height+5],
fill=(0, 0, 0, 180))
# Add text
draw.text((x, y), text, fill=(255, 255, 255), font=font)
except Exception as font_error:
logger.warning(f"Could not add text: {font_error}")
# Convert to base64
buffered = io.BytesIO()
img.save(buffered, format="PNG", optimize=True)
img_str = base64.b64encode(buffered.getvalue()).decode()
return f"data:image/png;base64,{img_str}"
except Exception as e:
logger.error(f"Image generation error: {e}")
# Ultimate fallback - solid color
img = Image.new('RGB', (width, height),
color=(random.randint(50, 200),
random.randint(50, 200),
random.randint(50, 200)))
buffered = io.BytesIO()
img.save(buffered, format="PNG")
img_str = base64.b64encode(buffered.getvalue()).decode()
return f"data:image/png;base64,{img_str}"
# ============================================================================
# FLASK ROUTES
# ============================================================================
@app.route('/')
def home():
return jsonify({
"message": "π STANLEY AI API is running!",
"version": "3.0",
"status": "active",
"model": current_model_name or "Loading...",
"optimized": "true",
"cache_size": len(response_cache),
"endpoints": [
"/api/chat - Main chat endpoint",
"/api/chat-fast - Faster responses",
"/api/generate-image - Simple image generation",
"/api/health - System health check",
"/api/cache/clear - Clear response cache"
]
})
@app.route('/api/health')
def health_check():
return jsonify({
"status": "healthy" if model_loaded else "loading",
"model_loaded": model_loaded,
"model": current_model_name,
"cache_entries": len(response_cache),
"timestamp": time.time()
})
@app.route('/api/chat', methods=['POST'])
def chat():
try:
start_time = time.time()
data = request.get_json()
user_message = data.get('message', '')
if not user_message:
return jsonify({"error": "Tafadhali provide a message"}), 400
logger.info(f"π¬ Processing: {user_message[:60]}...")
# Generate response
response = generate_response(user_message)
response_time = round(time.time() - start_time, 2)
# Check if response contains Kiswahili
has_kiswahili = detect_kiswahili_context(response)
return jsonify({
"response": response,
"status": "success",
"response_time": f"{response_time}s",
"model": current_model_name,
"cultural_context": has_kiswahili,
"language": "en+sw" if has_kiswahili else "en",
"word_count": len(response.split())
})
except Exception as e:
logger.error(f"Chat error: {e}")
return jsonify({
"error": f"Pole! Error: {str(e)[:100]}",
"status": "error"
}), 500
@app.route('/api/chat-fast', methods=['POST'])
def chat_fast():
"""Faster endpoint with shorter responses"""
try:
data = request.get_json()
user_message = data.get('message', '')
if not user_message:
return jsonify({"error": "Please provide a message"}), 400
# Quick response with fewer tokens
response = generate_response(user_message, max_tokens=256)
return jsonify({
"response": response,
"status": "success",
"model": f"{current_model_name} (fast mode)",
"response_type": "concise"
})
except Exception as e:
return jsonify({"error": "Quick response failed"}), 500
@app.route('/api/generate-image', methods=['POST'])
def generate_image_endpoint():
"""Simple image generation endpoint"""
try:
data = request.get_json()
prompt = data.get('prompt', 'A beautiful landscape')
width = min(data.get('width', 512), 1024)
height = min(data.get('height', 512), 1024)
logger.info(f"π¨ Generating image: {prompt[:40]}...")
image_data = generate_image_simple(prompt, width, height)
if image_data:
return jsonify({
"image": image_data,
"prompt": prompt,
"status": "success",
"method": "PIL generated",
"dimensions": f"{width}x{height}"
})
else:
return jsonify({"error": "Could not generate image"}), 500
except Exception as e:
return jsonify({"error": f"Image error: {str(e)[:80]}"}), 500
@app.route('/api/cache/clear', methods=['POST'])
def clear_cache():
"""Clear response cache"""
cache_size = len(response_cache)
response_cache.clear()
# Clear GPU cache if available
if torch.cuda.is_available():
torch.cuda.empty_cache()
return jsonify({
"status": "success",
"cleared_entries": cache_size,
"message": "Cache cleared"
})
@app.route('/api/switch-model', methods=['POST'])
def switch_model():
"""Switch between available models"""
try:
data = request.get_json()
model_choice = data.get('model', 'primary')
model_name = MODEL_CONFIG.get(model_choice, MODEL_CONFIG["primary"])
success = load_model_optimized(model_name)
if success:
return jsonify({
"status": "success",
"message": f"Switched to {model_name}",
"current_model": current_model_name
})
else:
return jsonify({"error": "Failed to switch model"}), 500
except Exception as e:
return jsonify({"error": str(e)}), 500
# ============================================================================
# INITIALIZATION & STARTUP
# ============================================================================
def initialize_app():
"""Initialize the application"""
logger.info("π Initializing STANLEY AI...")
# Load model in background thread
def load_model_background():
load_model_optimized()
background_thread = Thread(target=load_model_background, daemon=True)
background_thread.start()
logger.info("β
STANLEY AI initialized and ready!")
# Initialize on import
initialize_app()
if __name__ == '__main__':
port = int(os.environ.get('PORT', 7860))
app.run(debug=False, host='0.0.0.0', port=port, threaded=True) |