Stanley03 commited on
Commit
7df1070
·
verified ·
1 Parent(s): 9f01d67

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +291 -208
app.py CHANGED
@@ -1,10 +1,11 @@
1
- # app.py - OPTIMIZED FOR HUGGING FACE SPACES
2
  from flask import Flask, request, jsonify
3
  from flask_cors import CORS
4
  import torch
5
  import time
6
  import logging
7
  import os
 
8
 
9
  # Configure logging
10
  logging.basicConfig(level=logging.INFO)
@@ -15,282 +16,364 @@ CORS(app)
15
 
16
  # Detect if running on Hugging Face Spaces
17
  ON_SPACES = os.environ.get('SPACE_ID') is not None
 
18
 
19
  # ============================================================================
20
- # TINY MODEL FOR SPACES - NO GPU NEEDED
21
  # ============================================================================
22
 
23
- try:
24
- # Import only what we need
25
- from transformers import pipeline, AutoTokenizer
26
-
27
- # USE A TINY MODEL THAT WORKS ON CPU
28
- model_name = "microsoft/phi-2" # 2.7B but very fast
29
- # OR use an even smaller model:
30
- # model_name = "HuggingFaceTB/SmolLM2-135M-Instruct"
31
-
32
- logger.info(f"🚀 Loading {model_name} for Spaces...")
33
-
34
- # Use pipeline for simplicity and speed
35
- text_generator = pipeline(
36
- "text-generation",
37
- model=model_name,
38
- tokenizer=model_name,
39
- device=-1, # CPU
40
- torch_dtype=torch.float32,
41
- model_kwargs={"low_cpu_mem_usage": True}
42
- )
43
-
44
- model_loaded = True
45
- logger.info("✅ Model loaded successfully for Spaces!")
46
 
47
- except Exception as e:
48
- logger.error(f"❌ Model loading failed: {e}")
49
- # Fallback to even simpler model
50
  try:
51
- from transformers import GPT2Tokenizer, GPT2LMHeadModel
 
 
 
 
 
 
 
 
 
 
 
 
 
 
52
 
53
- tokenizer = GPT2Tokenizer.from_pretrained("gpt2")
54
- model = GPT2LMHeadModel.from_pretrained("gpt2")
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
55
  model_loaded = True
56
- text_generator = None # We'll use custom generation
57
- logger.info("✅ Loaded GPT-2 as fallback!")
58
- except:
 
 
 
 
 
59
  model_loaded = False
60
- text_generator = None
61
- logger.warning("⚠️ No model loaded - running in simulation mode")
62
-
63
- # Cache for responses
64
- response_cache = {}
65
- CACHE_SIZE = 50
66
 
67
- # Simplified system prompt for Spaces
68
- STANLEY_AI_SYSTEM = """You are STANLEY AI - an advanced AI assistant created by Stanley AI.
69
- You provide helpful, accurate, and concise responses.
70
- When appropriate, use Kiswahili phrases naturally in your responses."""
71
 
72
- def generate_response_fast(user_message):
73
- """Ultra-fast response generation for Spaces"""
74
-
75
- # Check cache
76
- cache_key = user_message.lower()[:50]
77
- if cache_key in response_cache:
78
- return response_cache[cache_key]
79
-
80
- # Truncate if too long
81
- if len(user_message) > 500:
82
- user_message = user_message[:500]
83
 
84
  try:
85
- if text_generator:
86
- # Use pipeline for fast generation
87
- response = text_generator(
88
- f"{STANLEY_AI_SYSTEM}\n\nUser: {user_message}\nStanley AI:",
89
- max_new_tokens=256,
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
90
  temperature=0.7,
91
  do_sample=True,
92
  top_p=0.9,
 
93
  repetition_penalty=1.1,
94
- num_return_sequences=1
95
- )[0]['generated_text']
96
-
97
- # Extract just the response part
98
- if "Stanley AI:" in response:
99
- response = response.split("Stanley AI:")[-1].strip()
100
 
101
- elif model_loaded and 'model' in locals() and 'tokenizer' in locals():
102
- # Fallback GPT-2 generation
103
- inputs = tokenizer(user_message, return_tensors="pt", truncation=True, max_length=128)
104
- with torch.no_grad():
105
- outputs = model.generate(
106
- **inputs,
107
- max_new_tokens=128,
108
- temperature=0.7,
109
- do_sample=True,
110
- pad_token_id=tokenizer.eos_token_id
111
- )
112
- response = tokenizer.decode(outputs[0], skip_special_tokens=True)
113
- else:
114
- # Simulation mode for testing
115
- response = f"I'm Stanley AI! You said: {user_message[:100]}...\n\nI'm running on Hugging Face Spaces with limited resources. For full capabilities, consider running locally with GPU."
116
-
117
- # Add some Kiswahili if relevant
118
- if any(word in user_message.lower() for word in ['swahili', 'kiswahili', 'hakuna matata', 'jambo']):
119
- response += "\n\nAsante sana for your question! Hakuna matata."
120
-
121
- # Cache it
122
- if len(response_cache) < CACHE_SIZE:
123
- response_cache[cache_key] = response
124
 
125
  return response.strip()
126
 
127
  except Exception as e:
128
  logger.error(f"Generation error: {e}")
129
- return f"Pole! I encountered an error: {str(e)[:100]}"
130
 
131
- # ============================================================================
132
- # SIMPLIFIED IMAGE GENERATION - NO HEAVY MODELS
133
- # ============================================================================
134
-
135
- def generate_image_simple(prompt):
136
- """Simple image generation using only PIL - no external models"""
137
  try:
138
- from PIL import Image, ImageDraw, ImageFont
139
- import random
140
- import io
141
- import base64
 
142
 
143
- # Create a simple image
144
- width, height = 256, 256
145
- img = Image.new('RGB', (width, height), color=(
146
- random.randint(50, 200),
147
- random.randint(50, 200),
148
- random.randint(50, 200)
149
- ))
150
 
151
- draw = ImageDraw.Draw(img)
152
-
153
- # Add some simple shapes based on prompt
154
- if 'sun' in prompt.lower():
155
- draw.ellipse([50, 50, 200, 200], fill=(255, 255, 0))
156
- elif 'tree' in prompt.lower():
157
- # Brown trunk
158
- draw.rectangle([width//2-10, height//2, width//2+10, height-50], fill=(139, 69, 19))
159
- # Green leaves
160
- draw.ellipse([width//2-40, height//2-60, width//2+40, height//2+20], fill=(34, 139, 34))
161
-
162
- # Add text
163
- try:
164
- font = ImageFont.load_default()
165
- text = prompt[:30] + "..." if len(prompt) > 30 else prompt
166
- draw.text((10, 10), f"Stanley AI:", fill=(255, 255, 255), font=font)
167
- draw.text((10, 30), text, fill=(255, 255, 255), font=font)
168
- except:
169
- pass
170
-
171
- # Convert to base64
172
- buffered = io.BytesIO()
173
- img.save(buffered, format="PNG", optimize=True)
174
- img_str = base64.b64encode(buffered.getvalue()).decode()
175
- return f"data:image/png;base64,{img_str}"
176
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
177
  except Exception as e:
178
- logger.error(f"Image error: {e}")
179
- return None
180
 
181
  # ============================================================================
182
- # FLASK ROUTES
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
183
  # ============================================================================
184
 
185
  @app.route('/')
186
  def home():
187
  return jsonify({
188
- "message": "🚀 STANLEY AI - Hugging Face Spaces Edition",
189
- "version": "3.0",
190
- "status": "active" if model_loaded else "simulation",
191
- "platform": "Hugging Face Spaces",
192
- "model": "phi-2" if model_loaded else "simulation",
193
- "image_generation": "simple",
194
- "optimized_for": "spaces-free-tier",
195
- "instructions": "Chat API at /api/chat, Images at /api/generate-image"
 
 
 
196
  })
197
 
198
- @app.route('/api/chat', methods=['POST'])
199
  def chat():
 
 
 
200
  try:
201
- start_time = time.time()
202
- data = request.get_json()
203
- user_message = data.get('message', '')
 
 
 
 
 
204
 
205
  if not user_message:
206
- return jsonify({"error": "Please provide a message"}), 400
207
 
208
- if not model_loaded and ON_SPACES:
209
- # Provide a helpful message
 
 
210
  return jsonify({
211
- "response": "⚠️ Model not fully loaded. This is expected on Hugging Face Spaces free tier. I'm running in simulation mode.\n\nTry: 'Tell me about Kiswahili' or 'Generate an image of a lion'",
212
- "status": "simulation",
213
- "response_time": 0.1
 
214
  })
215
 
216
- response = generate_response_fast(user_message)
217
- response_time = round(time.time() - start_time, 2)
 
 
 
 
 
218
 
219
  return jsonify({
220
  "response": response,
221
- "status": "success",
222
  "response_time": response_time,
223
- "word_count": len(response.split()),
224
- "platform": "huggingface-spaces"
 
225
  })
226
 
227
  except Exception as e:
228
  logger.error(f"Chat error: {e}")
229
  return jsonify({
230
- "error": f"System error: {str(e)[:100]}",
231
  "status": "error"
232
  }), 500
233
 
234
- @app.route('/api/generate-image', methods=['POST'])
235
- def generate_image():
236
- """Simple image generation endpoint"""
237
- try:
238
- data = request.get_json()
239
- prompt = data.get('prompt', 'A beautiful landscape')
240
-
241
- image_data = generate_image_simple(prompt)
242
-
243
- if image_data:
244
- return jsonify({
245
- "image": image_data,
246
- "prompt": prompt,
247
- "status": "success",
248
- "quality": "simple",
249
- "note": "Simple image generation for Spaces free tier"
250
- })
251
- else:
252
- return jsonify({
253
- "image": None,
254
- "prompt": prompt,
255
- "status": "success",
256
- "message": "Image generation failed, but chat is working!"
257
- })
258
-
259
- except Exception as e:
260
- return jsonify({
261
- "error": f"Image error: {str(e)[:100]}",
262
- "status": "error"
263
- }), 500
264
 
265
  @app.route('/api/status')
266
  def status():
267
- """Health check endpoint"""
268
  return jsonify({
269
- "status": "healthy" if model_loaded else "degraded",
270
  "model_loaded": model_loaded,
271
- "on_spaces": ON_SPACES,
 
272
  "cache_size": len(response_cache),
273
- "timestamp": time.time()
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
274
  })
275
 
276
  # ============================================================================
277
- # REQUIREMENTS.TXT (update this file too!)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
278
  # ============================================================================
279
- """
280
- flask>=2.3.0
281
- flask-cors>=4.0.0
282
- torch>=2.0.0
283
- transformers>=4.35.0
284
- pillow>=10.0.0
285
- accelerate>=0.24.0
286
- """
287
 
288
  if __name__ == '__main__':
289
- print("🚀 STANLEY AI - Hugging Face Spaces Edition")
290
- print(" Optimized for CPU-only environments")
291
- print("🌍 Running on:", "Hugging Face Spaces" if ON_SPACES else "Local")
292
- print("📦 Model status:", "Loaded" if model_loaded else "Simulation mode")
 
 
 
 
 
 
 
 
 
293
 
294
- # Run on port 7860 (default for Spaces)
295
  port = int(os.environ.get('PORT', 7860))
296
- app.run(debug=False, host='0.0.0.0', port=port)
 
 
 
 
 
 
1
+ # app.py - OPTIMIZED TEXT-ONLY VERSION FOR HUGGING FACE SPACES
2
  from flask import Flask, request, jsonify
3
  from flask_cors import CORS
4
  import torch
5
  import time
6
  import logging
7
  import os
8
+ import json
9
 
10
  # Configure logging
11
  logging.basicConfig(level=logging.INFO)
 
16
 
17
  # Detect if running on Hugging Face Spaces
18
  ON_SPACES = os.environ.get('SPACE_ID') is not None
19
+ logger.info(f"🚀 Running on Hugging Face Spaces: {ON_SPACES}")
20
 
21
  # ============================================================================
22
+ # ULTRA-FAST QWEN MODEL LOADING
23
  # ============================================================================
24
 
25
+ # Use the smallest Qwen model available
26
+ MODEL_NAME = "Qwen/Qwen2.5-0.5B-Instruct"
27
+ # Or even smaller alternative: "Qwen/Qwen2.5-Coder-0.5B-Instruct"
28
+
29
+ model = None
30
+ tokenizer = None
31
+ model_loaded = False
32
+
33
+ def load_model_fast():
34
+ """Fast model loading optimized for Spaces"""
35
+ global model, tokenizer, model_loaded
 
 
 
 
 
 
 
 
 
 
 
 
36
 
 
 
 
37
  try:
38
+ logger.info(f"🔄 Loading {MODEL_NAME}...")
39
+
40
+ # Import only when needed
41
+ from transformers import AutoTokenizer, AutoModelForCausalLM
42
+
43
+ # Load tokenizer first
44
+ tokenizer = AutoTokenizer.from_pretrained(
45
+ MODEL_NAME,
46
+ trust_remote_code=True,
47
+ padding_side="left"
48
+ )
49
+
50
+ # Set padding token if not set
51
+ if tokenizer.pad_token is None:
52
+ tokenizer.pad_token = tokenizer.eos_token
53
 
54
+ # Load model with minimal settings
55
+ model = AutoModelForCausalLM.from_pretrained(
56
+ MODEL_NAME,
57
+ torch_dtype=torch.float16 if torch.cuda.is_available() else torch.float32,
58
+ device_map="auto" if torch.cuda.is_available() else None,
59
+ trust_remote_code=True,
60
+ low_cpu_mem_usage=True,
61
+ )
62
+
63
+ # If no GPU, move to CPU explicitly
64
+ if not torch.cuda.is_available():
65
+ model = model.to("cpu")
66
+ logger.info("📱 Model moved to CPU")
67
+ else:
68
+ logger.info("🎮 GPU available!")
69
+
70
+ model.eval()
71
  model_loaded = True
72
+ logger.info("✅ Model loaded successfully!")
73
+
74
+ # Test a quick generation
75
+ test_response = generate_quick("Hello")
76
+ logger.info(f"🧪 Test generation: {test_response[:50]}...")
77
+
78
+ except Exception as e:
79
+ logger.error(f"❌ Model loading failed: {str(e)[:200]}")
80
  model_loaded = False
 
 
 
 
 
 
81
 
82
+ # ============================================================================
83
+ # OPTIMIZED GENERATION FUNCTIONS
84
+ # ============================================================================
 
85
 
86
+ def generate_quick(user_message, max_tokens=256):
87
+ """Ultra-fast generation with minimal overhead"""
88
+ if not model_loaded:
89
+ return "Model is still loading, please wait..."
 
 
 
 
 
 
 
90
 
91
  try:
92
+ # Format the prompt for Qwen chat template
93
+ messages = [
94
+ {"role": "system", "content": "You are Stanley AI, a helpful assistant."},
95
+ {"role": "user", "content": user_message}
96
+ ]
97
+
98
+ # Apply chat template
99
+ text = tokenizer.apply_chat_template(
100
+ messages,
101
+ tokenize=False,
102
+ add_generation_prompt=True
103
+ )
104
+
105
+ # Tokenize
106
+ inputs = tokenizer(text, return_tensors="pt", truncation=True, max_length=512)
107
+
108
+ # Move to device
109
+ device = model.device
110
+ inputs = {k: v.to(device) for k, v in inputs.items()}
111
+
112
+ # Generate with optimized settings
113
+ with torch.no_grad():
114
+ outputs = model.generate(
115
+ **inputs,
116
+ max_new_tokens=max_tokens,
117
  temperature=0.7,
118
  do_sample=True,
119
  top_p=0.9,
120
+ top_k=50,
121
  repetition_penalty=1.1,
122
+ pad_token_id=tokenizer.eos_token_id,
123
+ eos_token_id=tokenizer.eos_token_id,
124
+ use_cache=True, # Important for speed
125
+ attention_mask=inputs.get("attention_mask", None),
126
+ )
 
127
 
128
+ # Decode only new tokens
129
+ response = tokenizer.decode(outputs[0][inputs['input_ids'].shape[1]:], skip_special_tokens=True)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
130
 
131
  return response.strip()
132
 
133
  except Exception as e:
134
  logger.error(f"Generation error: {e}")
135
+ return f"I encountered an error: {str(e)[:100]}"
136
 
137
+ def generate_streaming(user_message, max_tokens=256):
138
+ """Streaming response for better UX"""
139
+ if not model_loaded:
140
+ yield "data: Model is still loading, please wait...\n\n"
141
+ return
142
+
143
  try:
144
+ # Format prompt
145
+ messages = [
146
+ {"role": "system", "content": "You are Stanley AI, a helpful assistant."},
147
+ {"role": "user", "content": user_message}
148
+ ]
149
 
150
+ text = tokenizer.apply_chat_template(
151
+ messages,
152
+ tokenize=False,
153
+ add_generation_prompt=True
154
+ )
 
 
155
 
156
+ inputs = tokenizer(text, return_tensors="pt", truncation=True, max_length=512)
157
+ device = model.device
158
+ inputs = {k: v.to(device) for k, v in inputs.items()}
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
159
 
160
+ # Generate token by token
161
+ with torch.no_grad():
162
+ generated = inputs['input_ids'].clone()
163
+ for _ in range(max_tokens):
164
+ outputs = model(
165
+ input_ids=generated,
166
+ attention_mask=torch.ones_like(generated) if 'attention_mask' not in inputs else None,
167
+ use_cache=True
168
+ )
169
+
170
+ # Get next token
171
+ next_token_logits = outputs.logits[:, -1, :]
172
+ next_token = torch.argmax(next_token_logits, dim=-1, keepdim=True)
173
+
174
+ # Check for eos
175
+ if next_token.item() == tokenizer.eos_token_id:
176
+ break
177
+
178
+ # Decode and yield
179
+ generated = torch.cat([generated, next_token], dim=-1)
180
+ token_text = tokenizer.decode(next_token[0], skip_special_tokens=True)
181
+
182
+ yield f"data: {json.dumps({'token': token_text})}\n\n"
183
+
184
  except Exception as e:
185
+ logger.error(f"Streaming error: {e}")
186
+ yield f"data: {json.dumps({'error': str(e)[:100]})}\n\n"
187
 
188
  # ============================================================================
189
+ # CACHE SYSTEM FOR REPEATED QUERIES
190
+ # ============================================================================
191
+
192
+ response_cache = {}
193
+ CACHE_SIZE = 100
194
+
195
+ def get_cached_response(query):
196
+ """Get response from cache"""
197
+ key = query.lower().strip()[:100]
198
+ return response_cache.get(key)
199
+
200
+ def cache_response(query, response):
201
+ """Cache response"""
202
+ key = query.lower().strip()[:100]
203
+ if len(response_cache) >= CACHE_SIZE:
204
+ # Remove oldest
205
+ response_cache.pop(next(iter(response_cache)))
206
+ response_cache[key] = response
207
+
208
+ # ============================================================================
209
+ # FLASK ROUTES - TEXT ONLY
210
  # ============================================================================
211
 
212
  @app.route('/')
213
  def home():
214
  return jsonify({
215
+ "name": "Stanley AI - Text Only",
216
+ "version": "4.0",
217
+ "model": MODEL_NAME,
218
+ "status": "ready" if model_loaded else "loading",
219
+ "optimized_for": "huggingface-spaces",
220
+ "endpoints": {
221
+ "chat": "/api/chat",
222
+ "stream": "/api/chat/stream",
223
+ "status": "/api/status"
224
+ },
225
+ "note": "Ultra-fast text-only version using Qwen 0.5B"
226
  })
227
 
228
+ @app.route('/api/chat', methods=['POST', 'GET'])
229
  def chat():
230
+ """Main chat endpoint - supports both POST and GET for testing"""
231
+ start_time = time.time()
232
+
233
  try:
234
+ # Handle both POST and GET
235
+ if request.method == 'POST':
236
+ data = request.get_json()
237
+ if not data:
238
+ return jsonify({"error": "No JSON data provided"}), 400
239
+ user_message = data.get('message', '')
240
+ else:
241
+ user_message = request.args.get('message', 'Hello')
242
 
243
  if not user_message:
244
+ return jsonify({"error": "No message provided"}), 400
245
 
246
+ # Check cache first
247
+ cached = get_cached_response(user_message)
248
+ if cached:
249
+ logger.info("📦 Using cached response")
250
  return jsonify({
251
+ "response": cached,
252
+ "cached": True,
253
+ "response_time": round(time.time() - start_time, 3),
254
+ "model": MODEL_NAME
255
  })
256
 
257
+ # Generate response
258
+ response = generate_quick(user_message)
259
+
260
+ # Cache it
261
+ cache_response(user_message, response)
262
+
263
+ response_time = round(time.time() - start_time, 3)
264
 
265
  return jsonify({
266
  "response": response,
267
+ "cached": False,
268
  "response_time": response_time,
269
+ "tokens": len(response.split()),
270
+ "model": MODEL_NAME,
271
+ "status": "success"
272
  })
273
 
274
  except Exception as e:
275
  logger.error(f"Chat error: {e}")
276
  return jsonify({
277
+ "error": f"Error: {str(e)[:200]}",
278
  "status": "error"
279
  }), 500
280
 
281
+ @app.route('/api/chat/stream')
282
+ def chat_stream():
283
+ """Streaming chat endpoint"""
284
+ user_message = request.args.get('message', 'Hello')
285
+
286
+ def generate():
287
+ for token in generate_streaming(user_message):
288
+ yield token
289
+ yield "data: [DONE]\n\n"
290
+
291
+ return app.response_class(
292
+ generate(),
293
+ mimetype='text/event-stream',
294
+ headers={
295
+ 'Cache-Control': 'no-cache',
296
+ 'X-Accel-Buffering': 'no'
297
+ }
298
+ )
 
 
 
 
 
 
 
 
 
 
 
 
299
 
300
  @app.route('/api/status')
301
  def status():
302
+ """Health check"""
303
  return jsonify({
 
304
  "model_loaded": model_loaded,
305
+ "model_name": MODEL_NAME,
306
+ "device": str(model.device) if model_loaded else "none",
307
  "cache_size": len(response_cache),
308
+ "timestamp": time.time(),
309
+ "memory_allocated": f"{torch.cuda.memory_allocated() / 1024**2:.1f} MB" if torch.cuda.is_available() else "CPU mode"
310
+ })
311
+
312
+ @app.route('/api/test')
313
+ def test():
314
+ """Quick test endpoint"""
315
+ test_queries = [
316
+ "Hello, how are you?",
317
+ "What is AI?",
318
+ "Tell me a joke",
319
+ "Explain quantum computing simply"
320
+ ]
321
+
322
+ results = []
323
+ for query in test_queries[:2]: # Test only 2 to be fast
324
+ start = time.time()
325
+ response = generate_quick(query, max_tokens=100)
326
+ time_taken = round(time.time() - start, 3)
327
+ results.append({
328
+ "query": query,
329
+ "response": response[:100] + "..." if len(response) > 100 else response,
330
+ "time": time_taken
331
+ })
332
+
333
+ return jsonify({
334
+ "tests": results,
335
+ "average_time": round(sum(r['time'] for r in results) / len(results), 3) if results else 0
336
  })
337
 
338
  # ============================================================================
339
+ # STARTUP OPTIMIZATION
340
+ # ============================================================================
341
+
342
+ @app.before_first_request
343
+ def startup():
344
+ """Load model on first request to avoid startup timeout"""
345
+ if not model_loaded:
346
+ load_model_fast()
347
+
348
+ # Preload model immediately if not on Spaces (for local testing)
349
+ if not ON_SPACES:
350
+ logger.info("🌍 Local mode - loading model immediately")
351
+ load_model_fast()
352
+
353
+ # ============================================================================
354
+ # MAIN
355
  # ============================================================================
 
 
 
 
 
 
 
 
356
 
357
  if __name__ == '__main__':
358
+ print("=" * 50)
359
+ print("🚀 STANLEY AI - Ultra Fast Text Edition")
360
+ print(f"📦 Model: {MODEL_NAME}")
361
+ print(f"🌍 Platform: {'Hugging Face Spaces' if ON_SPACES else 'Local'}")
362
+ print(f"⚡ Optimized for: {'CPU' if not torch.cuda.is_available() else 'GPU'}")
363
+ print("=" * 50)
364
+
365
+ # Load model in background thread to avoid timeout
366
+ import threading
367
+ if ON_SPACES and not model_loaded:
368
+ print("🔄 Loading model in background thread...")
369
+ thread = threading.Thread(target=load_model_fast, daemon=True)
370
+ thread.start()
371
 
372
+ # Run app
373
  port = int(os.environ.get('PORT', 7860))
374
+ app.run(
375
+ debug=False,
376
+ host='0.0.0.0',
377
+ port=port,
378
+ threaded=True
379
+ )