turtle170 commited on
Commit
d82c853
Β·
verified Β·
1 Parent(s): 72e0339

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +102 -68
app.py CHANGED
@@ -245,7 +245,7 @@ class TokenManager:
245
  self.user_tokens[username] = {
246
  "balance": float('inf'),
247
  "start_time": time.time(),
248
- "purchases": {"batch_multiplier": 1, "token_limit": 2048},
249
  "total_spent": 0.0,
250
  "is_owner": True,
251
  "username": username
@@ -255,7 +255,7 @@ class TokenManager:
255
  self.user_tokens[username] = {
256
  "balance": MONTHLY_TOKEN_CREDITS,
257
  "start_time": time.time(),
258
- "purchases": {"batch_multiplier": 1, "token_limit": 2048},
259
  "total_spent": 0.0,
260
  "is_owner": False,
261
  "username": username,
@@ -328,62 +328,93 @@ class TokenManager:
328
  last_reset = self.user_tokens[username].get("last_reset", time.time())
329
  time_until_reset = 30 * 24 * 60 * 60 - (time.time() - last_reset)
330
  days_left = int(time_until_reset / (24 * 60 * 60))
331
- return False, f"❌ Out of tokens! Resets in {days_left} days. Current balance: 0.00"
332
 
333
- return True, f"βœ… Access granted. Balance: {balance:.2f} tokens"
334
 
335
- def purchase_batch_upgrade(self, username: str) -> tuple:
336
- """Purchase batch size upgrade (exponential cost). Free for owner."""
337
  if not username:
338
- return False, "❌ Please login first"
339
 
340
  self.initialize_user(username)
341
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
342
  # Owner gets free upgrades
343
  if self.user_tokens[username].get("is_owner", False):
344
- current_mult = self.user_tokens[username]["purchases"]["batch_multiplier"]
345
- self.user_tokens[username]["purchases"]["batch_multiplier"] = current_mult * 2
346
- new_mult = current_mult * 2
347
- logger.info(f"[TOKEN] πŸ‘‘ OWNER free batch upgrade: {current_mult}x β†’ {new_mult}x")
348
- return True, f"πŸ‘‘ FREE UPGRADE! Batch now {new_mult}x!"
349
 
350
- current_mult = self.user_tokens[username]["purchases"]["batch_multiplier"]
351
- upgrade_level = int(math.log2(current_mult)) if current_mult > 1 else 0
352
- cost = BATCH_UPGRADE_BASE_COST * (2 ** upgrade_level)
353
 
354
  if self.user_tokens[username]["balance"] >= cost:
355
  self.user_tokens[username]["balance"] -= cost
356
- self.user_tokens[username]["purchases"]["batch_multiplier"] = current_mult * 2
357
- new_mult = current_mult * 2
358
- logger.info(f"[TOKEN] Batch upgrade: {current_mult}x β†’ {new_mult}x | Cost: {cost:.5f}")
359
- return True, f"βœ… Batch upgraded to {new_mult}x! (-{cost:.5f} tokens)"
 
360
  else:
361
- return False, f"❌ Insufficient tokens! Need {cost:.5f}, have {self.user_tokens[username]['balance']:.2f}"
362
 
363
- def purchase_token_upgrade(self, username: str, extra_tokens: int = 1000) -> tuple:
364
- """Purchase extra response token length. Free for owner."""
365
  if not username:
366
- return False, "❌ Please login first"
367
 
368
  self.initialize_user(username)
369
 
 
 
 
 
 
 
 
 
370
  # Owner gets free upgrades
371
  if self.user_tokens[username].get("is_owner", False):
372
- self.user_tokens[username]["purchases"]["token_limit"] += extra_tokens
373
- new_limit = self.user_tokens[username]["purchases"]["token_limit"]
374
- logger.info(f"[TOKEN] πŸ‘‘ OWNER free token upgrade: +{extra_tokens} tokens")
375
- return True, f"πŸ‘‘ FREE UPGRADE! Token limit now {new_limit}!"
 
376
 
377
- cost = (extra_tokens / 1000) * TOKEN_UPGRADE_COST_PER_1K
 
378
 
379
  if self.user_tokens[username]["balance"] >= cost:
380
  self.user_tokens[username]["balance"] -= cost
381
- self.user_tokens[username]["purchases"]["token_limit"] += extra_tokens
382
- new_limit = self.user_tokens[username]["purchases"]["token_limit"]
383
- logger.info(f"[TOKEN] Token limit upgrade: +{extra_tokens} tokens | Cost: {cost:.5f}")
384
- return True, f"βœ… Token limit now {new_limit}! (-{cost:.5f} tokens)"
 
385
  else:
386
- return False, f"❌ Insufficient tokens! Need {cost:.5f}, have {self.user_tokens[username]['balance']:.2f}"
387
 
388
  def get_balance(self, username: str) -> float:
389
  """Get user's current token balance"""
@@ -418,7 +449,7 @@ class TokenManager:
418
  stats = self.user_tokens[username]
419
 
420
  if stats.get("is_owner", False):
421
- return f"πŸ‘‘ Owner session ended. Welcome back anytime, {stats['username']}!"
422
 
423
  logger.info(f"[TOKEN] Session ended: Spent {stats['total_spent']:.2f}, Remaining {stats['balance']:.2f}")
424
  return f"Session ended. You spent {stats['total_spent']:.2f} tokens this session. Balance: {stats['balance']:.2f}"
@@ -560,7 +591,7 @@ class ZeroEngine:
560
  logger.info(f"[PREPROCESS] Tokenizing {len(text)} chars in background...")
561
  tokens = self.llm.tokenize(text.encode("utf-8"))
562
  self.preprocessed_tokens = tokens
563
- logger.info(f"[PREPROCESS] βœ… Ready: {len(tokens)} tokens cached")
564
  except Exception as e:
565
  logger.error(f"[PREPROCESS] Failed: {e}")
566
  self.preprocessed_tokens = None
@@ -637,7 +668,7 @@ class ZeroEngine:
637
  """HYPER-OPTIMIZED Boot kernel with format auto-detection and Gemma fixes"""
638
  try:
639
  if not repo or not filename:
640
- return "πŸ”΄ ERROR: Repository or filename missing"
641
 
642
  logger.info(f"[BOOT] Starting download: {filename} from {repo}")
643
 
@@ -658,17 +689,17 @@ class ZeroEngine:
658
  logger.info(f"[BOOT] Download complete: {path}")
659
  except Exception as e:
660
  logger.error(f"[BOOT] Download failed: {e}")
661
- return f"πŸ”΄ DOWNLOAD FAILED: {str(e)}"
662
 
663
  # Check if model is cached
664
  is_cached = model_cache.is_cached(path)
665
- cache_status = "🎯 CACHED" if is_cached else "πŸ†• NEW"
666
 
667
  # Validate before loading
668
  valid, msg = ResourceMonitor.validate_deployment(path)
669
  if not valid:
670
  logger.warning(f"[BOOT] Validation failed: {msg}")
671
- return f"πŸ”΄ VALIDATION FAILED: {msg}"
672
 
673
  logger.info(f"[BOOT] Validation passed ({cache_status}), applying {quant_config['type']} optimizations for {model_format.upper()}...")
674
 
@@ -676,13 +707,13 @@ class ZeroEngine:
676
  with self.kernel_lock:
677
  # WRECK OLD MODEL
678
  if self.llm:
679
- logger.info("[BOOT] πŸ’£ WRECKING old model...")
680
  try:
681
  model_cache.wreck_old_model_cache()
682
  del self.llm
683
  self.llm = None
684
  nuclear_ram_clear()
685
- logger.info("[BOOT] βœ… Old model DESTROYED")
686
  except Exception as e:
687
  logger.warning(f"[BOOT] Cleanup warning: {e}")
688
 
@@ -693,13 +724,13 @@ class ZeroEngine:
693
  # CPU-OPTIMIZED BATCH CALCULATION - Very aggressive for 16GB RAM
694
  # Base calculation: use more RAM for batching on CPU
695
  base_batch = int(512 * available_ram_gb / 8) # More aggressive base
696
- optimal_batch = int(base_batch * quant_config["batch_multiplier"])
697
 
698
- # Apply user's batch multiplier from token purchases
699
  if session_id:
700
- user_batch_mult = token_manager.get_purchases(session_id)["batch_multiplier"]
701
- optimal_batch = int(optimal_batch * user_batch_mult)
702
- logger.info(f"[TOKEN] User batch multiplier: {user_batch_mult}x")
703
 
704
  # CPU can handle larger batches with quantized models
705
  optimal_batch = max(1024, min(8192, optimal_batch)) # 1024-8192 range for CPU
@@ -772,19 +803,19 @@ class ZeroEngine:
772
  except:
773
  pass
774
 
775
- logger.info("[BOOT] πŸš€ CPU-OPTIMIZED MODEL READY!")
776
- return f"🟒 {model_format.upper()} {quant_config['type']} {cache_status} | CPU:{optimal_threads}T | B:{optimal_batch} | Ctx:{optimal_ctx}"
777
 
778
  except Exception as e:
779
  logger.error(f"[BOOT] Model loading failed: {e}")
780
  self.llm = None
781
  nuclear_ram_clear()
782
- return f"πŸ”΄ LOAD FAILED: {str(e)}"
783
 
784
  except Exception as e:
785
  logger.error(f"[BOOT] Unexpected error: {e}")
786
  nuclear_ram_clear()
787
- return f"πŸ”΄ BOOT FAILURE: {str(e)}"
788
 
789
  def stitch_cache(self, ghost_text: str) -> str:
790
  """Prime KV cache with ghost context"""
@@ -794,6 +825,7 @@ class ZeroEngine:
794
  def _bg_eval():
795
  self.is_prefilling = True
796
  try:
 
797
  tokens = self.llm.tokenize(ghost_text.encode("utf-8"))
798
  self.llm.eval(tokens)
799
  logger.info(f"Ghost cache primed: {len(tokens)} tokens")
@@ -804,7 +836,7 @@ class ZeroEngine:
804
  self.is_prefilling = False
805
 
806
  threading.Thread(target=_bg_eval, daemon=True).start()
807
- return "⚑ Primed"
808
 
809
  def inference_generator(self, prompt: str, history: List[Dict], ghost_context: str, repo: str, quant: str, profile: gr.OAuthProfile | None) -> Generator:
810
  username = profile.username if profile else "anonymous"
@@ -817,7 +849,7 @@ class ZeroEngine:
817
  # AUTO-BOOT: If model not loaded, auto-boot default model
818
  if not self.llm:
819
  logger.info("[AUTO-BOOT] No model loaded, initiating auto-boot...")
820
- history.append({"role": "assistant", "content": "πŸ”„ Auto-booting model, please wait..."})
821
  yield history
822
 
823
  # Use provided repo/quant or fallback to defaults
@@ -826,12 +858,12 @@ class ZeroEngine:
826
 
827
  boot_result = self.boot_kernel(boot_repo, boot_quant)
828
 
829
- if "πŸ”΄" in boot_result or "FAILED" in boot_result:
830
- history[-1]["content"] = f"❌ Auto-boot failed: {boot_result}\n\nPlease manually SCAN and BOOT a model."
831
  yield history
832
  return
833
 
834
- history[-1]["content"] = f"βœ… {boot_result}\n\nProcessing your request..."
835
  yield history
836
  time.sleep(0.5) # Brief pause for user to see the message
837
 
@@ -839,7 +871,7 @@ class ZeroEngine:
839
  cache_key = f"{ghost_context}:{prompt}"
840
  if cache_key in self.prompt_cache:
841
  self.perf_stats["cache_hits"] += 1
842
- logger.info("⚑ CACHE HIT - Instant response!")
843
  history.append({"role": "user", "content": prompt})
844
  history.append({"role": "assistant", "content": self.prompt_cache[cache_key]})
845
  yield history
@@ -863,7 +895,7 @@ class ZeroEngine:
863
  # Get max tokens from user purchases
864
  max_tokens = 2048
865
  if username:
866
- max_tokens = token_manager.get_purchases(username)["token_limit"]
867
 
868
  # HYPER-OPTIMIZED CPU INFERENCE SETTINGS
869
  stream = self.llm(
@@ -1125,12 +1157,14 @@ with gr.Blocks(title="ZeroEngine V0.2", css=CUSTOM_CSS) as demo:
1125
 
1126
  gr.Markdown("---")
1127
 
1128
- # Token Purchases
1129
- gr.Markdown("### πŸ’Ž Token Upgrades")
 
 
1130
  with gr.Row():
1131
- batch_upgrade_btn = gr.Button("πŸš€ Batch x2", size="sm", variant="secondary")
1132
- token_upgrade_btn = gr.Button("πŸ“ˆ +1K Tokens", size="sm", variant="secondary")
1133
- purchase_status = gr.Markdown("Ready to upgrade!")
1134
 
1135
  gr.Markdown("---")
1136
 
@@ -1185,13 +1219,13 @@ with gr.Blocks(title="ZeroEngine V0.2", css=CUSTOM_CSS) as demo:
1185
  result = kernel.boot_kernel(repo, file, username)
1186
  return result
1187
 
1188
- def on_batch_upgrade():
1189
- success, msg = token_manager.purchase_batch_upgrade(session_id)
1190
  balance = token_manager.get_balance(session_id)
1191
  return msg, f"{balance}"
1192
 
1193
- def on_token_upgrade():
1194
- success, msg = token_manager.purchase_token_upgrade(session_id, 1000)
1195
  balance = token_manager.get_balance(session_id)
1196
  return msg, f"{balance}"
1197
 
@@ -1215,8 +1249,8 @@ with gr.Blocks(title="ZeroEngine V0.2", css=CUSTOM_CSS) as demo:
1215
  boot_btn.click(on_boot, [repo_input, quant_dropdown], [boot_status])
1216
 
1217
  # Token purchases
1218
- batch_upgrade_btn.click(on_batch_upgrade, None, [purchase_status, token_balance])
1219
- token_upgrade_btn.click(on_token_upgrade, None, [purchase_status, token_balance])
1220
  end_session_btn.click(on_end_session, None, [session_status])
1221
 
1222
  # Custom parameter updates
 
245
  self.user_tokens[username] = {
246
  "balance": float('inf'),
247
  "start_time": time.time(),
248
+ "purchases": {"batch_size": 512, "max_tokens": 2048},
249
  "total_spent": 0.0,
250
  "is_owner": True,
251
  "username": username
 
255
  self.user_tokens[username] = {
256
  "balance": MONTHLY_TOKEN_CREDITS,
257
  "start_time": time.time(),
258
+ "purchases": {"batch_size": 512, "max_tokens": 2048},
259
  "total_spent": 0.0,
260
  "is_owner": False,
261
  "username": username,
 
328
  last_reset = self.user_tokens[username].get("last_reset", time.time())
329
  time_until_reset = 30 * 24 * 60 * 60 - (time.time() - last_reset)
330
  days_left = int(time_until_reset / (24 * 60 * 60))
331
+ return False, f" Out of tokens! Resets in {days_left} days. Current balance: 0.00"
332
 
333
+ return True, f" Access granted. Balance: {balance:.2f} tokens"
334
 
335
+ def purchase_batch_upgrade(self, username: str, batch_size: int = 512) -> tuple:
336
+ """Purchase specific batch size upgrade. Free for owner. Auto-rounds to nearest power of 2."""
337
  if not username:
338
+ return False, " Please login first"
339
 
340
  self.initialize_user(username)
341
 
342
+ # SMART ROUNDING: Round to nearest power of 2 for optimal performance
343
+ if batch_size <= 0:
344
+ batch_size = 512
345
+
346
+ # Find nearest power of 2
347
+ def round_to_power_of_2(n):
348
+ if n <= 128:
349
+ return 128
350
+ # Find the next power of 2
351
+ power = 1
352
+ while power < n:
353
+ power *= 2
354
+ # Check if previous power of 2 is closer
355
+ prev_power = power // 2
356
+ if abs(n - prev_power) <= abs(n - power):
357
+ return prev_power
358
+ return power
359
+
360
+ rounded_batch = round_to_power_of_2(batch_size)
361
+
362
  # Owner gets free upgrades
363
  if self.user_tokens[username].get("is_owner", False):
364
+ self.user_tokens[username]["purchases"]["batch_size"] = rounded_batch
365
+ logger.info(f"[TOKEN] OWNER set batch size to: {rounded_batch} (rounded from {batch_size})")
366
+ if rounded_batch != batch_size:
367
+ return True, f" Batch size set to {rounded_batch} (rounded from {batch_size})!"
368
+ return True, f" Batch size set to {rounded_batch}!"
369
 
370
+ # Cost based on rounded batch size (larger batches cost more)
371
+ cost = (rounded_batch / 1000) * 0.01 # 0.01 tokens per 1000 batch size
 
372
 
373
  if self.user_tokens[username]["balance"] >= cost:
374
  self.user_tokens[username]["balance"] -= cost
375
+ self.user_tokens[username]["purchases"]["batch_size"] = rounded_batch
376
+ logger.info(f"[TOKEN] Batch size set to {rounded_batch} (rounded from {batch_size}) | Cost: {cost:.5f}")
377
+ if rounded_batch != batch_size:
378
+ return True, f" Batch size set to {rounded_batch} (rounded from {batch_size})! (-{cost:.5f} tokens)"
379
+ return True, f" Batch size set to {rounded_batch}! (-{cost:.5f} tokens)"
380
  else:
381
+ return False, f" Insufficient tokens! Need {cost:.5f}, have {self.user_tokens[username]['balance']:.2f}"
382
 
383
+ def purchase_token_upgrade(self, username: str, max_tokens: int = 2048) -> tuple:
384
+ """Purchase specific max tokens setting. Free for owner. Auto-rounds to nearest 256."""
385
  if not username:
386
+ return False, " Please login first"
387
 
388
  self.initialize_user(username)
389
 
390
+ # SMART ROUNDING: Round to nearest 256 for optimal memory alignment
391
+ if max_tokens <= 0:
392
+ max_tokens = 2048
393
+
394
+ # Find nearest multiple of 256
395
+ rounded_tokens = ((max_tokens + 128) // 256) * 256
396
+ rounded_tokens = max(256, min(8192, rounded_tokens)) # Clamp between 256-8192
397
+
398
  # Owner gets free upgrades
399
  if self.user_tokens[username].get("is_owner", False):
400
+ self.user_tokens[username]["purchases"]["max_tokens"] = rounded_tokens
401
+ logger.info(f"[TOKEN] OWNER set max tokens to: {rounded_tokens} (rounded from {max_tokens})")
402
+ if rounded_tokens != max_tokens:
403
+ return True, f" Max tokens set to {rounded_tokens} (rounded from {max_tokens})!"
404
+ return True, f" Max tokens set to {rounded_tokens}!"
405
 
406
+ # Cost based on rounded max tokens (larger context costs more)
407
+ cost = (rounded_tokens / 1000) * TOKEN_UPGRADE_COST_PER_1K
408
 
409
  if self.user_tokens[username]["balance"] >= cost:
410
  self.user_tokens[username]["balance"] -= cost
411
+ self.user_tokens[username]["purchases"]["max_tokens"] = rounded_tokens
412
+ logger.info(f"[TOKEN] Max tokens set to {rounded_tokens} (rounded from {max_tokens}) | Cost: {cost:.5f}")
413
+ if rounded_tokens != max_tokens:
414
+ return True, f" Max tokens set to {rounded_tokens} (rounded from {max_tokens})! (-{cost:.5f} tokens)"
415
+ return True, f" Max tokens set to {rounded_tokens}! (-{cost:.5f} tokens)"
416
  else:
417
+ return False, f" Insufficient tokens! Need {cost:.5f}, have {self.user_tokens[username]['balance']:.2f}"
418
 
419
  def get_balance(self, username: str) -> float:
420
  """Get user's current token balance"""
 
449
  stats = self.user_tokens[username]
450
 
451
  if stats.get("is_owner", False):
452
+ return f" OWNER session ended. Welcome back anytime, {stats['username']}!"
453
 
454
  logger.info(f"[TOKEN] Session ended: Spent {stats['total_spent']:.2f}, Remaining {stats['balance']:.2f}")
455
  return f"Session ended. You spent {stats['total_spent']:.2f} tokens this session. Balance: {stats['balance']:.2f}"
 
591
  logger.info(f"[PREPROCESS] Tokenizing {len(text)} chars in background...")
592
  tokens = self.llm.tokenize(text.encode("utf-8"))
593
  self.preprocessed_tokens = tokens
594
+ logger.info(f"[PREPROCESS] Ready: {len(tokens)} tokens cached")
595
  except Exception as e:
596
  logger.error(f"[PREPROCESS] Failed: {e}")
597
  self.preprocessed_tokens = None
 
668
  """HYPER-OPTIMIZED Boot kernel with format auto-detection and Gemma fixes"""
669
  try:
670
  if not repo or not filename:
671
+ return " ERROR: Repository or filename missing"
672
 
673
  logger.info(f"[BOOT] Starting download: {filename} from {repo}")
674
 
 
689
  logger.info(f"[BOOT] Download complete: {path}")
690
  except Exception as e:
691
  logger.error(f"[BOOT] Download failed: {e}")
692
+ return f" DOWNLOAD FAILED: {str(e)}"
693
 
694
  # Check if model is cached
695
  is_cached = model_cache.is_cached(path)
696
+ cache_status = " CACHED" if is_cached else " NEW"
697
 
698
  # Validate before loading
699
  valid, msg = ResourceMonitor.validate_deployment(path)
700
  if not valid:
701
  logger.warning(f"[BOOT] Validation failed: {msg}")
702
+ return f" VALIDATION FAILED: {msg}"
703
 
704
  logger.info(f"[BOOT] Validation passed ({cache_status}), applying {quant_config['type']} optimizations for {model_format.upper()}...")
705
 
 
707
  with self.kernel_lock:
708
  # WRECK OLD MODEL
709
  if self.llm:
710
+ logger.info("[BOOT] WRECKING old model...")
711
  try:
712
  model_cache.wreck_old_model_cache()
713
  del self.llm
714
  self.llm = None
715
  nuclear_ram_clear()
716
+ logger.info("[BOOT] Old model DESTROYED")
717
  except Exception as e:
718
  logger.warning(f"[BOOT] Cleanup warning: {e}")
719
 
 
724
  # CPU-OPTIMIZED BATCH CALCULATION - Very aggressive for 16GB RAM
725
  # Base calculation: use more RAM for batching on CPU
726
  base_batch = int(512 * available_ram_gb / 8) # More aggressive base
727
+ optimal_batch = base_batch
728
 
729
+ # Apply user's batch size from token purchases
730
  if session_id:
731
+ user_batch_size = token_manager.get_purchases(session_id)["batch_size"]
732
+ optimal_batch = user_batch_size
733
+ logger.info(f"[TOKEN] User batch size: {user_batch_size}")
734
 
735
  # CPU can handle larger batches with quantized models
736
  optimal_batch = max(1024, min(8192, optimal_batch)) # 1024-8192 range for CPU
 
803
  except:
804
  pass
805
 
806
+ logger.info("[BOOT] CPU-OPTIMIZED MODEL READY!")
807
+ return f" {model_format.upper()} {quant_config['type']} {cache_status} | CPU:{optimal_threads}T | B:{optimal_batch} | Ctx:{optimal_ctx}"
808
 
809
  except Exception as e:
810
  logger.error(f"[BOOT] Model loading failed: {e}")
811
  self.llm = None
812
  nuclear_ram_clear()
813
+ return f" LOAD FAILED: {str(e)}"
814
 
815
  except Exception as e:
816
  logger.error(f"[BOOT] Unexpected error: {e}")
817
  nuclear_ram_clear()
818
+ return f" BOOT FAILURE: {str(e)}"
819
 
820
  def stitch_cache(self, ghost_text: str) -> str:
821
  """Prime KV cache with ghost context"""
 
825
  def _bg_eval():
826
  self.is_prefilling = True
827
  try:
828
+ logger.info(f"[PREPROCESS] Tokenizing {len(ghost_text)} chars in background...")
829
  tokens = self.llm.tokenize(ghost_text.encode("utf-8"))
830
  self.llm.eval(tokens)
831
  logger.info(f"Ghost cache primed: {len(tokens)} tokens")
 
836
  self.is_prefilling = False
837
 
838
  threading.Thread(target=_bg_eval, daemon=True).start()
839
+ return " Primed"
840
 
841
  def inference_generator(self, prompt: str, history: List[Dict], ghost_context: str, repo: str, quant: str, profile: gr.OAuthProfile | None) -> Generator:
842
  username = profile.username if profile else "anonymous"
 
849
  # AUTO-BOOT: If model not loaded, auto-boot default model
850
  if not self.llm:
851
  logger.info("[AUTO-BOOT] No model loaded, initiating auto-boot...")
852
+ history.append({"role": "assistant", "content": " Auto-booting model, please wait..."})
853
  yield history
854
 
855
  # Use provided repo/quant or fallback to defaults
 
858
 
859
  boot_result = self.boot_kernel(boot_repo, boot_quant)
860
 
861
+ if " " in boot_result or "FAILED" in boot_result:
862
+ history[-1]["content"] = f" Auto-boot failed: {boot_result}\n\nPlease manually SCAN and BOOT a model."
863
  yield history
864
  return
865
 
866
+ history[-1]["content"] = f" {boot_result}\n\nProcessing your request..."
867
  yield history
868
  time.sleep(0.5) # Brief pause for user to see the message
869
 
 
871
  cache_key = f"{ghost_context}:{prompt}"
872
  if cache_key in self.prompt_cache:
873
  self.perf_stats["cache_hits"] += 1
874
+ logger.info(" CACHE HIT - Instant response!")
875
  history.append({"role": "user", "content": prompt})
876
  history.append({"role": "assistant", "content": self.prompt_cache[cache_key]})
877
  yield history
 
895
  # Get max tokens from user purchases
896
  max_tokens = 2048
897
  if username:
898
+ max_tokens = token_manager.get_purchases(username)["max_tokens"]
899
 
900
  # HYPER-OPTIMIZED CPU INFERENCE SETTINGS
901
  stream = self.llm(
 
1157
 
1158
  gr.Markdown("---")
1159
 
1160
+ # Performance Settings
1161
+ gr.Markdown("### πŸ’Ž Performance Settings")
1162
+ batch_size_input = gr.Number(label="Batch Size", value=512, minimum=128, maximum=8192, step=128)
1163
+ max_tokens_input = gr.Number(label="Max Tokens", value=2048, minimum=512, maximum=8192, step=256)
1164
  with gr.Row():
1165
+ batch_upgrade_btn = gr.Button("πŸš€ Set Batch Size", size="sm", variant="secondary")
1166
+ token_upgrade_btn = gr.Button("πŸ“ˆ Set Max Tokens", size="sm", variant="secondary")
1167
+ purchase_status = gr.Markdown("Ready to configure!")
1168
 
1169
  gr.Markdown("---")
1170
 
 
1219
  result = kernel.boot_kernel(repo, file, username)
1220
  return result
1221
 
1222
+ def on_batch_upgrade(batch_size):
1223
+ success, msg = token_manager.purchase_batch_upgrade(session_id, int(batch_size))
1224
  balance = token_manager.get_balance(session_id)
1225
  return msg, f"{balance}"
1226
 
1227
+ def on_token_upgrade(max_tokens):
1228
+ success, msg = token_manager.purchase_token_upgrade(session_id, int(max_tokens))
1229
  balance = token_manager.get_balance(session_id)
1230
  return msg, f"{balance}"
1231
 
 
1249
  boot_btn.click(on_boot, [repo_input, quant_dropdown], [boot_status])
1250
 
1251
  # Token purchases
1252
+ batch_upgrade_btn.click(on_batch_upgrade, [batch_size_input], [purchase_status, token_balance])
1253
+ token_upgrade_btn.click(on_token_upgrade, [max_tokens_input], [purchase_status, token_balance])
1254
  end_session_btn.click(on_end_session, None, [session_status])
1255
 
1256
  # Custom parameter updates