Spaces:
Running
Running
Update app.py
Browse files
app.py
CHANGED
|
@@ -225,91 +225,203 @@ class ModelCacheManager:
|
|
| 225 |
# --- TOKEN MANAGER ---
|
| 226 |
class TokenManager:
|
| 227 |
def __init__(self):
|
| 228 |
-
self.user_tokens = {} # {
|
| 229 |
-
self.
|
| 230 |
|
| 231 |
-
def
|
| 232 |
-
"""
|
| 233 |
-
|
| 234 |
-
|
| 235 |
-
|
| 236 |
-
return hashlib.md5(str(time.time()).encode()).hexdigest()[:8]
|
| 237 |
|
| 238 |
-
def initialize_user(self,
|
| 239 |
-
"""Initialize new user with monthly credits"""
|
| 240 |
-
if
|
| 241 |
-
|
| 242 |
-
|
| 243 |
-
|
| 244 |
-
|
| 245 |
-
|
| 246 |
-
|
| 247 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 248 |
|
| 249 |
-
def charge_usage(self,
|
| 250 |
-
"""Charge user for inference time. Returns True if successful"""
|
| 251 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 252 |
|
| 253 |
cost = (duration_ms / 100.0) * TOKEN_COST_PER_100MS
|
| 254 |
|
| 255 |
-
if
|
| 256 |
-
|
| 257 |
-
|
| 258 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 259 |
return True
|
| 260 |
else:
|
| 261 |
-
|
|
|
|
|
|
|
| 262 |
return False
|
| 263 |
|
| 264 |
-
def
|
| 265 |
-
"""
|
| 266 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 267 |
|
| 268 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 269 |
upgrade_level = int(math.log2(current_mult)) if current_mult > 1 else 0
|
| 270 |
cost = BATCH_UPGRADE_BASE_COST * (2 ** upgrade_level)
|
| 271 |
|
| 272 |
-
if self.user_tokens[
|
| 273 |
-
self.user_tokens[
|
| 274 |
-
self.user_tokens[
|
| 275 |
new_mult = current_mult * 2
|
| 276 |
logger.info(f"[TOKEN] Batch upgrade: {current_mult}x β {new_mult}x | Cost: {cost:.5f}")
|
| 277 |
return True, f"β
Batch upgraded to {new_mult}x! (-{cost:.5f} tokens)"
|
| 278 |
else:
|
| 279 |
-
return False, f"β Insufficient tokens! Need {cost:.5f}, have {self.user_tokens[
|
| 280 |
|
| 281 |
-
def purchase_token_upgrade(self,
|
| 282 |
-
"""Purchase extra response token length"""
|
| 283 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 284 |
|
| 285 |
cost = (extra_tokens / 1000) * TOKEN_UPGRADE_COST_PER_1K
|
| 286 |
|
| 287 |
-
if self.user_tokens[
|
| 288 |
-
self.user_tokens[
|
| 289 |
-
self.user_tokens[
|
| 290 |
-
new_limit = self.user_tokens[
|
| 291 |
logger.info(f"[TOKEN] Token limit upgrade: +{extra_tokens} tokens | Cost: {cost:.5f}")
|
| 292 |
return True, f"β
Token limit now {new_limit}! (-{cost:.5f} tokens)"
|
| 293 |
else:
|
| 294 |
-
return False, f"β Insufficient tokens! Need {cost:.5f}, have {self.user_tokens[
|
| 295 |
|
| 296 |
-
def get_balance(self,
|
| 297 |
"""Get user's current token balance"""
|
| 298 |
-
|
| 299 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 300 |
|
| 301 |
-
def get_purchases(self,
|
| 302 |
"""Get user's current purchases"""
|
| 303 |
-
|
| 304 |
-
|
|
|
|
|
|
|
|
|
|
| 305 |
|
| 306 |
-
def end_session(self,
|
| 307 |
"""End user session and log stats"""
|
| 308 |
-
if
|
| 309 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 310 |
logger.info(f"[TOKEN] Session ended: Spent {stats['total_spent']:.2f}, Remaining {stats['balance']:.2f}")
|
| 311 |
-
|
| 312 |
-
return f"Session ended. You spent {stats['total_spent']:.2f} tokens this session."
|
| 313 |
return "No active session found."
|
| 314 |
|
| 315 |
# Global token manager
|
|
@@ -830,7 +942,7 @@ class ZeroEngine:
|
|
| 830 |
threading.Thread(target=_bg_eval, daemon=True).start()
|
| 831 |
return "β‘ Primed"
|
| 832 |
|
| 833 |
-
def inference_generator(self, prompt: str, history: List[Dict], ghost_context: str, repo: str, quant: str) -> Generator:
|
| 834 |
# Update activity timestamp
|
| 835 |
self.update_activity()
|
| 836 |
|
|
@@ -885,8 +997,8 @@ class ZeroEngine:
|
|
| 885 |
try:
|
| 886 |
# Get max tokens from user purchases
|
| 887 |
max_tokens = 2048
|
| 888 |
-
if
|
| 889 |
-
max_tokens = token_manager.get_purchases(
|
| 890 |
|
| 891 |
# HYPER-OPTIMIZED CPU INFERENCE SETTINGS
|
| 892 |
stream = self.llm(
|
|
@@ -925,17 +1037,17 @@ class ZeroEngine:
|
|
| 925 |
self.perf_stats["peak_tps"] = tps
|
| 926 |
|
| 927 |
# Charge tokens every second
|
| 928 |
-
if int(elapsed * 1000) % 1000 < 100 and
|
| 929 |
-
token_manager.charge_usage(
|
| 930 |
|
| 931 |
# Update history with streaming content + performance metrics
|
| 932 |
-
balance = token_manager.get_balance(
|
| 933 |
history[-1]["content"] = f"{response_text}\n\n`β‘ {tps} t/s | π― Peak: {self.perf_stats['peak_tps']:.1f} t/s | π° {balance:.2f} tokens`"
|
| 934 |
yield history
|
| 935 |
|
| 936 |
# Final token charge for remaining time
|
| 937 |
-
if
|
| 938 |
-
token_manager.charge_usage(
|
| 939 |
|
| 940 |
# Update global performance stats
|
| 941 |
self.perf_stats["total_tokens"] += tokens_count
|
|
@@ -1061,7 +1173,7 @@ h1, h2, h3, h4, h5, h6 {
|
|
| 1061 |
kernel = ZeroEngine()
|
| 1062 |
|
| 1063 |
# Session ID for token tracking
|
| 1064 |
-
|
| 1065 |
|
| 1066 |
with gr.Blocks(title="ZeroEngine V0.2", css=CUSTOM_CSS) as demo:
|
| 1067 |
# Header with Token Display
|
|
|
|
| 225 |
# --- TOKEN MANAGER ---
|
| 226 |
class TokenManager:
|
| 227 |
def __init__(self):
|
| 228 |
+
self.user_tokens = {} # {username: {"balance": float, "start_time": float, "purchases": {}}}
|
| 229 |
+
self.owner_username = "turtle170" # Owner gets infinite tokens
|
| 230 |
|
| 231 |
+
def is_owner(self, username: str) -> bool:
|
| 232 |
+
"""Check if user is the owner"""
|
| 233 |
+
if not username:
|
| 234 |
+
return False
|
| 235 |
+
return username.lower() == self.owner_username.lower()
|
|
|
|
| 236 |
|
| 237 |
+
def initialize_user(self, username: str):
|
| 238 |
+
"""Initialize new user with monthly credits (or infinite for owner)"""
|
| 239 |
+
if not username:
|
| 240 |
+
username = "anonymous"
|
| 241 |
+
|
| 242 |
+
if username not in self.user_tokens:
|
| 243 |
+
# Owner gets infinite tokens
|
| 244 |
+
if self.is_owner(username):
|
| 245 |
+
self.user_tokens[username] = {
|
| 246 |
+
"balance": float('inf'),
|
| 247 |
+
"start_time": time.time(),
|
| 248 |
+
"purchases": {"batch_multiplier": 1, "token_limit": 2048},
|
| 249 |
+
"total_spent": 0.0,
|
| 250 |
+
"is_owner": True,
|
| 251 |
+
"username": username
|
| 252 |
+
}
|
| 253 |
+
logger.info(f"[TOKEN] π OWNER {username} initialized with INFINITE tokens!")
|
| 254 |
+
else:
|
| 255 |
+
self.user_tokens[username] = {
|
| 256 |
+
"balance": MONTHLY_TOKEN_CREDITS,
|
| 257 |
+
"start_time": time.time(),
|
| 258 |
+
"purchases": {"batch_multiplier": 1, "token_limit": 2048},
|
| 259 |
+
"total_spent": 0.0,
|
| 260 |
+
"is_owner": False,
|
| 261 |
+
"username": username,
|
| 262 |
+
"last_reset": time.time()
|
| 263 |
+
}
|
| 264 |
+
logger.info(f"[TOKEN] New user {username}: {MONTHLY_TOKEN_CREDITS} tokens")
|
| 265 |
+
|
| 266 |
+
def check_monthly_reset(self, username: str):
|
| 267 |
+
"""Reset tokens if a month has passed"""
|
| 268 |
+
if not username or username not in self.user_tokens:
|
| 269 |
+
return
|
| 270 |
+
|
| 271 |
+
if self.user_tokens[username].get("is_owner", False):
|
| 272 |
+
return # Owner never needs reset
|
| 273 |
+
|
| 274 |
+
last_reset = self.user_tokens[username].get("last_reset", time.time())
|
| 275 |
+
month_in_seconds = 30 * 24 * 60 * 60 # 30 days
|
| 276 |
+
|
| 277 |
+
if time.time() - last_reset > month_in_seconds:
|
| 278 |
+
self.user_tokens[username]["balance"] = MONTHLY_TOKEN_CREDITS
|
| 279 |
+
self.user_tokens[username]["last_reset"] = time.time()
|
| 280 |
+
self.user_tokens[username]["total_spent"] = 0.0
|
| 281 |
+
logger.info(f"[TOKEN] Monthly reset for {username}: {MONTHLY_TOKEN_CREDITS} tokens")
|
| 282 |
|
| 283 |
+
def charge_usage(self, username: str, duration_ms: float) -> bool:
|
| 284 |
+
"""Charge user for inference time. Returns True if successful. Owner never charged."""
|
| 285 |
+
if not username:
|
| 286 |
+
username = "anonymous"
|
| 287 |
+
|
| 288 |
+
self.initialize_user(username)
|
| 289 |
+
self.check_monthly_reset(username)
|
| 290 |
+
|
| 291 |
+
# Owner never gets charged
|
| 292 |
+
if self.user_tokens[username].get("is_owner", False):
|
| 293 |
+
return True
|
| 294 |
|
| 295 |
cost = (duration_ms / 100.0) * TOKEN_COST_PER_100MS
|
| 296 |
|
| 297 |
+
# Check if user has enough balance
|
| 298 |
+
if self.user_tokens[username]["balance"] <= 0:
|
| 299 |
+
logger.warning(f"[TOKEN] β {username} has 0 tokens! Access denied.")
|
| 300 |
+
return False
|
| 301 |
+
|
| 302 |
+
if self.user_tokens[username]["balance"] >= cost:
|
| 303 |
+
self.user_tokens[username]["balance"] -= cost
|
| 304 |
+
self.user_tokens[username]["balance"] = max(0, self.user_tokens[username]["balance"]) # Never go below 0
|
| 305 |
+
self.user_tokens[username]["total_spent"] += cost
|
| 306 |
+
logger.info(f"[TOKEN] Charged {cost:.4f} tokens ({duration_ms:.0f}ms) | Remaining: {self.user_tokens[username]['balance']:.2f}")
|
| 307 |
return True
|
| 308 |
else:
|
| 309 |
+
# Insufficient balance - set to 0 and deny
|
| 310 |
+
self.user_tokens[username]["balance"] = 0
|
| 311 |
+
logger.warning(f"[TOKEN] β Insufficient balance! {username} now at 0 tokens.")
|
| 312 |
return False
|
| 313 |
|
| 314 |
+
def can_use_engine(self, username: str) -> tuple:
|
| 315 |
+
"""Check if user can use the engine. Returns (bool, message)"""
|
| 316 |
+
if not username:
|
| 317 |
+
username = "anonymous"
|
| 318 |
+
|
| 319 |
+
self.initialize_user(username)
|
| 320 |
+
self.check_monthly_reset(username)
|
| 321 |
+
|
| 322 |
+
if self.user_tokens[username].get("is_owner", False):
|
| 323 |
+
return True, "π Owner access granted"
|
| 324 |
+
|
| 325 |
+
balance = self.user_tokens[username]["balance"]
|
| 326 |
+
|
| 327 |
+
if balance <= 0:
|
| 328 |
+
last_reset = self.user_tokens[username].get("last_reset", time.time())
|
| 329 |
+
time_until_reset = 30 * 24 * 60 * 60 - (time.time() - last_reset)
|
| 330 |
+
days_left = int(time_until_reset / (24 * 60 * 60))
|
| 331 |
+
return False, f"β Out of tokens! Resets in {days_left} days. Current balance: 0.00"
|
| 332 |
+
|
| 333 |
+
return True, f"β
Access granted. Balance: {balance:.2f} tokens"
|
| 334 |
+
|
| 335 |
+
def purchase_batch_upgrade(self, username: str) -> tuple:
|
| 336 |
+
"""Purchase batch size upgrade (exponential cost). Free for owner."""
|
| 337 |
+
if not username:
|
| 338 |
+
return False, "β Please login first"
|
| 339 |
+
|
| 340 |
+
self.initialize_user(username)
|
| 341 |
|
| 342 |
+
# Owner gets free upgrades
|
| 343 |
+
if self.user_tokens[username].get("is_owner", False):
|
| 344 |
+
current_mult = self.user_tokens[username]["purchases"]["batch_multiplier"]
|
| 345 |
+
self.user_tokens[username]["purchases"]["batch_multiplier"] = current_mult * 2
|
| 346 |
+
new_mult = current_mult * 2
|
| 347 |
+
logger.info(f"[TOKEN] π OWNER free batch upgrade: {current_mult}x β {new_mult}x")
|
| 348 |
+
return True, f"π FREE UPGRADE! Batch now {new_mult}x!"
|
| 349 |
+
|
| 350 |
+
current_mult = self.user_tokens[username]["purchases"]["batch_multiplier"]
|
| 351 |
upgrade_level = int(math.log2(current_mult)) if current_mult > 1 else 0
|
| 352 |
cost = BATCH_UPGRADE_BASE_COST * (2 ** upgrade_level)
|
| 353 |
|
| 354 |
+
if self.user_tokens[username]["balance"] >= cost:
|
| 355 |
+
self.user_tokens[username]["balance"] -= cost
|
| 356 |
+
self.user_tokens[username]["purchases"]["batch_multiplier"] = current_mult * 2
|
| 357 |
new_mult = current_mult * 2
|
| 358 |
logger.info(f"[TOKEN] Batch upgrade: {current_mult}x β {new_mult}x | Cost: {cost:.5f}")
|
| 359 |
return True, f"β
Batch upgraded to {new_mult}x! (-{cost:.5f} tokens)"
|
| 360 |
else:
|
| 361 |
+
return False, f"β Insufficient tokens! Need {cost:.5f}, have {self.user_tokens[username]['balance']:.2f}"
|
| 362 |
|
| 363 |
+
def purchase_token_upgrade(self, username: str, extra_tokens: int = 1000) -> tuple:
|
| 364 |
+
"""Purchase extra response token length. Free for owner."""
|
| 365 |
+
if not username:
|
| 366 |
+
return False, "β Please login first"
|
| 367 |
+
|
| 368 |
+
self.initialize_user(username)
|
| 369 |
+
|
| 370 |
+
# Owner gets free upgrades
|
| 371 |
+
if self.user_tokens[username].get("is_owner", False):
|
| 372 |
+
self.user_tokens[username]["purchases"]["token_limit"] += extra_tokens
|
| 373 |
+
new_limit = self.user_tokens[username]["purchases"]["token_limit"]
|
| 374 |
+
logger.info(f"[TOKEN] π OWNER free token upgrade: +{extra_tokens} tokens")
|
| 375 |
+
return True, f"π FREE UPGRADE! Token limit now {new_limit}!"
|
| 376 |
|
| 377 |
cost = (extra_tokens / 1000) * TOKEN_UPGRADE_COST_PER_1K
|
| 378 |
|
| 379 |
+
if self.user_tokens[username]["balance"] >= cost:
|
| 380 |
+
self.user_tokens[username]["balance"] -= cost
|
| 381 |
+
self.user_tokens[username]["purchases"]["token_limit"] += extra_tokens
|
| 382 |
+
new_limit = self.user_tokens[username]["purchases"]["token_limit"]
|
| 383 |
logger.info(f"[TOKEN] Token limit upgrade: +{extra_tokens} tokens | Cost: {cost:.5f}")
|
| 384 |
return True, f"β
Token limit now {new_limit}! (-{cost:.5f} tokens)"
|
| 385 |
else:
|
| 386 |
+
return False, f"β Insufficient tokens! Need {cost:.5f}, have {self.user_tokens[username]['balance']:.2f}"
|
| 387 |
|
| 388 |
+
def get_balance(self, username: str) -> float:
|
| 389 |
"""Get user's current token balance"""
|
| 390 |
+
if not username:
|
| 391 |
+
username = "anonymous"
|
| 392 |
+
|
| 393 |
+
self.initialize_user(username)
|
| 394 |
+
self.check_monthly_reset(username)
|
| 395 |
+
|
| 396 |
+
balance = self.user_tokens[username]["balance"]
|
| 397 |
+
|
| 398 |
+
# Show β for owner
|
| 399 |
+
if balance == float('inf'):
|
| 400 |
+
return balance
|
| 401 |
+
|
| 402 |
+
return round(max(0, balance), 2) # Never show negative
|
| 403 |
|
| 404 |
+
def get_purchases(self, username: str) -> dict:
|
| 405 |
"""Get user's current purchases"""
|
| 406 |
+
if not username:
|
| 407 |
+
username = "anonymous"
|
| 408 |
+
|
| 409 |
+
self.initialize_user(username)
|
| 410 |
+
return self.user_tokens[username]["purchases"]
|
| 411 |
|
| 412 |
+
def end_session(self, username: str):
|
| 413 |
"""End user session and log stats"""
|
| 414 |
+
if not username:
|
| 415 |
+
return "No active session found."
|
| 416 |
+
|
| 417 |
+
if username in self.user_tokens:
|
| 418 |
+
stats = self.user_tokens[username]
|
| 419 |
+
|
| 420 |
+
if stats.get("is_owner", False):
|
| 421 |
+
return f"π Owner session ended. Welcome back anytime, {stats['username']}!"
|
| 422 |
+
|
| 423 |
logger.info(f"[TOKEN] Session ended: Spent {stats['total_spent']:.2f}, Remaining {stats['balance']:.2f}")
|
| 424 |
+
return f"Session ended. You spent {stats['total_spent']:.2f} tokens this session. Balance: {stats['balance']:.2f}"
|
|
|
|
| 425 |
return "No active session found."
|
| 426 |
|
| 427 |
# Global token manager
|
|
|
|
| 942 |
threading.Thread(target=_bg_eval, daemon=True).start()
|
| 943 |
return "β‘ Primed"
|
| 944 |
|
| 945 |
+
def inference_generator(self, prompt: str, history: List[Dict], ghost_context: str, repo: str, quant: str, username: str) -> Generator:
|
| 946 |
# Update activity timestamp
|
| 947 |
self.update_activity()
|
| 948 |
|
|
|
|
| 997 |
try:
|
| 998 |
# Get max tokens from user purchases
|
| 999 |
max_tokens = 2048
|
| 1000 |
+
if username:
|
| 1001 |
+
max_tokens = token_manager.get_purchases(username)["token_limit"]
|
| 1002 |
|
| 1003 |
# HYPER-OPTIMIZED CPU INFERENCE SETTINGS
|
| 1004 |
stream = self.llm(
|
|
|
|
| 1037 |
self.perf_stats["peak_tps"] = tps
|
| 1038 |
|
| 1039 |
# Charge tokens every second
|
| 1040 |
+
if int(elapsed * 1000) % 1000 < 100 and username: # Every ~1 second
|
| 1041 |
+
token_manager.charge_usage(username, elapsed * 1000)
|
| 1042 |
|
| 1043 |
# Update history with streaming content + performance metrics
|
| 1044 |
+
balance = token_manager.get_balance(username) if username else 0
|
| 1045 |
history[-1]["content"] = f"{response_text}\n\n`β‘ {tps} t/s | π― Peak: {self.perf_stats['peak_tps']:.1f} t/s | π° {balance:.2f} tokens`"
|
| 1046 |
yield history
|
| 1047 |
|
| 1048 |
# Final token charge for remaining time
|
| 1049 |
+
if username:
|
| 1050 |
+
token_manager.charge_usage(username, elapsed * 1000)
|
| 1051 |
|
| 1052 |
# Update global performance stats
|
| 1053 |
self.perf_stats["total_tokens"] += tokens_count
|
|
|
|
| 1173 |
kernel = ZeroEngine()
|
| 1174 |
|
| 1175 |
# Session ID for token tracking
|
| 1176 |
+
username = token_manager.get_username()
|
| 1177 |
|
| 1178 |
with gr.Blocks(title="ZeroEngine V0.2", css=CUSTOM_CSS) as demo:
|
| 1179 |
# Header with Token Display
|