Ciroc0 commited on
Commit
8c04aa9
·
verified ·
1 Parent(s): 75bdddb

Update arca-processor

Browse files
Files changed (6) hide show
  1. .gitignore +15 -0
  2. Dockerfile +16 -0
  3. README.md +66 -12
  4. app.py +560 -0
  5. hf_client.py +38 -0
  6. requirements.txt +8 -0
.gitignore ADDED
@@ -0,0 +1,15 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ __pycache__/
2
+ *.pyc
3
+ *.pyo
4
+ *.pyd
5
+ .Python
6
+ *.so
7
+ *.egg
8
+ *.egg-info/
9
+ dist/
10
+ build/
11
+ .pytest_cache/
12
+ .mypy_cache/
13
+ .coverage
14
+ *.log
15
+ /tmp/
Dockerfile ADDED
@@ -0,0 +1,16 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ FROM python:3.11-slim
2
+
3
+ WORKDIR /app
4
+
5
+ # Install dependencies
6
+ COPY requirements.txt .
7
+ RUN pip install --no-cache-dir -r requirements.txt
8
+
9
+ # Copy application
10
+ COPY . .
11
+
12
+ # Expose port
13
+ EXPOSE 7860
14
+
15
+ # Run application
16
+ CMD ["python", "app.py"]
README.md CHANGED
@@ -1,12 +1,66 @@
1
- ---
2
- title: Arca Processor
3
- emoji: 📈
4
- colorFrom: green
5
- colorTo: blue
6
- sdk: gradio
7
- sdk_version: 6.6.0
8
- app_file: app.py
9
- pinned: false
10
- ---
11
-
12
- Check out the configuration reference at https://huggingface.co/docs/hub/spaces-config-reference
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # ArcaThread Processor
2
+
3
+ Pre-computes champion statistics from matchup-matrix data for fast tier list generation.
4
+
5
+ ## What it does
6
+
7
+ 1. **Scans matchup-matrix** for latest patches every hour
8
+ 2. **Aggregates data** per champion (win rates, matchups, by role/rank)
9
+ 3. **Generates JSON files** at `champ-stats/{patch}/{champion_id}.json`
10
+ 4. **Creates tier list** at `champ-stats/{patch}/tier-list.json`
11
+
12
+ ## Output Structure
13
+
14
+ ```
15
+ champ-stats/
16
+ ├── 16.4/
17
+ │ ├── meta.json # Patch metadata
18
+ │ ├── tier-list.json # Sorted tier list
19
+ │ ├── 266.json # Aatrox stats
20
+ │ ├── 103.json # Ahri stats
21
+ │ └── ...
22
+ ├── 16.3/
23
+ │ └── ...
24
+ ```
25
+
26
+ ## Champion Stats Format
27
+
28
+ ```json
29
+ {
30
+ "champion_id": 266,
31
+ "total_games": 15420,
32
+ "win_rate": 0.5234,
33
+ "by_role": {
34
+ "TOP": {"games": 12000, "win_rate": 0.5240},
35
+ "JUNGLE": {"games": 3420, "win_rate": 0.5210}
36
+ },
37
+ "by_rank": {
38
+ "DIAMOND": {"games": 3000, "win_rate": 0.5100},
39
+ "MASTER": {"games": 1500, "win_rate": 0.5050}
40
+ },
41
+ "matchups": [
42
+ {"enemy_champion_id": 54, "games": 500, "win_rate": 0.4800},
43
+ ...
44
+ ]
45
+ }
46
+ ```
47
+
48
+ ## Environment Variables
49
+
50
+ | Variable | Default | Description |
51
+ |----------|---------|-------------|
52
+ | `HF_TOKEN` | (required) | HuggingFace API token |
53
+ | `DATASET_REPO` | `ArcaThread/arca-thread-priors` | Target dataset |
54
+ | `PROCESS_INTERVAL_SECONDS` | `3600` | Run interval (1 hour) |
55
+ | `MIN_SAMPLE_SIZE` | `100` | Minimum games for inclusion |
56
+
57
+ ## API Endpoints
58
+
59
+ - `GET /` - Basic info
60
+ - `GET /health` - Status and stats
61
+ - `GET /trigger` - Manually trigger processing
62
+ - `GET /patch/{patch}` - Patch processing status
63
+
64
+ ## Deployment
65
+
66
+ Deploy to HuggingFace Spaces as a Docker space. Set `HF_TOKEN` as a secret.
app.py ADDED
@@ -0,0 +1,560 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ #!/usr/bin/env python3
2
+ """
3
+ ArcaThread Processor v1.0
4
+ - Generates pre-computed champion stats from matchup-matrix data
5
+ - Runs hourly to update stats for new patches
6
+ - Creates champ-stats/{patch}/{champion}.json files
7
+ """
8
+
9
+ import os
10
+ import sys
11
+ import json
12
+ import time
13
+ import re
14
+ import threading
15
+ import traceback
16
+ from datetime import datetime
17
+ from typing import Dict, List, Optional, Any
18
+ from collections import defaultdict
19
+
20
+ from fastapi import FastAPI
21
+ import uvicorn
22
+ import pandas as pd
23
+ import numpy as np
24
+ from huggingface_hub import hf_hub_download, CommitOperationAdd, list_repo_files
25
+ from hf_client import get_hf_api, get_hf_config
26
+
27
+ HF_CFG = get_hf_config()
28
+ HF_TOKEN = HF_CFG.token
29
+ DATASET_REPO = HF_CFG.dataset_repo
30
+ PROCESS_INTERVAL_SECONDS = max(60, int(os.environ.get("PROCESS_INTERVAL_SECONDS", "3600")))
31
+ MIN_SAMPLE_SIZE = int(os.environ.get("MIN_SAMPLE_SIZE", "100"))
32
+
33
+ RANKS = [
34
+ "IRON", "BRONZE", "SILVER", "GOLD", "PLATINUM",
35
+ "EMERALD", "DIAMOND", "MASTER", "GRANDMASTER", "CHALLENGER"
36
+ ]
37
+
38
+ ROLE_MAPPING = {'TOP': 0, 'JUNGLE': 1, 'MIDDLE': 2, 'BOTTOM': 3, 'SUPPORT': 4, 'UNKNOWN': 5}
39
+
40
+ # Global state
41
+ is_running = True
42
+ last_processing = None
43
+ commit_cooldown_until = 0.0
44
+ stats = {
45
+ "processings": 0,
46
+ "champions_processed": 0,
47
+ "patches_processed": [],
48
+ "last_processing_per_patch": {},
49
+ "processing_history": []
50
+ }
51
+ state_lock = threading.Lock()
52
+
53
+ app = FastAPI(title="ArcaThread Processor v1.0")
54
+
55
+ MAX_HISTORY = 20
56
+
57
+
58
+ def log(msg: str):
59
+ """Thread-safe logging"""
60
+ timestamp = datetime.now().strftime('%Y-%m-%d %H:%M:%S')
61
+ print(f"[{timestamp}] {msg}", flush=True)
62
+
63
+
64
+ def _normalize_patch_token(value: str) -> Optional[str]:
65
+ """Extract major.minor from patch string"""
66
+ text = str(value or "").strip()
67
+ match = re.match(r"^(\d+)\.(\d+)", text)
68
+ if not match:
69
+ return None
70
+ return f"{match.group(1)}.{match.group(2)}"
71
+
72
+
73
+ def _extract_champion_name(champion_id: int) -> str:
74
+ """Convert champion ID to name (placeholder - will use ID as key)"""
75
+ return str(champion_id)
76
+
77
+
78
+ def load_matchup_data_for_patch(patch: str) -> pd.DataFrame:
79
+ """Load all matchup data for a specific patch across all ranks"""
80
+ log(f"Loading matchup data for patch {patch}...")
81
+
82
+ try:
83
+ api = get_hf_api()
84
+ all_files = list_repo_files(DATASET_REPO, repo_type="dataset", token=HF_TOKEN)
85
+
86
+ # Filter for this patch's matchup files
87
+ patch_files = [
88
+ f for f in all_files
89
+ if f.startswith(f"matchup-matrix/")
90
+ and f"/{patch}/" in f
91
+ and f.endswith('.parquet')
92
+ ]
93
+
94
+ log(f"Found {len(patch_files)} matchup files for patch {patch}")
95
+
96
+ if not patch_files:
97
+ return pd.DataFrame()
98
+
99
+ # Download and combine all files
100
+ all_data = []
101
+ for file_path in patch_files:
102
+ try:
103
+ local_path = hf_hub_download(
104
+ repo_id=DATASET_REPO,
105
+ filename=file_path,
106
+ repo_type="dataset",
107
+ token=HF_TOKEN,
108
+ local_dir="/tmp",
109
+ )
110
+ df = pd.read_parquet(local_path)
111
+ all_data.append(df)
112
+ log(f" Loaded {file_path}: {len(df)} rows")
113
+ except Exception as e:
114
+ log(f" Failed to load {file_path}: {e}")
115
+ continue
116
+
117
+ if not all_data:
118
+ return pd.DataFrame()
119
+
120
+ combined = pd.concat(all_data, ignore_index=True)
121
+ log(f"Combined patch {patch} data: {len(combined)} total rows")
122
+ return combined
123
+
124
+ except Exception as e:
125
+ log(f"Error loading data for patch {patch}: {e}")
126
+ log(traceback.format_exc())
127
+ return pd.DataFrame()
128
+
129
+
130
+ def get_latest_patches(n: int = 3) -> List[str]:
131
+ """Get the n latest patches from the dataset"""
132
+ try:
133
+ api = get_hf_api()
134
+ all_files = list_repo_files(DATASET_REPO, repo_type="dataset", token=HF_TOKEN)
135
+
136
+ patches = set()
137
+ for f in all_files:
138
+ if not f.startswith("matchup-matrix/"):
139
+ continue
140
+ parts = f.split("/")
141
+ if len(parts) >= 3:
142
+ patch = _normalize_patch_token(parts[2])
143
+ if patch:
144
+ patches.add(patch)
145
+
146
+ # Sort by version number (newest first)
147
+ sorted_patches = sorted(patches, key=lambda p: [int(x) for x in p.split(".")], reverse=True)
148
+ return sorted_patches[:n]
149
+
150
+ except Exception as e:
151
+ log(f"Error getting latest patches: {e}")
152
+ return []
153
+
154
+
155
+ def compute_champion_stats(df: pd.DataFrame) -> Dict[str, Dict[str, Any]]:
156
+ """Compute aggregated stats per champion from matchup data"""
157
+ if df.empty:
158
+ return {}
159
+
160
+ champion_stats = defaultdict(lambda: {
161
+ "champion_id": 0,
162
+ "total_games": 0,
163
+ "wins": 0,
164
+ "by_role": defaultdict(lambda: {"games": 0, "wins": 0}),
165
+ "by_rank": defaultdict(lambda: {"games": 0, "wins": 0}),
166
+ "matchups": defaultdict(lambda: {"games": 0, "wins": 0}),
167
+ })
168
+
169
+ for _, row in df.iterrows():
170
+ champ_id = int(row.get('champion_id', 0))
171
+ enemy_id = int(row.get('enemy_champion_id', 0))
172
+ wins = float(row.get('wins', 0))
173
+ sample_size = int(row.get('sample_size', 0))
174
+ role = str(row.get('role', 'UNKNOWN')).upper()
175
+ rank = str(row.get('rank', 'UNKNOWN')).upper()
176
+
177
+ if champ_id <= 0 or sample_size <= 0:
178
+ continue
179
+
180
+ stats_entry = champion_stats[champ_id]
181
+ stats_entry["champion_id"] = champ_id
182
+ stats_entry["total_games"] += sample_size
183
+ stats_entry["wins"] += wins
184
+
185
+ # By role
186
+ stats_entry["by_role"][role]["games"] += sample_size
187
+ stats_entry["by_role"][role]["wins"] += wins
188
+
189
+ # By rank
190
+ stats_entry["by_rank"][rank]["games"] += sample_size
191
+ stats_entry["by_rank"][rank]["wins"] += wins
192
+
193
+ # Matchups
194
+ if enemy_id > 0:
195
+ matchup_key = str(enemy_id)
196
+ stats_entry["matchups"][matchup_key]["games"] += sample_size
197
+ stats_entry["matchups"][matchup_key]["wins"] += wins
198
+
199
+ # Convert to final format with win rates
200
+ result = {}
201
+ for champ_id, data in champion_stats.items():
202
+ total_games = data["total_games"]
203
+ if total_games < MIN_SAMPLE_SIZE:
204
+ continue
205
+
206
+ win_rate = data["wins"] / total_games if total_games > 0 else 0.5
207
+
208
+ # Process by_role
209
+ by_role = {}
210
+ for role, role_data in data["by_role"].items():
211
+ if role_data["games"] >= MIN_SAMPLE_SIZE // 2:
212
+ by_role[role] = {
213
+ "games": role_data["games"],
214
+ "win_rate": round(role_data["wins"] / role_data["games"], 4)
215
+ }
216
+
217
+ # Process by_rank
218
+ by_rank = {}
219
+ for rank, rank_data in data["by_rank"].items():
220
+ if rank_data["games"] >= MIN_SAMPLE_SIZE // 2:
221
+ by_rank[rank] = {
222
+ "games": rank_data["games"],
223
+ "win_rate": round(rank_data["wins"] / rank_data["games"], 4)
224
+ }
225
+
226
+ # Process matchups (top 10 most played)
227
+ matchups = []
228
+ for enemy_id, matchup_data in data["matchups"].items():
229
+ if matchup_data["games"] >= MIN_SAMPLE_SIZE // 5:
230
+ matchups.append({
231
+ "enemy_champion_id": int(enemy_id),
232
+ "games": matchup_data["games"],
233
+ "win_rate": round(matchup_data["wins"] / matchup_data["games"], 4)
234
+ })
235
+
236
+ # Sort matchups by games played and take top 20
237
+ matchups.sort(key=lambda x: x["games"], reverse=True)
238
+ matchups = matchups[:20]
239
+
240
+ result[str(champ_id)] = {
241
+ "champion_id": champ_id,
242
+ "total_games": total_games,
243
+ "win_rate": round(win_rate, 4),
244
+ "by_role": by_role,
245
+ "by_rank": by_rank,
246
+ "matchups": matchups,
247
+ }
248
+
249
+ return result
250
+
251
+
252
+ def generate_tier_list(stats_by_champion: Dict[str, Dict], min_games: int = 500) -> List[Dict]:
253
+ """Generate tier list from champion stats"""
254
+ tiers = []
255
+
256
+ for champ_id, data in stats_by_champion.items():
257
+ if data["total_games"] < min_games:
258
+ continue
259
+
260
+ win_rate = data["win_rate"]
261
+
262
+ # Determine tier based on win rate
263
+ if win_rate >= 0.54:
264
+ tier = "S"
265
+ elif win_rate >= 0.52:
266
+ tier = "A"
267
+ elif win_rate >= 0.50:
268
+ tier = "B"
269
+ elif win_rate >= 0.48:
270
+ tier = "C"
271
+ else:
272
+ tier = "D"
273
+
274
+ tiers.append({
275
+ "champion_id": data["champion_id"],
276
+ "tier": tier,
277
+ "win_rate": win_rate,
278
+ "games": data["total_games"],
279
+ })
280
+
281
+ # Sort by win rate descending
282
+ tiers.sort(key=lambda x: x["win_rate"], reverse=True)
283
+ return tiers
284
+
285
+
286
+ def build_upload_operation(local_path: str, repo_path: str) -> Optional[CommitOperationAdd]:
287
+ """Validate and build a single upload operation"""
288
+ if not os.path.exists(local_path):
289
+ log(f"File not found: {local_path}")
290
+ return None
291
+ size = os.path.getsize(local_path)
292
+ if size == 0:
293
+ log(f"File is empty: {local_path}")
294
+ return None
295
+ return CommitOperationAdd(path_in_repo=repo_path, path_or_fileobj=local_path)
296
+
297
+
298
+ def upload_operations(operations: List[CommitOperationAdd], commit_message: str) -> bool:
299
+ """Upload files to HF dataset"""
300
+ global commit_cooldown_until
301
+ if not operations:
302
+ return True
303
+
304
+ now = time.time()
305
+ if now < commit_cooldown_until:
306
+ remaining = int(commit_cooldown_until - now)
307
+ log(f"Skipping upload (commit cooldown active for {remaining}s)")
308
+ return False
309
+
310
+ try:
311
+ api = get_hf_api()
312
+ api.create_commit(
313
+ repo_id=DATASET_REPO,
314
+ repo_type="dataset",
315
+ operations=operations,
316
+ commit_message=commit_message,
317
+ )
318
+ log(f"Uploaded {len(operations)} files")
319
+ return True
320
+ except Exception as e:
321
+ err_text = str(e)
322
+ if "429" in err_text or "Too Many Requests" in err_text:
323
+ commit_cooldown_until = time.time() + 3600
324
+ log(f"Upload rate-limited. Pausing for 1 hour")
325
+ log(f"Upload failed: {e}")
326
+ return False
327
+
328
+
329
+ def process_patch(patch: str) -> int:
330
+ """Process a single patch and generate champion stats"""
331
+ log(f"=" * 60)
332
+ log(f"Processing patch: {patch}")
333
+ log(f"=" * 60)
334
+
335
+ # Load matchup data
336
+ df = load_matchup_data_for_patch(patch)
337
+ if df.empty:
338
+ log(f"No data found for patch {patch}")
339
+ return 0
340
+
341
+ log(f"Computing champion stats from {len(df)} rows...")
342
+ champion_stats = compute_champion_stats(df)
343
+ log(f"Generated stats for {len(champion_stats)} champions")
344
+
345
+ if not champion_stats:
346
+ log("No champions met the minimum sample size requirement")
347
+ return 0
348
+
349
+ # Generate tier list
350
+ tier_list = generate_tier_list(champion_stats)
351
+ log(f"Generated tier list with {len(tier_list)} champions")
352
+
353
+ # Save files locally
354
+ temp_dir = f"/tmp/champ-stats/{patch}"
355
+ os.makedirs(temp_dir, exist_ok=True)
356
+
357
+ # Save individual champion files
358
+ operations = []
359
+ for champ_id, data in champion_stats.items():
360
+ file_path = f"{temp_dir}/{champ_id}.json"
361
+ with open(file_path, 'w') as f:
362
+ json.dump(data, f, indent=2)
363
+
364
+ repo_path = f"champ-stats/{patch}/{champ_id}.json"
365
+ op = build_upload_operation(file_path, repo_path)
366
+ if op:
367
+ operations.append(op)
368
+
369
+ # Save tier list
370
+ tier_list_path = f"{temp_dir}/tier-list.json"
371
+ with open(tier_list_path, 'w') as f:
372
+ json.dump({
373
+ "patch": patch,
374
+ "generated_at": datetime.now().isoformat(),
375
+ "total_champions": len(tier_list),
376
+ "tiers": tier_list,
377
+ }, f, indent=2)
378
+
379
+ tier_op = build_upload_operation(tier_list_path, f"champ-stats/{patch}/tier-list.json")
380
+ if tier_op:
381
+ operations.append(tier_op)
382
+
383
+ # Save patch metadata
384
+ meta_path = f"{temp_dir}/meta.json"
385
+ with open(meta_path, 'w') as f:
386
+ json.dump({
387
+ "patch": patch,
388
+ "generated_at": datetime.now().isoformat(),
389
+ "champions_count": len(champion_stats),
390
+ "total_games": int(df['sample_size'].sum()) if 'sample_size' in df.columns else 0,
391
+ "min_sample_size": MIN_SAMPLE_SIZE,
392
+ }, f, indent=2)
393
+
394
+ meta_op = build_upload_operation(meta_path, f"champ-stats/{patch}/meta.json")
395
+ if meta_op:
396
+ operations.append(meta_op)
397
+
398
+ # Upload to HF
399
+ if operations:
400
+ commit_msg = f"Update champ-stats for patch {patch} - {datetime.now().isoformat()}"
401
+ success = upload_operations(operations, commit_msg)
402
+ if success:
403
+ log(f"Successfully uploaded {len(operations)} files for patch {patch}")
404
+ return len(champion_stats)
405
+
406
+ return 0
407
+
408
+
409
+ def run_processing_cycle():
410
+ """Run a complete processing cycle for latest patches"""
411
+ global stats, last_processing
412
+
413
+ log("=" * 60)
414
+ log("STARTING PROCESSING CYCLE")
415
+ log("=" * 60)
416
+
417
+ # Get latest patches
418
+ patches = get_latest_patches(n=3)
419
+ log(f"Found patches to process: {patches}")
420
+
421
+ total_champions = 0
422
+ processed_patches = []
423
+
424
+ for patch in patches:
425
+ if not is_running:
426
+ break
427
+
428
+ try:
429
+ count = process_patch(patch)
430
+ if count > 0:
431
+ total_champions += count
432
+ processed_patches.append(patch)
433
+
434
+ with state_lock:
435
+ stats["last_processing_per_patch"][patch] = datetime.now().isoformat()
436
+
437
+ # Small delay between patches
438
+ time.sleep(2)
439
+
440
+ except Exception as e:
441
+ log(f"Error processing patch {patch}: {e}")
442
+ log(traceback.format_exc())
443
+ continue
444
+
445
+ cycle_finished_at = datetime.now().isoformat()
446
+
447
+ with state_lock:
448
+ stats["processings"] += 1
449
+ stats["champions_processed"] = total_champions
450
+ stats["patches_processed"] = processed_patches
451
+
452
+ cycle_history = {
453
+ "timestamp": cycle_finished_at,
454
+ "patches": processed_patches,
455
+ "champions": total_champions,
456
+ }
457
+ stats["processing_history"].append(cycle_history)
458
+ if len(stats["processing_history"]) > MAX_HISTORY:
459
+ stats["processing_history"] = stats["processing_history"][-MAX_HISTORY:]
460
+
461
+ last_processing = cycle_finished_at
462
+
463
+ log("=" * 60)
464
+ log(f"PROCESSING CYCLE COMPLETE - {total_champions} champions across {len(processed_patches)} patches")
465
+ log("=" * 60)
466
+
467
+
468
+ def processing_loop():
469
+ """Main processing loop - runs every PROCESS_INTERVAL_SECONDS"""
470
+ log("Processing loop starting...")
471
+
472
+ if not HF_TOKEN:
473
+ log("ERROR: HF_TOKEN not set!")
474
+ return
475
+
476
+ # Initial processing
477
+ try:
478
+ log("Running initial processing...")
479
+ run_processing_cycle()
480
+ except Exception as e:
481
+ log(f"Initial processing failed: {e}")
482
+ log(traceback.format_exc())
483
+
484
+ # Then every configured interval
485
+ while is_running:
486
+ log(f"Sleeping {PROCESS_INTERVAL_SECONDS} seconds until next cycle...")
487
+
488
+ for _ in range(PROCESS_INTERVAL_SECONDS):
489
+ if not is_running:
490
+ break
491
+ time.sleep(1)
492
+
493
+ if not is_running:
494
+ break
495
+
496
+ try:
497
+ run_processing_cycle()
498
+ except Exception as e:
499
+ log(f"Processing cycle failed: {e}")
500
+ log(traceback.format_exc())
501
+
502
+
503
+ @app.get("/")
504
+ def root():
505
+ with state_lock:
506
+ history = list(stats.get("processing_history", []))
507
+ return {
508
+ "message": "ArcaThread Processor v1.0 - use /health for status",
509
+ "recent_history": history[-5:],
510
+ }
511
+
512
+
513
+ @app.get("/health")
514
+ def health():
515
+ with state_lock:
516
+ return {
517
+ "status": "healthy",
518
+ "last_processing": last_processing,
519
+ "stats": {
520
+ "processings": stats["processings"],
521
+ "champions_processed": stats["champions_processed"],
522
+ "patches_processed": stats["patches_processed"],
523
+ },
524
+ "config": {
525
+ "process_interval_seconds": PROCESS_INTERVAL_SECONDS,
526
+ "min_sample_size": MIN_SAMPLE_SIZE,
527
+ }
528
+ }
529
+
530
+
531
+ @app.get("/trigger")
532
+ def trigger_processing():
533
+ """Manually trigger a processing cycle"""
534
+ log("Manual processing trigger received")
535
+ thread = threading.Thread(target=run_processing_cycle, daemon=True)
536
+ thread.start()
537
+ return {"status": "processing_triggered"}
538
+
539
+
540
+ @app.get("/patch/{patch}")
541
+ def get_patch_status(patch: str):
542
+ """Get processing status for a specific patch"""
543
+ with state_lock:
544
+ last_proc = stats["last_processing_per_patch"].get(patch)
545
+ return {
546
+ "patch": patch,
547
+ "last_processing": last_proc,
548
+ "dataset_url": f"https://huggingface.co/datasets/{DATASET_REPO}/tree/main/champ-stats/{patch}"
549
+ }
550
+
551
+
552
+ @app.on_event("startup")
553
+ def startup():
554
+ log("ArcaThread Processor v1.0 starting...")
555
+ thread = threading.Thread(target=processing_loop, daemon=True, name="Processor")
556
+ thread.start()
557
+
558
+
559
+ if __name__ == "__main__":
560
+ uvicorn.run(app, host="0.0.0.0", port=7860)
hf_client.py ADDED
@@ -0,0 +1,38 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ #!/usr/bin/env python3
2
+ """HF API client configuration for arca-processor"""
3
+ import os
4
+ from dataclasses import dataclass
5
+ from typing import Dict, Optional
6
+
7
+ from huggingface_hub import HfApi
8
+
9
+
10
+ @dataclass(frozen=True)
11
+ class HFConfig:
12
+ token: Optional[str]
13
+ dataset_repo: str
14
+
15
+
16
+ def _is_dev_mode() -> bool:
17
+ value = str(os.environ.get("NODE_ENV") or os.environ.get("ENV") or "").strip().lower()
18
+ return value in {"dev", "development", "local"}
19
+
20
+
21
+ def get_hf_config() -> HFConfig:
22
+ token = str(os.environ.get("HF_TOKEN") or "").strip()
23
+ dataset_repo = str(os.environ.get("DATASET_REPO") or "ArcaThread/arca-thread-priors").strip()
24
+ if not token and not _is_dev_mode():
25
+ raise RuntimeError("HF_TOKEN is required in non-dev environments")
26
+ return HFConfig(token=token or None, dataset_repo=dataset_repo)
27
+
28
+
29
+ def get_hf_api() -> HfApi:
30
+ cfg = get_hf_config()
31
+ return HfApi(token=cfg.token)
32
+
33
+
34
+ def get_hf_headers() -> Dict[str, str]:
35
+ cfg = get_hf_config()
36
+ if not cfg.token:
37
+ return {}
38
+ return {"Authorization": f"Bearer {cfg.token}"}
requirements.txt ADDED
@@ -0,0 +1,8 @@
 
 
 
 
 
 
 
 
 
1
+ fastapi>=0.104.0
2
+ uvicorn>=0.24.0
3
+ pandas>=2.0.0
4
+ numpy>=1.24.0
5
+ huggingface-hub>=0.20.0
6
+ pyarrow>=14.0.0
7
+ requests>=2.31.0
8
+ python-dateutil>=2.8.0