abidlabs HF Staff commited on
Commit
dc86f09
·
verified ·
1 Parent(s): 0874ba3

Upload folder using huggingface_hub

Browse files
Files changed (46) hide show
  1. .gitattributes +2 -0
  2. trackio/CHANGELOG.md +232 -0
  3. trackio/__init__.py +749 -0
  4. trackio/alerts.py +184 -0
  5. trackio/api.py +87 -0
  6. trackio/apple_gpu.py +253 -0
  7. trackio/assets/badge.png +3 -0
  8. trackio/assets/trackio_logo_dark.png +0 -0
  9. trackio/assets/trackio_logo_light.png +0 -0
  10. trackio/assets/trackio_logo_old.png +3 -0
  11. trackio/assets/trackio_logo_type_dark.png +0 -0
  12. trackio/assets/trackio_logo_type_dark_transparent.png +0 -0
  13. trackio/assets/trackio_logo_type_light.png +0 -0
  14. trackio/assets/trackio_logo_type_light_transparent.png +0 -0
  15. trackio/bucket_storage.py +152 -0
  16. trackio/cli.py +1272 -0
  17. trackio/cli_helpers.py +158 -0
  18. trackio/commit_scheduler.py +310 -0
  19. trackio/context_vars.py +18 -0
  20. trackio/deploy.py +991 -0
  21. trackio/dummy_commit_scheduler.py +19 -0
  22. trackio/frontend/dist/assets/index-6kGqI2Bm.js +0 -0
  23. trackio/frontend/dist/assets/index-BjAwVTtr.css +1 -0
  24. trackio/frontend/dist/index.html +14 -0
  25. trackio/frontend/eslint.config.js +42 -0
  26. trackio/frontend/index.html +13 -0
  27. trackio/frontend_server.py +64 -0
  28. trackio/gpu.py +381 -0
  29. trackio/histogram.py +71 -0
  30. trackio/imports.py +290 -0
  31. trackio/markdown.py +21 -0
  32. trackio/media/__init__.py +27 -0
  33. trackio/media/audio.py +167 -0
  34. trackio/media/image.py +84 -0
  35. trackio/media/media.py +79 -0
  36. trackio/media/utils.py +60 -0
  37. trackio/media/video.py +246 -0
  38. trackio/package.json +6 -0
  39. trackio/py.typed +0 -0
  40. trackio/remote_client.py +28 -0
  41. trackio/run.py +739 -0
  42. trackio/server.py +743 -0
  43. trackio/sqlite_storage.py +1920 -0
  44. trackio/table.py +173 -0
  45. trackio/typehints.py +39 -0
  46. trackio/utils.py +927 -0
.gitattributes CHANGED
@@ -33,3 +33,5 @@ saved_model/**/* filter=lfs diff=lfs merge=lfs -text
33
  *.zip filter=lfs diff=lfs merge=lfs -text
34
  *.zst filter=lfs diff=lfs merge=lfs -text
35
  *tfevents* filter=lfs diff=lfs merge=lfs -text
 
 
 
33
  *.zip filter=lfs diff=lfs merge=lfs -text
34
  *.zst filter=lfs diff=lfs merge=lfs -text
35
  *tfevents* filter=lfs diff=lfs merge=lfs -text
36
+ trackio/assets/badge.png filter=lfs diff=lfs merge=lfs -text
37
+ trackio/assets/trackio_logo_old.png filter=lfs diff=lfs merge=lfs -text
trackio/CHANGELOG.md ADDED
@@ -0,0 +1,232 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # trackio
2
+
3
+ ## 0.22.0
4
+
5
+ ### Features
6
+
7
+ - [#484](https://github.com/gradio-app/trackio/pull/484) [`cc05ada`](https://github.com/gradio-app/trackio/commit/cc05ada8e89773f3a894af99b801ef680f64418f) - Fix duplicate columns in parquet export. Thanks @abidlabs!
8
+ - [#487](https://github.com/gradio-app/trackio/pull/487) [`853f764`](https://github.com/gradio-app/trackio/commit/853f7646a70d12633afaa4f69db86425aa665413) - Relax `PIL` dependency and remove `plotly` as it's no longer used. Thanks @abidlabs!
9
+
10
+ ## 0.21.2
11
+
12
+ ### Features
13
+
14
+ - [#482](https://github.com/gradio-app/trackio/pull/482) [`f62180a`](https://github.com/gradio-app/trackio/commit/f62180a0218bc99a259d5ca110a0384a6cae11c8) - Use server-side bucket copy when freezing Spaces. Thanks @abidlabs!
15
+
16
+ ## 0.21.1
17
+
18
+ ### Features
19
+
20
+ - [#475](https://github.com/gradio-app/trackio/pull/475) [`fcb476c`](https://github.com/gradio-app/trackio/commit/fcb476cd37a40923e9679aaf966f41d582a878a8) - Tweaks. Thanks @abidlabs!
21
+ - [#477](https://github.com/gradio-app/trackio/pull/477) [`7d52dfd`](https://github.com/gradio-app/trackio/commit/7d52dfdce5b6eff6a34501a6d5a620220663cf09) - Fix `.sync()` and add `.freeze()` as a separate methods. Thanks @abidlabs!
22
+
23
+ ## 0.21.0
24
+
25
+ ### Features
26
+
27
+ - [#467](https://github.com/gradio-app/trackio/pull/467) [`f357deb`](https://github.com/gradio-app/trackio/commit/f357debf78957e4c1f2b901bee4f77cf397298b4) - Allow logged metrics as x-axis choices. Thanks @abidlabs!
28
+ - [#474](https://github.com/gradio-app/trackio/pull/474) [`655673d`](https://github.com/gradio-app/trackio/commit/655673d4c6b7c8b7ee8f87f2589f2dbbc3d2ef91) - Fix file descriptor leak from `sqlite3.connect`. Thanks @abidlabs!
29
+ - [#470](https://github.com/gradio-app/trackio/pull/470) [`bea8c9d`](https://github.com/gradio-app/trackio/commit/bea8c9dcae0b59d071b6c779c97ee525c9bbf6e7) - Restores tooltips to line plots and fixes the call to uses TTL instead of OAuth. Thanks @abidlabs!
30
+ - [#471](https://github.com/gradio-app/trackio/pull/471) [`246fce0`](https://github.com/gradio-app/trackio/commit/246fce0a01619e1c2c538c67b3e460883334d500) - Deprecate dataset backend in favor of buckets. Thanks @abidlabs!
31
+ - [#465](https://github.com/gradio-app/trackio/pull/465) [`3e11174`](https://github.com/gradio-app/trackio/commit/3e1117438bb8168b802245a33059affa558ae519) - Use HF buckets as backend. Thanks @abidlabs!
32
+ - [#469](https://github.com/gradio-app/trackio/pull/469) [`915d170`](https://github.com/gradio-app/trackio/commit/915d17045133172b59195acfdcc70709229668aa) - Make static Spaces work with Buckets and also allow conversion from Gradio SDK to Static Spaces. Thanks @abidlabs!
33
+
34
+ ## 0.20.2
35
+
36
+ ### Features
37
+
38
+ - [#464](https://github.com/gradio-app/trackio/pull/464) [`c89ebb3`](https://github.com/gradio-app/trackio/commit/c89ebb3b50f695bc7f16cbc6f46dce86f79a01e9) - Improve rendering of curves. Thanks @abidlabs!
39
+ - [#462](https://github.com/gradio-app/trackio/pull/462) [`9160b78`](https://github.com/gradio-app/trackio/commit/9160b78ff6f258f0b87a4f34a24e7d0b5dfbf2fb) - Refactor plot title to display only the metric name without the path. Thanks @qgallouedec!
40
+
41
+ ## 0.20.1
42
+
43
+ ### Features
44
+
45
+ - [#454](https://github.com/gradio-app/trackio/pull/454) [`22881db`](https://github.com/gradio-app/trackio/commit/22881dbbbb6b81197a00a19853771007093d61e4) - Bar chart single point. Thanks @abidlabs!
46
+ - [#455](https://github.com/gradio-app/trackio/pull/455) [`f8db51a`](https://github.com/gradio-app/trackio/commit/f8db51a20ca61ef703f3f2c2ee1ebd9c4f239cf2) - Adds a static Trackio mode via `trackio.sync(sdk="static")` and support for the `TRACKIO_SPACE_ID` environment variable. Thanks @abidlabs!
47
+
48
+ ## 0.20.0
49
+
50
+ ### Features
51
+
52
+ - [#450](https://github.com/gradio-app/trackio/pull/450) [`b0571ef`](https://github.com/gradio-app/trackio/commit/b0571ef6207a1ce346696f858ad2b7b584dd194f) - Use Svelte source for Gradio components directly in Trackio dashboard. Thanks @abidlabs!
53
+
54
+ ## 0.19.0
55
+
56
+ ### Features
57
+
58
+ - [#445](https://github.com/gradio-app/trackio/pull/445) [`cef4a58`](https://github.com/gradio-app/trackio/commit/cef4a583cb76f4091fc6c0e5783124ee84f8e243) - Add remote HF Space support to CLI. Thanks @abidlabs!
59
+ - [#444](https://github.com/gradio-app/trackio/pull/444) [`358f2a9`](https://github.com/gradio-app/trackio/commit/358f2a9ca238ee8b90b5a8c96220da287e0698fb) - Fix alerts placeholder flashing on reports page. Thanks @abidlabs!
60
+
61
+ ## 0.18.0
62
+
63
+ ### Features
64
+
65
+ - [#435](https://github.com/gradio-app/trackio/pull/435) [`4a47112`](https://github.com/gradio-app/trackio/commit/4a471128e18a39e45fad48a67fd711c5ae9e4aed) - feat: allow hiding section header accordions. Thanks @Saba9!
66
+ - [#439](https://github.com/gradio-app/trackio/pull/439) [`18e9650`](https://github.com/gradio-app/trackio/commit/18e96503d5a3a7cf926e92782d457e23c19942bd) - Add alerts with webhooks, CLI, and documentation. Thanks @abidlabs!
67
+ - [#438](https://github.com/gradio-app/trackio/pull/438) [`0875ccd`](https://github.com/gradio-app/trackio/commit/0875ccd3d8a41b1376f64030f21cfe8cdcc73b05) - Add "share this view" functionality. Thanks @qgallouedec!
68
+ - [#409](https://github.com/gradio-app/trackio/pull/409) [`9282403`](https://github.com/gradio-app/trackio/commit/9282403d8896d48679b0f888208a7ba5bdd4271a) - Add Apple Silicon GPU and system monitoring support. Thanks @znation!
69
+ - [#434](https://github.com/gradio-app/trackio/pull/434) [`4193223`](https://github.com/gradio-app/trackio/commit/41932230a3a2e1c16405dba08ecba5a42f11d1a8) - fix: table slider crash. Thanks @Saba9!
70
+
71
+ ### Fixes
72
+
73
+ - [#441](https://github.com/gradio-app/trackio/pull/441) [`3a2d11d`](https://github.com/gradio-app/trackio/commit/3a2d11dab0b4b37c925abc30ef84b0e2910321ee) - preserve x-axis step when toggling run checkboxes. Thanks @Saba9!
74
+
75
+ ## 0.17.0
76
+
77
+ ### Features
78
+
79
+ - [#428](https://github.com/gradio-app/trackio/pull/428) [`f7dd1ce`](https://github.com/gradio-app/trackio/commit/f7dd1ce2dc8a1936f9983467fcbcf93bfef01e09) - feat: add ability to rename runs. Thanks @Saba9!
80
+ - [#437](https://github.com/gradio-app/trackio/pull/437) [`2727c0b`](https://github.com/gradio-app/trackio/commit/2727c0b0755f48f7f186162ea45185c98f6b5516) - Add markdown reports across Trackio. Thanks @abidlabs!
81
+ - [#427](https://github.com/gradio-app/trackio/pull/427) [`5aeb9ed`](https://github.com/gradio-app/trackio/commit/5aeb9edcfd2068d309d9d64f172dcbcc327be1ab) - Make Trackio logging much more robust. Thanks @abidlabs!
82
+
83
+ ## 0.16.1
84
+
85
+ ### Features
86
+
87
+ - [#431](https://github.com/gradio-app/trackio/pull/431) [`c7ce55b`](https://github.com/gradio-app/trackio/commit/c7ce55b14dd5eb0c2165fb15df17dd60721c9325) - Lazy load the UI when trackio is imported. Thanks @abidlabs!
88
+
89
+ ## 0.16.0
90
+
91
+ ### Features
92
+
93
+ - [#426](https://github.com/gradio-app/trackio/pull/426) [`ead4dc8`](https://github.com/gradio-app/trackio/commit/ead4dc8e74ee2d8e47d61bca0a7668456acf49be) - Fix redundant double rendering of group checkboxes. Thanks @abidlabs!
94
+ - [#413](https://github.com/gradio-app/trackio/pull/413) [`39c4750`](https://github.com/gradio-app/trackio/commit/39c4750951d554ba6eb4d58847c6bb444b2891a8) - Check `dist-packages` when checking for source installation. Thanks @sergiopaniego!
95
+ - [#423](https://github.com/gradio-app/trackio/pull/423) [`2e52ab3`](https://github.com/gradio-app/trackio/commit/2e52ab303e3041718a6a56fbf84d0848aca9ad67) - Fix legend outline visibility issue. Thanks @Raghunath-Balaji!
96
+ - [#407](https://github.com/gradio-app/trackio/pull/407) [`c8a384d`](https://github.com/gradio-app/trackio/commit/c8a384ddfe5a295cecf862a26178d40e48acb424) - Fix pytests that were failling locally on MacOS. Thanks @abidlabs!
97
+ - [#405](https://github.com/gradio-app/trackio/pull/405) [`35aae4e`](https://github.com/gradio-app/trackio/commit/35aae4e3aa3e2b2888887528478b9dc6a9808bda) - Add conditional padding for HF Space dashboard when not in iframe. Thanks @znation!
98
+
99
+ ## 0.15.0
100
+
101
+ ### Features
102
+
103
+ - [#397](https://github.com/gradio-app/trackio/pull/397) [`6b38ad0`](https://github.com/gradio-app/trackio/commit/6b38ad02e5d73a0df49c4eede7e91331282ece04) - Adds `--host` cli option support. Thanks @abidlabs!
104
+ - [#396](https://github.com/gradio-app/trackio/pull/396) [`4a4d1ab`](https://github.com/gradio-app/trackio/commit/4a4d1ab85e63d923132a3fa7afa5d90e16431bec) - Fix run selection issue. Thanks @abidlabs!
105
+ - [#394](https://github.com/gradio-app/trackio/pull/394) [`c47a3a3`](https://github.com/gradio-app/trackio/commit/c47a3a31f8c4b83bce1aa7fc22eeba3d9021ad3d) - Add wandb-compatible API for trackio. Thanks @abidlabs!
106
+ - [#378](https://github.com/gradio-app/trackio/pull/378) [`b02046a`](https://github.com/gradio-app/trackio/commit/b02046a5b0dad7c9854e099a87f884afba4aecb2) - Add JSON export button for line plots and upgrade gradio dependency. Thanks @JamshedAli18!
107
+
108
+ ## 0.14.2
109
+
110
+ ### Features
111
+
112
+ - [#386](https://github.com/gradio-app/trackio/pull/386) [`f9452cd`](https://github.com/gradio-app/trackio/commit/f9452cdb8f0819368f3610f7ac0ed08957305275) - Fixing some issues related to deployed Trackio Spaces. Thanks @abidlabs!
113
+
114
+ ## 0.14.1
115
+
116
+ ### Features
117
+
118
+ - [#382](https://github.com/gradio-app/trackio/pull/382) [`44fe9bb`](https://github.com/gradio-app/trackio/commit/44fe9bb264fb2aafb0ec302ff15227c045819a2c) - Fix app file path when Trackio is not installed from source. Thanks @abidlabs!
119
+ - [#380](https://github.com/gradio-app/trackio/pull/380) [`c3f4cff`](https://github.com/gradio-app/trackio/commit/c3f4cff74bc5676e812773d8571454894fcdc7cc) - Add CLI commands for querying projects, runs, and metrics. Thanks @abidlabs!
120
+
121
+ ## 0.14.0
122
+
123
+ ### Features
124
+
125
+ - [#377](https://github.com/gradio-app/trackio/pull/377) [`5c5015b`](https://github.com/gradio-app/trackio/commit/5c5015b68c85c5de51111dad983f735c27b9a05f) - fixed wrapping issue in Runs table. Thanks @gaganchapa!
126
+ - [#374](https://github.com/gradio-app/trackio/pull/374) [`388e26b`](https://github.com/gradio-app/trackio/commit/388e26b9e9f24cd7ad203affe9b709be885b3d24) - Save Optimized Parquet files. Thanks @lhoestq!
127
+ - [#371](https://github.com/gradio-app/trackio/pull/371) [`fbace9c`](https://github.com/gradio-app/trackio/commit/fbace9cd7732c166f34d268f54b05bb06846cc5d) - Add GPU metrics logging. Thanks @kashif!
128
+ - [#367](https://github.com/gradio-app/trackio/pull/367) [`862840c`](https://github.com/gradio-app/trackio/commit/862840c13e30fc960cbee5b9eac4d3c25beba9de) - Add option to only show latest run, and fix the double logo issue. Thanks @abidlabs!
129
+
130
+ ## 0.13.1
131
+
132
+ ### Features
133
+
134
+ - [#369](https://github.com/gradio-app/trackio/pull/369) [`767e9fe`](https://github.com/gradio-app/trackio/commit/767e9fe095d7c6ed102016caf927c1517fb8618c) - tiny pr removing unnecessary code. Thanks @abidlabs!
135
+
136
+ ## 0.13.0
137
+
138
+ ### Features
139
+
140
+ - [#358](https://github.com/gradio-app/trackio/pull/358) [`073715d`](https://github.com/gradio-app/trackio/commit/073715d1caf8282f68890117f09c3ac301205312) - Improvements to `trackio.sync()`. Thanks @abidlabs!
141
+
142
+ ## 0.12.0
143
+
144
+ ### Features
145
+
146
+ - [#357](https://github.com/gradio-app/trackio/pull/357) [`02ba815`](https://github.com/gradio-app/trackio/commit/02ba815358060f1966052de051a5bdb09702920e) - Redesign media and tables to show up on separate page. Thanks @abidlabs!
147
+ - [#359](https://github.com/gradio-app/trackio/pull/359) [`08fe9c9`](https://github.com/gradio-app/trackio/commit/08fe9c9ddd7fe99ee811555fdfb62df9ab88e939) - docs: Improve docstrings. Thanks @qgallouedec!
148
+
149
+ ## 0.11.0
150
+
151
+ ### Features
152
+
153
+ - [#355](https://github.com/gradio-app/trackio/pull/355) [`ea51f49`](https://github.com/gradio-app/trackio/commit/ea51f4954922f21be76ef828700420fe9a912c4b) - Color code run checkboxes and match with plot lines. Thanks @abidlabs!
154
+ - [#353](https://github.com/gradio-app/trackio/pull/353) [`8abe691`](https://github.com/gradio-app/trackio/commit/8abe6919aeefe21fc7a23af814883efbb037c21f) - Remove show_api from demo.launch. Thanks @sergiopaniego!
155
+ - [#351](https://github.com/gradio-app/trackio/pull/351) [`8a8957e`](https://github.com/gradio-app/trackio/commit/8a8957e530dd7908d1fef7f2df030303f808101f) - Add `trackio.save()`. Thanks @abidlabs!
156
+
157
+ ## 0.10.0
158
+
159
+ ### Features
160
+
161
+ - [#305](https://github.com/gradio-app/trackio/pull/305) [`e64883a`](https://github.com/gradio-app/trackio/commit/e64883a51f7b8b93f7d48b8afe55acdb62238b71) - bump to gradio 6.0, make `trackio` compatible, and fix related issues. Thanks @abidlabs!
162
+
163
+ ## 0.9.1
164
+
165
+ ### Features
166
+
167
+ - [#344](https://github.com/gradio-app/trackio/pull/344) [`7e01024`](https://github.com/gradio-app/trackio/commit/7e010241d9a34794e0ce0dc19c1a6f0cf94ba856) - Avoid redundant calls to /whoami-v2. Thanks @Wauplin!
168
+
169
+ ## 0.9.0
170
+
171
+ ### Features
172
+
173
+ - [#343](https://github.com/gradio-app/trackio/pull/343) [`51bea30`](https://github.com/gradio-app/trackio/commit/51bea30f2877adff8e6497466d3a799400a0a049) - Sync offline projects to Hugging Face spaces. Thanks @candemircan!
174
+ - [#341](https://github.com/gradio-app/trackio/pull/341) [`4fd841f`](https://github.com/gradio-app/trackio/commit/4fd841fa190e15071b02f6fba7683ef4f393a654) - Adds a basic UI test to `trackio`. Thanks @abidlabs!
175
+ - [#339](https://github.com/gradio-app/trackio/pull/339) [`011d91b`](https://github.com/gradio-app/trackio/commit/011d91bb6ae266516fd250a349285670a8049d05) - Allow customzing the trackio color palette. Thanks @abidlabs!
176
+
177
+ ## 0.8.1
178
+
179
+ ### Features
180
+
181
+ - [#336](https://github.com/gradio-app/trackio/pull/336) [`5f9f51d`](https://github.com/gradio-app/trackio/commit/5f9f51dac8677f240d7c42c3e3b2660a22aee138) - Support a list of `Trackio.Image` in a `trackio.Table` cell. Thanks @abidlabs!
182
+
183
+ ## 0.8.0
184
+
185
+ ### Features
186
+
187
+ - [#331](https://github.com/gradio-app/trackio/pull/331) [`2c02d0f`](https://github.com/gradio-app/trackio/commit/2c02d0fd0a5824160528782402bb0dd4083396d5) - Truncate table string values that are greater than 250 characters (configuirable via env variable). Thanks @abidlabs!
188
+ - [#324](https://github.com/gradio-app/trackio/pull/324) [`50b2122`](https://github.com/gradio-app/trackio/commit/50b2122e7965ac82a72e6cb3b7d048bc10a2a6b1) - Add log y-axis functionality to UI. Thanks @abidlabs!
189
+ - [#326](https://github.com/gradio-app/trackio/pull/326) [`61dc1f4`](https://github.com/gradio-app/trackio/commit/61dc1f40af2f545f8e70395ddf0dbb8aee6b60d5) - Fix: improve table rendering for metrics in Trackio Dashboard. Thanks @vigneshwaran!
190
+ - [#328](https://github.com/gradio-app/trackio/pull/328) [`6857cbb`](https://github.com/gradio-app/trackio/commit/6857cbbe557a59a4642f210ec42566d108294e63) - Support trackio.Table with trackio.Image columns. Thanks @abidlabs!
191
+ - [#323](https://github.com/gradio-app/trackio/pull/323) [`6857cbb`](https://github.com/gradio-app/trackio/commit/6857cbbe557a59a4642f210ec42566d108294e63) - add Trackio client implementations in Go, Rust, and JS. Thanks @vaibhav-research!
192
+
193
+ ## 0.7.0
194
+
195
+ ### Features
196
+
197
+ - [#277](https://github.com/gradio-app/trackio/pull/277) [`db35601`](https://github.com/gradio-app/trackio/commit/db35601b9c023423c4654c9909b8ab73e58737de) - fix: make grouped runs view reflect live updates. Thanks @Saba9!
198
+ - [#320](https://github.com/gradio-app/trackio/pull/320) [`24ae739`](https://github.com/gradio-app/trackio/commit/24ae73969b09fb3126acd2f91647cdfbf8cf72a1) - Add additional query parms for xmin, xmax, and smoothing. Thanks @abidlabs!
199
+ - [#270](https://github.com/gradio-app/trackio/pull/270) [`cd1dfc3`](https://github.com/gradio-app/trackio/commit/cd1dfc3dc641b4499ac6d4a1b066fa8e2b52c57b) - feature: add support for logging audio. Thanks @Saba9!
200
+
201
+ ## 0.6.0
202
+
203
+ ### Features
204
+
205
+ - [#309](https://github.com/gradio-app/trackio/pull/309) [`1df2353`](https://github.com/gradio-app/trackio/commit/1df23534d6c01938c8db9c0f584ffa23e8d6021d) - Add histogram support with wandb-compatible API. Thanks @abidlabs!
206
+ - [#315](https://github.com/gradio-app/trackio/pull/315) [`76ba060`](https://github.com/gradio-app/trackio/commit/76ba06055dc43ca8f03b79f3e72d761949bd19a8) - Add guards to avoid silent fails. Thanks @Xmaster6y!
207
+ - [#313](https://github.com/gradio-app/trackio/pull/313) [`a606b3e`](https://github.com/gradio-app/trackio/commit/a606b3e1c5edf3d4cf9f31bd50605226a5a1c5d0) - No longer prevent certain keys from being used. Instead, dunderify them to prevent collisions with internal usage. Thanks @abidlabs!
208
+ - [#317](https://github.com/gradio-app/trackio/pull/317) [`27370a5`](https://github.com/gradio-app/trackio/commit/27370a595d0dbdf7eebbe7159d2ba778f039da44) - quick fixes for trackio.histogram. Thanks @abidlabs!
209
+ - [#312](https://github.com/gradio-app/trackio/pull/312) [`aa0f3bf`](https://github.com/gradio-app/trackio/commit/aa0f3bf372e7a0dd592a38af699c998363830eeb) - Fix video logging by adding TRACKIO_DIR to allowed_paths. Thanks @abidlabs!
210
+
211
+ ## 0.5.3
212
+
213
+ ### Features
214
+
215
+ - [#300](https://github.com/gradio-app/trackio/pull/300) [`5e4cacf`](https://github.com/gradio-app/trackio/commit/5e4cacf2e7ce527b4ce60de3a5bc05d2c02c77fb) - Adds more environment variables to allow customization of Trackio dashboard. Thanks @abidlabs!
216
+
217
+ ## 0.5.2
218
+
219
+ ### Features
220
+
221
+ - [#293](https://github.com/gradio-app/trackio/pull/293) [`64afc28`](https://github.com/gradio-app/trackio/commit/64afc28d3ea1dfd821472dc6bf0b8ed35a9b74be) - Ensures that the TRACKIO_DIR environment variable is respected. Thanks @abidlabs!
222
+ - [#287](https://github.com/gradio-app/trackio/pull/287) [`cd3e929`](https://github.com/gradio-app/trackio/commit/cd3e9294320949e6b8b829239069a43d5d7ff4c1) - fix(sqlite): unify .sqlite extension, allow export when DBs exist, clean WAL sidecars on import. Thanks @vaibhav-research!
223
+
224
+ ### Fixes
225
+
226
+ - [#291](https://github.com/gradio-app/trackio/pull/291) [`3b5adc3`](https://github.com/gradio-app/trackio/commit/3b5adc3d1f452dbab7a714d235f4974782f93730) - Fix the wheel build. Thanks @pngwn!
227
+
228
+ ## 0.5.1
229
+
230
+ ### Fixes
231
+
232
+ - [#278](https://github.com/gradio-app/trackio/pull/278) [`314c054`](https://github.com/gradio-app/trackio/commit/314c05438007ddfea3383e06fd19143e27468e2d) - Fix row orientation of metrics plots. Thanks @abidlabs!
trackio/__init__.py ADDED
@@ -0,0 +1,749 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import atexit
2
+ import glob
3
+ import json
4
+ import logging
5
+ import os
6
+ import shutil
7
+ import warnings
8
+ import webbrowser
9
+ from pathlib import Path
10
+ from typing import Any
11
+
12
+ import huggingface_hub
13
+ from gradio.themes import ThemeClass
14
+ from gradio.utils import TupleNoPrint
15
+ from gradio_client import Client, handle_file
16
+ from huggingface_hub import SpaceStorage
17
+ from huggingface_hub.errors import LocalTokenNotFoundError
18
+
19
+ from trackio import context_vars, deploy, utils
20
+ from trackio.alerts import AlertLevel
21
+ from trackio.api import Api
22
+ from trackio.apple_gpu import apple_gpu_available
23
+ from trackio.apple_gpu import log_apple_gpu as _log_apple_gpu
24
+ from trackio.deploy import freeze, sync
25
+ from trackio.frontend_server import mount_frontend
26
+ from trackio.gpu import gpu_available
27
+ from trackio.gpu import log_gpu as _log_nvidia_gpu
28
+ from trackio.histogram import Histogram
29
+ from trackio.imports import import_csv, import_tf_events
30
+ from trackio.markdown import Markdown
31
+ from trackio.media import (
32
+ TrackioAudio,
33
+ TrackioImage,
34
+ TrackioVideo,
35
+ get_project_media_path,
36
+ )
37
+ from trackio.run import Run
38
+ from trackio.server import make_trackio_server
39
+ from trackio.sqlite_storage import SQLiteStorage
40
+ from trackio.table import Table
41
+ from trackio.typehints import UploadEntry
42
+ from trackio.utils import TRACKIO_DIR, TRACKIO_LOGO_DIR
43
+
44
+ logging.getLogger("httpx").setLevel(logging.WARNING)
45
+
46
+ warnings.filterwarnings(
47
+ "ignore",
48
+ message="Empty session being created. Install gradio\\[oauth\\]",
49
+ category=UserWarning,
50
+ module="gradio.helpers",
51
+ )
52
+
53
+ __version__ = json.loads(Path(__file__).parent.joinpath("package.json").read_text())[
54
+ "version"
55
+ ]
56
+
57
+ __all__ = [
58
+ "init",
59
+ "log",
60
+ "log_system",
61
+ "log_gpu",
62
+ "finish",
63
+ "alert",
64
+ "AlertLevel",
65
+ "show",
66
+ "sync",
67
+ "freeze",
68
+ "delete_project",
69
+ "import_csv",
70
+ "import_tf_events",
71
+ "save",
72
+ "Image",
73
+ "Video",
74
+ "Audio",
75
+ "Table",
76
+ "Histogram",
77
+ "Markdown",
78
+ "Api",
79
+ ]
80
+
81
+ Audio = TrackioAudio
82
+ Image = TrackioImage
83
+ Video = TrackioVideo
84
+
85
+
86
+ config = {}
87
+
88
+ _atexit_registered = False
89
+ _projects_notified_auto_log_hw: set[str] = set()
90
+
91
+
92
+ def _cleanup_current_run():
93
+ run = context_vars.current_run.get()
94
+ if run is not None:
95
+ try:
96
+ run.finish()
97
+ except Exception:
98
+ pass
99
+
100
+
101
+ def init(
102
+ project: str,
103
+ name: str | None = None,
104
+ group: str | None = None,
105
+ space_id: str | None = None,
106
+ space_storage: SpaceStorage | None = None,
107
+ dataset_id: str | None = None,
108
+ bucket_id: str | None = None,
109
+ config: dict | None = None,
110
+ resume: str = "never",
111
+ settings: Any = None,
112
+ private: bool | None = None,
113
+ embed: bool = True,
114
+ auto_log_gpu: bool | None = None,
115
+ gpu_log_interval: float = 10.0,
116
+ webhook_url: str | None = None,
117
+ webhook_min_level: AlertLevel | str | None = None,
118
+ ) -> Run:
119
+ """
120
+ Creates a new Trackio project and returns a [`Run`] object.
121
+
122
+ Args:
123
+ project (`str`):
124
+ The name of the project (can be an existing project to continue tracking or
125
+ a new project to start tracking from scratch).
126
+ name (`str`, *optional*):
127
+ The name of the run (if not provided, a default name will be generated).
128
+ group (`str`, *optional*):
129
+ The name of the group which this run belongs to in order to help organize
130
+ related runs together. You can toggle the entire group's visibilitiy in the
131
+ dashboard.
132
+ space_id (`str`, *optional*):
133
+ If provided, the project will be logged to a Hugging Face Space instead of
134
+ a local directory. Should be a complete Space name like
135
+ `"username/reponame"` or `"orgname/reponame"`, or just `"reponame"` in which
136
+ case the Space will be created in the currently-logged-in Hugging Face
137
+ user's namespace. If the Space does not exist, it will be created. If the
138
+ Space already exists, the project will be logged to it. Can also be set
139
+ via the `TRACKIO_SPACE_ID` environment variable. You cannot log to a
140
+ Space that has been **frozen** (converted to the static SDK); use
141
+ ``trackio.sync(..., sdk="static")`` only after you are done logging.
142
+ space_storage ([`~huggingface_hub.SpaceStorage`], *optional*):
143
+ Choice of persistent storage tier.
144
+ dataset_id (`str`, *optional*):
145
+ Deprecated. Use `bucket_id` instead.
146
+ bucket_id (`str`, *optional*):
147
+ The ID of the Hugging Face Bucket to use for metric persistence. By default,
148
+ when a `space_id` is provided and `bucket_id` is not explicitly set, a
149
+ bucket is auto-generated from the space_id. Buckets provide
150
+ S3-like storage without git overhead - the SQLite database is stored directly
151
+ via `hf-mount` in the Space. Specify a Bucket with name like
152
+ `"username/bucketname"` or just `"bucketname"`.
153
+ config (`dict`, *optional*):
154
+ A dictionary of configuration options. Provided for compatibility with
155
+ `wandb.init()`.
156
+ resume (`str`, *optional*, defaults to `"never"`):
157
+ Controls how to handle resuming a run. Can be one of:
158
+
159
+ - `"must"`: Must resume the run with the given name, raises error if run
160
+ doesn't exist
161
+ - `"allow"`: Resume the run if it exists, otherwise create a new run
162
+ - `"never"`: Never resume a run, always create a new one
163
+ private (`bool`, *optional*):
164
+ Whether to make the Space private. If None (default), the repo will be
165
+ public unless the organization's default is private. This value is ignored
166
+ if the repo already exists.
167
+ settings (`Any`, *optional*):
168
+ Not used. Provided for compatibility with `wandb.init()`.
169
+ embed (`bool`, *optional*, defaults to `True`):
170
+ If running inside a Jupyter/Colab notebook, whether the dashboard should
171
+ automatically be embedded in the cell when trackio.init() is called. For
172
+ local runs, this launches a local Gradio app and embeds it. For Space runs,
173
+ this embeds the Space URL. In Colab, the local dashboard will be accessible
174
+ via a public share URL (default Gradio behavior).
175
+ auto_log_gpu (`bool` or `None`, *optional*, defaults to `None`):
176
+ Controls automatic GPU metrics logging. If `None` (default), GPU logging
177
+ is automatically enabled when `nvidia-ml-py` is installed and an NVIDIA
178
+ GPU or Apple M series is detected. Set to `True` to force enable or
179
+ `False` to disable.
180
+ gpu_log_interval (`float`, *optional*, defaults to `10.0`):
181
+ The interval in seconds between automatic GPU metric logs.
182
+ Only used when `auto_log_gpu=True`.
183
+ webhook_url (`str`, *optional*):
184
+ A webhook URL to POST alert payloads to when `trackio.alert()` is
185
+ called. Supports Slack and Discord webhook URLs natively (payloads
186
+ are formatted automatically). Can also be set via the
187
+ `TRACKIO_WEBHOOK_URL` environment variable. Individual alerts can
188
+ override this URL by passing `webhook_url` to `trackio.alert()`.
189
+ webhook_min_level (`AlertLevel` or `str`, *optional*):
190
+ Minimum alert level that should trigger webhook delivery.
191
+ For example, `AlertLevel.WARN` sends only `WARN` and `ERROR`
192
+ alerts to the webhook destination. Can also be set via
193
+ `TRACKIO_WEBHOOK_MIN_LEVEL`.
194
+ Returns:
195
+ `Run`: A [`Run`] object that can be used to log metrics and finish the run.
196
+ """
197
+ if settings is not None:
198
+ warnings.warn(
199
+ "* Warning: settings is not used. Provided for compatibility with wandb.init(). Please create an issue at: https://github.com/gradio-app/trackio/issues if you need a specific feature implemented."
200
+ )
201
+
202
+ space_id = space_id or os.environ.get("TRACKIO_SPACE_ID")
203
+ bucket_id = bucket_id or os.environ.get("TRACKIO_BUCKET_ID")
204
+ if space_id is None and dataset_id is not None:
205
+ raise ValueError("Must provide a `space_id` when `dataset_id` is provided.")
206
+ if dataset_id is not None and bucket_id is not None:
207
+ raise ValueError("Cannot provide both `dataset_id` and `bucket_id`.")
208
+ try:
209
+ space_id, dataset_id, bucket_id = utils.preprocess_space_and_dataset_ids(
210
+ space_id, dataset_id, bucket_id
211
+ )
212
+ except LocalTokenNotFoundError as e:
213
+ raise LocalTokenNotFoundError(
214
+ f"You must be logged in to Hugging Face locally when `space_id` is provided to deploy to a Space. {e}"
215
+ ) from e
216
+
217
+ if space_id is None and bucket_id is not None:
218
+ warnings.warn(
219
+ "trackio.init() has `bucket_id` set but `space_id` is None: metrics will be logged "
220
+ "locally only. Pass `space_id` to create or use a Hugging Face Space, which will be "
221
+ "attached to the Hugging Face Bucket.",
222
+ UserWarning,
223
+ stacklevel=2,
224
+ )
225
+
226
+ if space_id is not None:
227
+ deploy.raise_if_space_is_frozen_for_logging(space_id)
228
+
229
+ url = context_vars.current_server.get()
230
+
231
+ if space_id is not None:
232
+ if url is None:
233
+ url = space_id
234
+ context_vars.current_server.set(url)
235
+ context_vars.current_space_id.set(space_id)
236
+
237
+ _should_embed_local = False
238
+
239
+ if (
240
+ context_vars.current_project.get() is None
241
+ or context_vars.current_project.get() != project
242
+ ):
243
+ print(f"* Trackio project initialized: {project}")
244
+
245
+ if bucket_id is not None:
246
+ os.environ["TRACKIO_BUCKET_ID"] = bucket_id
247
+ bucket_url = f"https://huggingface.co/buckets/{bucket_id}"
248
+ print(
249
+ f"* Trackio metrics will be synced to Hugging Face Bucket: {bucket_url}"
250
+ )
251
+ elif dataset_id is not None:
252
+ os.environ["TRACKIO_DATASET_ID"] = dataset_id
253
+ print(
254
+ f"* Trackio metrics will be synced to Hugging Face Dataset: {dataset_id}"
255
+ )
256
+ if space_id is None:
257
+ print(f"* Trackio metrics logged to: {TRACKIO_DIR}")
258
+ _should_embed_local = embed and utils.is_in_notebook()
259
+ if not _should_embed_local:
260
+ utils.print_dashboard_instructions(project)
261
+ else:
262
+ deploy.create_space_if_not_exists(
263
+ space_id,
264
+ space_storage,
265
+ dataset_id,
266
+ bucket_id,
267
+ private,
268
+ )
269
+ user_name, space_name = space_id.split("/")
270
+ space_url = deploy.SPACE_HOST_URL.format(
271
+ user_name=user_name, space_name=space_name
272
+ )
273
+ if utils.is_in_notebook() and embed:
274
+ utils.embed_url_in_notebook(space_url)
275
+ context_vars.current_project.set(project)
276
+
277
+ if resume == "must":
278
+ if name is None:
279
+ raise ValueError("Must provide a run name when resume='must'")
280
+ if name not in SQLiteStorage.get_runs(project):
281
+ raise ValueError(f"Run '{name}' does not exist in project '{project}'")
282
+ resumed = True
283
+ elif resume == "allow":
284
+ resumed = name is not None and name in SQLiteStorage.get_runs(project)
285
+ elif resume == "never":
286
+ if name is not None and name in SQLiteStorage.get_runs(project):
287
+ warnings.warn(
288
+ f"* Warning: resume='never' but a run '{name}' already exists in "
289
+ f"project '{project}'. Generating a new name and instead. If you want "
290
+ "to resume this run, call init() with resume='must' or resume='allow'."
291
+ )
292
+ name = None
293
+ resumed = False
294
+ else:
295
+ raise ValueError("resume must be one of: 'must', 'allow', or 'never'")
296
+
297
+ if auto_log_gpu is None:
298
+ nvidia_available = gpu_available()
299
+ apple_available = apple_gpu_available()
300
+ auto_log_gpu = nvidia_available or apple_available
301
+ if project not in _projects_notified_auto_log_hw:
302
+ if nvidia_available:
303
+ print("* NVIDIA GPU detected, enabling automatic GPU metrics logging")
304
+ elif apple_available:
305
+ print(
306
+ "* Apple Silicon detected, enabling automatic system metrics logging"
307
+ )
308
+ if nvidia_available or apple_available:
309
+ _projects_notified_auto_log_hw.add(project)
310
+
311
+ run = Run(
312
+ url=url,
313
+ project=project,
314
+ client=None,
315
+ name=name,
316
+ group=group,
317
+ config=config,
318
+ space_id=space_id,
319
+ auto_log_gpu=auto_log_gpu,
320
+ gpu_log_interval=gpu_log_interval,
321
+ webhook_url=webhook_url,
322
+ webhook_min_level=webhook_min_level,
323
+ )
324
+
325
+ if space_id is not None:
326
+ SQLiteStorage.set_project_metadata(project, "space_id", space_id)
327
+ if SQLiteStorage.has_pending_data(project):
328
+ run._has_local_buffer = True
329
+
330
+ global _atexit_registered
331
+ if not _atexit_registered:
332
+ atexit.register(_cleanup_current_run)
333
+ _atexit_registered = True
334
+
335
+ if resumed:
336
+ print(f"* Resumed existing run: {run.name}")
337
+ else:
338
+ print(f"* Created new run: {run.name}")
339
+
340
+ context_vars.current_run.set(run)
341
+ globals()["config"] = run.config
342
+
343
+ if _should_embed_local:
344
+ show(project=project, open_browser=False, block_thread=False)
345
+
346
+ return run
347
+
348
+
349
+ def log(metrics: dict, step: int | None = None) -> None:
350
+ """
351
+ Logs metrics to the current run.
352
+
353
+ Args:
354
+ metrics (`dict`):
355
+ A dictionary of metrics to log.
356
+ step (`int`, *optional*):
357
+ The step number. If not provided, the step will be incremented
358
+ automatically.
359
+ """
360
+ run = context_vars.current_run.get()
361
+ if run is None:
362
+ raise RuntimeError("Call trackio.init() before trackio.log().")
363
+ run.log(
364
+ metrics=metrics,
365
+ step=step,
366
+ )
367
+
368
+
369
+ def log_system(metrics: dict) -> None:
370
+ """
371
+ Logs system metrics (GPU, etc.) to the current run using timestamps instead of steps.
372
+
373
+ Args:
374
+ metrics (`dict`):
375
+ A dictionary of system metrics to log.
376
+ """
377
+ run = context_vars.current_run.get()
378
+ if run is None:
379
+ raise RuntimeError("Call trackio.init() before trackio.log_system().")
380
+ run.log_system(metrics=metrics)
381
+
382
+
383
+ def log_gpu(run: Run | None = None, device: int | None = None) -> dict:
384
+ """
385
+ Log GPU metrics to the current or specified run as system metrics.
386
+ Automatically detects whether an NVIDIA or Apple GPU is available and calls
387
+ the appropriate logging method.
388
+
389
+ Args:
390
+ run: Optional Run instance. If None, uses current run from context.
391
+ device: CUDA device index to collect metrics from (NVIDIA GPUs only).
392
+ If None, collects from all GPUs visible to this process.
393
+ This parameter is ignored for Apple GPUs.
394
+
395
+ Returns:
396
+ dict: The GPU metrics that were logged.
397
+
398
+ Example:
399
+ ```python
400
+ import trackio
401
+
402
+ run = trackio.init(project="my-project")
403
+ trackio.log({"loss": 0.5})
404
+ trackio.log_gpu()
405
+ trackio.log_gpu(device=0)
406
+ ```
407
+ """
408
+ if run is None:
409
+ run = context_vars.current_run.get()
410
+ if run is None:
411
+ raise RuntimeError("Call trackio.init() before trackio.log_gpu().")
412
+
413
+ if gpu_available():
414
+ return _log_nvidia_gpu(run=run, device=device)
415
+ elif apple_gpu_available():
416
+ return _log_apple_gpu(run=run)
417
+ else:
418
+ warnings.warn(
419
+ "No GPU detected. Install nvidia-ml-py for NVIDIA GPU support "
420
+ "or psutil for Apple Silicon support."
421
+ )
422
+ return {}
423
+
424
+
425
+ def finish():
426
+ """
427
+ Finishes the current run.
428
+ """
429
+ run = context_vars.current_run.get()
430
+ if run is None:
431
+ raise RuntimeError("Call trackio.init() before trackio.finish().")
432
+ run.finish()
433
+
434
+
435
+ def alert(
436
+ title: str,
437
+ text: str | None = None,
438
+ level: AlertLevel = AlertLevel.WARN,
439
+ webhook_url: str | None = None,
440
+ ) -> None:
441
+ """
442
+ Fires an alert immediately on the current run. The alert is printed to the
443
+ terminal, stored in the database, and displayed in the dashboard. If a
444
+ webhook URL is configured (via `trackio.init()`, the `TRACKIO_WEBHOOK_URL`
445
+ environment variable, or the `webhook_url` parameter here), the alert is
446
+ also POSTed to that URL.
447
+
448
+ Args:
449
+ title (`str`):
450
+ A short title for the alert.
451
+ text (`str`, *optional*):
452
+ A longer description with details about the alert.
453
+ level (`AlertLevel`, *optional*, defaults to `AlertLevel.WARN`):
454
+ The severity level. One of `AlertLevel.INFO`, `AlertLevel.WARN`,
455
+ or `AlertLevel.ERROR`.
456
+ webhook_url (`str`, *optional*):
457
+ A webhook URL to send this specific alert to. Overrides any
458
+ URL set in `trackio.init()` or the `TRACKIO_WEBHOOK_URL`
459
+ environment variable. Supports Slack and Discord webhook
460
+ URLs natively.
461
+ """
462
+ run = context_vars.current_run.get()
463
+ if run is None:
464
+ raise RuntimeError("Call trackio.init() before trackio.alert().")
465
+ run.alert(title=title, text=text, level=level, webhook_url=webhook_url)
466
+
467
+
468
+ def delete_project(project: str, force: bool = False) -> bool:
469
+ """
470
+ Deletes a project by removing its local SQLite database.
471
+
472
+ Args:
473
+ project (`str`):
474
+ The name of the project to delete.
475
+ force (`bool`, *optional*, defaults to `False`):
476
+ If `True`, deletes the project without prompting for confirmation.
477
+ If `False`, prompts the user to confirm before deleting.
478
+
479
+ Returns:
480
+ `bool`: `True` if the project was deleted, `False` otherwise.
481
+ """
482
+ db_path = SQLiteStorage.get_project_db_path(project)
483
+
484
+ if not db_path.exists():
485
+ print(f"* Project '{project}' does not exist.")
486
+ return False
487
+
488
+ if not force:
489
+ response = input(
490
+ f"Are you sure you want to delete project '{project}'? "
491
+ f"This will permanently delete all runs and metrics. (y/N): "
492
+ )
493
+ if response.lower() not in ["y", "yes"]:
494
+ print("* Deletion cancelled.")
495
+ return False
496
+
497
+ try:
498
+ db_path.unlink()
499
+
500
+ for suffix in ("-wal", "-shm"):
501
+ sidecar = Path(str(db_path) + suffix)
502
+ if sidecar.exists():
503
+ sidecar.unlink()
504
+
505
+ print(f"* Project '{project}' has been deleted.")
506
+ return True
507
+ except Exception as e:
508
+ print(f"* Error deleting project '{project}': {e}")
509
+ return False
510
+
511
+
512
+ def save(
513
+ glob_str: str | Path,
514
+ project: str | None = None,
515
+ ) -> str:
516
+ """
517
+ Saves files to a project (not linked to a specific run). If Trackio is running
518
+ locally, the file(s) will be copied to the project's files directory. If Trackio is
519
+ running in a Space, the file(s) will be uploaded to the Space's files directory.
520
+
521
+ Args:
522
+ glob_str (`str` or `Path`):
523
+ The file path or glob pattern to save. Can be a single file or a pattern
524
+ matching multiple files (e.g., `"*.py"`, `"models/**/*.pth"`).
525
+ project (`str`, *optional*):
526
+ The name of the project to save files to. If not provided, uses the current
527
+ project from `trackio.init()`. If no project is initialized, raises an
528
+ error.
529
+
530
+ Returns:
531
+ `str`: The path where the file(s) were saved (project's files directory).
532
+
533
+ Example:
534
+ ```python
535
+ import trackio
536
+
537
+ trackio.init(project="my-project")
538
+ trackio.save("config.yaml")
539
+ trackio.save("models/*.pth")
540
+ ```
541
+ """
542
+ if project is None:
543
+ project = context_vars.current_project.get()
544
+ if project is None:
545
+ raise RuntimeError(
546
+ "No project specified. Either call trackio.init() first or provide a "
547
+ "project parameter to trackio.save()."
548
+ )
549
+
550
+ glob_str = Path(glob_str)
551
+ base_path = Path.cwd().resolve()
552
+
553
+ matched_files = []
554
+ if glob_str.is_file():
555
+ matched_files = [glob_str.resolve()]
556
+ else:
557
+ pattern = str(glob_str)
558
+ if not glob_str.is_absolute():
559
+ pattern = str((Path.cwd() / glob_str).resolve())
560
+ matched_files = [
561
+ Path(f).resolve()
562
+ for f in glob.glob(pattern, recursive=True)
563
+ if Path(f).is_file()
564
+ ]
565
+
566
+ if not matched_files:
567
+ raise ValueError(f"No files found matching pattern: {glob_str}")
568
+
569
+ current_run = context_vars.current_run.get()
570
+ is_local = (
571
+ current_run._is_local
572
+ if current_run is not None
573
+ else (context_vars.current_space_id.get() is None)
574
+ )
575
+
576
+ if is_local:
577
+ for file_path in matched_files:
578
+ try:
579
+ relative_to_base = file_path.relative_to(base_path)
580
+ except ValueError:
581
+ relative_to_base = Path(file_path.name)
582
+
583
+ if current_run is not None:
584
+ current_run._queue_upload(
585
+ file_path,
586
+ step=None,
587
+ relative_path=str(relative_to_base.parent),
588
+ use_run_name=False,
589
+ )
590
+ else:
591
+ media_path = get_project_media_path(
592
+ project=project,
593
+ run=None,
594
+ step=None,
595
+ relative_path=str(relative_to_base),
596
+ )
597
+ shutil.copy(str(file_path), str(media_path))
598
+ else:
599
+ url = context_vars.current_server.get()
600
+
601
+ upload_entries = []
602
+ for file_path in matched_files:
603
+ try:
604
+ relative_to_base = file_path.relative_to(base_path)
605
+ except ValueError:
606
+ relative_to_base = Path(file_path.name)
607
+
608
+ if current_run is not None:
609
+ current_run._queue_upload(
610
+ file_path,
611
+ step=None,
612
+ relative_path=str(relative_to_base.parent),
613
+ use_run_name=False,
614
+ )
615
+ else:
616
+ upload_entry: UploadEntry = {
617
+ "project": project,
618
+ "run": None,
619
+ "step": None,
620
+ "relative_path": str(relative_to_base),
621
+ "uploaded_file": handle_file(file_path),
622
+ }
623
+ upload_entries.append(upload_entry)
624
+
625
+ if upload_entries:
626
+ if url is None:
627
+ raise RuntimeError(
628
+ "No server available. Call trackio.init() before trackio.save() to start the server."
629
+ )
630
+
631
+ try:
632
+ client = Client(url, verbose=False, httpx_kwargs={"timeout": 90})
633
+ client.predict(
634
+ api_name="/bulk_upload_media",
635
+ uploads=upload_entries,
636
+ hf_token=huggingface_hub.utils.get_token(),
637
+ )
638
+ except Exception as e:
639
+ warnings.warn(
640
+ f"Failed to upload files: {e}. "
641
+ "Files may not be available in the dashboard."
642
+ )
643
+
644
+ return str(utils.MEDIA_DIR / project / "files")
645
+
646
+
647
+ def show(
648
+ project: str | None = None,
649
+ *,
650
+ theme: str | ThemeClass | None = None,
651
+ mcp_server: bool | None = None,
652
+ footer: bool = True,
653
+ color_palette: list[str] | None = None,
654
+ open_browser: bool = True,
655
+ block_thread: bool | None = None,
656
+ host: str | None = None,
657
+ ):
658
+ """
659
+ Launches the Trackio dashboard.
660
+
661
+ Args:
662
+ project (`str`, *optional*):
663
+ The name of the project whose runs to show. If not provided, all projects
664
+ will be shown and the user can select one.
665
+ theme (`str` or `ThemeClass`, *optional*):
666
+ A Gradio Theme to use for the dashboard instead of the default Gradio theme,
667
+ can be a built-in theme (e.g. `'soft'`, `'citrus'`), a theme from the Hub
668
+ (e.g. `"gstaff/xkcd"`), or a custom Theme class. If not provided, the
669
+ `TRACKIO_THEME` environment variable will be used, or if that is not set,
670
+ the default Gradio theme will be used.
671
+ mcp_server (`bool`, *optional*):
672
+ If `True`, the Trackio dashboard will be set up as an MCP server and certain
673
+ functions will be added as MCP tools. If `None` (default behavior), then the
674
+ `GRADIO_MCP_SERVER` environment variable will be used to determine if the
675
+ MCP server should be enabled (which is `"True"` on Hugging Face Spaces).
676
+ footer (`bool`, *optional*, defaults to `True`):
677
+ Whether to show the Gradio footer. When `False`, the footer will be hidden.
678
+ This can also be controlled via the `footer` query parameter in the URL.
679
+ color_palette (`list[str]`, *optional*):
680
+ A list of hex color codes to use for plot lines. If not provided, the
681
+ `TRACKIO_COLOR_PALETTE` environment variable will be used (comma-separated
682
+ hex codes), or if that is not set, the default color palette will be used.
683
+ Example: `['#FF0000', '#00FF00', '#0000FF']`
684
+ open_browser (`bool`, *optional*, defaults to `True`):
685
+ If `True` and not in a notebook, a new browser tab will be opened with the
686
+ dashboard. If `False`, the browser will not be opened.
687
+ block_thread (`bool`, *optional*):
688
+ If `True`, the main thread will be blocked until the dashboard is closed.
689
+ If `None` (default behavior), then the main thread will not be blocked if the
690
+ dashboard is launched in a notebook, otherwise the main thread will be blocked.
691
+ host (`str`, *optional*):
692
+ The host to bind the server to. If not provided, defaults to `'127.0.0.1'`
693
+ (localhost only). Set to `'0.0.0.0'` to allow remote access.
694
+
695
+ Returns:
696
+ `app`: The Gradio app object corresponding to the dashboard launched by Trackio.
697
+ `url`: The local URL of the dashboard.
698
+ `share_url`: The public share URL of the dashboard.
699
+ `full_url`: The full URL of the dashboard including the write token (will use the public share URL if launched publicly, otherwise the local URL).
700
+ """
701
+ if color_palette is not None:
702
+ os.environ["TRACKIO_COLOR_PALETTE"] = ",".join(color_palette)
703
+
704
+ theme = theme or os.environ.get("TRACKIO_THEME")
705
+
706
+ _mcp_server = (
707
+ mcp_server
708
+ if mcp_server is not None
709
+ else os.environ.get("GRADIO_MCP_SERVER", "False") == "True"
710
+ )
711
+
712
+ server = make_trackio_server()
713
+ mount_frontend(server)
714
+
715
+ _, url, share_url = server.launch(
716
+ quiet=True,
717
+ inline=False,
718
+ prevent_thread_lock=True,
719
+ favicon_path=TRACKIO_LOGO_DIR / "trackio_logo_light.png",
720
+ allowed_paths=[TRACKIO_LOGO_DIR, TRACKIO_DIR],
721
+ mcp_server=_mcp_server,
722
+ theme=theme,
723
+ server_name=host,
724
+ )
725
+
726
+ base_url = share_url + "/" if share_url else url
727
+ dashboard_url = base_url.rstrip("/") + "/"
728
+ if project:
729
+ dashboard_url += f"?project={project}"
730
+ full_url = utils.get_full_url(
731
+ base_url.rstrip("/"),
732
+ project=project,
733
+ write_token=server.write_token,
734
+ footer=footer,
735
+ )
736
+
737
+ if not utils.is_in_notebook():
738
+ print(f"* Trackio UI launched at: {dashboard_url}")
739
+ print(f"* Gradio API available at: {base_url}")
740
+ if open_browser:
741
+ webbrowser.open(dashboard_url)
742
+ block_thread = block_thread if block_thread is not None else True
743
+ else:
744
+ utils.embed_url_in_notebook(dashboard_url)
745
+ block_thread = block_thread if block_thread is not None else False
746
+
747
+ if block_thread:
748
+ utils.block_main_thread_until_keyboard_interrupt()
749
+ return TupleNoPrint((server, url, share_url, full_url))
trackio/alerts.py ADDED
@@ -0,0 +1,184 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import json
2
+ import logging
3
+ import ssl
4
+ import urllib.request
5
+ from enum import Enum
6
+
7
+ try:
8
+ import certifi
9
+
10
+ _SSL_CONTEXT = ssl.create_default_context(cafile=certifi.where())
11
+ except ImportError:
12
+ _SSL_CONTEXT = None
13
+
14
+ logger = logging.getLogger(__name__)
15
+
16
+
17
+ class AlertLevel(str, Enum):
18
+ INFO = "info"
19
+ WARN = "warn"
20
+ ERROR = "error"
21
+
22
+
23
+ ALERT_LEVEL_ORDER = {
24
+ AlertLevel.INFO: 0,
25
+ AlertLevel.WARN: 1,
26
+ AlertLevel.ERROR: 2,
27
+ }
28
+
29
+ ALERT_COLORS = {
30
+ AlertLevel.INFO: "\033[94m",
31
+ AlertLevel.WARN: "\033[93m",
32
+ AlertLevel.ERROR: "\033[91m",
33
+ }
34
+ RESET_COLOR = "\033[0m"
35
+
36
+ LEVEL_EMOJI = {
37
+ AlertLevel.INFO: "ℹ️",
38
+ AlertLevel.WARN: "⚠️",
39
+ AlertLevel.ERROR: "🚨",
40
+ }
41
+
42
+
43
+ def format_alert_terminal(
44
+ level: AlertLevel, title: str, text: str | None, step: int | None
45
+ ) -> str:
46
+ color = ALERT_COLORS.get(level, "")
47
+ step_str = f" (step {step})" if step is not None else ""
48
+ if text:
49
+ return f"{color}[TRACKIO {level.value.upper()}]{RESET_COLOR} {title}: {text}{step_str}"
50
+ return f"{color}[TRACKIO {level.value.upper()}]{RESET_COLOR} {title}{step_str}"
51
+
52
+
53
+ def _is_slack_url(url: str) -> bool:
54
+ return "hooks.slack.com" in url
55
+
56
+
57
+ def _is_discord_url(url: str) -> bool:
58
+ return "discord.com/api/webhooks" in url or "discordapp.com/api/webhooks" in url
59
+
60
+
61
+ def _build_slack_payload(
62
+ level: AlertLevel,
63
+ title: str,
64
+ text: str | None,
65
+ project: str,
66
+ run: str,
67
+ step: int | None,
68
+ ) -> dict:
69
+ emoji = LEVEL_EMOJI.get(level, "")
70
+ step_str = f" • Step {step}" if step is not None else ""
71
+ header = f"{emoji} *[{level.value.upper()}] {title}*"
72
+ context = f"Project: {project} • Run: {run}{step_str}"
73
+ blocks = [
74
+ {"type": "section", "text": {"type": "mrkdwn", "text": header}},
75
+ ]
76
+ if text:
77
+ blocks.append({"type": "section", "text": {"type": "mrkdwn", "text": text}})
78
+ blocks.append(
79
+ {"type": "context", "elements": [{"type": "mrkdwn", "text": context}]}
80
+ )
81
+ return {"blocks": blocks}
82
+
83
+
84
+ def _build_discord_payload(
85
+ level: AlertLevel,
86
+ title: str,
87
+ text: str | None,
88
+ project: str,
89
+ run: str,
90
+ step: int | None,
91
+ ) -> dict:
92
+ color_map = {
93
+ AlertLevel.INFO: 3447003,
94
+ AlertLevel.WARN: 16776960,
95
+ AlertLevel.ERROR: 15158332,
96
+ }
97
+ emoji = LEVEL_EMOJI.get(level, "")
98
+ step_str = f" • Step {step}" if step is not None else ""
99
+ embed = {
100
+ "title": f"{emoji} [{level.value.upper()}] {title}",
101
+ "color": color_map.get(level, 0),
102
+ "footer": {"text": f"Project: {project} • Run: {run}{step_str}"},
103
+ }
104
+ if text:
105
+ embed["description"] = text
106
+ return {"embeds": [embed]}
107
+
108
+
109
+ def _build_generic_payload(
110
+ level: AlertLevel,
111
+ title: str,
112
+ text: str | None,
113
+ project: str,
114
+ run: str,
115
+ step: int | None,
116
+ timestamp: str | None,
117
+ ) -> dict:
118
+ return {
119
+ "level": level.value,
120
+ "title": title,
121
+ "text": text,
122
+ "project": project,
123
+ "run": run,
124
+ "step": step,
125
+ "timestamp": timestamp,
126
+ }
127
+
128
+
129
+ def parse_alert_level(level: AlertLevel | str) -> AlertLevel:
130
+ if isinstance(level, AlertLevel):
131
+ return level
132
+ normalized = level.lower().strip()
133
+ try:
134
+ return AlertLevel(normalized)
135
+ except ValueError as e:
136
+ allowed = ", ".join(lvl.value for lvl in AlertLevel)
137
+ raise ValueError(
138
+ f"Invalid alert level '{level}'. Expected one of: {allowed}."
139
+ ) from e
140
+
141
+
142
+ def resolve_webhook_min_level(
143
+ webhook_min_level: AlertLevel | str | None,
144
+ ) -> AlertLevel | None:
145
+ if webhook_min_level is None:
146
+ return None
147
+ return parse_alert_level(webhook_min_level)
148
+
149
+
150
+ def should_send_webhook(
151
+ level: AlertLevel, webhook_min_level: AlertLevel | None
152
+ ) -> bool:
153
+ if webhook_min_level is None:
154
+ return True
155
+ return ALERT_LEVEL_ORDER[level] >= ALERT_LEVEL_ORDER[webhook_min_level]
156
+
157
+
158
+ def send_webhook(
159
+ url: str,
160
+ level: AlertLevel,
161
+ title: str,
162
+ text: str | None,
163
+ project: str,
164
+ run: str,
165
+ step: int | None,
166
+ timestamp: str | None = None,
167
+ ) -> None:
168
+ if _is_slack_url(url):
169
+ payload = _build_slack_payload(level, title, text, project, run, step)
170
+ elif _is_discord_url(url):
171
+ payload = _build_discord_payload(level, title, text, project, run, step)
172
+ else:
173
+ payload = _build_generic_payload(
174
+ level, title, text, project, run, step, timestamp
175
+ )
176
+
177
+ data = json.dumps(payload).encode("utf-8")
178
+ req = urllib.request.Request(
179
+ url, data=data, headers={"Content-Type": "application/json"}
180
+ )
181
+ try:
182
+ urllib.request.urlopen(req, timeout=10, context=_SSL_CONTEXT)
183
+ except Exception as e:
184
+ logger.warning(f"Failed to send webhook to {url}: {e}")
trackio/api.py ADDED
@@ -0,0 +1,87 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from typing import Iterator
2
+
3
+ from trackio.sqlite_storage import SQLiteStorage
4
+
5
+
6
+ class Run:
7
+ def __init__(self, project: str, name: str):
8
+ self.project = project
9
+ self.name = name
10
+ self._config = None
11
+
12
+ @property
13
+ def id(self) -> str:
14
+ return self.name
15
+
16
+ @property
17
+ def config(self) -> dict | None:
18
+ if self._config is None:
19
+ self._config = SQLiteStorage.get_run_config(self.project, self.name)
20
+ return self._config
21
+
22
+ def alerts(self, level: str | None = None, since: str | None = None) -> list[dict]:
23
+ return SQLiteStorage.get_alerts(
24
+ self.project, run_name=self.name, level=level, since=since
25
+ )
26
+
27
+ def delete(self) -> bool:
28
+ return SQLiteStorage.delete_run(self.project, self.name)
29
+
30
+ def move(self, new_project: str) -> bool:
31
+ success = SQLiteStorage.move_run(self.project, self.name, new_project)
32
+ if success:
33
+ self.project = new_project
34
+ return success
35
+
36
+ def rename(self, new_name: str) -> "Run":
37
+ SQLiteStorage.rename_run(self.project, self.name, new_name)
38
+ self.name = new_name
39
+ return self
40
+
41
+ def __repr__(self) -> str:
42
+ return f"<Run {self.name} in project {self.project}>"
43
+
44
+
45
+ class Runs:
46
+ def __init__(self, project: str):
47
+ self.project = project
48
+ self._runs = None
49
+
50
+ def _load_runs(self):
51
+ if self._runs is None:
52
+ run_names = SQLiteStorage.get_runs(self.project)
53
+ self._runs = [Run(self.project, name) for name in run_names]
54
+
55
+ def __iter__(self) -> Iterator[Run]:
56
+ self._load_runs()
57
+ return iter(self._runs)
58
+
59
+ def __getitem__(self, index: int) -> Run:
60
+ self._load_runs()
61
+ return self._runs[index]
62
+
63
+ def __len__(self) -> int:
64
+ self._load_runs()
65
+ return len(self._runs)
66
+
67
+ def __repr__(self) -> str:
68
+ self._load_runs()
69
+ return f"<Runs project={self.project} count={len(self._runs)}>"
70
+
71
+
72
+ class Api:
73
+ def runs(self, project: str) -> Runs:
74
+ if not SQLiteStorage.get_project_db_path(project).exists():
75
+ raise ValueError(f"Project '{project}' does not exist")
76
+ return Runs(project)
77
+
78
+ def alerts(
79
+ self,
80
+ project: str,
81
+ run: str | None = None,
82
+ level: str | None = None,
83
+ since: str | None = None,
84
+ ) -> list[dict]:
85
+ if not SQLiteStorage.get_project_db_path(project).exists():
86
+ raise ValueError(f"Project '{project}' does not exist")
87
+ return SQLiteStorage.get_alerts(project, run_name=run, level=level, since=since)
trackio/apple_gpu.py ADDED
@@ -0,0 +1,253 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import platform
2
+ import subprocess
3
+ import sys
4
+ import threading
5
+ import warnings
6
+ from typing import TYPE_CHECKING, Any
7
+
8
+ if TYPE_CHECKING:
9
+ from trackio.run import Run
10
+
11
+ psutil: Any = None
12
+ PSUTIL_AVAILABLE = False
13
+ _monitor_lock = threading.Lock()
14
+
15
+
16
+ def _ensure_psutil():
17
+ global PSUTIL_AVAILABLE, psutil
18
+ if PSUTIL_AVAILABLE:
19
+ return psutil
20
+ try:
21
+ import psutil as _psutil
22
+
23
+ psutil = _psutil
24
+ PSUTIL_AVAILABLE = True
25
+ return psutil
26
+ except ImportError:
27
+ raise ImportError(
28
+ "psutil is required for Apple Silicon monitoring. "
29
+ "Install it with: pip install psutil"
30
+ )
31
+
32
+
33
+ def is_apple_silicon() -> bool:
34
+ """Check if running on Apple Silicon (M1/M2/M3/M4)."""
35
+ if platform.system() != "Darwin":
36
+ return False
37
+
38
+ try:
39
+ result = subprocess.run(
40
+ ["sysctl", "-n", "machdep.cpu.brand_string"],
41
+ capture_output=True,
42
+ text=True,
43
+ timeout=1,
44
+ )
45
+ cpu_brand = result.stdout.strip()
46
+ return "Apple" in cpu_brand
47
+ except Exception:
48
+ return False
49
+
50
+
51
+ def get_gpu_info() -> dict[str, Any]:
52
+ """Get Apple GPU information using ioreg."""
53
+ try:
54
+ result = subprocess.run(
55
+ ["ioreg", "-r", "-d", "1", "-w", "0", "-c", "IOAccelerator"],
56
+ capture_output=True,
57
+ text=True,
58
+ timeout=2,
59
+ )
60
+
61
+ if result.returncode == 0 and result.stdout:
62
+ lines = result.stdout.strip().split("\n")
63
+ for line in lines:
64
+ if "IOAccelerator" in line and "class" in line:
65
+ return {"detected": True, "type": "Apple GPU"}
66
+ else:
67
+ print("Error collecting Apple GPU info. ioreg stdout was:", file=sys.stderr)
68
+ print(result.stdout, file=sys.stderr)
69
+ print("ioreg stderr was:", file=sys.stderr)
70
+ print(result.stderr, file=sys.stderr)
71
+
72
+ result = subprocess.run(
73
+ ["system_profiler", "SPDisplaysDataType"],
74
+ capture_output=True,
75
+ text=True,
76
+ timeout=3,
77
+ )
78
+
79
+ if result.returncode == 0 and "Apple" in result.stdout:
80
+ for line in result.stdout.split("\n"):
81
+ if "Chipset Model:" in line:
82
+ model = line.split(":")[-1].strip()
83
+ return {"detected": True, "type": model}
84
+
85
+ except Exception:
86
+ pass
87
+
88
+ return {"detected": False}
89
+
90
+
91
+ def apple_gpu_available() -> bool:
92
+ """
93
+ Check if Apple GPU monitoring is available.
94
+
95
+ Returns True if running on Apple Silicon (M-series chips) and psutil is installed.
96
+ """
97
+ try:
98
+ _ensure_psutil()
99
+ return is_apple_silicon()
100
+ except ImportError:
101
+ return False
102
+ except Exception:
103
+ return False
104
+
105
+
106
+ def collect_apple_metrics() -> dict:
107
+ """
108
+ Collect system metrics for Apple Silicon.
109
+
110
+ Returns:
111
+ Dictionary of system metrics including CPU, memory, and GPU info.
112
+ """
113
+ if not PSUTIL_AVAILABLE:
114
+ try:
115
+ _ensure_psutil()
116
+ except ImportError:
117
+ return {}
118
+
119
+ metrics = {}
120
+
121
+ try:
122
+ cpu_percent = psutil.cpu_percent(interval=0.1, percpu=False)
123
+ metrics["cpu/utilization"] = cpu_percent
124
+ except Exception:
125
+ pass
126
+
127
+ try:
128
+ cpu_percents = psutil.cpu_percent(interval=0.1, percpu=True)
129
+ for i, percent in enumerate(cpu_percents):
130
+ metrics[f"cpu/{i}/utilization"] = percent
131
+ except Exception:
132
+ pass
133
+
134
+ try:
135
+ cpu_freq = psutil.cpu_freq()
136
+ if cpu_freq:
137
+ metrics["cpu/frequency"] = cpu_freq.current
138
+ if cpu_freq.max > 0:
139
+ metrics["cpu/frequency_max"] = cpu_freq.max
140
+ except Exception:
141
+ pass
142
+
143
+ try:
144
+ mem = psutil.virtual_memory()
145
+ metrics["memory/used"] = mem.used / (1024**3)
146
+ metrics["memory/total"] = mem.total / (1024**3)
147
+ metrics["memory/available"] = mem.available / (1024**3)
148
+ metrics["memory/percent"] = mem.percent
149
+ except Exception:
150
+ pass
151
+
152
+ try:
153
+ swap = psutil.swap_memory()
154
+ metrics["swap/used"] = swap.used / (1024**3)
155
+ metrics["swap/total"] = swap.total / (1024**3)
156
+ metrics["swap/percent"] = swap.percent
157
+ except Exception:
158
+ pass
159
+
160
+ try:
161
+ sensors_temps = psutil.sensors_temperatures()
162
+ if sensors_temps:
163
+ for name, entries in sensors_temps.items():
164
+ for i, entry in enumerate(entries):
165
+ label = entry.label or f"{name}_{i}"
166
+ metrics[f"temp/{label}"] = entry.current
167
+ except Exception:
168
+ pass
169
+
170
+ gpu_info = get_gpu_info()
171
+ if gpu_info.get("detected"):
172
+ metrics["gpu/detected"] = 1
173
+ if "type" in gpu_info:
174
+ pass
175
+
176
+ return metrics
177
+
178
+
179
+ class AppleGpuMonitor:
180
+ def __init__(self, run: "Run", interval: float = 10.0):
181
+ self._run = run
182
+ self._interval = interval
183
+ self._stop_flag = threading.Event()
184
+ self._thread: "threading.Thread | None" = None
185
+
186
+ def start(self):
187
+ if not is_apple_silicon():
188
+ warnings.warn(
189
+ "auto_log_gpu=True but not running on Apple Silicon. "
190
+ "Apple GPU logging disabled."
191
+ )
192
+ return
193
+
194
+ if not PSUTIL_AVAILABLE:
195
+ try:
196
+ _ensure_psutil()
197
+ except ImportError:
198
+ warnings.warn(
199
+ "auto_log_gpu=True but psutil not installed. "
200
+ "Install with: pip install psutil"
201
+ )
202
+ return
203
+
204
+ self._thread = threading.Thread(target=self._monitor_loop, daemon=True)
205
+ self._thread.start()
206
+
207
+ def stop(self):
208
+ self._stop_flag.set()
209
+ if self._thread is not None:
210
+ self._thread.join(timeout=2.0)
211
+
212
+ def _monitor_loop(self):
213
+ while not self._stop_flag.is_set():
214
+ try:
215
+ metrics = collect_apple_metrics()
216
+ if metrics:
217
+ self._run.log_system(metrics)
218
+ except Exception:
219
+ pass
220
+
221
+ self._stop_flag.wait(timeout=self._interval)
222
+
223
+
224
+ def log_apple_gpu(run: "Run | None" = None) -> dict:
225
+ """
226
+ Log Apple Silicon system metrics to the current or specified run.
227
+
228
+ Args:
229
+ run: Optional Run instance. If None, uses current run from context.
230
+
231
+ Returns:
232
+ dict: The system metrics that were logged.
233
+
234
+ Example:
235
+ ```python
236
+ import trackio
237
+
238
+ run = trackio.init(project="my-project")
239
+ trackio.log({"loss": 0.5})
240
+ trackio.log_apple_gpu()
241
+ ```
242
+ """
243
+ from trackio import context_vars
244
+
245
+ if run is None:
246
+ run = context_vars.current_run.get()
247
+ if run is None:
248
+ raise RuntimeError("Call trackio.init() before trackio.log_apple_gpu().")
249
+
250
+ metrics = collect_apple_metrics()
251
+ if metrics:
252
+ run.log_system(metrics)
253
+ return metrics
trackio/assets/badge.png ADDED

Git LFS Details

  • SHA256: 206b7847247e83279f498510a2760338a03116bb5141a658d71ec14429f9ea9e
  • Pointer size: 131 Bytes
  • Size of remote file: 170 kB
trackio/assets/trackio_logo_dark.png ADDED
trackio/assets/trackio_logo_light.png ADDED
trackio/assets/trackio_logo_old.png ADDED

Git LFS Details

  • SHA256: 3922c4d1e465270ad4d8abb12023f3beed5d9f7f338528a4c0ac21dcf358a1c8
  • Pointer size: 131 Bytes
  • Size of remote file: 487 kB
trackio/assets/trackio_logo_type_dark.png ADDED
trackio/assets/trackio_logo_type_dark_transparent.png ADDED
trackio/assets/trackio_logo_type_light.png ADDED
trackio/assets/trackio_logo_type_light_transparent.png ADDED
trackio/bucket_storage.py ADDED
@@ -0,0 +1,152 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import shutil
2
+ import tempfile
3
+ from pathlib import Path
4
+
5
+ import huggingface_hub
6
+ from huggingface_hub import copy_files, sync_bucket
7
+
8
+ from trackio.sqlite_storage import SQLiteStorage
9
+ from trackio.utils import MEDIA_DIR, TRACKIO_DIR
10
+
11
+
12
+ def create_bucket_if_not_exists(bucket_id: str, private: bool | None = None) -> None:
13
+ huggingface_hub.create_bucket(bucket_id, private=private, exist_ok=True)
14
+
15
+
16
+ def _list_bucket_file_paths(bucket_id: str, prefix: str | None = None) -> list[str]:
17
+ items = huggingface_hub.list_bucket_tree(bucket_id, prefix=prefix, recursive=True)
18
+ return [
19
+ item.path
20
+ for item in items
21
+ if getattr(item, "type", None) == "file" and getattr(item, "path", None)
22
+ ]
23
+
24
+
25
+ def download_bucket_to_trackio_dir(bucket_id: str) -> None:
26
+ TRACKIO_DIR.mkdir(parents=True, exist_ok=True)
27
+ sync_bucket(
28
+ source=f"hf://buckets/{bucket_id}",
29
+ dest=str(TRACKIO_DIR.parent),
30
+ quiet=True,
31
+ )
32
+
33
+
34
+ def upload_project_to_bucket(project: str, bucket_id: str) -> None:
35
+ db_path = SQLiteStorage.get_project_db_path(project)
36
+ if not db_path.exists():
37
+ raise FileNotFoundError(f"No database found for project '{project}'")
38
+
39
+ with SQLiteStorage._get_connection(
40
+ db_path, configure_pragmas=False, row_factory=None
41
+ ) as conn:
42
+ conn.execute("PRAGMA wal_checkpoint(TRUNCATE)")
43
+
44
+ files_to_add = [(str(db_path), f"trackio/{db_path.name}")]
45
+
46
+ media_dir = MEDIA_DIR / project
47
+ if media_dir.exists():
48
+ for media_file in media_dir.rglob("*"):
49
+ if media_file.is_file():
50
+ rel = media_file.relative_to(TRACKIO_DIR)
51
+ files_to_add.append((str(media_file), f"trackio/{rel}"))
52
+
53
+ huggingface_hub.batch_bucket_files(bucket_id, add=files_to_add)
54
+
55
+
56
+ def _download_db_from_bucket(
57
+ project: str, bucket_id: str, dest_path: Path | None = None
58
+ ) -> bool:
59
+ db_filename = SQLiteStorage.get_project_db_filename(project)
60
+ remote_path = f"trackio/{db_filename}"
61
+ local_path = dest_path or SQLiteStorage.get_project_db_path(project)
62
+ local_path.parent.mkdir(parents=True, exist_ok=True)
63
+ try:
64
+ huggingface_hub.download_bucket_files(
65
+ bucket_id,
66
+ files=[(remote_path, str(local_path))],
67
+ token=huggingface_hub.utils.get_token(),
68
+ )
69
+ return local_path.exists()
70
+ except Exception:
71
+ return False
72
+
73
+
74
+ def _local_db_has_data(project: str) -> bool:
75
+ db_path = SQLiteStorage.get_project_db_path(project)
76
+ if not db_path.exists() or db_path.stat().st_size == 0:
77
+ return False
78
+ try:
79
+ with SQLiteStorage._get_connection(
80
+ db_path, configure_pragmas=False, row_factory=None
81
+ ) as conn:
82
+ count = conn.execute("SELECT COUNT(*) FROM metrics").fetchone()[0]
83
+ return count > 0
84
+ except Exception:
85
+ return False
86
+
87
+
88
+ def _export_and_upload_static(
89
+ project: str,
90
+ dest_bucket_id: str,
91
+ db_path: Path,
92
+ media_dir: Path | None = None,
93
+ ) -> None:
94
+ with tempfile.TemporaryDirectory() as tmp_dir:
95
+ output_dir = Path(tmp_dir)
96
+ SQLiteStorage.export_for_static_space(
97
+ project, output_dir, db_path_override=db_path
98
+ )
99
+
100
+ if media_dir and media_dir.exists():
101
+ shutil.copytree(media_dir, output_dir / "media")
102
+
103
+ files_to_add = []
104
+ for f in output_dir.rglob("*"):
105
+ if f.is_file():
106
+ rel = f.relative_to(output_dir)
107
+ files_to_add.append((str(f), str(rel)))
108
+
109
+ huggingface_hub.batch_bucket_files(dest_bucket_id, add=files_to_add)
110
+
111
+
112
+ def _copy_project_media_between_buckets(
113
+ source_bucket_id: str, dest_bucket_id: str, project: str
114
+ ) -> None:
115
+ source_media_prefix = f"trackio/media/{project}/"
116
+ media_to_copy = _list_bucket_file_paths(
117
+ source_bucket_id, prefix=source_media_prefix
118
+ )
119
+ if not media_to_copy:
120
+ return
121
+
122
+ copy_files(
123
+ f"hf://buckets/{source_bucket_id}/{source_media_prefix}",
124
+ f"hf://buckets/{dest_bucket_id}/media/",
125
+ )
126
+
127
+
128
+ def upload_project_to_bucket_for_static(project: str, bucket_id: str) -> None:
129
+ if not _local_db_has_data(project):
130
+ _download_db_from_bucket(project, bucket_id)
131
+
132
+ db_path = SQLiteStorage.get_project_db_path(project)
133
+ _export_and_upload_static(project, bucket_id, db_path, MEDIA_DIR / project)
134
+
135
+
136
+ def export_from_bucket_for_static(
137
+ source_bucket_id: str,
138
+ dest_bucket_id: str,
139
+ project: str,
140
+ ) -> None:
141
+ with tempfile.TemporaryDirectory() as work_dir:
142
+ work_path = Path(work_dir)
143
+ db_path = work_path / SQLiteStorage.get_project_db_filename(project)
144
+
145
+ if not _download_db_from_bucket(project, source_bucket_id, dest_path=db_path):
146
+ raise FileNotFoundError(
147
+ f"Could not download database for project '{project}' "
148
+ f"from bucket '{source_bucket_id}'."
149
+ )
150
+
151
+ _export_and_upload_static(project, dest_bucket_id, db_path)
152
+ _copy_project_media_between_buckets(source_bucket_id, dest_bucket_id, project)
trackio/cli.py ADDED
@@ -0,0 +1,1272 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import argparse
2
+ import os
3
+
4
+ from trackio import freeze, show, sync
5
+ from trackio.cli_helpers import (
6
+ error_exit,
7
+ format_alerts,
8
+ format_json,
9
+ format_list,
10
+ format_metric_values,
11
+ format_project_summary,
12
+ format_run_summary,
13
+ format_snapshot,
14
+ format_system_metric_names,
15
+ format_system_metrics,
16
+ )
17
+ from trackio.markdown import Markdown
18
+ from trackio.server import get_project_summary, get_run_summary
19
+ from trackio.sqlite_storage import SQLiteStorage
20
+
21
+
22
+ def _get_space(args):
23
+ return getattr(args, "space", None)
24
+
25
+
26
+ def _get_remote(args):
27
+ from trackio.remote_client import RemoteClient
28
+
29
+ space = _get_space(args)
30
+ if not space:
31
+ return None
32
+ hf_token = getattr(args, "hf_token", None)
33
+ return RemoteClient(space, hf_token=hf_token)
34
+
35
+
36
+ def _handle_status():
37
+ print("Reading local Trackio projects...\n")
38
+ projects = SQLiteStorage.get_projects()
39
+ if not projects:
40
+ print("No Trackio projects found.")
41
+ return
42
+
43
+ local_projects = []
44
+ synced_projects = []
45
+ unsynced_projects = []
46
+
47
+ for project in projects:
48
+ space_id = SQLiteStorage.get_space_id(project)
49
+ if space_id is None:
50
+ local_projects.append(project)
51
+ elif SQLiteStorage.has_pending_data(project):
52
+ unsynced_projects.append(project)
53
+ else:
54
+ synced_projects.append(project)
55
+
56
+ print("Finished reading Trackio projects")
57
+ if local_projects:
58
+ print(f" * {len(local_projects)} local trackio project(s) [OK]")
59
+ if synced_projects:
60
+ print(f" * {len(synced_projects)} trackio project(s) synced to Spaces [OK]")
61
+ if unsynced_projects:
62
+ print(
63
+ f" * {len(unsynced_projects)} trackio project(s) with unsynced changes [WARNING]:"
64
+ )
65
+ for p in unsynced_projects:
66
+ print(f" - {p}")
67
+
68
+ if unsynced_projects:
69
+ print(
70
+ f"\nRun `trackio sync --project {unsynced_projects[0]}` to sync. "
71
+ "Or run `trackio sync --all` to sync all unsynced changes."
72
+ )
73
+
74
+
75
+ def _handle_sync(args):
76
+ from trackio.deploy import sync_incremental
77
+
78
+ if args.sync_all and args.project:
79
+ error_exit("Cannot use --all and --project together.")
80
+ if not args.sync_all and not args.project:
81
+ error_exit("Must provide either --project or --all.")
82
+
83
+ if args.sync_all:
84
+ projects = SQLiteStorage.get_projects()
85
+ synced_any = False
86
+ for project in projects:
87
+ space_id = SQLiteStorage.get_space_id(project)
88
+ if space_id and SQLiteStorage.has_pending_data(project):
89
+ sync_incremental(
90
+ project, space_id, private=args.private, pending_only=True
91
+ )
92
+ synced_any = True
93
+ if not synced_any:
94
+ print("No projects with unsynced data found.")
95
+ else:
96
+ space_id = args.space_id
97
+ if space_id is None:
98
+ space_id = SQLiteStorage.get_space_id(args.project)
99
+ sync(
100
+ project=args.project,
101
+ space_id=space_id,
102
+ private=args.private,
103
+ force=args.force,
104
+ sdk=args.sdk,
105
+ )
106
+
107
+
108
+ def _extract_reports(
109
+ run: str, logs: list[dict], report_name: str | None = None
110
+ ) -> list[dict]:
111
+ reports = []
112
+ for log in logs:
113
+ timestamp = log.get("timestamp")
114
+ step = log.get("step")
115
+ for key, value in log.items():
116
+ if report_name is not None and key != report_name:
117
+ continue
118
+ if isinstance(value, dict) and value.get("_type") == Markdown.TYPE:
119
+ content = value.get("_value")
120
+ if isinstance(content, str):
121
+ reports.append(
122
+ {
123
+ "run": run,
124
+ "report": key,
125
+ "step": step,
126
+ "timestamp": timestamp,
127
+ "content": content,
128
+ }
129
+ )
130
+ return reports
131
+
132
+
133
+ def main():
134
+ parser = argparse.ArgumentParser(description="Trackio CLI")
135
+ parser.add_argument(
136
+ "--space",
137
+ required=False,
138
+ help="HF Space ID (e.g. 'user/space') or Space URL to query remotely.",
139
+ )
140
+ parser.add_argument(
141
+ "--hf-token",
142
+ required=False,
143
+ help="HF token for accessing private Spaces.",
144
+ )
145
+ subparsers = parser.add_subparsers(dest="command")
146
+
147
+ ui_parser = subparsers.add_parser(
148
+ "show", help="Show the Trackio dashboard UI for a project"
149
+ )
150
+ ui_parser.add_argument(
151
+ "--project", required=False, help="Project name to show in the dashboard"
152
+ )
153
+ ui_parser.add_argument(
154
+ "--theme",
155
+ required=False,
156
+ default="default",
157
+ help="A Gradio Theme to use for the dashboard instead of the default, can be a built-in theme (e.g. 'soft', 'citrus'), or a theme from the Hub (e.g. 'gstaff/xkcd').",
158
+ )
159
+ ui_parser.add_argument(
160
+ "--mcp-server",
161
+ action="store_true",
162
+ help="Enable MCP server functionality. The Trackio dashboard will be set up as an MCP server and certain functions will be exposed as MCP tools.",
163
+ )
164
+ ui_parser.add_argument(
165
+ "--footer",
166
+ action="store_true",
167
+ default=True,
168
+ help="Show the Gradio footer. Use --no-footer to hide it.",
169
+ )
170
+ ui_parser.add_argument(
171
+ "--no-footer",
172
+ dest="footer",
173
+ action="store_false",
174
+ help="Hide the Gradio footer.",
175
+ )
176
+ ui_parser.add_argument(
177
+ "--color-palette",
178
+ required=False,
179
+ help="Comma-separated list of hex color codes for plot lines (e.g. '#FF0000,#00FF00,#0000FF'). If not provided, the TRACKIO_COLOR_PALETTE environment variable will be used, or the default palette if not set.",
180
+ )
181
+ ui_parser.add_argument(
182
+ "--host",
183
+ required=False,
184
+ help="Host to bind the server to (e.g. '0.0.0.0' for remote access). If not provided, defaults to '127.0.0.1' (localhost only).",
185
+ )
186
+
187
+ subparsers.add_parser(
188
+ "status",
189
+ help="Show the status of all local Trackio projects, including sync status.",
190
+ )
191
+
192
+ sync_parser = subparsers.add_parser(
193
+ "sync",
194
+ help="Sync a local project's database to a Hugging Face Space. If the Space does not exist, it will be created.",
195
+ )
196
+ sync_parser.add_argument(
197
+ "--project",
198
+ required=False,
199
+ help="The name of the local project.",
200
+ )
201
+ sync_parser.add_argument(
202
+ "--space-id",
203
+ required=False,
204
+ help="The Hugging Face Space ID where the project will be synced (e.g. username/space_id). If not provided, uses the previously-configured Space.",
205
+ )
206
+ sync_parser.add_argument(
207
+ "--all",
208
+ action="store_true",
209
+ dest="sync_all",
210
+ help="Sync all projects that have unsynced data to their configured Spaces.",
211
+ )
212
+ sync_parser.add_argument(
213
+ "--private",
214
+ action="store_true",
215
+ help="Make the Hugging Face Space private if creating a new Space. By default, the repo will be public unless the organization's default is private. This value is ignored if the repo already exists.",
216
+ )
217
+ sync_parser.add_argument(
218
+ "--force",
219
+ action="store_true",
220
+ help="Overwrite the existing database without prompting for confirmation.",
221
+ )
222
+ sync_parser.add_argument(
223
+ "--sdk",
224
+ choices=["gradio", "static"],
225
+ default="gradio",
226
+ help="The type of Space to deploy. 'gradio' (default) deploys a live Gradio server. 'static' deploys a static Space that reads from an HF Bucket.",
227
+ )
228
+
229
+ freeze_parser = subparsers.add_parser(
230
+ "freeze",
231
+ help="Create a one-time static Space snapshot from a project's data.",
232
+ )
233
+ freeze_parser.add_argument(
234
+ "--space-id",
235
+ required=True,
236
+ help="The source Gradio Space ID (e.g. username/space_id).",
237
+ )
238
+ freeze_parser.add_argument(
239
+ "--project",
240
+ required=True,
241
+ help="The name of the project to freeze into a static snapshot.",
242
+ )
243
+ freeze_parser.add_argument(
244
+ "--new-space-id",
245
+ required=False,
246
+ help="The Space ID for the new static Space. Defaults to {space_id}_static.",
247
+ )
248
+ freeze_parser.add_argument(
249
+ "--private",
250
+ action="store_true",
251
+ help="Make the new static Space private.",
252
+ )
253
+
254
+ list_parser = subparsers.add_parser(
255
+ "list",
256
+ help="List projects, runs, or metrics",
257
+ )
258
+ list_subparsers = list_parser.add_subparsers(dest="list_type", required=True)
259
+
260
+ list_projects_parser = list_subparsers.add_parser(
261
+ "projects",
262
+ help="List all projects",
263
+ )
264
+ list_projects_parser.add_argument(
265
+ "--json",
266
+ action="store_true",
267
+ help="Output in JSON format",
268
+ )
269
+
270
+ list_runs_parser = list_subparsers.add_parser(
271
+ "runs",
272
+ help="List runs for a project",
273
+ )
274
+ list_runs_parser.add_argument(
275
+ "--project",
276
+ required=True,
277
+ help="Project name",
278
+ )
279
+ list_runs_parser.add_argument(
280
+ "--json",
281
+ action="store_true",
282
+ help="Output in JSON format",
283
+ )
284
+
285
+ list_metrics_parser = list_subparsers.add_parser(
286
+ "metrics",
287
+ help="List metrics for a run",
288
+ )
289
+ list_metrics_parser.add_argument(
290
+ "--project",
291
+ required=True,
292
+ help="Project name",
293
+ )
294
+ list_metrics_parser.add_argument(
295
+ "--run",
296
+ required=True,
297
+ help="Run name",
298
+ )
299
+ list_metrics_parser.add_argument(
300
+ "--json",
301
+ action="store_true",
302
+ help="Output in JSON format",
303
+ )
304
+
305
+ list_system_metrics_parser = list_subparsers.add_parser(
306
+ "system-metrics",
307
+ help="List system metrics for a run",
308
+ )
309
+ list_system_metrics_parser.add_argument(
310
+ "--project",
311
+ required=True,
312
+ help="Project name",
313
+ )
314
+ list_system_metrics_parser.add_argument(
315
+ "--run",
316
+ required=True,
317
+ help="Run name",
318
+ )
319
+ list_system_metrics_parser.add_argument(
320
+ "--json",
321
+ action="store_true",
322
+ help="Output in JSON format",
323
+ )
324
+
325
+ list_alerts_parser = list_subparsers.add_parser(
326
+ "alerts",
327
+ help="List alerts for a project or run",
328
+ )
329
+ list_alerts_parser.add_argument(
330
+ "--project",
331
+ required=True,
332
+ help="Project name",
333
+ )
334
+ list_alerts_parser.add_argument(
335
+ "--run",
336
+ required=False,
337
+ help="Run name (optional)",
338
+ )
339
+ list_alerts_parser.add_argument(
340
+ "--level",
341
+ required=False,
342
+ help="Filter by alert level (info, warn, error)",
343
+ )
344
+ list_alerts_parser.add_argument(
345
+ "--json",
346
+ action="store_true",
347
+ help="Output in JSON format",
348
+ )
349
+ list_alerts_parser.add_argument(
350
+ "--since",
351
+ required=False,
352
+ help="Only show alerts after this ISO 8601 timestamp",
353
+ )
354
+
355
+ list_reports_parser = list_subparsers.add_parser(
356
+ "reports",
357
+ help="List markdown reports for a project or run",
358
+ )
359
+ list_reports_parser.add_argument(
360
+ "--project",
361
+ required=True,
362
+ help="Project name",
363
+ )
364
+ list_reports_parser.add_argument(
365
+ "--run",
366
+ required=False,
367
+ help="Run name (optional)",
368
+ )
369
+ list_reports_parser.add_argument(
370
+ "--json",
371
+ action="store_true",
372
+ help="Output in JSON format",
373
+ )
374
+
375
+ get_parser = subparsers.add_parser(
376
+ "get",
377
+ help="Get project, run, or metric information",
378
+ )
379
+ get_subparsers = get_parser.add_subparsers(dest="get_type", required=True)
380
+
381
+ get_project_parser = get_subparsers.add_parser(
382
+ "project",
383
+ help="Get project summary",
384
+ )
385
+ get_project_parser.add_argument(
386
+ "--project",
387
+ required=True,
388
+ help="Project name",
389
+ )
390
+ get_project_parser.add_argument(
391
+ "--json",
392
+ action="store_true",
393
+ help="Output in JSON format",
394
+ )
395
+
396
+ get_run_parser = get_subparsers.add_parser(
397
+ "run",
398
+ help="Get run summary",
399
+ )
400
+ get_run_parser.add_argument(
401
+ "--project",
402
+ required=True,
403
+ help="Project name",
404
+ )
405
+ get_run_parser.add_argument(
406
+ "--run",
407
+ required=True,
408
+ help="Run name",
409
+ )
410
+ get_run_parser.add_argument(
411
+ "--json",
412
+ action="store_true",
413
+ help="Output in JSON format",
414
+ )
415
+
416
+ get_metric_parser = get_subparsers.add_parser(
417
+ "metric",
418
+ help="Get metric values for a run",
419
+ )
420
+ get_metric_parser.add_argument(
421
+ "--project",
422
+ required=True,
423
+ help="Project name",
424
+ )
425
+ get_metric_parser.add_argument(
426
+ "--run",
427
+ required=True,
428
+ help="Run name",
429
+ )
430
+ get_metric_parser.add_argument(
431
+ "--metric",
432
+ required=True,
433
+ help="Metric name",
434
+ )
435
+ get_metric_parser.add_argument(
436
+ "--step",
437
+ type=int,
438
+ required=False,
439
+ help="Get metric at exactly this step",
440
+ )
441
+ get_metric_parser.add_argument(
442
+ "--around",
443
+ type=int,
444
+ required=False,
445
+ help="Get metrics around this step (use with --window)",
446
+ )
447
+ get_metric_parser.add_argument(
448
+ "--at-time",
449
+ required=False,
450
+ help="Get metrics around this ISO 8601 timestamp (use with --window)",
451
+ )
452
+ get_metric_parser.add_argument(
453
+ "--window",
454
+ type=int,
455
+ required=False,
456
+ default=10,
457
+ help="Window size: ±steps for --around, ±seconds for --at-time (default: 10)",
458
+ )
459
+ get_metric_parser.add_argument(
460
+ "--json",
461
+ action="store_true",
462
+ help="Output in JSON format",
463
+ )
464
+
465
+ get_snapshot_parser = get_subparsers.add_parser(
466
+ "snapshot",
467
+ help="Get all metrics at/around a step or timestamp",
468
+ )
469
+ get_snapshot_parser.add_argument(
470
+ "--project",
471
+ required=True,
472
+ help="Project name",
473
+ )
474
+ get_snapshot_parser.add_argument(
475
+ "--run",
476
+ required=True,
477
+ help="Run name",
478
+ )
479
+ get_snapshot_parser.add_argument(
480
+ "--step",
481
+ type=int,
482
+ required=False,
483
+ help="Get all metrics at exactly this step",
484
+ )
485
+ get_snapshot_parser.add_argument(
486
+ "--around",
487
+ type=int,
488
+ required=False,
489
+ help="Get all metrics around this step (use with --window)",
490
+ )
491
+ get_snapshot_parser.add_argument(
492
+ "--at-time",
493
+ required=False,
494
+ help="Get all metrics around this ISO 8601 timestamp (use with --window)",
495
+ )
496
+ get_snapshot_parser.add_argument(
497
+ "--window",
498
+ type=int,
499
+ required=False,
500
+ default=10,
501
+ help="Window size: ±steps for --around, ±seconds for --at-time (default: 10)",
502
+ )
503
+ get_snapshot_parser.add_argument(
504
+ "--json",
505
+ action="store_true",
506
+ help="Output in JSON format",
507
+ )
508
+
509
+ get_system_metric_parser = get_subparsers.add_parser(
510
+ "system-metric",
511
+ help="Get system metric values for a run",
512
+ )
513
+ get_system_metric_parser.add_argument(
514
+ "--project",
515
+ required=True,
516
+ help="Project name",
517
+ )
518
+ get_system_metric_parser.add_argument(
519
+ "--run",
520
+ required=True,
521
+ help="Run name",
522
+ )
523
+ get_system_metric_parser.add_argument(
524
+ "--metric",
525
+ required=False,
526
+ help="System metric name (optional, if not provided returns all system metrics)",
527
+ )
528
+ get_system_metric_parser.add_argument(
529
+ "--json",
530
+ action="store_true",
531
+ help="Output in JSON format",
532
+ )
533
+
534
+ get_alerts_parser = get_subparsers.add_parser(
535
+ "alerts",
536
+ help="Get alerts for a project or run",
537
+ )
538
+ get_alerts_parser.add_argument(
539
+ "--project",
540
+ required=True,
541
+ help="Project name",
542
+ )
543
+ get_alerts_parser.add_argument(
544
+ "--run",
545
+ required=False,
546
+ help="Run name (optional)",
547
+ )
548
+ get_alerts_parser.add_argument(
549
+ "--level",
550
+ required=False,
551
+ help="Filter by alert level (info, warn, error)",
552
+ )
553
+ get_alerts_parser.add_argument(
554
+ "--json",
555
+ action="store_true",
556
+ help="Output in JSON format",
557
+ )
558
+ get_alerts_parser.add_argument(
559
+ "--since",
560
+ required=False,
561
+ help="Only show alerts after this ISO 8601 timestamp",
562
+ )
563
+
564
+ get_report_parser = get_subparsers.add_parser(
565
+ "report",
566
+ help="Get markdown report entries for a run",
567
+ )
568
+ get_report_parser.add_argument(
569
+ "--project",
570
+ required=True,
571
+ help="Project name",
572
+ )
573
+ get_report_parser.add_argument(
574
+ "--run",
575
+ required=True,
576
+ help="Run name",
577
+ )
578
+ get_report_parser.add_argument(
579
+ "--report",
580
+ required=True,
581
+ help="Report metric name",
582
+ )
583
+ get_report_parser.add_argument(
584
+ "--json",
585
+ action="store_true",
586
+ help="Output in JSON format",
587
+ )
588
+
589
+ skills_parser = subparsers.add_parser(
590
+ "skills",
591
+ help="Manage Trackio skills for AI coding assistants",
592
+ )
593
+ skills_subparsers = skills_parser.add_subparsers(
594
+ dest="skills_action", required=True
595
+ )
596
+ skills_add_parser = skills_subparsers.add_parser(
597
+ "add",
598
+ help="Download and install the Trackio skill for an AI assistant",
599
+ )
600
+ skills_add_parser.add_argument(
601
+ "--cursor",
602
+ action="store_true",
603
+ help="Install for Cursor",
604
+ )
605
+ skills_add_parser.add_argument(
606
+ "--claude",
607
+ action="store_true",
608
+ help="Install for Claude Code",
609
+ )
610
+ skills_add_parser.add_argument(
611
+ "--codex",
612
+ action="store_true",
613
+ help="Install for Codex",
614
+ )
615
+ skills_add_parser.add_argument(
616
+ "--opencode",
617
+ action="store_true",
618
+ help="Install for OpenCode",
619
+ )
620
+ skills_add_parser.add_argument(
621
+ "--global",
622
+ dest="global_",
623
+ action="store_true",
624
+ help="Install globally (user-level) instead of in the current project directory",
625
+ )
626
+ skills_add_parser.add_argument(
627
+ "--dest",
628
+ type=str,
629
+ required=False,
630
+ help="Install into a custom destination (path to skills directory)",
631
+ )
632
+ skills_add_parser.add_argument(
633
+ "--force",
634
+ action="store_true",
635
+ help="Overwrite existing skill if it already exists",
636
+ )
637
+
638
+ args, unknown_args = parser.parse_known_args()
639
+ if unknown_args:
640
+ trailing_global_parser = argparse.ArgumentParser(add_help=False)
641
+ trailing_global_parser.add_argument("--space", required=False)
642
+ trailing_global_parser.add_argument("--hf-token", required=False)
643
+ trailing_globals, remaining_unknown = trailing_global_parser.parse_known_args(
644
+ unknown_args
645
+ )
646
+ if remaining_unknown:
647
+ parser.error(f"unrecognized arguments: {' '.join(remaining_unknown)}")
648
+ if trailing_globals.space is not None:
649
+ args.space = trailing_globals.space
650
+ if trailing_globals.hf_token is not None:
651
+ args.hf_token = trailing_globals.hf_token
652
+
653
+ if args.command in ("show", "status", "sync", "freeze", "skills") and _get_space(
654
+ args
655
+ ):
656
+ error_exit(
657
+ f"The '{args.command}' command does not support --space (remote mode)."
658
+ )
659
+
660
+ if args.command == "show":
661
+ color_palette = None
662
+ if args.color_palette:
663
+ color_palette = [color.strip() for color in args.color_palette.split(",")]
664
+ show(
665
+ project=args.project,
666
+ theme=args.theme,
667
+ mcp_server=args.mcp_server,
668
+ footer=args.footer,
669
+ color_palette=color_palette,
670
+ host=args.host,
671
+ )
672
+ elif args.command == "status":
673
+ _handle_status()
674
+ elif args.command == "sync":
675
+ _handle_sync(args)
676
+ elif args.command == "freeze":
677
+ freeze(
678
+ space_id=args.space_id,
679
+ project=args.project,
680
+ new_space_id=args.new_space_id,
681
+ private=args.private,
682
+ )
683
+ elif args.command == "list":
684
+ remote = _get_remote(args)
685
+ if args.list_type == "projects":
686
+ if remote:
687
+ projects = remote.predict(api_name="/get_all_projects")
688
+ else:
689
+ projects = SQLiteStorage.get_projects()
690
+ if args.json:
691
+ print(format_json({"projects": projects}))
692
+ else:
693
+ print(format_list(projects, "Projects"))
694
+ elif args.list_type == "runs":
695
+ if remote:
696
+ runs = remote.predict(args.project, api_name="/get_runs_for_project")
697
+ else:
698
+ db_path = SQLiteStorage.get_project_db_path(args.project)
699
+ if not db_path.exists():
700
+ error_exit(f"Project '{args.project}' not found.")
701
+ runs = SQLiteStorage.get_runs(args.project)
702
+ if args.json:
703
+ print(format_json({"project": args.project, "runs": runs}))
704
+ else:
705
+ print(format_list(runs, f"Runs in '{args.project}'"))
706
+ elif args.list_type == "metrics":
707
+ if remote:
708
+ metrics = remote.predict(
709
+ args.project, args.run, api_name="/get_metrics_for_run"
710
+ )
711
+ else:
712
+ db_path = SQLiteStorage.get_project_db_path(args.project)
713
+ if not db_path.exists():
714
+ error_exit(f"Project '{args.project}' not found.")
715
+ runs = SQLiteStorage.get_runs(args.project)
716
+ if args.run not in runs:
717
+ error_exit(
718
+ f"Run '{args.run}' not found in project '{args.project}'."
719
+ )
720
+ metrics = SQLiteStorage.get_all_metrics_for_run(args.project, args.run)
721
+ if args.json:
722
+ print(
723
+ format_json(
724
+ {"project": args.project, "run": args.run, "metrics": metrics}
725
+ )
726
+ )
727
+ else:
728
+ print(
729
+ format_list(
730
+ metrics, f"Metrics for '{args.run}' in '{args.project}'"
731
+ )
732
+ )
733
+ elif args.list_type == "system-metrics":
734
+ if remote:
735
+ system_metrics = remote.predict(
736
+ args.project, args.run, api_name="/get_system_metrics_for_run"
737
+ )
738
+ else:
739
+ db_path = SQLiteStorage.get_project_db_path(args.project)
740
+ if not db_path.exists():
741
+ error_exit(f"Project '{args.project}' not found.")
742
+ runs = SQLiteStorage.get_runs(args.project)
743
+ if args.run not in runs:
744
+ error_exit(
745
+ f"Run '{args.run}' not found in project '{args.project}'."
746
+ )
747
+ system_metrics = SQLiteStorage.get_all_system_metrics_for_run(
748
+ args.project, args.run
749
+ )
750
+ if args.json:
751
+ print(
752
+ format_json(
753
+ {
754
+ "project": args.project,
755
+ "run": args.run,
756
+ "system_metrics": system_metrics,
757
+ }
758
+ )
759
+ )
760
+ else:
761
+ print(format_system_metric_names(system_metrics))
762
+ elif args.list_type == "alerts":
763
+ if remote:
764
+ alerts = remote.predict(
765
+ args.project,
766
+ args.run,
767
+ args.level,
768
+ args.since,
769
+ api_name="/get_alerts",
770
+ )
771
+ else:
772
+ db_path = SQLiteStorage.get_project_db_path(args.project)
773
+ if not db_path.exists():
774
+ error_exit(f"Project '{args.project}' not found.")
775
+ alerts = SQLiteStorage.get_alerts(
776
+ args.project,
777
+ run_name=args.run,
778
+ level=args.level,
779
+ since=args.since,
780
+ )
781
+ if args.json:
782
+ print(
783
+ format_json(
784
+ {
785
+ "project": args.project,
786
+ "run": args.run,
787
+ "level": args.level,
788
+ "since": args.since,
789
+ "alerts": alerts,
790
+ }
791
+ )
792
+ )
793
+ else:
794
+ print(format_alerts(alerts))
795
+ elif args.list_type == "reports":
796
+ if remote:
797
+ runs = remote.predict(args.project, api_name="/get_runs_for_project")
798
+ else:
799
+ db_path = SQLiteStorage.get_project_db_path(args.project)
800
+ if not db_path.exists():
801
+ error_exit(f"Project '{args.project}' not found.")
802
+ runs = SQLiteStorage.get_runs(args.project)
803
+ if args.run and args.run not in runs:
804
+ error_exit(f"Run '{args.run}' not found in project '{args.project}'.")
805
+
806
+ target_runs = [args.run] if args.run else runs
807
+ all_reports = []
808
+ for run_name in target_runs:
809
+ if remote:
810
+ logs = remote.predict(args.project, run_name, api_name="/get_logs")
811
+ else:
812
+ logs = SQLiteStorage.get_logs(args.project, run_name)
813
+ all_reports.extend(_extract_reports(run_name, logs))
814
+
815
+ if args.json:
816
+ print(
817
+ format_json(
818
+ {
819
+ "project": args.project,
820
+ "run": args.run,
821
+ "reports": all_reports,
822
+ }
823
+ )
824
+ )
825
+ else:
826
+ report_lines = [
827
+ f"{entry['run']} | {entry['report']} | step={entry['step']} | {entry['timestamp']}"
828
+ for entry in all_reports
829
+ ]
830
+ if args.run:
831
+ print(
832
+ format_list(
833
+ report_lines,
834
+ f"Reports for '{args.run}' in '{args.project}'",
835
+ )
836
+ )
837
+ else:
838
+ print(format_list(report_lines, f"Reports in '{args.project}'"))
839
+ elif args.command == "get":
840
+ remote = _get_remote(args)
841
+ if args.get_type == "project":
842
+ if remote:
843
+ summary = remote.predict(args.project, api_name="/get_project_summary")
844
+ else:
845
+ db_path = SQLiteStorage.get_project_db_path(args.project)
846
+ if not db_path.exists():
847
+ error_exit(f"Project '{args.project}' not found.")
848
+ summary = get_project_summary(args.project)
849
+ if args.json:
850
+ print(format_json(summary))
851
+ else:
852
+ print(format_project_summary(summary))
853
+ elif args.get_type == "run":
854
+ if remote:
855
+ summary = remote.predict(
856
+ args.project, args.run, api_name="/get_run_summary"
857
+ )
858
+ else:
859
+ db_path = SQLiteStorage.get_project_db_path(args.project)
860
+ if not db_path.exists():
861
+ error_exit(f"Project '{args.project}' not found.")
862
+ runs = SQLiteStorage.get_runs(args.project)
863
+ if args.run not in runs:
864
+ error_exit(
865
+ f"Run '{args.run}' not found in project '{args.project}'."
866
+ )
867
+ summary = get_run_summary(args.project, args.run)
868
+ if args.json:
869
+ print(format_json(summary))
870
+ else:
871
+ print(format_run_summary(summary))
872
+ elif args.get_type == "metric":
873
+ at_time = getattr(args, "at_time", None)
874
+ if remote:
875
+ values = remote.predict(
876
+ args.project,
877
+ args.run,
878
+ args.metric,
879
+ args.step,
880
+ args.around,
881
+ at_time,
882
+ args.window,
883
+ api_name="/get_metric_values",
884
+ )
885
+ else:
886
+ db_path = SQLiteStorage.get_project_db_path(args.project)
887
+ if not db_path.exists():
888
+ error_exit(f"Project '{args.project}' not found.")
889
+ runs = SQLiteStorage.get_runs(args.project)
890
+ if args.run not in runs:
891
+ error_exit(
892
+ f"Run '{args.run}' not found in project '{args.project}'."
893
+ )
894
+ metrics = SQLiteStorage.get_all_metrics_for_run(args.project, args.run)
895
+ if args.metric not in metrics:
896
+ error_exit(
897
+ f"Metric '{args.metric}' not found in run '{args.run}' of project '{args.project}'."
898
+ )
899
+ values = SQLiteStorage.get_metric_values(
900
+ args.project,
901
+ args.run,
902
+ args.metric,
903
+ step=args.step,
904
+ around_step=args.around,
905
+ at_time=at_time,
906
+ window=args.window,
907
+ )
908
+ if args.json:
909
+ print(
910
+ format_json(
911
+ {
912
+ "project": args.project,
913
+ "run": args.run,
914
+ "metric": args.metric,
915
+ "values": values,
916
+ }
917
+ )
918
+ )
919
+ else:
920
+ print(format_metric_values(values))
921
+ elif args.get_type == "snapshot":
922
+ if not args.step and not args.around and not getattr(args, "at_time", None):
923
+ error_exit(
924
+ "Provide --step, --around (with --window), or --at-time (with --window)."
925
+ )
926
+ at_time = getattr(args, "at_time", None)
927
+ if remote:
928
+ snapshot = remote.predict(
929
+ args.project,
930
+ args.run,
931
+ args.step,
932
+ args.around,
933
+ at_time,
934
+ args.window,
935
+ api_name="/get_snapshot",
936
+ )
937
+ else:
938
+ db_path = SQLiteStorage.get_project_db_path(args.project)
939
+ if not db_path.exists():
940
+ error_exit(f"Project '{args.project}' not found.")
941
+ runs = SQLiteStorage.get_runs(args.project)
942
+ if args.run not in runs:
943
+ error_exit(
944
+ f"Run '{args.run}' not found in project '{args.project}'."
945
+ )
946
+ snapshot = SQLiteStorage.get_snapshot(
947
+ args.project,
948
+ args.run,
949
+ step=args.step,
950
+ around_step=args.around,
951
+ at_time=at_time,
952
+ window=args.window,
953
+ )
954
+ if args.json:
955
+ result = {
956
+ "project": args.project,
957
+ "run": args.run,
958
+ "metrics": snapshot,
959
+ }
960
+ if args.step is not None:
961
+ result["step"] = args.step
962
+ if args.around is not None:
963
+ result["around"] = args.around
964
+ result["window"] = args.window
965
+ if at_time is not None:
966
+ result["at_time"] = at_time
967
+ result["window"] = args.window
968
+ print(format_json(result))
969
+ else:
970
+ print(format_snapshot(snapshot))
971
+ elif args.get_type == "system-metric":
972
+ if remote:
973
+ system_metrics = remote.predict(
974
+ args.project, args.run, api_name="/get_system_logs"
975
+ )
976
+ if args.metric:
977
+ all_system_metric_names = remote.predict(
978
+ args.project,
979
+ args.run,
980
+ api_name="/get_system_metrics_for_run",
981
+ )
982
+ if args.metric not in all_system_metric_names:
983
+ error_exit(
984
+ f"System metric '{args.metric}' not found in run '{args.run}' of project '{args.project}'."
985
+ )
986
+ filtered_metrics = [
987
+ {
988
+ k: v
989
+ for k, v in entry.items()
990
+ if k == "timestamp" or k == args.metric
991
+ }
992
+ for entry in system_metrics
993
+ if args.metric in entry
994
+ ]
995
+ if args.json:
996
+ print(
997
+ format_json(
998
+ {
999
+ "project": args.project,
1000
+ "run": args.run,
1001
+ "metric": args.metric,
1002
+ "values": filtered_metrics,
1003
+ }
1004
+ )
1005
+ )
1006
+ else:
1007
+ print(format_system_metrics(filtered_metrics))
1008
+ else:
1009
+ if args.json:
1010
+ print(
1011
+ format_json(
1012
+ {
1013
+ "project": args.project,
1014
+ "run": args.run,
1015
+ "system_metrics": system_metrics,
1016
+ }
1017
+ )
1018
+ )
1019
+ else:
1020
+ print(format_system_metrics(system_metrics))
1021
+ else:
1022
+ db_path = SQLiteStorage.get_project_db_path(args.project)
1023
+ if not db_path.exists():
1024
+ error_exit(f"Project '{args.project}' not found.")
1025
+ runs = SQLiteStorage.get_runs(args.project)
1026
+ if args.run not in runs:
1027
+ error_exit(
1028
+ f"Run '{args.run}' not found in project '{args.project}'."
1029
+ )
1030
+ if args.metric:
1031
+ system_metrics = SQLiteStorage.get_system_logs(
1032
+ args.project, args.run
1033
+ )
1034
+ all_system_metric_names = (
1035
+ SQLiteStorage.get_all_system_metrics_for_run(
1036
+ args.project, args.run
1037
+ )
1038
+ )
1039
+ if args.metric not in all_system_metric_names:
1040
+ error_exit(
1041
+ f"System metric '{args.metric}' not found in run '{args.run}' of project '{args.project}'."
1042
+ )
1043
+ filtered_metrics = [
1044
+ {
1045
+ k: v
1046
+ for k, v in entry.items()
1047
+ if k == "timestamp" or k == args.metric
1048
+ }
1049
+ for entry in system_metrics
1050
+ if args.metric in entry
1051
+ ]
1052
+ if args.json:
1053
+ print(
1054
+ format_json(
1055
+ {
1056
+ "project": args.project,
1057
+ "run": args.run,
1058
+ "metric": args.metric,
1059
+ "values": filtered_metrics,
1060
+ }
1061
+ )
1062
+ )
1063
+ else:
1064
+ print(format_system_metrics(filtered_metrics))
1065
+ else:
1066
+ system_metrics = SQLiteStorage.get_system_logs(
1067
+ args.project, args.run
1068
+ )
1069
+ if args.json:
1070
+ print(
1071
+ format_json(
1072
+ {
1073
+ "project": args.project,
1074
+ "run": args.run,
1075
+ "system_metrics": system_metrics,
1076
+ }
1077
+ )
1078
+ )
1079
+ else:
1080
+ print(format_system_metrics(system_metrics))
1081
+ elif args.get_type == "alerts":
1082
+ if remote:
1083
+ alerts = remote.predict(
1084
+ args.project,
1085
+ args.run,
1086
+ args.level,
1087
+ args.since,
1088
+ api_name="/get_alerts",
1089
+ )
1090
+ else:
1091
+ db_path = SQLiteStorage.get_project_db_path(args.project)
1092
+ if not db_path.exists():
1093
+ error_exit(f"Project '{args.project}' not found.")
1094
+ alerts = SQLiteStorage.get_alerts(
1095
+ args.project,
1096
+ run_name=args.run,
1097
+ level=args.level,
1098
+ since=args.since,
1099
+ )
1100
+ if args.json:
1101
+ print(
1102
+ format_json(
1103
+ {
1104
+ "project": args.project,
1105
+ "run": args.run,
1106
+ "level": args.level,
1107
+ "since": args.since,
1108
+ "alerts": alerts,
1109
+ }
1110
+ )
1111
+ )
1112
+ else:
1113
+ print(format_alerts(alerts))
1114
+ elif args.get_type == "report":
1115
+ if remote:
1116
+ logs = remote.predict(args.project, args.run, api_name="/get_logs")
1117
+ else:
1118
+ db_path = SQLiteStorage.get_project_db_path(args.project)
1119
+ if not db_path.exists():
1120
+ error_exit(f"Project '{args.project}' not found.")
1121
+ runs = SQLiteStorage.get_runs(args.project)
1122
+ if args.run not in runs:
1123
+ error_exit(
1124
+ f"Run '{args.run}' not found in project '{args.project}'."
1125
+ )
1126
+ logs = SQLiteStorage.get_logs(args.project, args.run)
1127
+
1128
+ reports = _extract_reports(args.run, logs, report_name=args.report)
1129
+ if not reports:
1130
+ error_exit(
1131
+ f"Report '{args.report}' not found in run '{args.run}' of project '{args.project}'."
1132
+ )
1133
+
1134
+ if args.json:
1135
+ print(
1136
+ format_json(
1137
+ {
1138
+ "project": args.project,
1139
+ "run": args.run,
1140
+ "report": args.report,
1141
+ "values": reports,
1142
+ }
1143
+ )
1144
+ )
1145
+ else:
1146
+ output = []
1147
+ for idx, entry in enumerate(reports, start=1):
1148
+ output.append(
1149
+ f"Entry {idx} | step={entry['step']} | timestamp={entry['timestamp']}"
1150
+ )
1151
+ output.append(entry["content"])
1152
+ if idx < len(reports):
1153
+ output.append("-" * 80)
1154
+ print("\n".join(output))
1155
+ elif args.command == "skills":
1156
+ if args.skills_action == "add":
1157
+ _handle_skills_add(args)
1158
+ else:
1159
+ parser.print_help()
1160
+
1161
+
1162
+ def _handle_skills_add(args):
1163
+ import shutil
1164
+ from pathlib import Path
1165
+
1166
+ try:
1167
+ from huggingface_hub.cli.skills import (
1168
+ CENTRAL_GLOBAL,
1169
+ CENTRAL_LOCAL,
1170
+ GLOBAL_TARGETS,
1171
+ LOCAL_TARGETS,
1172
+ )
1173
+ except (ImportError, ModuleNotFoundError):
1174
+ error_exit(
1175
+ "The 'trackio skills' command requires huggingface_hub >= 1.4.0.\n"
1176
+ "Please upgrade: pip install --upgrade huggingface_hub"
1177
+ )
1178
+
1179
+ SKILL_ID = "trackio"
1180
+ GITHUB_RAW = "https://raw.githubusercontent.com/gradio-app/trackio/main"
1181
+ SKILL_PREFIX = ".agents/skills/trackio"
1182
+ SKILL_FILES = [
1183
+ "SKILL.md",
1184
+ "alerts.md",
1185
+ "logging_metrics.md",
1186
+ "retrieving_metrics.md",
1187
+ ]
1188
+
1189
+ if not (args.cursor or args.claude or args.codex or args.opencode or args.dest):
1190
+ error_exit(
1191
+ "Pick a destination via --cursor, --claude, --codex, --opencode, or --dest."
1192
+ )
1193
+
1194
+ def download(url: str) -> str:
1195
+ from huggingface_hub.utils import get_session
1196
+
1197
+ try:
1198
+ response = get_session().get(url)
1199
+ response.raise_for_status()
1200
+ except Exception as e:
1201
+ error_exit(
1202
+ f"Failed to download {url}\n{e}\n\n"
1203
+ "Make sure you have internet access. The skill files are fetched from "
1204
+ "the Trackio GitHub repository."
1205
+ )
1206
+ return response.text
1207
+
1208
+ def remove_existing(path: Path, force: bool):
1209
+ if not (path.exists() or path.is_symlink()):
1210
+ return
1211
+ if not force:
1212
+ error_exit(
1213
+ f"Skill already exists at {path}.\nRe-run with --force to overwrite."
1214
+ )
1215
+ if path.is_dir() and not path.is_symlink():
1216
+ shutil.rmtree(path)
1217
+ else:
1218
+ path.unlink()
1219
+
1220
+ def install_to(skills_dir: Path, force: bool) -> Path:
1221
+ skills_dir = skills_dir.expanduser().resolve()
1222
+ skills_dir.mkdir(parents=True, exist_ok=True)
1223
+ dest = skills_dir / SKILL_ID
1224
+ remove_existing(dest, force)
1225
+ dest.mkdir()
1226
+ for fname in SKILL_FILES:
1227
+ content = download(f"{GITHUB_RAW}/{SKILL_PREFIX}/{fname}")
1228
+ (dest / fname).write_text(content, encoding="utf-8")
1229
+ return dest
1230
+
1231
+ def create_symlink(
1232
+ agent_skills_dir: Path, central_skill_path: Path, force: bool
1233
+ ) -> Path:
1234
+ agent_skills_dir = agent_skills_dir.expanduser().resolve()
1235
+ agent_skills_dir.mkdir(parents=True, exist_ok=True)
1236
+ link_path = agent_skills_dir / SKILL_ID
1237
+ remove_existing(link_path, force)
1238
+ link_path.symlink_to(os.path.relpath(central_skill_path, agent_skills_dir))
1239
+ return link_path
1240
+
1241
+ global_targets = {**GLOBAL_TARGETS, "cursor": Path("~/.cursor/skills")}
1242
+ local_targets = {**LOCAL_TARGETS, "cursor": Path(".cursor/skills")}
1243
+ targets_dict = global_targets if args.global_ else local_targets
1244
+
1245
+ if args.dest:
1246
+ if args.cursor or args.claude or args.codex or args.opencode or args.global_:
1247
+ error_exit("--dest cannot be combined with agent flags or --global.")
1248
+ skill_dest = install_to(Path(args.dest), args.force)
1249
+ print(f"Installed '{SKILL_ID}' to {skill_dest}")
1250
+ return
1251
+
1252
+ agent_targets = []
1253
+ if args.cursor:
1254
+ agent_targets.append(targets_dict["cursor"])
1255
+ if args.claude:
1256
+ agent_targets.append(targets_dict["claude"])
1257
+ if args.codex:
1258
+ agent_targets.append(targets_dict["codex"])
1259
+ if args.opencode:
1260
+ agent_targets.append(targets_dict["opencode"])
1261
+
1262
+ central_path = CENTRAL_GLOBAL if args.global_ else CENTRAL_LOCAL
1263
+ central_skill_path = install_to(central_path, args.force)
1264
+ print(f"Installed '{SKILL_ID}' to central location: {central_skill_path}")
1265
+
1266
+ for agent_target in agent_targets:
1267
+ link_path = create_symlink(agent_target, central_skill_path, args.force)
1268
+ print(f"Created symlink: {link_path}")
1269
+
1270
+
1271
+ if __name__ == "__main__":
1272
+ main()
trackio/cli_helpers.py ADDED
@@ -0,0 +1,158 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import json
2
+ import sys
3
+ from typing import Any
4
+
5
+
6
+ def format_json(data: Any) -> str:
7
+ """Format data as JSON."""
8
+ return json.dumps(data, indent=2)
9
+
10
+
11
+ def format_list(items: list[str], title: str | None = None) -> str:
12
+ """Format a list of items in human-readable format."""
13
+ if not items:
14
+ return f"No {title.lower() if title else 'items'} found."
15
+
16
+ output = []
17
+ if title:
18
+ output.append(f"{title}:")
19
+
20
+ for item in items:
21
+ output.append(f" - {item}")
22
+
23
+ return "\n".join(output)
24
+
25
+
26
+ def format_project_summary(summary: dict) -> str:
27
+ """Format project summary in human-readable format."""
28
+ output = [f"Project: {summary['project']}"]
29
+ output.append(f"Number of runs: {summary['num_runs']}")
30
+
31
+ if summary["runs"]:
32
+ output.append("\nRuns:")
33
+ for run in summary["runs"]:
34
+ output.append(f" - {run}")
35
+ else:
36
+ output.append("\nNo runs found.")
37
+
38
+ if summary.get("last_activity"):
39
+ output.append(f"\nLast activity (max step): {summary['last_activity']}")
40
+
41
+ return "\n".join(output)
42
+
43
+
44
+ def format_run_summary(summary: dict) -> str:
45
+ """Format run summary in human-readable format."""
46
+ output = [f"Project: {summary['project']}"]
47
+ output.append(f"Run: {summary['run']}")
48
+ output.append(f"Number of logs: {summary['num_logs']}")
49
+
50
+ if summary.get("last_step") is not None:
51
+ output.append(f"Last step: {summary['last_step']}")
52
+
53
+ if summary.get("metrics"):
54
+ output.append("\nMetrics:")
55
+ for metric in summary["metrics"]:
56
+ output.append(f" - {metric}")
57
+ else:
58
+ output.append("\nNo metrics found.")
59
+
60
+ config = summary.get("config")
61
+ if config:
62
+ output.append("\nConfig:")
63
+ config_display = {k: v for k, v in config.items() if not k.startswith("_")}
64
+ if config_display:
65
+ for key, value in config_display.items():
66
+ output.append(f" {key}: {value}")
67
+ else:
68
+ output.append(" (no config)")
69
+ else:
70
+ output.append("\nConfig: (no config)")
71
+
72
+ return "\n".join(output)
73
+
74
+
75
+ def format_metric_values(values: list[dict]) -> str:
76
+ """Format metric values in human-readable format."""
77
+ if not values:
78
+ return "No metric values found."
79
+
80
+ output = [f"Found {len(values)} value(s):\n"]
81
+ output.append("Step | Timestamp | Value")
82
+ output.append("-" * 50)
83
+
84
+ for value in values:
85
+ step = value.get("step", "N/A")
86
+ timestamp = value.get("timestamp", "N/A")
87
+ val = value.get("value", "N/A")
88
+ output.append(f"{step} | {timestamp} | {val}")
89
+
90
+ return "\n".join(output)
91
+
92
+
93
+ def format_system_metrics(metrics: list[dict]) -> str:
94
+ """Format system metrics in human-readable format."""
95
+ if not metrics:
96
+ return "No system metrics found."
97
+
98
+ output = [f"Found {len(metrics)} system metric entry/entries:\n"]
99
+
100
+ for i, entry in enumerate(metrics):
101
+ timestamp = entry.get("timestamp", "N/A")
102
+ output.append(f"\nEntry {i + 1} (Timestamp: {timestamp}):")
103
+ for key, value in entry.items():
104
+ if key != "timestamp":
105
+ output.append(f" {key}: {value}")
106
+
107
+ return "\n".join(output)
108
+
109
+
110
+ def format_system_metric_names(names: list[str]) -> str:
111
+ """Format system metric names in human-readable format."""
112
+ return format_list(names, "System Metrics")
113
+
114
+
115
+ def format_snapshot(snapshot: dict[str, list[dict]]) -> str:
116
+ """Format a metrics snapshot in human-readable format."""
117
+ if not snapshot:
118
+ return "No metrics found in the specified range."
119
+
120
+ output = []
121
+ for metric_name, values in sorted(snapshot.items()):
122
+ output.append(f"\n{metric_name}:")
123
+ output.append(" Step | Timestamp | Value")
124
+ output.append(" " + "-" * 48)
125
+ for v in values:
126
+ step = v.get("step", "N/A")
127
+ ts = v.get("timestamp", "N/A")
128
+ val = v.get("value", "N/A")
129
+ output.append(f" {step} | {ts} | {val}")
130
+
131
+ return "\n".join(output)
132
+
133
+
134
+ def format_alerts(alerts: list[dict]) -> str:
135
+ """Format alerts in human-readable format."""
136
+ if not alerts:
137
+ return "No alerts found."
138
+
139
+ output = [f"Found {len(alerts)} alert(s):\n"]
140
+ output.append("Timestamp | Run | Level | Title | Text | Step")
141
+ output.append("-" * 80)
142
+
143
+ for a in alerts:
144
+ ts = a.get("timestamp", "N/A")
145
+ run = a.get("run", "N/A")
146
+ level = a.get("level", "N/A").upper()
147
+ title = a.get("title", "")
148
+ text = a.get("text", "") or ""
149
+ step = a.get("step", "N/A")
150
+ output.append(f"{ts} | {run} | {level} | {title} | {text} | {step}")
151
+
152
+ return "\n".join(output)
153
+
154
+
155
+ def error_exit(message: str, code: int = 1) -> None:
156
+ """Print error message and exit."""
157
+ print(f"Error: {message}", file=sys.stderr)
158
+ sys.exit(code)
trackio/commit_scheduler.py ADDED
@@ -0,0 +1,310 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # Originally copied from https://github.com/huggingface/huggingface_hub/blob/d0a948fc2a32ed6e557042a95ef3e4af97ec4a7c/src/huggingface_hub/_commit_scheduler.py
2
+
3
+ import atexit
4
+ import logging
5
+ import time
6
+ from concurrent.futures import Future
7
+ from dataclasses import dataclass
8
+ from pathlib import Path
9
+ from threading import Lock, Thread
10
+ from typing import Callable, Dict, List, Union
11
+
12
+ from huggingface_hub.hf_api import (
13
+ DEFAULT_IGNORE_PATTERNS,
14
+ CommitInfo,
15
+ CommitOperationAdd,
16
+ HfApi,
17
+ )
18
+ from huggingface_hub.utils import filter_repo_objects
19
+
20
+ logger = logging.getLogger(__name__)
21
+
22
+
23
+ @dataclass(frozen=True)
24
+ class _FileToUpload:
25
+ """Temporary dataclass to store info about files to upload. Not meant to be used directly."""
26
+
27
+ local_path: Path
28
+ path_in_repo: str
29
+ size_limit: int
30
+ last_modified: float
31
+
32
+
33
+ class CommitScheduler:
34
+ """
35
+ Scheduler to upload a local folder to the Hub at regular intervals (e.g. push to hub every 5 minutes).
36
+
37
+ The recommended way to use the scheduler is to use it as a context manager. This ensures that the scheduler is
38
+ properly stopped and the last commit is triggered when the script ends. The scheduler can also be stopped manually
39
+ with the `stop` method. Checkout the [upload guide](https://huggingface.co/docs/huggingface_hub/guides/upload#scheduled-uploads)
40
+ to learn more about how to use it.
41
+
42
+ Args:
43
+ repo_id (`str`):
44
+ The id of the repo to commit to.
45
+ folder_path (`str` or `Path`):
46
+ Path to the local folder to upload regularly.
47
+ every (`int` or `float`, *optional*):
48
+ The number of minutes between each commit. Defaults to 5 minutes.
49
+ path_in_repo (`str`, *optional*):
50
+ Relative path of the directory in the repo, for example: `"checkpoints/"`. Defaults to the root folder
51
+ of the repository.
52
+ repo_type (`str`, *optional*):
53
+ The type of the repo to commit to. Defaults to `model`.
54
+ revision (`str`, *optional*):
55
+ The revision of the repo to commit to. Defaults to `main`.
56
+ private (`bool`, *optional*):
57
+ Whether to make the repo private. If `None` (default), the repo will be public unless the organization's default is private. This value is ignored if the repo already exists.
58
+ token (`str`, *optional*):
59
+ The token to use to commit to the repo. Defaults to the token saved on the machine.
60
+ allow_patterns (`List[str]` or `str`, *optional*):
61
+ If provided, only files matching at least one pattern are uploaded.
62
+ ignore_patterns (`List[str]` or `str`, *optional*):
63
+ If provided, files matching any of the patterns are not uploaded.
64
+ squash_history (`bool`, *optional*):
65
+ Whether to squash the history of the repo after each commit. Defaults to `False`. Squashing commits is
66
+ useful to avoid degraded performances on the repo when it grows too large.
67
+ hf_api (`HfApi`, *optional*):
68
+ The [`HfApi`] client to use to commit to the Hub. Can be set with custom settings (user agent, token,...).
69
+ on_before_commit (`Callable[[], None]`, *optional*):
70
+ If specified, a function that will be called before the CommitScheduler lists files to create a commit.
71
+
72
+ Example:
73
+ ```py
74
+ >>> from pathlib import Path
75
+ >>> from huggingface_hub import CommitScheduler
76
+
77
+ # Scheduler uploads every 10 minutes
78
+ >>> csv_path = Path("watched_folder/data.csv")
79
+ >>> CommitScheduler(repo_id="test_scheduler", repo_type="dataset", folder_path=csv_path.parent, every=10)
80
+
81
+ >>> with csv_path.open("a") as f:
82
+ ... f.write("first line")
83
+
84
+ # Some time later (...)
85
+ >>> with csv_path.open("a") as f:
86
+ ... f.write("second line")
87
+ ```
88
+
89
+ Example using a context manager:
90
+ ```py
91
+ >>> from pathlib import Path
92
+ >>> from huggingface_hub import CommitScheduler
93
+
94
+ >>> with CommitScheduler(repo_id="test_scheduler", repo_type="dataset", folder_path="watched_folder", every=10) as scheduler:
95
+ ... csv_path = Path("watched_folder/data.csv")
96
+ ... with csv_path.open("a") as f:
97
+ ... f.write("first line")
98
+ ... (...)
99
+ ... with csv_path.open("a") as f:
100
+ ... f.write("second line")
101
+
102
+ # Scheduler is now stopped and last commit have been triggered
103
+ ```
104
+ """
105
+
106
+ def __init__(
107
+ self,
108
+ *,
109
+ repo_id: str,
110
+ folder_path: Union[str, Path],
111
+ every: Union[int, float] = 5,
112
+ path_in_repo: str | None = None,
113
+ repo_type: str | None = None,
114
+ revision: str | None = None,
115
+ private: bool | None = None,
116
+ token: str | None = None,
117
+ allow_patterns: list[str] | str | None = None,
118
+ ignore_patterns: list[str] | str | None = None,
119
+ squash_history: bool = False,
120
+ hf_api: HfApi | None = None,
121
+ on_before_commit: Callable[[], None] | None = None,
122
+ ) -> None:
123
+ self.api = hf_api or HfApi(token=token)
124
+ self.on_before_commit = on_before_commit
125
+
126
+ # Folder
127
+ self.folder_path = Path(folder_path).expanduser().resolve()
128
+ self.path_in_repo = path_in_repo or ""
129
+ self.allow_patterns = allow_patterns
130
+
131
+ if ignore_patterns is None:
132
+ ignore_patterns = []
133
+ elif isinstance(ignore_patterns, str):
134
+ ignore_patterns = [ignore_patterns]
135
+ self.ignore_patterns = ignore_patterns + DEFAULT_IGNORE_PATTERNS
136
+
137
+ if self.folder_path.is_file():
138
+ raise ValueError(
139
+ f"'folder_path' must be a directory, not a file: '{self.folder_path}'."
140
+ )
141
+ self.folder_path.mkdir(parents=True, exist_ok=True)
142
+
143
+ # Repository
144
+ repo_url = self.api.create_repo(
145
+ repo_id=repo_id, private=private, repo_type=repo_type, exist_ok=True
146
+ )
147
+ self.repo_id = repo_url.repo_id
148
+ self.repo_type = repo_type
149
+ self.revision = revision
150
+ self.token = token
151
+
152
+ self.last_uploaded: Dict[Path, float] = {}
153
+ self.last_push_time: float | None = None
154
+
155
+ if not every > 0:
156
+ raise ValueError(f"'every' must be a positive integer, not '{every}'.")
157
+ self.lock = Lock()
158
+ self.every = every
159
+ self.squash_history = squash_history
160
+
161
+ logger.info(
162
+ f"Scheduled job to push '{self.folder_path}' to '{self.repo_id}' every {self.every} minutes."
163
+ )
164
+ self._scheduler_thread = Thread(target=self._run_scheduler, daemon=True)
165
+ self._scheduler_thread.start()
166
+ atexit.register(self._push_to_hub)
167
+
168
+ self.__stopped = False
169
+
170
+ def stop(self) -> None:
171
+ """Stop the scheduler.
172
+
173
+ A stopped scheduler cannot be restarted. Mostly for tests purposes.
174
+ """
175
+ self.__stopped = True
176
+
177
+ def __enter__(self) -> "CommitScheduler":
178
+ return self
179
+
180
+ def __exit__(self, exc_type, exc_value, traceback) -> None:
181
+ # Upload last changes before exiting
182
+ self.trigger().result()
183
+ self.stop()
184
+ return
185
+
186
+ def _run_scheduler(self) -> None:
187
+ """Dumb thread waiting between each scheduled push to Hub."""
188
+ while True:
189
+ self.last_future = self.trigger()
190
+ time.sleep(self.every * 60)
191
+ if self.__stopped:
192
+ break
193
+
194
+ def trigger(self) -> Future:
195
+ """Trigger a `push_to_hub` and return a future.
196
+
197
+ This method is automatically called every `every` minutes. You can also call it manually to trigger a commit
198
+ immediately, without waiting for the next scheduled commit.
199
+ """
200
+ return self.api.run_as_future(self._push_to_hub)
201
+
202
+ def _push_to_hub(self) -> CommitInfo | None:
203
+ if self.__stopped: # If stopped, already scheduled commits are ignored
204
+ return None
205
+
206
+ logger.info("(Background) scheduled commit triggered.")
207
+ try:
208
+ value = self.push_to_hub()
209
+ if self.squash_history:
210
+ logger.info("(Background) squashing repo history.")
211
+ self.api.super_squash_history(
212
+ repo_id=self.repo_id, repo_type=self.repo_type, branch=self.revision
213
+ )
214
+ return value
215
+ except Exception as e:
216
+ logger.error(
217
+ f"Error while pushing to Hub: {e}"
218
+ ) # Depending on the setup, error might be silenced
219
+ raise
220
+
221
+ def push_to_hub(self) -> CommitInfo | None:
222
+ """
223
+ Push folder to the Hub and return the commit info.
224
+
225
+ <Tip warning={true}>
226
+
227
+ This method is not meant to be called directly. It is run in the background by the scheduler, respecting a
228
+ queue mechanism to avoid concurrent commits. Making a direct call to the method might lead to concurrency
229
+ issues.
230
+
231
+ </Tip>
232
+
233
+ The default behavior of `push_to_hub` is to assume an append-only folder. It lists all files in the folder and
234
+ uploads only changed files. If no changes are found, the method returns without committing anything. If you want
235
+ to change this behavior, you can inherit from [`CommitScheduler`] and override this method. This can be useful
236
+ for example to compress data together in a single file before committing. For more details and examples, check
237
+ out our [integration guide](https://huggingface.co/docs/huggingface_hub/main/en/guides/upload#scheduled-uploads).
238
+ """
239
+ # Check files to upload (with lock)
240
+ with self.lock:
241
+ if self.on_before_commit is not None:
242
+ self.on_before_commit()
243
+
244
+ logger.debug("Listing files to upload for scheduled commit.")
245
+
246
+ # List files from folder (taken from `_prepare_upload_folder_additions`)
247
+ relpath_to_abspath = {
248
+ path.relative_to(self.folder_path).as_posix(): path
249
+ for path in sorted(
250
+ self.folder_path.glob("**/*")
251
+ ) # sorted to be deterministic
252
+ if path.is_file()
253
+ }
254
+ prefix = f"{self.path_in_repo.strip('/')}/" if self.path_in_repo else ""
255
+
256
+ # Filter with pattern + filter out unchanged files + retrieve current file size
257
+ files_to_upload: List[_FileToUpload] = []
258
+ for relpath in filter_repo_objects(
259
+ relpath_to_abspath.keys(),
260
+ allow_patterns=self.allow_patterns,
261
+ ignore_patterns=self.ignore_patterns,
262
+ ):
263
+ local_path = relpath_to_abspath[relpath]
264
+ stat = local_path.stat()
265
+ if (
266
+ self.last_uploaded.get(local_path) is None
267
+ or self.last_uploaded[local_path] != stat.st_mtime
268
+ ):
269
+ files_to_upload.append(
270
+ _FileToUpload(
271
+ local_path=local_path,
272
+ path_in_repo=prefix + relpath,
273
+ size_limit=stat.st_size,
274
+ last_modified=stat.st_mtime,
275
+ )
276
+ )
277
+
278
+ # Return if nothing to upload
279
+ if len(files_to_upload) == 0:
280
+ logger.debug("Dropping schedule commit: no changed file to upload.")
281
+ return None
282
+
283
+ # Convert `_FileToUpload` as `CommitOperationAdd` (=> compute file shas + limit to file size)
284
+ logger.debug("Removing unchanged files since previous scheduled commit.")
285
+ add_operations = [
286
+ CommitOperationAdd(
287
+ # TODO: Cap the file to its current size, even if the user append data to it while a scheduled commit is happening
288
+ # (requires an upstream fix for XET-535: `hf_xet` should support `BinaryIO` for upload)
289
+ path_or_fileobj=file_to_upload.local_path,
290
+ path_in_repo=file_to_upload.path_in_repo,
291
+ )
292
+ for file_to_upload in files_to_upload
293
+ ]
294
+
295
+ # Upload files (append mode expected - no need for lock)
296
+ logger.debug("Uploading files for scheduled commit.")
297
+ commit_info = self.api.create_commit(
298
+ repo_id=self.repo_id,
299
+ repo_type=self.repo_type,
300
+ operations=add_operations,
301
+ commit_message="Scheduled Commit",
302
+ revision=self.revision,
303
+ )
304
+
305
+ for file in files_to_upload:
306
+ self.last_uploaded[file.local_path] = file.last_modified
307
+
308
+ self.last_push_time = time.time()
309
+
310
+ return commit_info
trackio/context_vars.py ADDED
@@ -0,0 +1,18 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import contextvars
2
+ from typing import TYPE_CHECKING
3
+
4
+ if TYPE_CHECKING:
5
+ from trackio.run import Run
6
+
7
+ current_run: contextvars.ContextVar["Run | None"] = contextvars.ContextVar(
8
+ "current_run", default=None
9
+ )
10
+ current_project: contextvars.ContextVar[str | None] = contextvars.ContextVar(
11
+ "current_project", default=None
12
+ )
13
+ current_server: contextvars.ContextVar[str | None] = contextvars.ContextVar(
14
+ "current_server", default=None
15
+ )
16
+ current_space_id: contextvars.ContextVar[str | None] = contextvars.ContextVar(
17
+ "current_space_id", default=None
18
+ )
trackio/deploy.py ADDED
@@ -0,0 +1,991 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import importlib.metadata
2
+ import io
3
+ import json as json_mod
4
+ import os
5
+ import shutil
6
+ import sys
7
+ import tempfile
8
+ import threading
9
+ import time
10
+ from collections import Counter
11
+ from importlib.resources import files
12
+ from pathlib import Path
13
+
14
+ if sys.version_info >= (3, 11):
15
+ import tomllib
16
+ else:
17
+ import tomli as tomllib
18
+
19
+ import gradio
20
+ import httpx
21
+ import huggingface_hub
22
+ from gradio_client import Client, handle_file
23
+ from httpx import ReadTimeout
24
+ from huggingface_hub import Volume
25
+ from huggingface_hub.errors import HfHubHTTPError, RepositoryNotFoundError
26
+
27
+ import trackio
28
+ from trackio.bucket_storage import (
29
+ create_bucket_if_not_exists,
30
+ export_from_bucket_for_static,
31
+ upload_project_to_bucket,
32
+ upload_project_to_bucket_for_static,
33
+ )
34
+ from trackio.sqlite_storage import SQLiteStorage
35
+ from trackio.utils import (
36
+ MEDIA_DIR,
37
+ get_or_create_project_hash,
38
+ preprocess_space_and_dataset_ids,
39
+ )
40
+
41
+ SPACE_HOST_URL = "https://{user_name}-{space_name}.hf.space/"
42
+ SPACE_URL = "https://huggingface.co/spaces/{space_id}"
43
+ _BOLD_ORANGE = "\033[1m\033[38;5;208m"
44
+ _RESET = "\033[0m"
45
+
46
+
47
+ def raise_if_space_is_frozen_for_logging(space_id: str) -> None:
48
+ try:
49
+ info = huggingface_hub.HfApi().space_info(space_id)
50
+ except RepositoryNotFoundError:
51
+ return
52
+ if getattr(info, "sdk", None) == "static":
53
+ raise RuntimeError(
54
+ f"Cannot log to Hugging Face Space '{space_id}' because it has been frozen "
55
+ f"(it uses the static SDK: a read-only dashboard with no live Trackio server).\n\n"
56
+ f"Use a different space_id for training, or create a new Gradio Trackio Space. "
57
+ f"Freezing converts a live Gradio Space to static after a run; a frozen Space "
58
+ f'cannot accept new logs. See trackio.sync(..., sdk="static") in the Trackio docs.'
59
+ )
60
+
61
+
62
+ def _readme_linked_hub_yaml(dataset_id: str | None) -> str:
63
+ if dataset_id is not None:
64
+ return f"datasets:\n - {dataset_id}\n"
65
+ return ""
66
+
67
+
68
+ _SPACE_APP_PY = "import trackio\ntrackio.show()\n"
69
+
70
+
71
+ def _retry_hf_write(op_name: str, fn, retries: int = 4, initial_delay: float = 1.5):
72
+ delay = initial_delay
73
+ for attempt in range(1, retries + 1):
74
+ try:
75
+ return fn()
76
+ except ReadTimeout:
77
+ if attempt == retries:
78
+ raise
79
+ print(
80
+ f"* {op_name} timed out (attempt {attempt}/{retries}). Retrying in {delay:.1f}s..."
81
+ )
82
+ time.sleep(delay)
83
+ delay = min(delay * 2, 12)
84
+ except HfHubHTTPError as e:
85
+ status = e.response.status_code if e.response is not None else None
86
+ if status is None or status < 500 or attempt == retries:
87
+ raise
88
+ print(
89
+ f"* {op_name} failed with HTTP {status} (attempt {attempt}/{retries}). Retrying in {delay:.1f}s..."
90
+ )
91
+ time.sleep(delay)
92
+ delay = min(delay * 2, 12)
93
+
94
+
95
+ def _get_space_volumes(space_id: str) -> list[Volume]:
96
+ """
97
+ Return mounted volumes for a Space.
98
+
99
+ `HfApi.get_space_runtime()` does not always populate `volumes`, even when the
100
+ mount exists. Fall back to `space_info().runtime.volumes`, which currently
101
+ carries the volume metadata for running Spaces.
102
+ """
103
+ hf_api = huggingface_hub.HfApi()
104
+ runtime = hf_api.get_space_runtime(space_id)
105
+ if runtime.volumes:
106
+ return list(runtime.volumes)
107
+
108
+ info = hf_api.space_info(space_id)
109
+ if info.runtime and info.runtime.volumes:
110
+ return list(info.runtime.volumes)
111
+
112
+ return []
113
+
114
+
115
+ def _get_source_install_dependencies() -> str:
116
+ """Get trackio dependencies from pyproject.toml for source installs."""
117
+ trackio_path = files("trackio")
118
+ pyproject_path = Path(trackio_path).parent / "pyproject.toml"
119
+ with open(pyproject_path, "rb") as f:
120
+ pyproject = tomllib.load(f)
121
+ deps = pyproject["project"]["dependencies"]
122
+ spaces_deps = (
123
+ pyproject["project"].get("optional-dependencies", {}).get("spaces", [])
124
+ )
125
+ return "\n".join(deps + spaces_deps)
126
+
127
+
128
+ def _is_trackio_installed_from_source() -> bool:
129
+ """Check if trackio is installed from source/editable install vs PyPI."""
130
+ try:
131
+ trackio_file = trackio.__file__
132
+ if "site-packages" not in trackio_file and "dist-packages" not in trackio_file:
133
+ return True
134
+
135
+ dist = importlib.metadata.distribution("trackio")
136
+ if dist.files:
137
+ files = list(dist.files)
138
+ has_pth = any(".pth" in str(f) for f in files)
139
+ if has_pth:
140
+ return True
141
+
142
+ return False
143
+ except (
144
+ AttributeError,
145
+ importlib.metadata.PackageNotFoundError,
146
+ importlib.metadata.MetadataError,
147
+ ValueError,
148
+ TypeError,
149
+ ):
150
+ return True
151
+
152
+
153
+ def deploy_as_space(
154
+ space_id: str,
155
+ space_storage: huggingface_hub.SpaceStorage | None = None,
156
+ dataset_id: str | None = None,
157
+ bucket_id: str | None = None,
158
+ private: bool | None = None,
159
+ ):
160
+ if (
161
+ os.getenv("SYSTEM") == "spaces"
162
+ ): # in case a repo with this function is uploaded to spaces
163
+ return
164
+
165
+ if dataset_id is not None and bucket_id is not None:
166
+ raise ValueError(
167
+ "Cannot use bucket volume options together with dataset_id; use one persistence mode."
168
+ )
169
+
170
+ trackio_path = files("trackio")
171
+
172
+ hf_api = huggingface_hub.HfApi()
173
+
174
+ try:
175
+ huggingface_hub.create_repo(
176
+ space_id,
177
+ private=private,
178
+ space_sdk="gradio",
179
+ space_storage=space_storage,
180
+ repo_type="space",
181
+ exist_ok=True,
182
+ )
183
+ except HfHubHTTPError as e:
184
+ if e.response.status_code in [401, 403]: # unauthorized or forbidden
185
+ print("Need 'write' access token to create a Spaces repo.")
186
+ huggingface_hub.login(add_to_git_credential=False)
187
+ huggingface_hub.create_repo(
188
+ space_id,
189
+ private=private,
190
+ space_sdk="gradio",
191
+ space_storage=space_storage,
192
+ repo_type="space",
193
+ exist_ok=True,
194
+ )
195
+ else:
196
+ raise ValueError(f"Failed to create Space: {e}")
197
+
198
+ # We can assume pandas, gradio, and huggingface-hub are already installed in a Gradio Space.
199
+ # Make sure necessary dependencies are installed by creating a requirements.txt.
200
+ is_source_install = _is_trackio_installed_from_source()
201
+
202
+ if bucket_id is not None:
203
+ create_bucket_if_not_exists(bucket_id, private=private)
204
+
205
+ with open(Path(trackio_path, "README.md"), "r") as f:
206
+ readme_content = f.read()
207
+ readme_content = readme_content.replace("{GRADIO_VERSION}", gradio.__version__)
208
+ readme_content = readme_content.replace("{APP_FILE}", "app.py")
209
+ readme_content = readme_content.replace(
210
+ "{LINKED_HUB_METADATA}", _readme_linked_hub_yaml(dataset_id)
211
+ )
212
+ readme_buffer = io.BytesIO(readme_content.encode("utf-8"))
213
+ hf_api.upload_file(
214
+ path_or_fileobj=readme_buffer,
215
+ path_in_repo="README.md",
216
+ repo_id=space_id,
217
+ repo_type="space",
218
+ )
219
+
220
+ if is_source_install:
221
+ requirements_content = _get_source_install_dependencies()
222
+ else:
223
+ requirements_content = f"trackio[spaces]=={trackio.__version__}"
224
+
225
+ requirements_buffer = io.BytesIO(requirements_content.encode("utf-8"))
226
+ hf_api.upload_file(
227
+ path_or_fileobj=requirements_buffer,
228
+ path_in_repo="requirements.txt",
229
+ repo_id=space_id,
230
+ repo_type="space",
231
+ )
232
+
233
+ huggingface_hub.utils.disable_progress_bars()
234
+
235
+ if is_source_install:
236
+ dist_index = (
237
+ Path(trackio.__file__).resolve().parent / "frontend" / "dist" / "index.html"
238
+ )
239
+ if not dist_index.is_file():
240
+ raise ValueError(
241
+ "The Trackio frontend build is missing. From the repository root run "
242
+ "`cd trackio/frontend && npm ci && npm run build`, then deploy again."
243
+ )
244
+ hf_api.upload_folder(
245
+ repo_id=space_id,
246
+ repo_type="space",
247
+ folder_path=trackio_path,
248
+ path_in_repo="trackio",
249
+ ignore_patterns=[
250
+ "README.md",
251
+ "frontend/node_modules/**",
252
+ "frontend/src/**",
253
+ "frontend/.gitignore",
254
+ "frontend/package.json",
255
+ "frontend/package-lock.json",
256
+ "frontend/vite.config.js",
257
+ "frontend/svelte.config.js",
258
+ "**/__pycache__/**",
259
+ "*.pyc",
260
+ ],
261
+ )
262
+
263
+ app_file_content = _SPACE_APP_PY
264
+ app_file_buffer = io.BytesIO(app_file_content.encode("utf-8"))
265
+ hf_api.upload_file(
266
+ path_or_fileobj=app_file_buffer,
267
+ path_in_repo="app.py",
268
+ repo_id=space_id,
269
+ repo_type="space",
270
+ )
271
+
272
+ if hf_token := huggingface_hub.utils.get_token():
273
+ huggingface_hub.add_space_secret(space_id, "HF_TOKEN", hf_token)
274
+ if bucket_id is not None:
275
+ existing = _get_space_volumes(space_id)
276
+ already_mounted = any(
277
+ v.type == "bucket" and v.source == bucket_id and v.mount_path == "/data"
278
+ for v in existing
279
+ )
280
+ if not already_mounted:
281
+ non_bucket = [
282
+ v
283
+ for v in existing
284
+ if not (v.type == "bucket" and v.source == bucket_id)
285
+ ]
286
+ hf_api.set_space_volumes(
287
+ space_id,
288
+ non_bucket
289
+ + [Volume(type="bucket", source=bucket_id, mount_path="/data")],
290
+ )
291
+ print(f"* Attached bucket {bucket_id} at '/data'")
292
+ huggingface_hub.add_space_variable(space_id, "TRACKIO_DIR", "/data/trackio")
293
+ elif dataset_id is not None:
294
+ huggingface_hub.add_space_variable(space_id, "TRACKIO_DATASET_ID", dataset_id)
295
+ if logo_light_url := os.environ.get("TRACKIO_LOGO_LIGHT_URL"):
296
+ huggingface_hub.add_space_variable(
297
+ space_id, "TRACKIO_LOGO_LIGHT_URL", logo_light_url
298
+ )
299
+ if logo_dark_url := os.environ.get("TRACKIO_LOGO_DARK_URL"):
300
+ huggingface_hub.add_space_variable(
301
+ space_id, "TRACKIO_LOGO_DARK_URL", logo_dark_url
302
+ )
303
+ if plot_order := os.environ.get("TRACKIO_PLOT_ORDER"):
304
+ huggingface_hub.add_space_variable(space_id, "TRACKIO_PLOT_ORDER", plot_order)
305
+ if theme := os.environ.get("TRACKIO_THEME"):
306
+ huggingface_hub.add_space_variable(space_id, "TRACKIO_THEME", theme)
307
+ huggingface_hub.add_space_variable(space_id, "GRADIO_MCP_SERVER", "True")
308
+
309
+
310
+ def create_space_if_not_exists(
311
+ space_id: str,
312
+ space_storage: huggingface_hub.SpaceStorage | None = None,
313
+ dataset_id: str | None = None,
314
+ bucket_id: str | None = None,
315
+ private: bool | None = None,
316
+ ) -> None:
317
+ """
318
+ Creates a new Hugging Face Space if it does not exist.
319
+
320
+ Args:
321
+ space_id (`str`):
322
+ The ID of the Space to create.
323
+ space_storage ([`~huggingface_hub.SpaceStorage`], *optional*):
324
+ Choice of persistent storage tier for the Space.
325
+ dataset_id (`str`, *optional*):
326
+ Deprecated. Use `bucket_id` instead.
327
+ bucket_id (`str`, *optional*):
328
+ Full Hub bucket id (`namespace/name`) to attach via the Hub volumes API (platform mount).
329
+ Sets `TRACKIO_DIR` to the mount path.
330
+ private (`bool`, *optional*):
331
+ Whether to make the Space private. If `None` (default), the repo will be
332
+ public unless the organization's default is private. This value is ignored
333
+ if the repo already exists.
334
+ """
335
+ if "/" not in space_id:
336
+ raise ValueError(
337
+ f"Invalid space ID: {space_id}. Must be in the format: username/reponame or orgname/reponame."
338
+ )
339
+ if dataset_id is not None and "/" not in dataset_id:
340
+ raise ValueError(
341
+ f"Invalid dataset ID: {dataset_id}. Must be in the format: username/datasetname or orgname/datasetname."
342
+ )
343
+ if bucket_id is not None and "/" not in bucket_id:
344
+ raise ValueError(
345
+ f"Invalid bucket ID: {bucket_id}. Must be in the format: username/bucketname or orgname/bucketname."
346
+ )
347
+ try:
348
+ huggingface_hub.repo_info(space_id, repo_type="space")
349
+ print(
350
+ f"* Found existing space: {_BOLD_ORANGE}{SPACE_URL.format(space_id=space_id)}{_RESET}"
351
+ )
352
+ return
353
+ except RepositoryNotFoundError:
354
+ pass
355
+ except HfHubHTTPError as e:
356
+ if e.response.status_code in [401, 403]: # unauthorized or forbidden
357
+ print("Need 'write' access token to create a Spaces repo.")
358
+ huggingface_hub.login(add_to_git_credential=False)
359
+ else:
360
+ raise ValueError(f"Failed to create Space: {e}")
361
+
362
+ print(
363
+ f"* Creating new space: {_BOLD_ORANGE}{SPACE_URL.format(space_id=space_id)}{_RESET}"
364
+ )
365
+ deploy_as_space(
366
+ space_id,
367
+ space_storage,
368
+ dataset_id,
369
+ bucket_id,
370
+ private,
371
+ )
372
+ print("* Waiting for Space to be ready...")
373
+ _wait_until_space_running(space_id)
374
+
375
+
376
+ def _wait_until_space_running(space_id: str, timeout: int = 300) -> None:
377
+ hf_api = huggingface_hub.HfApi()
378
+ start = time.time()
379
+ delay = 2
380
+ request_timeout = 45.0
381
+ failure_stages = frozenset(
382
+ ("NO_APP_FILE", "CONFIG_ERROR", "BUILD_ERROR", "RUNTIME_ERROR")
383
+ )
384
+ while time.time() - start < timeout:
385
+ try:
386
+ info = hf_api.space_info(space_id, timeout=request_timeout)
387
+ if info.runtime:
388
+ stage = str(info.runtime.stage)
389
+ if stage in failure_stages:
390
+ raise RuntimeError(
391
+ f"Space {space_id} entered terminal stage {stage}. "
392
+ "Fix README.md or app files; see build logs on the Hub."
393
+ )
394
+ if stage == "RUNNING":
395
+ return
396
+ except RuntimeError:
397
+ raise
398
+ except (huggingface_hub.utils.HfHubHTTPError, httpx.RequestError):
399
+ pass
400
+ time.sleep(delay)
401
+ delay = min(delay * 1.5, 15)
402
+ raise TimeoutError(
403
+ f"Space {space_id} did not reach RUNNING within {timeout}s. "
404
+ "Check status and build logs on the Hub."
405
+ )
406
+
407
+
408
+ def wait_until_space_exists(
409
+ space_id: str,
410
+ ) -> None:
411
+ """
412
+ Blocks the current thread until the Space exists.
413
+
414
+ Args:
415
+ space_id (`str`):
416
+ The ID of the Space to wait for.
417
+
418
+ Raises:
419
+ `TimeoutError`: If waiting for the Space takes longer than expected.
420
+ """
421
+ hf_api = huggingface_hub.HfApi()
422
+ delay = 1
423
+ for _ in range(30):
424
+ try:
425
+ hf_api.space_info(space_id)
426
+ return
427
+ except (huggingface_hub.utils.HfHubHTTPError, httpx.RequestError):
428
+ time.sleep(delay)
429
+ delay = min(delay * 2, 60)
430
+ raise TimeoutError("Waiting for space to exist took longer than expected")
431
+
432
+
433
+ def upload_db_to_space(project: str, space_id: str, force: bool = False) -> None:
434
+ """
435
+ Uploads the database of a local Trackio project to a Hugging Face Space.
436
+
437
+ This uses the Gradio Client to upload since we do not want to trigger a new build of
438
+ the Space, which would happen if we used `huggingface_hub.upload_file`.
439
+
440
+ Args:
441
+ project (`str`):
442
+ The name of the project to upload.
443
+ space_id (`str`):
444
+ The ID of the Space to upload to.
445
+ force (`bool`, *optional*, defaults to `False`):
446
+ If `True`, overwrites the existing database without prompting. If `False`,
447
+ prompts for confirmation.
448
+ """
449
+ db_path = SQLiteStorage.get_project_db_path(project)
450
+ client = Client(space_id, verbose=False, httpx_kwargs={"timeout": 90})
451
+
452
+ if not force:
453
+ try:
454
+ existing_projects = client.predict(api_name="/get_all_projects")
455
+ if project in existing_projects:
456
+ response = input(
457
+ f"Database for project '{project}' already exists on Space '{space_id}'. "
458
+ f"Overwrite it? (y/N): "
459
+ )
460
+ if response.lower() not in ["y", "yes"]:
461
+ print("* Upload cancelled.")
462
+ return
463
+ except Exception as e:
464
+ print(f"* Warning: Could not check if project exists on Space: {e}")
465
+ print("* Proceeding with upload...")
466
+
467
+ client.predict(
468
+ api_name="/upload_db_to_space",
469
+ project=project,
470
+ uploaded_db=handle_file(db_path),
471
+ hf_token=huggingface_hub.utils.get_token(),
472
+ )
473
+
474
+
475
+ SYNC_BATCH_SIZE = 500
476
+
477
+
478
+ def sync_incremental(
479
+ project: str,
480
+ space_id: str,
481
+ private: bool | None = None,
482
+ pending_only: bool = False,
483
+ ) -> None:
484
+ """
485
+ Syncs a local Trackio project to a Space via the bulk_log API endpoints
486
+ instead of uploading the entire DB file. Supports incremental sync.
487
+
488
+ Args:
489
+ project: The name of the project to sync.
490
+ space_id: The HF Space ID to sync to.
491
+ private: Whether to make the Space private if creating.
492
+ pending_only: If True, only sync rows tagged with space_id (pending data).
493
+ """
494
+ print(
495
+ f"* Syncing project '{project}' to: {SPACE_URL.format(space_id=space_id)} (please wait...)"
496
+ )
497
+ create_space_if_not_exists(space_id, private=private)
498
+ wait_until_space_exists(space_id)
499
+
500
+ client = Client(space_id, verbose=False, httpx_kwargs={"timeout": 90})
501
+ hf_token = huggingface_hub.utils.get_token()
502
+ expected_run_counts: Counter[str] = Counter()
503
+
504
+ if pending_only:
505
+ pending_logs = SQLiteStorage.get_pending_logs(project)
506
+ if pending_logs:
507
+ logs = pending_logs["logs"]
508
+ expected_run_counts.update(log["run"] for log in logs)
509
+ for i in range(0, len(logs), SYNC_BATCH_SIZE):
510
+ batch = logs[i : i + SYNC_BATCH_SIZE]
511
+ print(
512
+ f" Syncing metrics: {min(i + SYNC_BATCH_SIZE, len(logs))}/{len(logs)}..."
513
+ )
514
+ client.predict(api_name="/bulk_log", logs=batch, hf_token=hf_token)
515
+ SQLiteStorage.clear_pending_logs(project, pending_logs["ids"])
516
+
517
+ pending_sys = SQLiteStorage.get_pending_system_logs(project)
518
+ if pending_sys:
519
+ logs = pending_sys["logs"]
520
+ for i in range(0, len(logs), SYNC_BATCH_SIZE):
521
+ batch = logs[i : i + SYNC_BATCH_SIZE]
522
+ print(
523
+ f" Syncing system metrics: {min(i + SYNC_BATCH_SIZE, len(logs))}/{len(logs)}..."
524
+ )
525
+ client.predict(
526
+ api_name="/bulk_log_system", logs=batch, hf_token=hf_token
527
+ )
528
+ SQLiteStorage.clear_pending_system_logs(project, pending_sys["ids"])
529
+
530
+ pending_uploads = SQLiteStorage.get_pending_uploads(project)
531
+ if pending_uploads:
532
+ upload_entries = []
533
+ for u in pending_uploads["uploads"]:
534
+ fp = u["file_path"]
535
+ if os.path.exists(fp):
536
+ upload_entries.append(
537
+ {
538
+ "project": u["project"],
539
+ "run": u["run"],
540
+ "step": u["step"],
541
+ "relative_path": u["relative_path"],
542
+ "uploaded_file": handle_file(fp),
543
+ }
544
+ )
545
+ if upload_entries:
546
+ print(f" Syncing {len(upload_entries)} media files...")
547
+ client.predict(
548
+ api_name="/bulk_upload_media",
549
+ uploads=upload_entries,
550
+ hf_token=hf_token,
551
+ )
552
+ SQLiteStorage.clear_pending_uploads(project, pending_uploads["ids"])
553
+ else:
554
+ all_logs = SQLiteStorage.get_all_logs_for_sync(project)
555
+ if all_logs:
556
+ expected_run_counts.update(log["run"] for log in all_logs)
557
+ for i in range(0, len(all_logs), SYNC_BATCH_SIZE):
558
+ batch = all_logs[i : i + SYNC_BATCH_SIZE]
559
+ print(
560
+ f" Syncing metrics: {min(i + SYNC_BATCH_SIZE, len(all_logs))}/{len(all_logs)}..."
561
+ )
562
+ client.predict(api_name="/bulk_log", logs=batch, hf_token=hf_token)
563
+
564
+ all_sys_logs = SQLiteStorage.get_all_system_logs_for_sync(project)
565
+ if all_sys_logs:
566
+ for i in range(0, len(all_sys_logs), SYNC_BATCH_SIZE):
567
+ batch = all_sys_logs[i : i + SYNC_BATCH_SIZE]
568
+ print(
569
+ f" Syncing system metrics: {min(i + SYNC_BATCH_SIZE, len(all_sys_logs))}/{len(all_sys_logs)}..."
570
+ )
571
+ client.predict(
572
+ api_name="/bulk_log_system", logs=batch, hf_token=hf_token
573
+ )
574
+
575
+ _wait_for_remote_sync(client, project, expected_run_counts)
576
+ SQLiteStorage.set_project_metadata(project, "space_id", space_id)
577
+ print(
578
+ f"* Synced successfully to space: {_BOLD_ORANGE}{SPACE_URL.format(space_id=space_id)}{_RESET}"
579
+ )
580
+
581
+
582
+ def _wait_for_remote_sync(
583
+ client: Client,
584
+ project: str,
585
+ expected_run_counts: Counter[str],
586
+ timeout: int = 180,
587
+ ) -> None:
588
+ if not expected_run_counts:
589
+ return
590
+
591
+ deadline = time.time() + timeout
592
+ delay = 2
593
+ last_error: Exception | None = None
594
+ pending = dict(expected_run_counts)
595
+
596
+ while time.time() < deadline and pending:
597
+ completed = []
598
+ for run_name, expected_num_logs in pending.items():
599
+ try:
600
+ summary = client.predict(
601
+ project=project, run=run_name, api_name="/get_run_summary"
602
+ )
603
+ if summary.get("num_logs") == expected_num_logs:
604
+ completed.append(run_name)
605
+ except Exception as e:
606
+ last_error = e
607
+ for run_name in completed:
608
+ pending.pop(run_name, None)
609
+ if pending:
610
+ time.sleep(delay)
611
+ delay = min(delay * 1.5, 15)
612
+
613
+ if pending:
614
+ raise TimeoutError(
615
+ f"Remote sync for project '{project}' did not become visible for runs "
616
+ f"{sorted(pending.items())} within {timeout}s. "
617
+ f"Last error: {last_error!r}"
618
+ )
619
+
620
+
621
+ def upload_dataset_for_static(
622
+ project: str,
623
+ dataset_id: str,
624
+ private: bool | None = None,
625
+ ) -> None:
626
+ hf_api = huggingface_hub.HfApi()
627
+
628
+ try:
629
+ huggingface_hub.create_repo(
630
+ dataset_id,
631
+ private=private,
632
+ repo_type="dataset",
633
+ exist_ok=True,
634
+ )
635
+ except HfHubHTTPError as e:
636
+ if e.response.status_code in [401, 403]:
637
+ print("Need 'write' access token to create a Dataset repo.")
638
+ huggingface_hub.login(add_to_git_credential=False)
639
+ huggingface_hub.create_repo(
640
+ dataset_id,
641
+ private=private,
642
+ repo_type="dataset",
643
+ exist_ok=True,
644
+ )
645
+ else:
646
+ raise ValueError(f"Failed to create Dataset: {e}")
647
+
648
+ with tempfile.TemporaryDirectory() as tmp_dir:
649
+ output_dir = Path(tmp_dir)
650
+ SQLiteStorage.export_for_static_space(project, output_dir)
651
+
652
+ media_dir = MEDIA_DIR / project
653
+ if media_dir.exists():
654
+ dest = output_dir / "media"
655
+ shutil.copytree(media_dir, dest)
656
+
657
+ _retry_hf_write(
658
+ "Dataset upload",
659
+ lambda: hf_api.upload_folder(
660
+ repo_id=dataset_id,
661
+ repo_type="dataset",
662
+ folder_path=str(output_dir),
663
+ ),
664
+ )
665
+
666
+ print(f"* Dataset uploaded: https://huggingface.co/datasets/{dataset_id}")
667
+
668
+
669
+ def deploy_as_static_space(
670
+ space_id: str,
671
+ dataset_id: str | None,
672
+ project: str,
673
+ bucket_id: str | None = None,
674
+ private: bool | None = None,
675
+ hf_token: str | None = None,
676
+ ) -> None:
677
+ if os.getenv("SYSTEM") == "spaces":
678
+ return
679
+
680
+ hf_api = huggingface_hub.HfApi()
681
+
682
+ try:
683
+ huggingface_hub.create_repo(
684
+ space_id,
685
+ private=private,
686
+ space_sdk="static",
687
+ repo_type="space",
688
+ exist_ok=True,
689
+ )
690
+ except HfHubHTTPError as e:
691
+ if e.response.status_code in [401, 403]:
692
+ print("Need 'write' access token to create a Spaces repo.")
693
+ huggingface_hub.login(add_to_git_credential=False)
694
+ huggingface_hub.create_repo(
695
+ space_id,
696
+ private=private,
697
+ space_sdk="static",
698
+ repo_type="space",
699
+ exist_ok=True,
700
+ )
701
+ else:
702
+ raise ValueError(f"Failed to create Space: {e}")
703
+
704
+ linked = _readme_linked_hub_yaml(dataset_id)
705
+ readme_content = (
706
+ f"---\nsdk: static\npinned: false\ntags:\n - trackio\n{linked}---\n"
707
+ )
708
+ _retry_hf_write(
709
+ "Static Space README upload",
710
+ lambda: hf_api.upload_file(
711
+ path_or_fileobj=io.BytesIO(readme_content.encode("utf-8")),
712
+ path_in_repo="README.md",
713
+ repo_id=space_id,
714
+ repo_type="space",
715
+ ),
716
+ )
717
+
718
+ trackio_path = files("trackio")
719
+ dist_dir = Path(trackio_path).parent / "trackio" / "frontend" / "dist"
720
+ if not dist_dir.is_dir():
721
+ dist_dir = Path(trackio.__file__).resolve().parent / "frontend" / "dist"
722
+ if not dist_dir.is_dir():
723
+ raise ValueError(
724
+ "The Trackio frontend build is missing. From the repository root run "
725
+ "`cd trackio/frontend && npm ci && npm run build`, then deploy again."
726
+ )
727
+
728
+ _retry_hf_write(
729
+ "Static Space frontend upload",
730
+ lambda: hf_api.upload_folder(
731
+ repo_id=space_id,
732
+ repo_type="space",
733
+ folder_path=str(dist_dir),
734
+ ),
735
+ )
736
+
737
+ config = {
738
+ "mode": "static",
739
+ "project": project,
740
+ "private": bool(private),
741
+ }
742
+ if bucket_id is not None:
743
+ config["bucket_id"] = bucket_id
744
+ if dataset_id is not None:
745
+ config["dataset_id"] = dataset_id
746
+ if hf_token and private:
747
+ config["hf_token"] = hf_token
748
+
749
+ _retry_hf_write(
750
+ "Static Space config upload",
751
+ lambda: hf_api.upload_file(
752
+ path_or_fileobj=io.BytesIO(json_mod.dumps(config).encode("utf-8")),
753
+ path_in_repo="config.json",
754
+ repo_id=space_id,
755
+ repo_type="space",
756
+ ),
757
+ )
758
+
759
+ assets_dir = Path(trackio.__file__).resolve().parent / "assets"
760
+ if assets_dir.is_dir():
761
+ _retry_hf_write(
762
+ "Static Space assets upload",
763
+ lambda: hf_api.upload_folder(
764
+ repo_id=space_id,
765
+ repo_type="space",
766
+ folder_path=str(assets_dir),
767
+ path_in_repo="assets",
768
+ ),
769
+ )
770
+
771
+ print(
772
+ f"* Static Space deployed: {_BOLD_ORANGE}{SPACE_URL.format(space_id=space_id)}{_RESET}"
773
+ )
774
+
775
+
776
+ def sync(
777
+ project: str,
778
+ space_id: str | None = None,
779
+ private: bool | None = None,
780
+ force: bool = False,
781
+ run_in_background: bool = False,
782
+ sdk: str = "gradio",
783
+ dataset_id: str | None = None,
784
+ bucket_id: str | None = None,
785
+ ) -> str:
786
+ """
787
+ Syncs a local Trackio project's database to a Hugging Face Space.
788
+ If the Space does not exist, it will be created. Local data is never deleted.
789
+
790
+ **Freezing:** Passing ``sdk="static"`` deploys a static Space backed by an HF Bucket
791
+ (read-only dashboard, no Gradio server). You can sync the same project again later to
792
+ refresh that static Space. If you want a one-time snapshot of an existing Gradio Space,
793
+ use ``freeze()`` instead.
794
+
795
+ Args:
796
+ project (`str`): The name of the project to upload.
797
+ space_id (`str`, *optional*): The ID of the Space to upload to (e.g., `"username/space_id"`).
798
+ If not provided, checks project metadata first, then generates a random space_id.
799
+ private (`bool`, *optional*):
800
+ Whether to make the Space private. If None (default), the repo will be
801
+ public unless the organization's default is private. This value is ignored
802
+ if the repo already exists.
803
+ force (`bool`, *optional*, defaults to `False`):
804
+ If `True`, overwrite the existing database without prompting for confirmation.
805
+ If `False`, prompt the user before overwriting an existing database.
806
+ run_in_background (`bool`, *optional*, defaults to `False`):
807
+ If `True`, the Space creation and database upload will be run in a background thread.
808
+ If `False`, all the steps will be run synchronously.
809
+ sdk (`str`, *optional*, defaults to `"gradio"`):
810
+ The type of Space to deploy. `"gradio"` deploys a Gradio Space with a live
811
+ server. `"static"` freezes the Space: deploys a static Space that reads from an HF Bucket
812
+ (no server needed).
813
+ dataset_id (`str`, *optional*):
814
+ Deprecated. Use `bucket_id` instead.
815
+ bucket_id (`str`, *optional*):
816
+ The ID of the HF Bucket to sync to. By default, a bucket is auto-generated
817
+ from the space_id.
818
+ Returns:
819
+ `str`: The Space ID of the synced project.
820
+ """
821
+ if sdk not in ("gradio", "static"):
822
+ raise ValueError(f"sdk must be 'gradio' or 'static', got '{sdk}'")
823
+ if space_id is None:
824
+ space_id = SQLiteStorage.get_space_id(project)
825
+ if space_id is None:
826
+ space_id = f"{project}-{get_or_create_project_hash(project)}"
827
+ space_id, dataset_id, bucket_id = preprocess_space_and_dataset_ids(
828
+ space_id, dataset_id, bucket_id
829
+ )
830
+
831
+ def _do_sync():
832
+ try:
833
+ info = huggingface_hub.HfApi().space_info(space_id)
834
+ existing_sdk = info.sdk
835
+ if existing_sdk and existing_sdk != sdk:
836
+ raise ValueError(
837
+ f"Space '{space_id}' is a '{existing_sdk}' Space but sdk='{sdk}' was requested. "
838
+ f"The sdk must match the existing Space type."
839
+ )
840
+ except RepositoryNotFoundError:
841
+ pass
842
+
843
+ if sdk == "static":
844
+ if dataset_id is not None:
845
+ upload_dataset_for_static(project, dataset_id, private=private)
846
+ hf_token = huggingface_hub.utils.get_token() if private else None
847
+ deploy_as_static_space(
848
+ space_id,
849
+ dataset_id,
850
+ project,
851
+ private=private,
852
+ hf_token=hf_token,
853
+ )
854
+ elif bucket_id is not None:
855
+ create_bucket_if_not_exists(bucket_id, private=private)
856
+ upload_project_to_bucket_for_static(project, bucket_id)
857
+ print(
858
+ f"* Project data uploaded to bucket: https://huggingface.co/buckets/{bucket_id}"
859
+ )
860
+ deploy_as_static_space(
861
+ space_id,
862
+ None,
863
+ project,
864
+ bucket_id=bucket_id,
865
+ private=private,
866
+ hf_token=huggingface_hub.utils.get_token() if private else None,
867
+ )
868
+ else:
869
+ if bucket_id is not None:
870
+ create_bucket_if_not_exists(bucket_id, private=private)
871
+ upload_project_to_bucket(project, bucket_id)
872
+ print(
873
+ f"* Project data uploaded to bucket: https://huggingface.co/buckets/{bucket_id}"
874
+ )
875
+ create_space_if_not_exists(
876
+ space_id, bucket_id=bucket_id, private=private
877
+ )
878
+ _wait_for_remote_sync(
879
+ Client(space_id, verbose=False, httpx_kwargs={"timeout": 90}),
880
+ project,
881
+ Counter(
882
+ log["run"]
883
+ for log in SQLiteStorage.get_all_logs_for_sync(project)
884
+ ),
885
+ )
886
+ else:
887
+ sync_incremental(project, space_id, private=private, pending_only=False)
888
+ SQLiteStorage.set_project_metadata(project, "space_id", space_id)
889
+
890
+ if run_in_background:
891
+ threading.Thread(target=_do_sync).start()
892
+ else:
893
+ _do_sync()
894
+ return space_id
895
+
896
+
897
+ def _get_source_bucket(space_id: str) -> str:
898
+ volumes = _get_space_volumes(space_id)
899
+ for v in volumes:
900
+ if v.type == "bucket" and v.mount_path == "/data":
901
+ return v.source
902
+ raise ValueError(
903
+ f"Space '{space_id}' has no bucket mounted at '/data'. "
904
+ f"freeze() requires the source Space to use bucket storage."
905
+ )
906
+
907
+
908
+ def freeze(
909
+ space_id: str,
910
+ project: str,
911
+ new_space_id: str | None = None,
912
+ private: bool | None = None,
913
+ bucket_id: str | None = None,
914
+ ) -> str:
915
+ """
916
+ Creates a new static Hugging Face Space containing a read-only snapshot of
917
+ the data for the specified project from the source Gradio Space. The data is
918
+ read from the bucket attached to the source Space at freeze time. The original
919
+ Space is not modified, and the new static Space does not automatically reflect
920
+ metrics uploaded to the original Gradio Space after the freeze completes.
921
+
922
+ Args:
923
+ space_id (`str`):
924
+ The ID of the source Gradio Space (e.g., `"username/my-space"` or a
925
+ short repo name with the logged-in namespace inferred, like `init()`).
926
+ Must be a Gradio Space with a bucket mounted at `/data`.
927
+ project (`str`):
928
+ The name of the project whose data to include in the frozen Space.
929
+ new_space_id (`str`, *optional*):
930
+ The ID for the new static Space. If not provided, defaults to
931
+ `"{space_id}_static"`.
932
+ private (`bool`, *optional*):
933
+ Whether to make the new Space private. If None (default), the repo
934
+ will be public unless the organization's default is private.
935
+ bucket_id (`str`, *optional*):
936
+ The ID of the HF Bucket for the new static Space's data storage.
937
+ If not provided, one is auto-generated from the new Space ID.
938
+
939
+ Returns:
940
+ `str`: The Space ID of the newly created static Space.
941
+ """
942
+ space_id, _, _ = preprocess_space_and_dataset_ids(space_id, None, None)
943
+
944
+ try:
945
+ info = huggingface_hub.HfApi().space_info(space_id)
946
+ if info.sdk != "gradio":
947
+ raise ValueError(
948
+ f"Space '{space_id}' is not a Gradio Space (sdk='{info.sdk}'). "
949
+ f"freeze() requires a Gradio Space as the source."
950
+ )
951
+ except RepositoryNotFoundError:
952
+ raise ValueError(
953
+ f"Space '{space_id}' not found. Provide an existing Gradio Space ID."
954
+ )
955
+
956
+ source_bucket_id = _get_source_bucket(space_id)
957
+ print(f"* Reading project data from bucket: {source_bucket_id}")
958
+
959
+ if new_space_id is None:
960
+ new_space_id = f"{space_id}_static"
961
+ new_space_id, _dataset_id, bucket_id = preprocess_space_and_dataset_ids(
962
+ new_space_id, None, bucket_id
963
+ )
964
+
965
+ hf_api = huggingface_hub.HfApi()
966
+ try:
967
+ dest_info = hf_api.space_info(new_space_id)
968
+ tags = dest_info.tags or []
969
+ if dest_info.sdk != "static" or "trackio" not in tags:
970
+ raise ValueError(
971
+ f"Space '{new_space_id}' already exists and is not a Trackio static Space "
972
+ f"(sdk='{dest_info.sdk}', tags={tags}). Choose a different new_space_id "
973
+ f"or delete the existing Space first."
974
+ )
975
+ except RepositoryNotFoundError:
976
+ pass
977
+
978
+ create_bucket_if_not_exists(bucket_id, private=private)
979
+ export_from_bucket_for_static(source_bucket_id, bucket_id, project)
980
+ print(
981
+ f"* Project data uploaded to bucket: https://huggingface.co/buckets/{bucket_id}"
982
+ )
983
+ deploy_as_static_space(
984
+ new_space_id,
985
+ None,
986
+ project,
987
+ bucket_id=bucket_id,
988
+ private=private,
989
+ hf_token=huggingface_hub.utils.get_token() if private else None,
990
+ )
991
+ return new_space_id
trackio/dummy_commit_scheduler.py ADDED
@@ -0,0 +1,19 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from concurrent.futures import Future
2
+
3
+
4
+ class DummyCommitSchedulerLock:
5
+ def __enter__(self):
6
+ return None
7
+
8
+ def __exit__(self, exception_type, exception_value, exception_traceback):
9
+ pass
10
+
11
+
12
+ class DummyCommitScheduler:
13
+ def __init__(self):
14
+ self.lock = DummyCommitSchedulerLock()
15
+
16
+ def trigger(self) -> Future:
17
+ fut: Future = Future()
18
+ fut.set_result(None)
19
+ return fut
trackio/frontend/dist/assets/index-6kGqI2Bm.js ADDED
The diff for this file is too large to render. See raw diff
 
trackio/frontend/dist/assets/index-BjAwVTtr.css ADDED
@@ -0,0 +1 @@
 
 
1
+ :root{--primary-50: #fff7ed;--primary-100: #ffedd5;--primary-200: #fed7aa;--primary-300: #fdba74;--primary-400: #fb923c;--primary-500: #f97316;--primary-600: #ea580c;--primary-700: #c2410c;--primary-800: #9a3412;--primary-900: #7c2d12;--primary-950: #6c2e12;--secondary-50: #eff6ff;--secondary-100: #dbeafe;--secondary-200: #bfdbfe;--secondary-300: #93c5fd;--secondary-400: #60a5fa;--secondary-500: #3b82f6;--secondary-600: #2563eb;--secondary-700: #1d4ed8;--secondary-800: #1e40af;--secondary-900: #1e3a8a;--secondary-950: #1d3660;--neutral-50: #f9fafb;--neutral-100: #f3f4f6;--neutral-200: #e5e7eb;--neutral-300: #d1d5db;--neutral-400: #9ca3af;--neutral-500: #6b7280;--neutral-600: #4b5563;--neutral-700: #374151;--neutral-800: #1f2937;--neutral-900: #111827;--neutral-950: #0b0f19;--size-0-5: 2px;--size-1: 4px;--size-2: 8px;--size-3: 12px;--size-4: 16px;--size-5: 20px;--size-6: 24px;--size-8: 32px;--size-14: 56px;--size-16: 64px;--size-28: 112px;--size-full: 100%;--spacing-xxs: 1px;--spacing-xs: 2px;--spacing-sm: 4px;--spacing-md: 6px;--spacing-lg: 8px;--spacing-xl: 10px;--spacing-xxl: 16px;--radius-xxs: 1px;--radius-xs: 2px;--radius-sm: 3px;--radius-md: 4px;--radius-lg: 5px;--radius-xl: 8px;--radius-xxl: 12px;--text-xxs: 9px;--text-xs: 10px;--text-sm: 12px;--text-md: 14px;--text-lg: 16px;--text-xl: 22px;--text-xxl: 26px;--line-sm: 1.4;--background-fill-primary: white;--background-fill-secondary: var(--neutral-50);--body-text-color: var(--neutral-900);--body-text-color-subdued: var(--neutral-600);--border-color-primary: var(--neutral-200);--color-accent: var(--primary-500);--color-accent-soft: var(--primary-50);--shadow-drop: rgba(0, 0, 0, .05) 0px 1px 2px 0px;--shadow-drop-lg: 0 1px 3px 0 rgb(0 0 0 / .1), 0 1px 2px -1px rgb(0 0 0 / .1);--shadow-inset: rgba(0, 0, 0, .05) 0px 2px 4px 0px inset;--shadow-spread: 3px;--block-title-text-color: var(--neutral-500);--block-title-text-size: var(--text-md);--block-title-text-weight: 400;--block-info-text-color: var(--body-text-color-subdued);--block-info-text-size: var(--text-sm);--input-background-fill: white;--input-background-fill-focus: var(--primary-500);--input-border-color: var(--border-color-primary);--input-border-color-focus: var(--primary-300);--input-border-width: 1px;--input-padding: var(--spacing-xl);--input-placeholder-color: var(--neutral-400);--input-radius: var(--radius-lg);--input-shadow: 0 0 0 var(--shadow-spread) transparent, var(--shadow-inset);--input-shadow-focus: 0 0 0 var(--shadow-spread) var(--primary-50), var(--shadow-inset);--input-text-size: var(--text-md);--checkbox-background-color: var(--background-fill-primary);--checkbox-background-color-focus: var(--checkbox-background-color);--checkbox-background-color-hover: var(--checkbox-background-color);--checkbox-background-color-selected: var(--primary-600);--checkbox-border-color: var(--neutral-300);--checkbox-border-color-focus: var(--primary-500);--checkbox-border-color-hover: var(--neutral-300);--checkbox-border-color-selected: var(--primary-600);--checkbox-border-radius: var(--radius-sm);--checkbox-border-width: var(--input-border-width);--checkbox-label-gap: var(--spacing-lg);--checkbox-label-padding: var(--spacing-md) calc(2 * var(--spacing-md));--checkbox-label-text-size: var(--text-md);--checkbox-shadow: var(--input-shadow);--checkbox-check: url("data:image/svg+xml,%3csvg viewBox='0 0 16 16' fill='white' xmlns='http://www.w3.org/2000/svg'%3e%3cpath d='M12.207 4.793a1 1 0 010 1.414l-5 5a1 1 0 01-1.414 0l-2-2a1 1 0 011.414-1.414L6.5 9.086l4.293-4.293a1 1 0 011.414 0z'/%3e%3c/svg%3e");--slider-color: var(--primary-500);--container-radius: var(--radius-lg);--layer-top: 9999}.navbar.svelte-d8j1hi{display:flex;align-items:stretch;border-bottom:1px solid var(--border-color-primary, #e5e7eb);background:var(--background-fill-primary, white);padding:0;flex-shrink:0;min-height:44px}.nav-spacer.svelte-d8j1hi{flex:1 1 0;min-width:0}.nav-tabs.svelte-d8j1hi{display:flex;gap:0;flex-shrink:0;padding-right:8px}.nav-link.svelte-d8j1hi{padding:10px 16px;border:none;background:none;color:var(--body-text-color-subdued, #6b7280);font-size:var(--text-md, 14px);cursor:pointer;white-space:nowrap;border-bottom:2px solid transparent;transition:color .15s;font-weight:400}.nav-link.svelte-d8j1hi:hover{color:var(--body-text-color, #1f2937)}.nav-link.active.svelte-d8j1hi{color:var(--body-text-color, #1f2937);border-bottom-color:var(--body-text-color, #1f2937);font-weight:500}.settings-btn.svelte-d8j1hi{display:flex;align-items:center;gap:6px}.settings-btn.svelte-d8j1hi svg:where(.svelte-d8j1hi){flex-shrink:0}.checkbox-group.svelte-17gmtkf{display:flex;flex-direction:column}.checkbox-item.svelte-17gmtkf{display:flex;align-items:center;gap:8px;padding:3px 0;cursor:pointer;font-size:13px}.checkbox-item.svelte-17gmtkf input[type=checkbox]:where(.svelte-17gmtkf){-moz-appearance:none;appearance:none;-webkit-appearance:none;width:16px;height:16px;margin:0;border:1px solid var(--checkbox-border-color, #d1d5db);border-radius:var(--checkbox-border-radius, 4px);background-color:var(--checkbox-background-color, white);box-shadow:var(--checkbox-shadow);cursor:pointer;flex-shrink:0;transition:background-color .15s,border-color .15s}.checkbox-item.svelte-17gmtkf input[type=checkbox]:where(.svelte-17gmtkf):checked{background-image:var(--checkbox-check);background-color:var(--checkbox-background-color-selected, #f97316);border-color:var(--checkbox-border-color-selected, #f97316)}.checkbox-item.svelte-17gmtkf input[type=checkbox]:where(.svelte-17gmtkf):hover{border-color:var(--checkbox-border-color-hover, #d1d5db)}.color-dot.svelte-17gmtkf{width:10px;height:10px;border-radius:50%;flex-shrink:0}.run-name.svelte-17gmtkf{overflow:hidden;text-overflow:ellipsis;white-space:nowrap;color:var(--body-text-color, #1f2937)}.dropdown-container.svelte-kgylqb{width:100%}.label.svelte-kgylqb{display:block;font-size:13px;font-weight:500;color:var(--body-text-color-subdued, #6b7280);margin-bottom:6px}.info.svelte-kgylqb{display:block;font-size:12px;color:var(--body-text-color-subdued, #9ca3af);margin-bottom:4px}.wrap.svelte-kgylqb{position:relative;border-radius:var(--input-radius, 8px);background:var(--input-background-fill, white);border:1px solid var(--border-color-primary, #e5e7eb);transition:border-color .15s,box-shadow .15s}.wrap.focused.svelte-kgylqb{border-color:var(--input-border-color-focus, #fdba74);box-shadow:0 0 0 2px var(--primary-50, #fff7ed)}.wrap-inner.svelte-kgylqb{display:flex;position:relative;align-items:center;padding:0 10px}.secondary-wrap.svelte-kgylqb{display:flex;flex:1;align-items:center}input.svelte-kgylqb{margin:0;outline:none;border:none;background:inherit;width:100%;color:var(--body-text-color, #1f2937);font-size:13px;font-family:inherit;padding:7px 0}input.svelte-kgylqb::placeholder{color:var(--input-placeholder-color, #9ca3af)}input[readonly].svelte-kgylqb{cursor:pointer}.icon-wrap.svelte-kgylqb{color:var(--body-text-color-subdued, #9ca3af);width:16px;flex-shrink:0;pointer-events:none}.options.svelte-kgylqb{position:fixed;z-index:var(--layer-top, 9999);margin:0;padding:4px 0;box-shadow:0 4px 12px #0000001f;border-radius:var(--input-radius, 8px);border:1px solid var(--border-color-primary, #e5e7eb);background:var(--background-fill-primary, white);min-width:fit-content;overflow:auto;color:var(--body-text-color, #1f2937);list-style:none}.item.svelte-kgylqb{display:flex;cursor:pointer;padding:6px 10px;font-size:13px;word-break:break-word}.item.svelte-kgylqb:hover,.item.active.svelte-kgylqb{background:var(--background-fill-secondary, #f9fafb)}.item.selected.svelte-kgylqb{font-weight:500}.check-mark.svelte-kgylqb{padding-right:6px;min-width:16px;font-size:12px}.check-mark.hide.svelte-kgylqb{visibility:hidden}.checkbox-container.svelte-oj84db{display:flex;align-items:center;gap:8px;cursor:pointer;margin:8px 0}.label-text.svelte-oj84db{color:var(--body-text-color, #1f2937);font-size:13px;line-height:1.4}input[type=checkbox].svelte-oj84db{--ring-color: transparent;position:relative;-moz-appearance:none;appearance:none;-webkit-appearance:none;width:16px;height:16px;box-shadow:var(--checkbox-shadow);border:1px solid var(--checkbox-border-color, #d1d5db);border-radius:var(--checkbox-border-radius, 4px);background-color:var(--checkbox-background-color, white);flex-shrink:0;cursor:pointer;transition:background-color .15s,border-color .15s}input[type=checkbox].svelte-oj84db:checked,input[type=checkbox].svelte-oj84db:checked:hover,input[type=checkbox].svelte-oj84db:checked:focus{background-image:var(--checkbox-check);background-color:var(--checkbox-background-color-selected, #f97316);border-color:var(--checkbox-border-color-selected, #f97316)}input[type=checkbox].svelte-oj84db:hover{border-color:var(--checkbox-border-color-hover, #d1d5db);background-color:var(--checkbox-background-color-hover, white)}input[type=checkbox].svelte-oj84db:focus{border-color:var(--checkbox-border-color-focus, #f97316);background-color:var(--checkbox-background-color-focus, white);outline:none}.slider-wrap.svelte-wei6ev{display:flex;flex-direction:column;width:100%}.head.svelte-wei6ev{margin-bottom:4px;display:flex;justify-content:space-between;align-items:center;width:100%}.label.svelte-wei6ev{flex:1;font-size:13px;font-weight:500;color:var(--body-text-color-subdued, #6b7280)}.info.svelte-wei6ev{display:block;font-size:12px;color:var(--body-text-color-subdued, #9ca3af);margin-bottom:4px}.slider-input-container.svelte-wei6ev{display:flex;align-items:center;gap:6px}input[type=range].svelte-wei6ev{-webkit-appearance:none;-moz-appearance:none;appearance:none;width:100%;cursor:pointer;outline:none;border-radius:var(--radius-xl, 12px);min-width:var(--size-28, 112px);background:transparent}input[type=range].svelte-wei6ev::-webkit-slider-runnable-track{height:6px;border-radius:var(--radius-xl, 12px);background:linear-gradient(to right,var(--slider-color, #f97316) var(--range_progress, 50%),var(--neutral-200, #e5e7eb) var(--range_progress, 50%))}input[type=range].svelte-wei6ev::-webkit-slider-thumb{-webkit-appearance:none;-moz-appearance:none;appearance:none;height:16px;width:16px;background-color:var(--slider-color, #f97316);border:2px solid var(--background-fill-primary, white);border-radius:50%;margin-top:-5px;box-shadow:0 0 0 1px var(--border-color-primary, rgba(0, 0, 0, .08)),0 1px 3px #0003}input[type=range].svelte-wei6ev::-moz-range-track{height:6px;background:var(--neutral-200, #e5e7eb);border-radius:var(--radius-xl, 12px)}input[type=range].svelte-wei6ev::-moz-range-thumb{-webkit-appearance:none;-moz-appearance:none;appearance:none;height:16px;width:16px;background-color:var(--slider-color, #f97316);border:2px solid var(--background-fill-primary, white);border-radius:50%;box-shadow:0 0 0 1px var(--border-color-primary, rgba(0, 0, 0, .08)),0 1px 3px #0003}input[type=range].svelte-wei6ev::-moz-range-progress{height:6px;background-color:var(--slider-color, #f97316);border-radius:var(--radius-xl, 12px)}.bound.svelte-wei6ev{font-size:11px;color:var(--body-text-color-subdued, #9ca3af);min-width:12px;text-align:center}.textbox-container.svelte-6yncpg{width:100%}.label.svelte-6yncpg{display:block;font-size:13px;font-weight:500;color:var(--body-text-color-subdued, #6b7280);margin-bottom:6px}.info.svelte-6yncpg{display:block;font-size:12px;color:var(--body-text-color-subdued, #9ca3af);margin-bottom:4px}.input-wrap.svelte-6yncpg{border-radius:var(--input-radius, 8px);background:var(--input-background-fill, white);border:1px solid var(--border-color-primary, #e5e7eb);transition:border-color .15s,box-shadow .15s}.input-wrap.svelte-6yncpg:focus-within{border-color:var(--input-border-color-focus, #fdba74);box-shadow:0 0 0 2px var(--primary-50, #fff7ed)}input.svelte-6yncpg{width:100%;padding:7px 10px;outline:none;border:none;background:transparent;color:var(--body-text-color, #1f2937);font-size:13px;font-family:inherit;border-radius:var(--input-radius, 8px)}input.svelte-6yncpg::placeholder{color:var(--input-placeholder-color, #9ca3af)}.sidebar.svelte-181dlmc{width:290px;min-width:290px;background:var(--background-fill-primary, white);border-right:1px solid var(--border-color-primary, #e5e7eb);display:flex;flex-direction:column;position:relative;overflow:hidden;transition:width .2s,min-width .2s}.sidebar.collapsed.svelte-181dlmc{width:40px;min-width:40px}.toggle-btn.svelte-181dlmc{position:absolute;top:12px;right:8px;z-index:10;border:none;background:none;color:var(--body-text-color-subdued, #9ca3af);cursor:pointer;padding:4px;display:flex;align-items:center;justify-content:center;border-radius:var(--radius-sm, 4px);transition:color .15s,background-color .15s}.toggle-btn.svelte-181dlmc:hover{color:var(--body-text-color, #1f2937);background-color:var(--background-fill-secondary, #f9fafb)}.sidebar-content.svelte-181dlmc{padding:16px;flex:1;min-height:0;display:flex;flex-direction:column}.sidebar-scroll.svelte-181dlmc{overflow-y:auto;flex:1;min-height:0}.oauth-footer.svelte-181dlmc{flex-shrink:0;margin-top:12px;padding-top:12px;border-top:1px solid var(--border-color-primary, #e5e7eb)}.readonly-footer.svelte-181dlmc{flex-shrink:0;margin-top:12px;padding-top:12px;border-top:1px solid var(--border-color-primary, #e5e7eb);display:flex;align-items:center;gap:8px;flex-wrap:wrap}.readonly-badge.svelte-181dlmc{display:inline-flex;align-items:center;border:1px solid var(--border-color-primary, #e5e7eb);border-radius:999px;padding:2px 8px;font-size:10px;letter-spacing:.06em;font-weight:600;color:var(--body-text-color-subdued, #6b7280);background:var(--background-fill-secondary, #f9fafb)}.readonly-link.svelte-181dlmc{font-size:12px;color:var(--body-text-color-subdued, #6b7280);text-decoration:none;max-width:100%;overflow-wrap:anywhere}.readonly-link.svelte-181dlmc:hover{color:var(--body-text-color, #1f2937);text-decoration:underline}.oauth-line.svelte-181dlmc{margin:0;font-size:12px;line-height:1.4;color:var(--body-text-color-subdued, #6b7280)}.oauth-warn.svelte-181dlmc{color:var(--body-text-color, #92400e)}.hf-login-btn.svelte-181dlmc{display:inline-flex;align-items:center;justify-content:center;gap:8px;width:100%;padding:8px 12px;font-size:13px;font-weight:600;color:#fff;background:#141c2e;border-radius:var(--radius-lg, 8px);text-decoration:none;border:none;cursor:pointer;box-sizing:border-box}.hf-login-btn.svelte-181dlmc:hover{background:#283042}.hf-logo.svelte-181dlmc{width:20px;height:20px;flex-shrink:0}.oauth-hint.svelte-181dlmc{margin:8px 0 0;font-size:11px;line-height:1.35;color:var(--body-text-color-subdued, #9ca3af)}.oauth-signed-in.svelte-181dlmc{margin:0;font-size:12px;color:var(--body-text-color-subdued, #6b7280)}.oauth-logout.svelte-181dlmc{font-size:12px;color:var(--body-text-color-subdued, #9ca3af);text-decoration:none;cursor:pointer}.oauth-logout.svelte-181dlmc:hover{text-decoration:underline;color:var(--body-text-color, #1f2937)}.logo-section.svelte-181dlmc{margin-bottom:20px}.logo.svelte-181dlmc{width:80%;max-width:200px}.section.svelte-181dlmc{margin-bottom:18px}.section-label.svelte-181dlmc{font-size:13px;font-weight:500;color:var(--body-text-color-subdued, #6b7280)}.locked-project.svelte-181dlmc{margin-top:4px;font-size:13px;font-weight:500;color:var(--body-text-color, #1f2937);padding:8px 10px;border:1px solid var(--border-color-primary, #e5e7eb);border-radius:var(--radius-md, 6px);background:var(--background-fill-secondary, #f9fafb)}.runs-header.svelte-181dlmc{display:flex;align-items:center;justify-content:space-between;margin-bottom:6px}.select-all-label.svelte-181dlmc{display:flex;align-items:center;gap:6px;cursor:pointer}.select-all-cb.svelte-181dlmc{-moz-appearance:none;appearance:none;-webkit-appearance:none;width:16px;height:16px;border:1px solid var(--checkbox-border-color, #d1d5db);border-radius:var(--checkbox-border-radius, 4px);background-color:var(--checkbox-background-color, white);cursor:pointer;flex-shrink:0;position:relative;transition:background-color .15s,border-color .15s}.select-all-cb.svelte-181dlmc:checked{background-color:var(--checkbox-background-color-selected, var(--color-accent, #f97316));border-color:var(--checkbox-background-color-selected, var(--color-accent, #f97316));background-image:var(--checkbox-check)}.select-all-cb.svelte-181dlmc:indeterminate{background-color:var(--checkbox-background-color-selected, var(--color-accent, #f97316));border-color:var(--checkbox-background-color-selected, var(--color-accent, #f97316));background-image:url("data:image/svg+xml,%3Csvg viewBox='0 0 16 16' fill='white' xmlns='http://www.w3.org/2000/svg'%3E%3Crect x='3' y='7' width='10' height='2' rx='1'/%3E%3C/svg%3E");background-size:12px;background-position:center;background-repeat:no-repeat}.latest-toggle.svelte-181dlmc{display:flex;align-items:center;gap:6px;font-size:12px;color:var(--body-text-color-subdued, #6b7280);cursor:pointer}.latest-toggle.svelte-181dlmc input[type=checkbox]:where(.svelte-181dlmc){-moz-appearance:none;appearance:none;-webkit-appearance:none;width:16px;height:16px;margin:0;border:1px solid var(--checkbox-border-color, #d1d5db);border-radius:var(--checkbox-border-radius, 4px);background-color:var(--checkbox-background-color, white);box-shadow:var(--checkbox-shadow);cursor:pointer;flex-shrink:0;transition:background-color .15s,border-color .15s}.latest-toggle.svelte-181dlmc input[type=checkbox]:where(.svelte-181dlmc):checked{background-image:var(--checkbox-check);background-color:var(--checkbox-background-color-selected, #f97316);border-color:var(--checkbox-border-color-selected, #f97316)}.checkbox-list.svelte-181dlmc{max-height:300px;overflow-y:auto;margin-top:8px}.alert-panel.svelte-x5aqew{position:fixed;bottom:16px;right:16px;width:380px;max-height:400px;background:var(--background-fill-primary, white);border:1px solid var(--border-color-primary, #e5e7eb);border-radius:var(--radius-lg, 8px);box-shadow:var(--shadow-drop-lg);z-index:1000;overflow:hidden;display:flex;flex-direction:column}.alert-panel.collapsed.svelte-x5aqew{max-height:none}.alert-header.svelte-x5aqew{padding:10px 12px;border:none;border-bottom:1px solid var(--border-color-primary, #e5e7eb);background:none;width:100%;display:flex;align-items:center;justify-content:space-between;cursor:pointer;gap:8px}.alert-panel.collapsed.svelte-x5aqew .alert-header:where(.svelte-x5aqew){border-bottom:none}.collapse-icon.svelte-x5aqew{color:var(--body-text-color-subdued, #9ca3af);flex-shrink:0;transition:transform .15s}.collapse-icon.rotated.svelte-x5aqew{transform:rotate(-90deg)}.alert-title.svelte-x5aqew{font-size:13px;font-weight:600;color:var(--body-text-color, #1f2937)}.filter-pills.svelte-x5aqew{display:flex;gap:4px}.pill.svelte-x5aqew{border:1px solid var(--border-color-primary, #e5e7eb);border-radius:var(--radius-xxl, 22px);padding:2px 8px;font-size:11px;background:var(--background-fill-secondary, #f9fafb);color:var(--body-text-color-subdued, #6b7280);cursor:pointer}.pill.active.svelte-x5aqew{background:var(--color-accent, #f97316);color:#fff;border-color:var(--color-accent, #f97316)}.alert-list.svelte-x5aqew{overflow-y:auto;flex:1}.alert-item.svelte-x5aqew{border-bottom:1px solid var(--neutral-100, #f3f4f6)}.alert-row.svelte-x5aqew{display:flex;align-items:center;gap:8px;width:100%;padding:8px 12px;border:none;background:none;text-align:left;cursor:pointer;font-size:var(--text-sm, 12px)}.alert-row.svelte-x5aqew:hover{background:var(--background-fill-secondary, #f9fafb)}.alert-text.svelte-x5aqew{flex:1;color:var(--body-text-color, #1f2937)}.alert-meta.svelte-x5aqew{font-size:var(--text-xs, 10px);color:var(--body-text-color-subdued, #9ca3af);white-space:nowrap}.alert-detail.svelte-x5aqew{padding:4px 12px 8px 32px;font-size:var(--text-sm, 12px);color:var(--body-text-color-subdued, #6b7280)}.plot-container.svelte-9thu1j{min-width:350px;flex:1;background:var(--background-fill-primary, white);border:1px solid var(--border-color-primary, #e5e7eb);border-radius:var(--radius-lg, 8px);padding:12px;overflow:hidden;position:relative}.plot-container[draggable=true].svelte-9thu1j{cursor:grab}.plot-container[draggable=true].svelte-9thu1j:active{cursor:grabbing}.hidden-plot.svelte-9thu1j{visibility:hidden;height:0;padding:0;margin:0;border:none;overflow:hidden;pointer-events:none}.drag-handle.svelte-9thu1j{position:absolute;top:8px;left:8px;color:var(--body-text-color-subdued, #9ca3af);opacity:0;transition:opacity .15s;z-index:5}.plot-container.svelte-9thu1j:hover .drag-handle:where(.svelte-9thu1j){opacity:.5}.drag-handle.svelte-9thu1j:hover{opacity:1!important}.plot-toolbar.svelte-9thu1j{position:absolute;top:8px;right:8px;display:flex;gap:4px;z-index:5;opacity:0;transition:opacity .15s}.plot-container.svelte-9thu1j:hover .plot-toolbar:where(.svelte-9thu1j){opacity:1}.toolbar-btn.svelte-9thu1j{border:1px solid var(--border-color-primary, #e5e7eb);background:var(--background-fill-primary, white);color:var(--body-text-color-subdued, #6b7280);cursor:pointer;padding:4px 6px;border-radius:var(--radius-sm, 4px);display:flex;align-items:center;justify-content:center}.toolbar-btn.svelte-9thu1j:hover{background:var(--neutral-100, #f3f4f6);color:var(--body-text-color, #1f2937)}.plot-chart-wrap.svelte-9thu1j{position:relative;width:100%}.plot-chart-wrap--fs.svelte-9thu1j{flex:1;min-height:0;display:flex;flex-direction:column}.reset-zoom-btn.svelte-9thu1j{position:absolute;bottom:1px;right:1px;z-index:6;display:inline-flex;align-items:center;justify-content:center;margin:0;min-width:52px;padding:5px 12px 5px 10px;border:none;border-radius:4px;background:transparent;color:var(--body-text-color-subdued, #334155);cursor:pointer;opacity:.92;transform:translateY(6px);transition:opacity .15s ease,color .15s ease,background .15s ease;box-shadow:none}.reset-zoom-btn.svelte-9thu1j:hover{opacity:1;color:var(--body-text-color, #0f172a);background:var(--background-fill-secondary, rgba(226, 232, 240, .85));transform:translateY(6px)}.reset-zoom-btn.svelte-9thu1j svg:where(.svelte-9thu1j){display:block;flex-shrink:0;filter:drop-shadow(0 0 .5px rgba(255,255,255,.95))}.plot.svelte-9thu1j{width:100%}.plot.svelte-9thu1j .vega-embed{width:100%!important}.plot.svelte-9thu1j .vega-embed summary{display:none}.fullscreen-host.svelte-9thu1j{position:fixed;top:0;right:0;bottom:0;left:0;z-index:10000;box-sizing:border-box;display:flex;flex-direction:column;background:var(--background-fill-primary, white);padding:12px;gap:8px;pointer-events:auto}.fullscreen-host.svelte-9thu1j:fullscreen{width:100%;height:100%}.fullscreen-host.svelte-9thu1j:-webkit-full-screen{width:100%;height:100%}.fullscreen-toolbar.svelte-9thu1j{flex-shrink:0;display:flex;justify-content:flex-end;gap:4px;z-index:5}.fullscreen-chart-wrap.svelte-9thu1j{flex:1;min-height:0;display:flex;flex-direction:column}.fullscreen-legend.svelte-9thu1j{flex-shrink:0}.fullscreen-plot.svelte-9thu1j{flex:1;min-height:0;width:100%;overflow:hidden}.fullscreen-plot.svelte-9thu1j .vega-embed{width:100%!important;height:100%!important;min-height:0;display:flex;flex-direction:column}.fullscreen-plot.svelte-9thu1j .vega-embed .vega-view{flex:1;min-height:0}.fullscreen-plot.svelte-9thu1j .vega-embed summary{display:none}.custom-legend.svelte-9thu1j{display:flex;align-items:center;justify-content:center;gap:12px;padding:6px 0 0;flex-wrap:wrap}.legend-title.svelte-9thu1j{font-size:11px;color:var(--body-text-color-subdued, #6b7280);font-weight:600}.legend-item.svelte-9thu1j{display:flex;align-items:center;gap:4px}.legend-dot.svelte-9thu1j{width:10px;height:10px;border-radius:50%;flex-shrink:0}.legend-label.svelte-9thu1j{font-size:11px;color:var(--body-text-color-subdued, #6b7280)}.plot-container.svelte-1swghqy{min-width:350px;flex:1;background:var(--background-fill-primary, white);border:1px solid var(--border-color-primary, #e5e7eb);border-radius:var(--radius-lg, 8px);padding:12px;overflow:hidden;position:relative}.plot-container[draggable=true].svelte-1swghqy{cursor:grab}.plot-container[draggable=true].svelte-1swghqy:active{cursor:grabbing}.hidden-plot.svelte-1swghqy{visibility:hidden;height:0;padding:0;margin:0;border:none;overflow:hidden;pointer-events:none}.drag-handle.svelte-1swghqy{position:absolute;top:8px;left:8px;color:var(--body-text-color-subdued, #9ca3af);opacity:0;transition:opacity .15s;z-index:5}.plot-container.svelte-1swghqy:hover .drag-handle:where(.svelte-1swghqy){opacity:.5}.drag-handle.svelte-1swghqy:hover{opacity:1!important}.plot-toolbar.svelte-1swghqy{position:absolute;top:8px;right:8px;display:flex;gap:4px;z-index:5;opacity:0;transition:opacity .15s}.plot-container.svelte-1swghqy:hover .plot-toolbar:where(.svelte-1swghqy){opacity:1}.toolbar-btn.svelte-1swghqy{border:1px solid var(--border-color-primary, #e5e7eb);background:var(--background-fill-primary, white);color:var(--body-text-color-subdued, #6b7280);cursor:pointer;padding:4px 6px;border-radius:var(--radius-sm, 4px);display:flex;align-items:center;justify-content:center}.toolbar-btn.svelte-1swghqy:hover{background:var(--neutral-100, #f3f4f6);color:var(--body-text-color, #1f2937)}.plot-chart-wrap.svelte-1swghqy{position:relative;width:100%}.plot-chart-wrap--fs.svelte-1swghqy{flex:1;min-height:0;display:flex;flex-direction:column}.plot.svelte-1swghqy{width:100%}.plot.svelte-1swghqy .vega-embed{width:100%!important}.plot.svelte-1swghqy .vega-embed summary{display:none}.fullscreen-host.svelte-1swghqy{position:fixed;top:0;right:0;bottom:0;left:0;z-index:10000;box-sizing:border-box;display:flex;flex-direction:column;background:var(--background-fill-primary, white);padding:12px;gap:8px;pointer-events:auto}.fullscreen-host.svelte-1swghqy:fullscreen{width:100%;height:100%}.fullscreen-host.svelte-1swghqy:-webkit-full-screen{width:100%;height:100%}.fullscreen-toolbar.svelte-1swghqy{flex-shrink:0;display:flex;justify-content:flex-end;gap:4px;z-index:5}.fullscreen-chart-wrap.svelte-1swghqy{flex:1;min-height:0;display:flex;flex-direction:column}.fullscreen-legend.svelte-1swghqy{flex-shrink:0}.fullscreen-plot.svelte-1swghqy{flex:1;min-height:0;width:100%;overflow:hidden}.fullscreen-plot.svelte-1swghqy .vega-embed{width:100%!important;height:100%!important;min-height:0;display:flex;flex-direction:column}.fullscreen-plot.svelte-1swghqy .vega-embed .vega-view{flex:1;min-height:0}.fullscreen-plot.svelte-1swghqy .vega-embed summary{display:none}.custom-legend.svelte-1swghqy{display:flex;align-items:center;justify-content:center;gap:12px;padding:6px 0 0;flex-wrap:wrap}.legend-title.svelte-1swghqy{font-size:11px;color:var(--body-text-color-subdued, #6b7280);font-weight:600}.legend-item.svelte-1swghqy{display:flex;align-items:center;gap:4px}.legend-dot.svelte-1swghqy{width:10px;height:10px;border-radius:50%;flex-shrink:0}.legend-label.svelte-1swghqy{font-size:11px;color:var(--body-text-color-subdued, #6b7280)}.accordion.svelte-1jep0a{margin-bottom:12px;border:1px solid var(--border-color-primary, #e5e7eb);border-radius:var(--radius-lg, 8px);background:var(--background-fill-primary, white);overflow:hidden}.accordion-hidden.svelte-1jep0a{margin-bottom:8px}.accordion-header.svelte-1jep0a{display:flex;align-items:center;gap:8px;width:100%;padding:10px 14px;border:none;background:var(--background-fill-primary, white);color:var(--body-text-color, #1f2937);font-size:var(--text-md, 14px);font-weight:600;cursor:pointer;text-align:left}.accordion-header.svelte-1jep0a:hover{background:var(--background-fill-secondary, #f9fafb)}.arrow.svelte-1jep0a{font-size:14px;transition:transform .15s;color:var(--body-text-color, #1f2937);display:inline-block}.arrow.svelte-1jep0a:not(.rotated){transform:rotate(-90deg)}.accordion-body.svelte-1jep0a{padding:0 14px 14px}.trackio-loading.svelte-1kc6b2l{display:flex;align-items:center;justify-content:center;width:100%;min-height:min(70vh,640px);padding:32px 24px;box-sizing:border-box;background:transparent}.logo-stack.svelte-1kc6b2l{position:relative;width:min(100%,200px);max-width:min(92vw,200px);line-height:0;background:transparent;isolation:isolate}.logo-base.svelte-1kc6b2l{display:block;background:transparent}.logo-img.svelte-1kc6b2l{width:100%;height:auto;display:block;background:transparent}.logo-overlay.svelte-1kc6b2l{position:absolute;left:0;top:0;width:100%;animation:svelte-1kc6b2l-trackio-logo-sweep 4s linear infinite;pointer-events:none;background:transparent}.logo-overlay.svelte-1kc6b2l .logo-img:where(.svelte-1kc6b2l){width:100%;height:auto;object-position:left center}.logo-img--gray.svelte-1kc6b2l{filter:grayscale(1)}@keyframes svelte-1kc6b2l-trackio-logo-sweep{0%{clip-path:inset(0 0 0 0)}50%{clip-path:inset(0 0 0 100%)}to{clip-path:inset(0 0 0 0)}}@media(prefers-reduced-motion:reduce){.logo-overlay.svelte-1kc6b2l{display:none}}.sr-only.svelte-1kc6b2l{position:absolute;width:1px;height:1px;padding:0;margin:-1px;overflow:hidden;clip:rect(0,0,0,0);white-space:nowrap;border:0}.metrics-page.svelte-2bul55{padding:20px 24px;overflow-y:auto;flex:1;min-height:0}.plot-grid.svelte-2bul55{display:flex;flex-wrap:wrap;gap:16px}.subgroup-list.svelte-2bul55{margin-top:16px}.empty-state.svelte-2bul55{max-width:640px;padding:40px 24px;color:var(--body-text-color, #1f2937)}.empty-state.svelte-2bul55 h2:where(.svelte-2bul55){margin:0 0 8px;font-size:20px;font-weight:700}.empty-state.svelte-2bul55 p:where(.svelte-2bul55){margin:12px 0 8px;color:var(--body-text-color-subdued, #6b7280)}.empty-state.svelte-2bul55 pre:where(.svelte-2bul55){background:var(--background-fill-secondary, #f9fafb);padding:16px;border-radius:var(--radius-lg, 8px);border:1px solid var(--border-color-primary, #e5e7eb);font-size:13px;overflow-x:auto}.empty-state.svelte-2bul55 code:where(.svelte-2bul55){background:var(--background-fill-secondary, #f0f0f0);padding:1px 5px;border-radius:var(--radius-sm, 4px);font-size:13px}.empty-state.svelte-2bul55 pre:where(.svelte-2bul55) code:where(.svelte-2bul55){background:none;padding:0}.system-page.svelte-nv5os4{padding:20px 24px;overflow-y:auto;flex:1;min-height:0}.plot-grid.svelte-nv5os4{display:flex;flex-wrap:wrap;gap:12px}.subgroup-list.svelte-nv5os4{margin-top:16px}.empty-state.svelte-nv5os4{max-width:640px;padding:40px 24px;color:var(--body-text-color, #1f2937)}.empty-state.svelte-nv5os4 h2:where(.svelte-nv5os4){margin:0 0 8px;font-size:20px;font-weight:700}.empty-state.svelte-nv5os4 p:where(.svelte-nv5os4){margin:12px 0 8px;color:var(--body-text-color-subdued, #6b7280)}.empty-state.svelte-nv5os4 pre:where(.svelte-nv5os4){background:var(--background-fill-secondary, #f9fafb);padding:16px;border-radius:var(--radius-lg, 8px);border:1px solid var(--border-color-primary, #e5e7eb);font-size:13px;overflow-x:auto}.empty-state.svelte-nv5os4 ul:where(.svelte-nv5os4){list-style:disc;padding-left:20px;margin:4px 0 0}.empty-state.svelte-nv5os4 li:where(.svelte-nv5os4){margin:4px 0;color:var(--body-text-color, #1f2937)}.empty-state.svelte-nv5os4 code:where(.svelte-nv5os4){background:var(--background-fill-secondary, #f0f0f0);padding:1px 5px;border-radius:var(--radius-sm, 4px);font-size:13px}.empty-state.svelte-nv5os4 pre:where(.svelte-nv5os4) code:where(.svelte-nv5os4){background:none;padding:0}.table-container.svelte-1cp60rw{display:flex;flex-direction:column;gap:var(--size-2, 8px);position:relative}.header-row.svelte-1cp60rw{display:flex;justify-content:flex-end;align-items:center;min-height:var(--size-6, 24px);width:100%}.header-row.svelte-1cp60rw .label:where(.svelte-1cp60rw){flex:1;margin:0;color:var(--block-label-text-color, var(--neutral-500, #6b7280));font-size:var(--block-label-text-size, 12px);line-height:var(--line-sm, 1.4)}.table-wrap.svelte-1cp60rw{position:relative;overflow:auto;border:1px solid var(--border-color-primary, #e5e7eb);border-radius:var(--table-radius, var(--radius-lg, 8px))}table.svelte-1cp60rw{width:100%;table-layout:auto;color:var(--body-text-color, #1f2937);font-size:var(--input-text-size, 14px);line-height:var(--line-sm, 1.4);border-spacing:0;border-collapse:separate}thead.svelte-1cp60rw{position:sticky;top:0;z-index:5;box-shadow:var(--shadow-drop, rgba(0,0,0,.05) 0px 1px 2px 0px)}th.svelte-1cp60rw{padding:0;background:var(--table-even-background-fill, white);border-right-width:0px;border-left-width:1px;border-bottom-width:1px;border-style:solid;border-color:var(--border-color-primary, #e5e7eb);text-align:left;cursor:pointer;-webkit-user-select:none;user-select:none}th.first.svelte-1cp60rw{border-left-width:0;border-top-left-radius:var(--table-radius, var(--radius-lg, 8px))}th.last.svelte-1cp60rw{border-top-right-radius:var(--table-radius, var(--radius-lg, 8px))}.th-inner.svelte-1cp60rw{padding:var(--size-2, 8px);display:flex;align-items:center;gap:4px;font-weight:600;font-size:var(--text-sm, 12px);white-space:nowrap}.sort-arrow.svelte-1cp60rw{font-size:10px;color:var(--body-text-color-subdued, #9ca3af);visibility:hidden}.sort-arrow.visible.svelte-1cp60rw{visibility:visible}td.svelte-1cp60rw{padding:var(--size-2, 8px);border-right-width:0px;border-left-width:1px;border-bottom-width:1px;border-style:solid;border-color:var(--border-color-primary, #e5e7eb);font-size:var(--text-sm, 12px)}td.first.svelte-1cp60rw{border-left-width:0}tr.svelte-1cp60rw{background:var(--table-even-background-fill, white);border-bottom:1px solid var(--border-color-primary, #e5e7eb);text-align:left}tr.row-odd.svelte-1cp60rw{background:var(--table-odd-background-fill, var(--neutral-50, #f9fafb))}tr.selected.svelte-1cp60rw{background:var(--color-accent-soft, var(--primary-50, #fff7ed))}tr.svelte-1cp60rw:last-child td.first:where(.svelte-1cp60rw){border-bottom-left-radius:var(--table-radius, var(--radius-lg, 8px))}tr.svelte-1cp60rw:last-child td.last:where(.svelte-1cp60rw){border-bottom-right-radius:var(--table-radius, var(--radius-lg, 8px))}.check-col.svelte-1cp60rw{width:40px;text-align:center;padding:var(--size-2, 8px);border-left-width:0}.check-col.svelte-1cp60rw input[type=checkbox]:where(.svelte-1cp60rw){-moz-appearance:none;appearance:none;-webkit-appearance:none;width:16px;height:16px;border:1px solid var(--checkbox-border-color, #d1d5db);border-radius:var(--checkbox-border-radius, 4px);background-color:var(--checkbox-background-color, white);cursor:pointer;flex-shrink:0;transition:background-color .15s,border-color .15s}.check-col.svelte-1cp60rw input[type=checkbox]:where(.svelte-1cp60rw):checked{background-image:var(--checkbox-check);background-color:var(--checkbox-background-color-selected, #2563eb);border-color:var(--checkbox-border-color-selected, #2563eb)}.media-page.svelte-outb32{padding:20px 24px;overflow-y:auto;flex:1}.section-title.svelte-outb32{font-size:var(--text-lg, 16px);font-weight:600;color:var(--body-text-color, #1f2937);margin:16px 0 8px}.gallery.svelte-outb32{display:grid;grid-template-columns:repeat(auto-fill,minmax(200px,1fr));gap:12px}.gallery-item.svelte-outb32{border:1px solid var(--border-color-primary, #e5e7eb);border-radius:var(--radius-lg, 8px);overflow:hidden;background:var(--background-fill-secondary, #f9fafb)}.gallery-item.svelte-outb32 img:where(.svelte-outb32),.gallery-item.svelte-outb32 video:where(.svelte-outb32){width:100%;display:block}.caption.svelte-outb32{padding:4px 8px;font-size:var(--text-sm, 12px);color:var(--body-text-color-subdued, #9ca3af)}.step-label.svelte-outb32{padding:4px 8px;font-size:var(--text-xs, 10px);color:var(--body-text-color-subdued, #9ca3af)}.audio-list.svelte-outb32{display:flex;flex-direction:column;gap:8px}.audio-item.svelte-outb32{display:flex;align-items:center;gap:12px;padding:8px;border:1px solid var(--border-color-primary, #e5e7eb);border-radius:var(--radius-lg, 8px)}.audio-label.svelte-outb32{font-size:var(--text-sm, 12px);color:var(--body-text-color-subdued, #9ca3af);min-width:120px}.table-section.svelte-outb32{margin-bottom:16px}.empty-state.svelte-outb32{max-width:640px;padding:40px 24px;color:var(--body-text-color, #1f2937)}.empty-state.svelte-outb32 h2:where(.svelte-outb32){margin:0 0 8px;font-size:20px;font-weight:700}.empty-state.svelte-outb32 p:where(.svelte-outb32){margin:12px 0 8px;color:var(--body-text-color-subdued, #6b7280)}.empty-state.svelte-outb32 pre:where(.svelte-outb32){background:var(--background-fill-secondary, #f9fafb);padding:16px;border-radius:var(--radius-lg, 8px);border:1px solid var(--border-color-primary, #e5e7eb);font-size:13px;overflow-x:auto}.empty-state.svelte-outb32 code:where(.svelte-outb32){background:var(--background-fill-secondary, #f0f0f0);padding:1px 5px;border-radius:var(--radius-sm, 4px);font-size:13px}.empty-state.svelte-outb32 pre:where(.svelte-outb32) code:where(.svelte-outb32){background:none;padding:0}.reports-page.svelte-iufsej{padding:20px 24px;overflow-y:auto;flex:1}.controls.svelte-iufsej{display:flex;gap:16px;margin-bottom:16px;flex-wrap:wrap;align-items:flex-end}.control.svelte-iufsej{min-width:200px}.filter-pills.svelte-iufsej{display:flex;gap:4px}.pill.svelte-iufsej{border:1px solid var(--border-color-primary, #e5e7eb);border-radius:var(--radius-xxl, 22px);padding:4px 12px;font-size:var(--text-sm, 12px);background:var(--background-fill-secondary, #f9fafb);color:var(--body-text-color-subdued, #6b7280);cursor:pointer;transition:background-color .15s,color .15s}.pill.svelte-iufsej:hover{background:var(--neutral-100, #f3f4f6)}.pill.active.svelte-iufsej{background:var(--color-accent, #f97316);color:#fff;border-color:var(--color-accent, #f97316)}.empty-state.svelte-iufsej{max-width:640px;padding:40px 24px;color:var(--body-text-color, #1f2937)}.empty-state.svelte-iufsej h2:where(.svelte-iufsej){margin:0 0 8px;font-size:20px;font-weight:700}.empty-state.svelte-iufsej p:where(.svelte-iufsej){margin:12px 0 8px;color:var(--body-text-color-subdued, #6b7280)}.empty-state.svelte-iufsej pre:where(.svelte-iufsej){background:var(--background-fill-secondary, #f9fafb);padding:16px;border-radius:var(--radius-lg, 8px);border:1px solid var(--border-color-primary, #e5e7eb);font-size:13px;overflow-x:auto}.empty-state.svelte-iufsej code:where(.svelte-iufsej){background:var(--background-fill-secondary, #f0f0f0);padding:1px 5px;border-radius:var(--radius-sm, 4px);font-size:13px}.empty-state.svelte-iufsej pre:where(.svelte-iufsej) code:where(.svelte-iufsej){background:none;padding:0}.alerts-table.svelte-iufsej{width:100%;border-collapse:collapse;font-size:var(--text-md, 14px)}.alerts-table.svelte-iufsej th:where(.svelte-iufsej){text-align:left;padding:8px 12px;border-bottom:2px solid var(--border-color-primary, #e5e7eb);color:var(--body-text-color-subdued, #6b7280);font-weight:600;font-size:var(--text-sm, 12px);text-transform:uppercase;letter-spacing:.05em}.alerts-table.svelte-iufsej td:where(.svelte-iufsej){padding:8px 12px;border-bottom:1px solid var(--border-color-primary, #e5e7eb);color:var(--body-text-color, #1f2937)}.alerts-table.svelte-iufsej tbody:where(.svelte-iufsej) tr:where(.svelte-iufsej):nth-child(odd){background:var(--table-odd-background-fill, var(--background-fill-primary, white))}.alerts-table.svelte-iufsej tbody:where(.svelte-iufsej) tr:where(.svelte-iufsej):nth-child(2n){background:var(--table-even-background-fill, var(--background-fill-secondary, #f9fafb))}.alerts-table.svelte-iufsej tr:where(.svelte-iufsej):hover{background:var(--background-fill-secondary, #f3f4f6)}.section-title.svelte-iufsej{font-size:16px;font-weight:700;margin:0 0 12px;color:var(--body-text-color, #1f2937)}.reports-section.svelte-iufsej,.alerts-section.svelte-iufsej{margin-bottom:32px}.report-card.svelte-iufsej{border:1px solid var(--border-color-primary, #e5e7eb);border-radius:var(--radius-lg, 8px);padding:16px 20px;margin-bottom:12px;background:var(--background-fill-primary, white)}.report-meta.svelte-iufsej{font-size:var(--text-sm, 12px);color:var(--body-text-color-subdued, #6b7280);margin-bottom:8px}.report-content.svelte-iufsej{font-size:var(--text-md, 14px);color:var(--body-text-color, #1f2937);line-height:1.6}.report-content.svelte-iufsej h2{font-size:18px;font-weight:700;margin:0 0 8px}.report-content.svelte-iufsej h3{font-size:16px;font-weight:600;margin:12px 0 6px}.report-content.svelte-iufsej h4{font-size:14px;font-weight:600;margin:10px 0 4px}.report-content.svelte-iufsej code{background:var(--background-fill-secondary, #f0f0f0);padding:1px 5px;border-radius:var(--radius-sm, 4px);font-size:13px}.report-content.svelte-iufsej ul{margin:4px 0;padding-left:20px}.report-content.svelte-iufsej li{margin:2px 0}.report-content.svelte-iufsej p{margin:6px 0}.filter-empty.svelte-iufsej{color:var(--body-text-color-subdued, #6b7280);font-size:var(--text-md, 14px)}.runs-page.svelte-1yb6d54{padding:20px 24px;overflow-y:auto;flex:1}.empty-state.svelte-1yb6d54{max-width:640px;padding:40px 24px;color:var(--body-text-color, #1f2937)}.empty-state.svelte-1yb6d54 h2:where(.svelte-1yb6d54){margin:0 0 8px;font-size:20px;font-weight:700}.empty-state.svelte-1yb6d54 p:where(.svelte-1yb6d54){margin:12px 0 8px;color:var(--body-text-color-subdued, #6b7280)}.empty-state.svelte-1yb6d54 pre:where(.svelte-1yb6d54){background:var(--background-fill-secondary, #f9fafb);padding:16px;border-radius:var(--radius-lg, 8px);border:1px solid var(--border-color-primary, #e5e7eb);font-size:13px;overflow-x:auto}.empty-state.svelte-1yb6d54 code:where(.svelte-1yb6d54){background:var(--background-fill-secondary, #f0f0f0);padding:1px 5px;border-radius:var(--radius-sm, 4px);font-size:13px}.empty-state.svelte-1yb6d54 pre:where(.svelte-1yb6d54) code:where(.svelte-1yb6d54){background:none;padding:0}.runs-table.svelte-1yb6d54{width:100%;border-collapse:collapse;font-size:var(--text-md, 14px)}.runs-table.svelte-1yb6d54 th:where(.svelte-1yb6d54){text-align:left;padding:8px 12px;border-bottom:2px solid var(--border-color-primary, #e5e7eb);color:var(--body-text-color-subdued, #6b7280);font-weight:600;font-size:var(--text-sm, 12px);text-transform:uppercase;letter-spacing:.05em}.runs-table.svelte-1yb6d54 td:where(.svelte-1yb6d54){padding:8px 12px;border-bottom:1px solid var(--border-color-primary, #e5e7eb);color:var(--body-text-color, #1f2937)}.runs-table.svelte-1yb6d54 tbody:where(.svelte-1yb6d54) tr:where(.svelte-1yb6d54):nth-child(odd){background:var(--table-odd-background-fill, var(--background-fill-primary, white))}.runs-table.svelte-1yb6d54 tbody:where(.svelte-1yb6d54) tr:where(.svelte-1yb6d54):nth-child(2n){background:var(--table-even-background-fill, var(--background-fill-secondary, #f9fafb))}.runs-table.svelte-1yb6d54 tr:where(.svelte-1yb6d54):hover{background:var(--background-fill-secondary, #f3f4f6)}.run-name-cell.svelte-1yb6d54{font-weight:500}.run-name-with-dot.svelte-1yb6d54{display:inline-flex;align-items:center;gap:8px;max-width:100%}.run-dot.svelte-1yb6d54{width:10px;height:10px;border-radius:50%;flex-shrink:0}.link-btn.svelte-1yb6d54{background:none;border:none;color:var(--color-accent, #f97316);cursor:pointer;font:inherit;font-weight:500;padding:0;text-align:left}.link-btn.svelte-1yb6d54:hover{text-decoration:underline}.rename-input.svelte-1yb6d54{font:inherit;padding:2px 6px;border:1px solid var(--color-accent, #f97316);border-radius:var(--radius-sm, 4px);outline:none;width:100%}.actions-cell.svelte-1yb6d54{display:flex;gap:4px}.action-btn.svelte-1yb6d54{background:none;border:1px solid transparent;color:var(--body-text-color-subdued, #6b7280);cursor:pointer;padding:4px;border-radius:var(--radius-sm, 4px);display:flex;align-items:center}.action-btn.svelte-1yb6d54:hover{background:var(--background-fill-secondary, #f9fafb);border-color:var(--border-color-primary, #e5e7eb);color:var(--body-text-color, #1f2937)}.delete-btn.svelte-1yb6d54:hover{color:#dc2626;border-color:#fecaca;background:#fef2f2}.action-btn.svelte-1yb6d54:disabled{opacity:.45;cursor:not-allowed;pointer-events:none}.run-detail-page.svelte-1bpgsx2{padding:20px 24px;overflow-y:auto;flex:1}.detail-card.svelte-1bpgsx2{background:var(--background-fill-primary, white);border:1px solid var(--border-color-primary, #e5e7eb);border-radius:var(--radius-lg, 8px);padding:24px;max-width:800px}.detail-card.svelte-1bpgsx2 h2:where(.svelte-1bpgsx2){color:var(--body-text-color, #1f2937);margin:0 0 16px;font-size:var(--text-xl, 22px)}.detail-card.svelte-1bpgsx2 h3:where(.svelte-1bpgsx2){color:var(--body-text-color, #1f2937);margin:20px 0 8px;font-size:var(--text-lg, 16px)}.detail-grid.svelte-1bpgsx2{display:grid;grid-template-columns:repeat(auto-fill,minmax(200px,1fr));gap:12px}.detail-item.svelte-1bpgsx2{display:flex;flex-direction:column;gap:2px}.detail-label.svelte-1bpgsx2{font-size:var(--text-xs, 10px);font-weight:600;color:var(--body-text-color-subdued, #9ca3af);text-transform:uppercase}.detail-value.svelte-1bpgsx2{font-size:var(--text-md, 14px);color:var(--body-text-color, #1f2937)}.config-block.svelte-1bpgsx2{background:var(--background-fill-secondary, #f9fafb);padding:12px;border-radius:var(--radius-lg, 8px);border:1px solid var(--border-color-primary, #e5e7eb);font-size:var(--text-sm, 12px);color:var(--body-text-color, #1f2937);overflow-x:auto}.empty-state.svelte-1bpgsx2{max-width:640px;padding:40px 24px;color:var(--body-text-color, #1f2937)}.empty-state.svelte-1bpgsx2 h2:where(.svelte-1bpgsx2){margin:0 0 8px;font-size:20px;font-weight:700}.empty-state.svelte-1bpgsx2 p:where(.svelte-1bpgsx2){margin:12px 0 8px;color:var(--body-text-color-subdued, #6b7280)}.empty-state.svelte-1bpgsx2 pre:where(.svelte-1bpgsx2){background:var(--background-fill-secondary, #f9fafb);padding:16px;border-radius:var(--radius-lg, 8px);border:1px solid var(--border-color-primary, #e5e7eb);font-size:13px;overflow-x:auto}.empty-state.svelte-1bpgsx2 code:where(.svelte-1bpgsx2){background:var(--background-fill-secondary, #f0f0f0);padding:1px 5px;border-radius:var(--radius-sm, 4px);font-size:13px}.empty-state.svelte-1bpgsx2 pre:where(.svelte-1bpgsx2) code:where(.svelte-1bpgsx2){background:none;padding:0}.files-page.svelte-1xvfk9n{padding:20px 24px;overflow-y:auto;flex:1}.page-title.svelte-1xvfk9n{color:var(--body-text-color, #1f2937);font-size:16px;font-weight:700;margin:0 0 4px}.page-subtitle.svelte-1xvfk9n{color:var(--body-text-color-subdued, #6b7280);font-size:var(--text-sm, 12px);margin:0 0 16px}.file-list.svelte-1xvfk9n{display:flex;flex-direction:column;gap:4px}.file-item.svelte-1xvfk9n{border:1px solid var(--border-color-primary, #e5e7eb);border-radius:var(--radius-lg, 8px);background:var(--background-fill-primary, white);overflow:hidden}.file-item.expanded.svelte-1xvfk9n{border-color:var(--color-accent, #f97316)}.file-row.svelte-1xvfk9n{display:flex;align-items:center;justify-content:space-between;padding:10px 14px;gap:12px}.file-name.svelte-1xvfk9n{display:flex;align-items:center;gap:8px;background:none;border:none;padding:0;font-size:var(--text-md, 14px);color:var(--body-text-color, #1f2937);cursor:pointer;text-align:left}.file-name.svelte-1xvfk9n:hover{color:var(--color-accent, #f97316)}.file-icon.svelte-1xvfk9n{font-size:14px;flex-shrink:0}.file-actions.svelte-1xvfk9n{display:flex;align-items:center;gap:12px;flex-shrink:0}.file-size.svelte-1xvfk9n{font-size:var(--text-sm, 12px);color:var(--body-text-color-subdued, #6b7280);white-space:nowrap}.download-btn.svelte-1xvfk9n{display:flex;align-items:center;justify-content:center;width:28px;height:28px;border-radius:var(--radius-md, 6px);color:var(--body-text-color-subdued, #6b7280);transition:background-color .15s,color .15s}.download-btn.svelte-1xvfk9n:hover{background:var(--background-fill-secondary, #f3f4f6);color:var(--body-text-color, #1f2937)}.file-preview.svelte-1xvfk9n{border-top:1px solid var(--border-color-primary, #e5e7eb);padding:12px 14px;background:var(--background-fill-secondary, #f9fafb)}.preview-code.svelte-1xvfk9n{margin:0;font-size:12px;line-height:1.5;max-height:400px;overflow:auto;white-space:pre-wrap;word-break:break-all;color:var(--body-text-color, #1f2937)}.preview-loading.svelte-1xvfk9n,.preview-unavailable.svelte-1xvfk9n{color:var(--body-text-color-subdued, #6b7280);font-size:var(--text-sm, 12px);padding:8px 0}.preview-unavailable.svelte-1xvfk9n a:where(.svelte-1xvfk9n){color:var(--color-accent, #f97316);text-decoration:none}.preview-unavailable.svelte-1xvfk9n a:where(.svelte-1xvfk9n):hover{text-decoration:underline}.empty-state.svelte-1xvfk9n{max-width:640px;padding:40px 24px;color:var(--body-text-color, #1f2937)}.empty-state.svelte-1xvfk9n h2:where(.svelte-1xvfk9n){margin:0 0 8px;font-size:20px;font-weight:700}.empty-state.svelte-1xvfk9n p:where(.svelte-1xvfk9n){margin:12px 0 8px;color:var(--body-text-color-subdued, #6b7280)}.empty-state.svelte-1xvfk9n pre:where(.svelte-1xvfk9n){background:var(--background-fill-secondary, #f9fafb);padding:16px;border-radius:var(--radius-lg, 8px);border:1px solid var(--border-color-primary, #e5e7eb);font-size:13px;overflow-x:auto}.empty-state.svelte-1xvfk9n code:where(.svelte-1xvfk9n){background:var(--background-fill-secondary, #f0f0f0);padding:1px 5px;border-radius:var(--radius-sm, 4px);font-size:13px}.empty-state.svelte-1xvfk9n pre:where(.svelte-1xvfk9n) code:where(.svelte-1xvfk9n){background:none;padding:0}.settings-page.svelte-1ozf5k3{padding:24px 32px;overflow-y:auto;flex:1}.page-title.svelte-1ozf5k3{color:var(--body-text-color, #1f2937);font-size:18px;font-weight:700;margin:0 0 24px}.two-col.svelte-1ozf5k3{display:grid;grid-template-columns:minmax(0,1fr) minmax(0,1fr);gap:32px;align-items:start}@media(max-width:900px){.two-col.svelte-1ozf5k3{grid-template-columns:1fr}}.settings-section.svelte-1ozf5k3{margin-bottom:32px}.section-title.svelte-1ozf5k3{color:var(--body-text-color, #1f2937);font-size:15px;font-weight:600;margin:0 0 4px}.section-desc.svelte-1ozf5k3{color:var(--body-text-color-subdued, #6b7280);font-size:var(--text-sm, 12px);margin:0 0 12px;line-height:1.5}.section-desc.svelte-1ozf5k3 code:where(.svelte-1ozf5k3){background:var(--background-fill-secondary, #f3f4f6);padding:1px 5px;border-radius:var(--radius-sm, 3px);font-size:11px}.section-desc.svelte-1ozf5k3 strong:where(.svelte-1ozf5k3){color:var(--color-accent, #f97316)}.theme-switcher.svelte-1ozf5k3{display:inline-flex;border:1px solid var(--border-color-primary, #e5e7eb);border-radius:var(--radius-lg, 8px);overflow:hidden}.theme-option.svelte-1ozf5k3{display:inline-flex;align-items:center;gap:6px;padding:8px 20px;border:none;background:var(--background-fill-primary, white);color:var(--body-text-color-subdued, #6b7280);font-size:var(--text-md, 14px);cursor:pointer;transition:all .15s;border-right:1px solid var(--border-color-primary, #e5e7eb)}.theme-option.svelte-1ozf5k3:last-child{border-right:none}.theme-option.svelte-1ozf5k3:hover{color:var(--body-text-color, #1f2937);background:var(--background-fill-secondary, #f9fafb)}.theme-option.selected.svelte-1ozf5k3{background:var(--color-accent, #f97316);color:#fff;font-weight:500}.project-selector.svelte-1ozf5k3{display:flex;align-items:center;gap:10px;margin-bottom:12px}.selector-label.svelte-1ozf5k3{font-size:var(--text-sm, 12px);color:var(--body-text-color-subdued, #6b7280);flex-shrink:0}.selector-select.svelte-1ozf5k3{padding:6px 10px;border:1px solid var(--border-color-primary, #e5e7eb);border-radius:var(--radius-md, 4px);background:var(--background-fill-primary, white);color:var(--body-text-color, #1f2937);font-size:var(--text-sm, 12px);min-width:160px;cursor:pointer}.selector-select.svelte-1ozf5k3:focus{outline:none;border-color:var(--color-accent, #f97316)}.commands-table.svelte-1ozf5k3{border:1px solid var(--border-color-primary, #e5e7eb);border-radius:var(--radius-lg, 8px);overflow:hidden}.command-row.svelte-1ozf5k3{display:flex;align-items:center;gap:16px;padding:10px 14px;border-bottom:1px solid var(--border-color-primary, #e5e7eb)}.command-row.svelte-1ozf5k3:last-child{border-bottom:none}.command-label.svelte-1ozf5k3{width:180px;flex-shrink:0;font-size:var(--text-sm, 12px);color:var(--body-text-color-subdued, #6b7280)}.command-value.svelte-1ozf5k3{flex:1;display:flex;align-items:center;gap:8px;min-width:0}.command-value.svelte-1ozf5k3 code:where(.svelte-1ozf5k3){flex:1;font-family:SFMono-Regular,Consolas,Liberation Mono,Menlo,monospace;font-size:12px;color:var(--body-text-color, #1f2937);white-space:nowrap;overflow:hidden;text-overflow:ellipsis}.copy-btn.svelte-1ozf5k3{display:flex;align-items:center;justify-content:center;width:26px;height:26px;flex-shrink:0;border:none;background:none;border-radius:var(--radius-md, 4px);color:var(--body-text-color-subdued, #6b7280);cursor:pointer;transition:background-color .15s,color .15s}.copy-btn.svelte-1ozf5k3:hover{background:var(--background-fill-secondary, #f3f4f6);color:var(--body-text-color, #1f2937)}.copy-btn.copied.svelte-1ozf5k3{color:var(--color-accent, #f97316)}.agent-tabs.svelte-1ozf5k3{display:flex;border-bottom:1px solid var(--border-color-primary, #e5e7eb);gap:0;margin-bottom:0}.agent-tab.svelte-1ozf5k3{padding:8px 16px;border:none;background:none;color:var(--body-text-color-subdued, #6b7280);font-size:var(--text-sm, 12px);cursor:pointer;border-bottom:2px solid transparent;transition:all .15s;white-space:nowrap}.agent-tab.svelte-1ozf5k3:hover{color:var(--body-text-color, #1f2937)}.agent-tab.active.svelte-1ozf5k3{color:var(--color-accent, #f97316);border-bottom-color:var(--color-accent, #f97316);font-weight:500}.agent-panel.svelte-1ozf5k3{border:1px solid var(--border-color-primary, #e5e7eb);border-top:none;border-radius:0 0 var(--radius-lg, 8px) var(--radius-lg, 8px);padding:16px}.install-block.svelte-1ozf5k3{margin-bottom:16px}.install-label.svelte-1ozf5k3{display:block;font-size:11px;font-weight:500;color:var(--body-text-color-subdued, #6b7280);text-transform:uppercase;letter-spacing:.04em;margin-bottom:6px}.install-cmd.svelte-1ozf5k3{display:flex;align-items:center;gap:8px;background:var(--background-fill-secondary, #f3f4f6);border-radius:var(--radius-md, 4px);padding:8px 10px}.install-cmd.svelte-1ozf5k3 code:where(.svelte-1ozf5k3){flex:1;font-family:SFMono-Regular,Consolas,Liberation Mono,Menlo,monospace;font-size:12px;color:var(--body-text-color, #1f2937);white-space:nowrap;overflow:hidden;text-overflow:ellipsis}.example-block.svelte-1ozf5k3{background:var(--background-fill-secondary, #f9fafb);border:1px solid var(--border-color-primary, #e5e7eb);border-radius:var(--radius-md, 4px);padding:12px}.example-header.svelte-1ozf5k3{display:flex;align-items:center;justify-content:space-between;margin-bottom:8px}.example-label.svelte-1ozf5k3{font-size:11px;font-weight:500;color:var(--body-text-color-subdued, #6b7280);text-transform:uppercase;letter-spacing:.04em}.example-text.svelte-1ozf5k3{margin:0;font-size:var(--text-sm, 12px);color:var(--body-text-color, #1f2937);line-height:1.6;font-style:italic}*{margin:0;padding:0;box-sizing:border-box}body{font-family:-apple-system,BlinkMacSystemFont,Segoe UI,Roboto,Helvetica Neue,Arial,sans-serif;background:var(--background-fill-primary, #fff);color:var(--body-text-color, #1f2937);font-size:var(--text-md, 14px);-webkit-font-smoothing:antialiased}.app.svelte-1n46o8q{display:flex;height:100vh;overflow:hidden}.main.svelte-1n46o8q{flex:1;display:flex;flex-direction:column;overflow:hidden;min-width:0}.page-content.svelte-1n46o8q{flex:1;overflow:hidden;display:flex;background:var(--bg-primary)}
trackio/frontend/dist/index.html ADDED
@@ -0,0 +1,14 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ <!DOCTYPE html>
2
+ <html lang="en">
3
+ <head>
4
+ <meta charset="UTF-8" />
5
+ <meta name="viewport" content="width=device-width, initial-scale=1.0" />
6
+ <title>Trackio Dashboard</title>
7
+ <link rel="icon" type="image/png" href="/static/trackio/trackio_logo_light.png" />
8
+ <script type="module" crossorigin src="/assets/index-6kGqI2Bm.js"></script>
9
+ <link rel="stylesheet" crossorigin href="/assets/index-BjAwVTtr.css">
10
+ </head>
11
+ <body>
12
+ <div id="app"></div>
13
+ </body>
14
+ </html>
trackio/frontend/eslint.config.js ADDED
@@ -0,0 +1,42 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import js from "@eslint/js";
2
+ import svelte from "eslint-plugin-svelte";
3
+ import svelteParser from "svelte-eslint-parser";
4
+ import globals from "globals";
5
+
6
+ export default [
7
+ { ignores: ["dist/**", "node_modules/**"] },
8
+ {
9
+ files: ["**/*.js"],
10
+ languageOptions: {
11
+ globals: {
12
+ ...globals.browser,
13
+ ...globals.es2021,
14
+ $state: "readonly",
15
+ $derived: "readonly",
16
+ $effect: "readonly",
17
+ $props: "readonly",
18
+ $bindable: "readonly",
19
+ $inspect: "readonly",
20
+ },
21
+ },
22
+ rules: {
23
+ ...js.configs.recommended.rules,
24
+ "no-unused-vars": ["error", { argsIgnorePattern: "^_" }],
25
+ "no-empty": "off",
26
+ },
27
+ },
28
+ {
29
+ files: ["**/*.svelte"],
30
+ languageOptions: {
31
+ parser: svelteParser,
32
+ globals: { ...globals.browser, ...globals.es2021 },
33
+ },
34
+ plugins: { svelte },
35
+ rules: {
36
+ ...js.configs.recommended.rules,
37
+ ...svelte.configs.recommended.rules,
38
+ "no-unused-vars": ["error", { argsIgnorePattern: "^_", varsIgnorePattern: "^\\$" }],
39
+ "no-empty": "off",
40
+ },
41
+ },
42
+ ];
trackio/frontend/index.html ADDED
@@ -0,0 +1,13 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ <!DOCTYPE html>
2
+ <html lang="en">
3
+ <head>
4
+ <meta charset="UTF-8" />
5
+ <meta name="viewport" content="width=device-width, initial-scale=1.0" />
6
+ <title>Trackio Dashboard</title>
7
+ <link rel="icon" type="image/png" href="/static/trackio/trackio_logo_light.png" />
8
+ </head>
9
+ <body>
10
+ <div id="app"></div>
11
+ <script type="module" src="/src/main.js"></script>
12
+ </body>
13
+ </html>
trackio/frontend_server.py ADDED
@@ -0,0 +1,64 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ """Serves the built Svelte frontend alongside the Gradio API."""
2
+
3
+ import logging
4
+ import re
5
+ from pathlib import Path
6
+
7
+ from starlette.responses import HTMLResponse
8
+ from starlette.routing import Mount, Route
9
+ from starlette.staticfiles import StaticFiles
10
+
11
+ FRONTEND_DIR = Path(__file__).parent / "frontend" / "dist"
12
+ ASSETS_DIR = Path(__file__).parent / "assets"
13
+
14
+ _logger = logging.getLogger(__name__)
15
+
16
+ _SPA_SEGMENTS = (
17
+ "metrics",
18
+ "system",
19
+ "media",
20
+ "reports",
21
+ "runs",
22
+ "run",
23
+ "files",
24
+ "settings",
25
+ )
26
+
27
+
28
+ def mount_frontend(app):
29
+ if not FRONTEND_DIR.exists():
30
+ _logger.warning(
31
+ "Trackio dashboard UI was not mounted: %s is missing. "
32
+ "Build the frontend with `npm ci && npm run build` in trackio/frontend.",
33
+ FRONTEND_DIR,
34
+ )
35
+ return
36
+
37
+ index_html_path = FRONTEND_DIR / "index.html"
38
+ if not index_html_path.exists():
39
+ _logger.warning(
40
+ "Trackio dashboard UI was not mounted: %s is missing.",
41
+ index_html_path,
42
+ )
43
+ return
44
+
45
+ index_html_content = index_html_path.read_text()
46
+ patched_html = re.sub(
47
+ r'/assets/(index-[^"]+)',
48
+ r"/assets/app/\1",
49
+ index_html_content,
50
+ )
51
+
52
+ async def serve_frontend(request):
53
+ return HTMLResponse(patched_html)
54
+
55
+ vite_assets = StaticFiles(directory=str(FRONTEND_DIR / "assets"))
56
+ static_assets = StaticFiles(directory=str(ASSETS_DIR))
57
+
58
+ app.routes.insert(0, Mount("/static/trackio", app=static_assets))
59
+ app.routes.insert(0, Mount("/assets/app", app=vite_assets))
60
+
61
+ for seg in reversed(_SPA_SEGMENTS):
62
+ app.routes.insert(0, Route(f"/{seg}/", serve_frontend, methods=["GET"]))
63
+ app.routes.insert(0, Route(f"/{seg}", serve_frontend, methods=["GET"]))
64
+ app.routes.insert(0, Route("/", serve_frontend, methods=["GET"]))
trackio/gpu.py ADDED
@@ -0,0 +1,381 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import os
2
+ import threading
3
+ import warnings
4
+ from typing import TYPE_CHECKING, Any
5
+
6
+ if TYPE_CHECKING:
7
+ from trackio.run import Run
8
+
9
+ pynvml: Any = None
10
+ PYNVML_AVAILABLE = False
11
+ _nvml_initialized = False
12
+ _nvml_lock = threading.Lock()
13
+ _energy_baseline: dict[int, float] = {}
14
+
15
+
16
+ def _ensure_pynvml():
17
+ global PYNVML_AVAILABLE, pynvml
18
+ if PYNVML_AVAILABLE:
19
+ return pynvml
20
+ try:
21
+ import pynvml as _pynvml
22
+
23
+ pynvml = _pynvml
24
+ PYNVML_AVAILABLE = True
25
+ return pynvml
26
+ except ImportError:
27
+ raise ImportError(
28
+ "nvidia-ml-py is required for GPU monitoring. "
29
+ "Install it with: pip install nvidia-ml-py"
30
+ )
31
+
32
+
33
+ def _init_nvml() -> bool:
34
+ global _nvml_initialized
35
+ with _nvml_lock:
36
+ if _nvml_initialized:
37
+ return True
38
+ try:
39
+ nvml = _ensure_pynvml()
40
+ nvml.nvmlInit()
41
+ _nvml_initialized = True
42
+ return True
43
+ except Exception:
44
+ return False
45
+
46
+
47
+ def get_gpu_count() -> tuple[int, list[int]]:
48
+ """
49
+ Get the number of GPUs visible to this process and their physical indices.
50
+ Respects CUDA_VISIBLE_DEVICES environment variable.
51
+
52
+ Returns:
53
+ Tuple of (count, physical_indices) where:
54
+ - count: Number of visible GPUs
55
+ - physical_indices: List mapping logical index to physical GPU index.
56
+ e.g., if CUDA_VISIBLE_DEVICES=2,3 returns (2, [2, 3])
57
+ meaning logical GPU 0 = physical GPU 2, logical GPU 1 = physical GPU 3
58
+ """
59
+ if not _init_nvml():
60
+ return 0, []
61
+
62
+ cuda_visible = os.environ.get("CUDA_VISIBLE_DEVICES")
63
+ if cuda_visible is not None and cuda_visible.strip():
64
+ try:
65
+ indices = [int(x.strip()) for x in cuda_visible.split(",") if x.strip()]
66
+ return len(indices), indices
67
+ except ValueError:
68
+ pass
69
+
70
+ try:
71
+ total = pynvml.nvmlDeviceGetCount()
72
+ return total, list(range(total))
73
+ except Exception:
74
+ return 0, []
75
+
76
+
77
+ def get_all_gpu_count() -> tuple[int, list[int]]:
78
+ """
79
+ Get the total number of physical GPUs on the machine, ignoring CUDA_VISIBLE_DEVICES.
80
+
81
+ Returns:
82
+ Tuple of (count, physical_indices) for ALL GPUs on the machine.
83
+ e.g., on a 4-GPU machine returns (4, [0, 1, 2, 3]) regardless of
84
+ CUDA_VISIBLE_DEVICES setting.
85
+ """
86
+ if not _init_nvml():
87
+ return 0, []
88
+
89
+ try:
90
+ total = pynvml.nvmlDeviceGetCount()
91
+ return total, list(range(total))
92
+ except Exception:
93
+ return 0, []
94
+
95
+
96
+ def gpu_available() -> bool:
97
+ """
98
+ Check if GPU monitoring is available.
99
+
100
+ Returns True if nvidia-ml-py is installed and at least one NVIDIA GPU is detected.
101
+ This is used for auto-detection of GPU logging.
102
+ """
103
+ try:
104
+ _ensure_pynvml()
105
+ count, _ = get_gpu_count()
106
+ return count > 0
107
+ except ImportError:
108
+ return False
109
+ except Exception:
110
+ return False
111
+
112
+
113
+ def reset_energy_baseline():
114
+ """Reset the energy baseline for all GPUs. Called when a new run starts."""
115
+ global _energy_baseline
116
+ _energy_baseline = {}
117
+
118
+
119
+ def collect_gpu_metrics(device: int | None = None, all_gpus: bool = False) -> dict:
120
+ """
121
+ Collect GPU metrics for visible GPUs.
122
+
123
+ Args:
124
+ device: CUDA device index to collect metrics from. If None, collects
125
+ from all GPUs visible to this process (respects CUDA_VISIBLE_DEVICES).
126
+ The device index is the logical CUDA index (0, 1, 2...), not the
127
+ physical GPU index.
128
+ all_gpus: If True and device is None, collect metrics for ALL physical GPUs
129
+ on the machine, ignoring CUDA_VISIBLE_DEVICES. Used by GpuMonitor
130
+ to report system-wide GPU metrics in distributed training.
131
+
132
+ Returns:
133
+ Dictionary of GPU metrics. Keys use device indices (gpu/0/, gpu/1/, etc.).
134
+ """
135
+ if not _init_nvml():
136
+ return {}
137
+
138
+ if all_gpus and device is None:
139
+ gpu_count, visible_gpus = get_all_gpu_count()
140
+ else:
141
+ gpu_count, visible_gpus = get_gpu_count()
142
+ if gpu_count == 0:
143
+ return {}
144
+
145
+ if device is not None:
146
+ if device < 0 or device >= gpu_count:
147
+ return {}
148
+ gpu_indices = [(device, visible_gpus[device])]
149
+ else:
150
+ gpu_indices = list(enumerate(visible_gpus))
151
+
152
+ metrics = {}
153
+ total_util = 0.0
154
+ total_mem_used_gib = 0.0
155
+ total_power = 0.0
156
+ max_temp = 0.0
157
+ valid_util_count = 0
158
+
159
+ for logical_idx, physical_idx in gpu_indices:
160
+ prefix = f"gpu/{logical_idx}"
161
+ try:
162
+ handle = pynvml.nvmlDeviceGetHandleByIndex(physical_idx)
163
+
164
+ try:
165
+ util = pynvml.nvmlDeviceGetUtilizationRates(handle)
166
+ metrics[f"{prefix}/utilization"] = util.gpu
167
+ metrics[f"{prefix}/memory_utilization"] = util.memory
168
+ total_util += util.gpu
169
+ valid_util_count += 1
170
+ except Exception:
171
+ pass
172
+
173
+ try:
174
+ mem = pynvml.nvmlDeviceGetMemoryInfo(handle)
175
+ mem_used_gib = mem.used / (1024**3)
176
+ mem_total_gib = mem.total / (1024**3)
177
+ metrics[f"{prefix}/allocated_memory"] = mem_used_gib
178
+ metrics[f"{prefix}/total_memory"] = mem_total_gib
179
+ if mem.total > 0:
180
+ metrics[f"{prefix}/memory_usage"] = mem.used / mem.total
181
+ total_mem_used_gib += mem_used_gib
182
+ except Exception:
183
+ pass
184
+
185
+ try:
186
+ power_mw = pynvml.nvmlDeviceGetPowerUsage(handle)
187
+ power_w = power_mw / 1000.0
188
+ metrics[f"{prefix}/power"] = power_w
189
+ total_power += power_w
190
+ except Exception:
191
+ pass
192
+
193
+ try:
194
+ power_limit_mw = pynvml.nvmlDeviceGetPowerManagementLimit(handle)
195
+ power_limit_w = power_limit_mw / 1000.0
196
+ metrics[f"{prefix}/power_limit"] = power_limit_w
197
+ if power_limit_w > 0 and f"{prefix}/power" in metrics:
198
+ metrics[f"{prefix}/power_percent"] = (
199
+ metrics[f"{prefix}/power"] / power_limit_w
200
+ ) * 100
201
+ except Exception:
202
+ pass
203
+
204
+ try:
205
+ temp = pynvml.nvmlDeviceGetTemperature(
206
+ handle, pynvml.NVML_TEMPERATURE_GPU
207
+ )
208
+ metrics[f"{prefix}/temp"] = temp
209
+ max_temp = max(max_temp, temp)
210
+ except Exception:
211
+ pass
212
+
213
+ try:
214
+ sm_clock = pynvml.nvmlDeviceGetClockInfo(handle, pynvml.NVML_CLOCK_SM)
215
+ metrics[f"{prefix}/sm_clock"] = sm_clock
216
+ except Exception:
217
+ pass
218
+
219
+ try:
220
+ mem_clock = pynvml.nvmlDeviceGetClockInfo(handle, pynvml.NVML_CLOCK_MEM)
221
+ metrics[f"{prefix}/memory_clock"] = mem_clock
222
+ except Exception:
223
+ pass
224
+
225
+ try:
226
+ fan_speed = pynvml.nvmlDeviceGetFanSpeed(handle)
227
+ metrics[f"{prefix}/fan_speed"] = fan_speed
228
+ except Exception:
229
+ pass
230
+
231
+ try:
232
+ pstate = pynvml.nvmlDeviceGetPerformanceState(handle)
233
+ metrics[f"{prefix}/performance_state"] = pstate
234
+ except Exception:
235
+ pass
236
+
237
+ try:
238
+ energy_mj = pynvml.nvmlDeviceGetTotalEnergyConsumption(handle)
239
+ if logical_idx not in _energy_baseline:
240
+ _energy_baseline[logical_idx] = energy_mj
241
+ energy_consumed_mj = energy_mj - _energy_baseline[logical_idx]
242
+ metrics[f"{prefix}/energy_consumed"] = energy_consumed_mj / 1000.0
243
+ except Exception:
244
+ pass
245
+
246
+ try:
247
+ pcie_tx = pynvml.nvmlDeviceGetPcieThroughput(
248
+ handle, pynvml.NVML_PCIE_UTIL_TX_BYTES
249
+ )
250
+ pcie_rx = pynvml.nvmlDeviceGetPcieThroughput(
251
+ handle, pynvml.NVML_PCIE_UTIL_RX_BYTES
252
+ )
253
+ metrics[f"{prefix}/pcie_tx"] = pcie_tx / 1024.0
254
+ metrics[f"{prefix}/pcie_rx"] = pcie_rx / 1024.0
255
+ except Exception:
256
+ pass
257
+
258
+ try:
259
+ throttle = pynvml.nvmlDeviceGetCurrentClocksThrottleReasons(handle)
260
+ metrics[f"{prefix}/throttle_thermal"] = int(
261
+ bool(throttle & pynvml.nvmlClocksThrottleReasonSwThermalSlowdown)
262
+ )
263
+ metrics[f"{prefix}/throttle_power"] = int(
264
+ bool(throttle & pynvml.nvmlClocksThrottleReasonSwPowerCap)
265
+ )
266
+ metrics[f"{prefix}/throttle_hw_slowdown"] = int(
267
+ bool(throttle & pynvml.nvmlClocksThrottleReasonHwSlowdown)
268
+ )
269
+ metrics[f"{prefix}/throttle_apps"] = int(
270
+ bool(
271
+ throttle
272
+ & pynvml.nvmlClocksThrottleReasonApplicationsClocksSetting
273
+ )
274
+ )
275
+ except Exception:
276
+ pass
277
+
278
+ try:
279
+ ecc_corrected = pynvml.nvmlDeviceGetTotalEccErrors(
280
+ handle,
281
+ pynvml.NVML_MEMORY_ERROR_TYPE_CORRECTED,
282
+ pynvml.NVML_VOLATILE_ECC,
283
+ )
284
+ metrics[f"{prefix}/corrected_memory_errors"] = ecc_corrected
285
+ except Exception:
286
+ pass
287
+
288
+ try:
289
+ ecc_uncorrected = pynvml.nvmlDeviceGetTotalEccErrors(
290
+ handle,
291
+ pynvml.NVML_MEMORY_ERROR_TYPE_UNCORRECTED,
292
+ pynvml.NVML_VOLATILE_ECC,
293
+ )
294
+ metrics[f"{prefix}/uncorrected_memory_errors"] = ecc_uncorrected
295
+ except Exception:
296
+ pass
297
+
298
+ except Exception:
299
+ continue
300
+
301
+ if valid_util_count > 0:
302
+ metrics["gpu/mean_utilization"] = total_util / valid_util_count
303
+ if total_mem_used_gib > 0:
304
+ metrics["gpu/total_allocated_memory"] = total_mem_used_gib
305
+ if total_power > 0:
306
+ metrics["gpu/total_power"] = total_power
307
+ if max_temp > 0:
308
+ metrics["gpu/max_temp"] = max_temp
309
+
310
+ return metrics
311
+
312
+
313
+ class GpuMonitor:
314
+ def __init__(self, run: "Run", interval: float = 10.0):
315
+ self._run = run
316
+ self._interval = interval
317
+ self._stop_flag = threading.Event()
318
+ self._thread: "threading.Thread | None" = None
319
+
320
+ def start(self):
321
+ count, _ = get_all_gpu_count()
322
+ if count == 0:
323
+ warnings.warn(
324
+ "auto_log_gpu=True but no NVIDIA GPUs detected. GPU logging disabled."
325
+ )
326
+ return
327
+
328
+ reset_energy_baseline()
329
+ self._thread = threading.Thread(target=self._monitor_loop, daemon=True)
330
+ self._thread.start()
331
+
332
+ def stop(self):
333
+ self._stop_flag.set()
334
+ if self._thread is not None:
335
+ self._thread.join(timeout=2.0)
336
+
337
+ def _monitor_loop(self):
338
+ while not self._stop_flag.is_set():
339
+ try:
340
+ metrics = collect_gpu_metrics(all_gpus=True)
341
+ if metrics:
342
+ self._run.log_system(metrics)
343
+ except Exception:
344
+ pass
345
+
346
+ self._stop_flag.wait(timeout=self._interval)
347
+
348
+
349
+ def log_gpu(run: "Run | None" = None, device: int | None = None) -> dict:
350
+ """
351
+ Log GPU metrics to the current or specified run as system metrics.
352
+
353
+ Args:
354
+ run: Optional Run instance. If None, uses current run from context.
355
+ device: CUDA device index to collect metrics from. If None, collects
356
+ from all GPUs visible to this process (respects CUDA_VISIBLE_DEVICES).
357
+
358
+ Returns:
359
+ dict: The GPU metrics that were logged.
360
+
361
+ Example:
362
+ ```python
363
+ import trackio
364
+
365
+ run = trackio.init(project="my-project")
366
+ trackio.log({"loss": 0.5})
367
+ trackio.log_gpu() # logs all visible GPUs
368
+ trackio.log_gpu(device=0) # logs only CUDA device 0
369
+ ```
370
+ """
371
+ from trackio import context_vars
372
+
373
+ if run is None:
374
+ run = context_vars.current_run.get()
375
+ if run is None:
376
+ raise RuntimeError("Call trackio.init() before trackio.log_gpu().")
377
+
378
+ metrics = collect_gpu_metrics(device=device)
379
+ if metrics:
380
+ run.log_system(metrics)
381
+ return metrics
trackio/histogram.py ADDED
@@ -0,0 +1,71 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from typing import Sequence
2
+
3
+ import numpy as np
4
+
5
+
6
+ class Histogram:
7
+ """
8
+ Histogram data type for Trackio, compatible with wandb.Histogram.
9
+
10
+ Args:
11
+ sequence (`np.ndarray` or `Sequence[float]` or `Sequence[int]`, *optional*):
12
+ Sequence of values to create the histogram from.
13
+ np_histogram (`tuple`, *optional*):
14
+ Pre-computed NumPy histogram as a `(hist, bins)` tuple.
15
+ num_bins (`int`, *optional*, defaults to `64`):
16
+ Number of bins for the histogram (maximum `512`).
17
+
18
+ Example:
19
+ ```python
20
+ import trackio
21
+ import numpy as np
22
+
23
+ # Create histogram from sequence
24
+ data = np.random.randn(1000)
25
+ trackio.log({"distribution": trackio.Histogram(data)})
26
+
27
+ # Create histogram from numpy histogram
28
+ hist, bins = np.histogram(data, bins=30)
29
+ trackio.log({"distribution": trackio.Histogram(np_histogram=(hist, bins))})
30
+
31
+ # Specify custom number of bins
32
+ trackio.log({"distribution": trackio.Histogram(data, num_bins=50)})
33
+ ```
34
+ """
35
+
36
+ TYPE = "trackio.histogram"
37
+
38
+ def __init__(
39
+ self,
40
+ sequence: np.ndarray | Sequence[float] | Sequence[int] | None = None,
41
+ np_histogram: tuple | None = None,
42
+ num_bins: int = 64,
43
+ ):
44
+ if sequence is None and np_histogram is None:
45
+ raise ValueError("Must provide either sequence or np_histogram")
46
+
47
+ if sequence is not None and np_histogram is not None:
48
+ raise ValueError("Cannot provide both sequence and np_histogram")
49
+
50
+ num_bins = min(num_bins, 512)
51
+
52
+ if np_histogram is not None:
53
+ self.histogram, self.bins = np_histogram
54
+ self.histogram = np.asarray(self.histogram)
55
+ self.bins = np.asarray(self.bins)
56
+ else:
57
+ data = np.asarray(sequence).flatten()
58
+ data = data[np.isfinite(data)]
59
+ if len(data) == 0:
60
+ self.histogram = np.array([])
61
+ self.bins = np.array([])
62
+ else:
63
+ self.histogram, self.bins = np.histogram(data, bins=num_bins)
64
+
65
+ def _to_dict(self) -> dict:
66
+ """Convert histogram to dictionary for storage."""
67
+ return {
68
+ "_type": self.TYPE,
69
+ "bins": self.bins.tolist(),
70
+ "values": self.histogram.tolist(),
71
+ }
trackio/imports.py ADDED
@@ -0,0 +1,290 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import os
2
+ from pathlib import Path
3
+
4
+ import pandas as pd
5
+
6
+ from trackio import deploy, utils
7
+ from trackio.sqlite_storage import SQLiteStorage
8
+
9
+
10
+ def import_csv(
11
+ csv_path: str | Path,
12
+ project: str,
13
+ name: str | None = None,
14
+ space_id: str | None = None,
15
+ dataset_id: str | None = None,
16
+ private: bool | None = None,
17
+ force: bool = False,
18
+ ) -> None:
19
+ """
20
+ Imports a CSV file into a Trackio project. The CSV file must contain a `"step"`
21
+ column, may optionally contain a `"timestamp"` column, and any other columns will be
22
+ treated as metrics. It should also include a header row with the column names.
23
+
24
+ TODO: call init() and return a Run object so that the user can continue to log metrics to it.
25
+
26
+ Args:
27
+ csv_path (`str` or `Path`):
28
+ The str or Path to the CSV file to import.
29
+ project (`str`):
30
+ The name of the project to import the CSV file into. Must not be an existing
31
+ project.
32
+ name (`str`, *optional*):
33
+ The name of the Run to import the CSV file into. If not provided, a default
34
+ name will be generated.
35
+ name (`str`, *optional*):
36
+ The name of the run (if not provided, a default name will be generated).
37
+ space_id (`str`, *optional*):
38
+ If provided, the project will be logged to a Hugging Face Space instead of a
39
+ local directory. Should be a complete Space name like `"username/reponame"`
40
+ or `"orgname/reponame"`, or just `"reponame"` in which case the Space will
41
+ be created in the currently-logged-in Hugging Face user's namespace. If the
42
+ Space does not exist, it will be created. If the Space already exists, the
43
+ project will be logged to it.
44
+ dataset_id (`str`, *optional*):
45
+ Deprecated. Use `bucket_id` instead.
46
+ private (`bool`, *optional*):
47
+ Whether to make the Space private. If None (default), the repo will be
48
+ public unless the organization's default is private. This value is ignored
49
+ if the repo already exists.
50
+ """
51
+ if SQLiteStorage.get_runs(project):
52
+ raise ValueError(
53
+ f"Project '{project}' already exists. Cannot import CSV into existing project."
54
+ )
55
+
56
+ csv_path = Path(csv_path)
57
+ if not csv_path.exists():
58
+ raise FileNotFoundError(f"CSV file not found: {csv_path}")
59
+
60
+ df = pd.read_csv(csv_path)
61
+ if df.empty:
62
+ raise ValueError("CSV file is empty")
63
+
64
+ column_mapping = utils.simplify_column_names(df.columns.tolist())
65
+ df = df.rename(columns=column_mapping)
66
+
67
+ step_column = None
68
+ for col in df.columns:
69
+ if col.lower() == "step":
70
+ step_column = col
71
+ break
72
+
73
+ if step_column is None:
74
+ raise ValueError("CSV file must contain a 'step' or 'Step' column")
75
+
76
+ if name is None:
77
+ name = csv_path.stem
78
+
79
+ metrics_list = []
80
+ steps = []
81
+ timestamps = []
82
+
83
+ numeric_columns = []
84
+ for column in df.columns:
85
+ if column == step_column:
86
+ continue
87
+ if column == "timestamp":
88
+ continue
89
+
90
+ try:
91
+ pd.to_numeric(df[column], errors="raise")
92
+ numeric_columns.append(column)
93
+ except (ValueError, TypeError):
94
+ continue
95
+
96
+ for _, row in df.iterrows():
97
+ metrics = {}
98
+ for column in numeric_columns:
99
+ value = row[column]
100
+ if bool(pd.notna(value)):
101
+ metrics[column] = float(value)
102
+
103
+ if metrics:
104
+ metrics_list.append(metrics)
105
+ steps.append(int(row[step_column]))
106
+
107
+ if "timestamp" in df.columns and bool(pd.notna(row["timestamp"])):
108
+ timestamps.append(str(row["timestamp"]))
109
+ else:
110
+ timestamps.append("")
111
+
112
+ if metrics_list:
113
+ SQLiteStorage.bulk_log(
114
+ project=project,
115
+ run=name,
116
+ metrics_list=metrics_list,
117
+ steps=steps,
118
+ timestamps=timestamps,
119
+ )
120
+
121
+ print(
122
+ f"* Imported {len(metrics_list)} rows from {csv_path} into project '{project}' as run '{name}'"
123
+ )
124
+ print(f"* Metrics found: {', '.join(metrics_list[0].keys())}")
125
+
126
+ space_id, dataset_id, _ = utils.preprocess_space_and_dataset_ids(
127
+ space_id, dataset_id
128
+ )
129
+ if dataset_id is not None:
130
+ os.environ["TRACKIO_DATASET_ID"] = dataset_id
131
+ print(f"* Trackio metrics will be synced to Hugging Face Dataset: {dataset_id}")
132
+
133
+ if space_id is None:
134
+ utils.print_dashboard_instructions(project)
135
+ else:
136
+ deploy.create_space_if_not_exists(
137
+ space_id=space_id, dataset_id=dataset_id, private=private
138
+ )
139
+ deploy.wait_until_space_exists(space_id=space_id)
140
+ deploy.upload_db_to_space(project=project, space_id=space_id, force=force)
141
+ print(
142
+ f"* View dashboard by going to: {deploy.SPACE_URL.format(space_id=space_id)}"
143
+ )
144
+
145
+
146
+ def import_tf_events(
147
+ log_dir: str | Path,
148
+ project: str,
149
+ name: str | None = None,
150
+ space_id: str | None = None,
151
+ dataset_id: str | None = None,
152
+ private: bool | None = None,
153
+ force: bool = False,
154
+ ) -> None:
155
+ """
156
+ Imports TensorFlow Events files from a directory into a Trackio project. Each
157
+ subdirectory in the log directory will be imported as a separate run.
158
+
159
+ Args:
160
+ log_dir (`str` or `Path`):
161
+ The str or Path to the directory containing TensorFlow Events files.
162
+ project (`str`):
163
+ The name of the project to import the TensorFlow Events files into. Must not
164
+ be an existing project.
165
+ name (`str`, *optional*):
166
+ The name prefix for runs (if not provided, will use directory names). Each
167
+ subdirectory will create a separate run.
168
+ space_id (`str`, *optional*):
169
+ If provided, the project will be logged to a Hugging Face Space instead of a
170
+ local directory. Should be a complete Space name like `"username/reponame"`
171
+ or `"orgname/reponame"`, or just `"reponame"` in which case the Space will
172
+ be created in the currently-logged-in Hugging Face user's namespace. If the
173
+ Space does not exist, it will be created. If the Space already exists, the
174
+ project will be logged to it.
175
+ dataset_id (`str`, *optional*):
176
+ Deprecated. Use `bucket_id` instead.
177
+ private (`bool`, *optional*):
178
+ Whether to make the Space private. If None (default), the repo will be
179
+ public unless the organization's default is private. This value is ignored
180
+ if the repo already exists.
181
+ """
182
+ try:
183
+ from tbparse import SummaryReader
184
+ except ImportError:
185
+ raise ImportError(
186
+ "The `tbparse` package is not installed but is required for `import_tf_events`. Please install trackio with the `tensorboard` extra: `pip install trackio[tensorboard]`."
187
+ )
188
+
189
+ if SQLiteStorage.get_runs(project):
190
+ raise ValueError(
191
+ f"Project '{project}' already exists. Cannot import TF events into existing project."
192
+ )
193
+
194
+ path = Path(log_dir)
195
+ if not path.exists():
196
+ raise FileNotFoundError(f"TF events directory not found: {path}")
197
+
198
+ # Use tbparse to read all tfevents files in the directory structure
199
+ reader = SummaryReader(str(path), extra_columns={"dir_name"})
200
+ df = reader.scalars
201
+
202
+ if df.empty:
203
+ raise ValueError(f"No TensorFlow events data found in {path}")
204
+
205
+ total_imported = 0
206
+ imported_runs = []
207
+
208
+ # Group by dir_name to create separate runs
209
+ for dir_name, group_df in df.groupby("dir_name"):
210
+ try:
211
+ # Determine run name based on directory name
212
+ if dir_name == "":
213
+ run_name = "main" # For files in the root directory
214
+ else:
215
+ run_name = dir_name # Use directory name
216
+
217
+ if name:
218
+ run_name = f"{name}_{run_name}"
219
+
220
+ if group_df.empty:
221
+ print(f"* Skipping directory {dir_name}: no scalar data found")
222
+ continue
223
+
224
+ metrics_list = []
225
+ steps = []
226
+ timestamps = []
227
+
228
+ for _, row in group_df.iterrows():
229
+ # Convert row values to appropriate types
230
+ tag = str(row["tag"])
231
+ value = float(row["value"])
232
+ step = int(row["step"])
233
+
234
+ metrics = {tag: value}
235
+ metrics_list.append(metrics)
236
+ steps.append(step)
237
+
238
+ # Use wall_time if present, else fallback
239
+ if "wall_time" in group_df.columns and not bool(
240
+ pd.isna(row["wall_time"])
241
+ ):
242
+ timestamps.append(str(row["wall_time"]))
243
+ else:
244
+ timestamps.append("")
245
+
246
+ if metrics_list:
247
+ SQLiteStorage.bulk_log(
248
+ project=project,
249
+ run=str(run_name),
250
+ metrics_list=metrics_list,
251
+ steps=steps,
252
+ timestamps=timestamps,
253
+ )
254
+
255
+ total_imported += len(metrics_list)
256
+ imported_runs.append(run_name)
257
+
258
+ print(
259
+ f"* Imported {len(metrics_list)} scalar events from directory '{dir_name}' as run '{run_name}'"
260
+ )
261
+ print(f"* Metrics in this run: {', '.join(set(group_df['tag']))}")
262
+
263
+ except Exception as e:
264
+ print(f"* Error processing directory {dir_name}: {e}")
265
+ continue
266
+
267
+ if not imported_runs:
268
+ raise ValueError("No valid TensorFlow events data could be imported")
269
+
270
+ print(f"* Total imported events: {total_imported}")
271
+ print(f"* Created runs: {', '.join(imported_runs)}")
272
+
273
+ space_id, dataset_id, _ = utils.preprocess_space_and_dataset_ids(
274
+ space_id, dataset_id
275
+ )
276
+ if dataset_id is not None:
277
+ os.environ["TRACKIO_DATASET_ID"] = dataset_id
278
+ print(f"* Trackio metrics will be synced to Hugging Face Dataset: {dataset_id}")
279
+
280
+ if space_id is None:
281
+ utils.print_dashboard_instructions(project)
282
+ else:
283
+ deploy.create_space_if_not_exists(
284
+ space_id, dataset_id=dataset_id, private=private
285
+ )
286
+ deploy.wait_until_space_exists(space_id)
287
+ deploy.upload_db_to_space(project, space_id, force=force)
288
+ print(
289
+ f"* View dashboard by going to: {deploy.SPACE_URL.format(space_id=space_id)}"
290
+ )
trackio/markdown.py ADDED
@@ -0,0 +1,21 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ class Markdown:
2
+ """
3
+ Markdown report data type for Trackio.
4
+
5
+ Args:
6
+ text (`str`):
7
+ Markdown content to log.
8
+ """
9
+
10
+ TYPE = "trackio.markdown"
11
+
12
+ def __init__(self, text: str = ""):
13
+ if not isinstance(text, str):
14
+ raise ValueError("Markdown text must be a string")
15
+ self.text = text
16
+
17
+ def _to_dict(self) -> dict:
18
+ return {
19
+ "_type": self.TYPE,
20
+ "_value": self.text,
21
+ }
trackio/media/__init__.py ADDED
@@ -0,0 +1,27 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ """
2
+ Media module for Trackio.
3
+
4
+ This module contains all media-related functionality including:
5
+ - TrackioImage, TrackioVideo, TrackioAudio classes
6
+ - Video writing utilities
7
+ - Audio conversion utilities
8
+ """
9
+
10
+ from trackio.media.audio import TrackioAudio
11
+ from trackio.media.image import TrackioImage
12
+ from trackio.media.media import TrackioMedia
13
+ from trackio.media.utils import get_project_media_path
14
+ from trackio.media.video import TrackioVideo
15
+
16
+ write_audio = TrackioAudio.write_audio
17
+ write_video = TrackioVideo.write_video
18
+
19
+ __all__ = [
20
+ "TrackioMedia",
21
+ "TrackioImage",
22
+ "TrackioVideo",
23
+ "TrackioAudio",
24
+ "get_project_media_path",
25
+ "write_video",
26
+ "write_audio",
27
+ ]
trackio/media/audio.py ADDED
@@ -0,0 +1,167 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import os
2
+ import shutil
3
+ import warnings
4
+ from pathlib import Path
5
+ from typing import Literal
6
+
7
+ import numpy as np
8
+ from pydub import AudioSegment
9
+
10
+ from trackio.media.media import TrackioMedia
11
+ from trackio.media.utils import check_ffmpeg_installed, check_path
12
+
13
+ SUPPORTED_FORMATS = ["wav", "mp3"]
14
+ AudioFormatType = Literal["wav", "mp3"]
15
+ TrackioAudioSourceType = str | Path | np.ndarray
16
+
17
+
18
+ class TrackioAudio(TrackioMedia):
19
+ """
20
+ Initializes an Audio object.
21
+
22
+ Example:
23
+ ```python
24
+ import trackio
25
+ import numpy as np
26
+
27
+ # Generate a 1-second 440 Hz sine wave (mono)
28
+ sr = 16000
29
+ t = np.linspace(0, 1, sr, endpoint=False)
30
+ wave = 0.2 * np.sin(2 * np.pi * 440 * t)
31
+ audio = trackio.Audio(wave, caption="A4 sine", sample_rate=sr, format="wav")
32
+ trackio.log({"tone": audio})
33
+
34
+ # Stereo from numpy array (shape: samples, 2)
35
+ stereo = np.stack([wave, wave], axis=1)
36
+ audio = trackio.Audio(stereo, caption="Stereo", sample_rate=sr, format="mp3")
37
+ trackio.log({"stereo": audio})
38
+
39
+ # From an existing file
40
+ audio = trackio.Audio("path/to/audio.wav", caption="From file")
41
+ trackio.log({"file_audio": audio})
42
+ ```
43
+
44
+ Args:
45
+ value (`str`, `Path`, or `numpy.ndarray`, *optional*):
46
+ A path to an audio file, or a numpy array.
47
+ The array should be shaped `(samples,)` for mono or `(samples, 2)` for stereo.
48
+ Float arrays will be peak-normalized and converted to 16-bit PCM; integer arrays will be converted to 16-bit PCM as needed.
49
+ caption (`str`, *optional*):
50
+ A string caption for the audio.
51
+ sample_rate (`int`, *optional*):
52
+ Sample rate in Hz. Required when `value` is a numpy array.
53
+ format (`Literal["wav", "mp3"]`, *optional*):
54
+ Audio format used when `value` is a numpy array. Default is "wav".
55
+ """
56
+
57
+ TYPE = "trackio.audio"
58
+
59
+ def __init__(
60
+ self,
61
+ value: TrackioAudioSourceType,
62
+ caption: str | None = None,
63
+ sample_rate: int | None = None,
64
+ format: AudioFormatType | None = None,
65
+ ):
66
+ super().__init__(value, caption)
67
+ if isinstance(value, np.ndarray):
68
+ if sample_rate is None:
69
+ raise ValueError("Sample rate is required when value is an ndarray")
70
+ if format is None:
71
+ format = "wav"
72
+ self._format = format
73
+ self._sample_rate = sample_rate
74
+
75
+ def _save_media(self, file_path: Path):
76
+ if isinstance(self._value, np.ndarray):
77
+ TrackioAudio.write_audio(
78
+ data=self._value,
79
+ sample_rate=self._sample_rate,
80
+ filename=file_path,
81
+ format=self._format,
82
+ )
83
+ elif isinstance(self._value, str | Path):
84
+ if os.path.isfile(self._value):
85
+ shutil.copy(self._value, file_path)
86
+ else:
87
+ raise ValueError(f"File not found: {self._value}")
88
+
89
+ @staticmethod
90
+ def ensure_int16_pcm(data: np.ndarray) -> np.ndarray:
91
+ """
92
+ Convert input audio array to contiguous int16 PCM.
93
+ Peak normalization is applied to floating inputs.
94
+ """
95
+ arr = np.asarray(data)
96
+ if arr.ndim not in (1, 2):
97
+ raise ValueError("Audio data must be 1D (mono) or 2D ([samples, channels])")
98
+
99
+ if arr.dtype != np.int16:
100
+ warnings.warn(
101
+ f"Converting {arr.dtype} audio to int16 PCM; pass int16 to avoid conversion.",
102
+ stacklevel=2,
103
+ )
104
+
105
+ arr = np.nan_to_num(arr, copy=False)
106
+
107
+ # Floating types: normalize to peak 1.0, then scale to int16
108
+ if np.issubdtype(arr.dtype, np.floating):
109
+ max_abs = float(np.max(np.abs(arr))) if arr.size else 0.0
110
+ if max_abs > 0.0:
111
+ arr = arr / max_abs
112
+ out = (arr * 32767.0).clip(-32768, 32767).astype(np.int16, copy=False)
113
+ return np.ascontiguousarray(out)
114
+
115
+ converters: dict[np.dtype, callable] = {
116
+ np.dtype(np.int16): lambda a: a,
117
+ np.dtype(np.int32): lambda a: (a.astype(np.int32) // 65536).astype(
118
+ np.int16, copy=False
119
+ ),
120
+ np.dtype(np.uint16): lambda a: (a.astype(np.int32) - 32768).astype(
121
+ np.int16, copy=False
122
+ ),
123
+ np.dtype(np.uint8): lambda a: (a.astype(np.int32) * 257 - 32768).astype(
124
+ np.int16, copy=False
125
+ ),
126
+ np.dtype(np.int8): lambda a: (a.astype(np.int32) * 256).astype(
127
+ np.int16, copy=False
128
+ ),
129
+ }
130
+
131
+ conv = converters.get(arr.dtype)
132
+ if conv is not None:
133
+ out = conv(arr)
134
+ return np.ascontiguousarray(out)
135
+ raise TypeError(f"Unsupported audio dtype: {arr.dtype}")
136
+
137
+ @staticmethod
138
+ def write_audio(
139
+ data: np.ndarray,
140
+ sample_rate: int,
141
+ filename: str | Path,
142
+ format: AudioFormatType = "wav",
143
+ ) -> None:
144
+ if not isinstance(sample_rate, int) or sample_rate <= 0:
145
+ raise ValueError(f"Invalid sample_rate: {sample_rate}")
146
+ if format not in SUPPORTED_FORMATS:
147
+ raise ValueError(
148
+ f"Unsupported format: {format}. Supported: {SUPPORTED_FORMATS}"
149
+ )
150
+
151
+ check_path(filename)
152
+
153
+ pcm = TrackioAudio.ensure_int16_pcm(data)
154
+
155
+ if format != "wav":
156
+ check_ffmpeg_installed()
157
+
158
+ channels = 1 if pcm.ndim == 1 else pcm.shape[1]
159
+ audio = AudioSegment(
160
+ pcm.tobytes(),
161
+ frame_rate=sample_rate,
162
+ sample_width=2, # int16
163
+ channels=channels,
164
+ )
165
+
166
+ file = audio.export(str(filename), format=format)
167
+ file.close()
trackio/media/image.py ADDED
@@ -0,0 +1,84 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import os
2
+ import shutil
3
+ from pathlib import Path
4
+
5
+ import numpy as np
6
+ from PIL import Image as PILImage
7
+
8
+ from trackio.media.media import TrackioMedia
9
+
10
+ TrackioImageSourceType = str | Path | np.ndarray | PILImage.Image
11
+
12
+
13
+ class TrackioImage(TrackioMedia):
14
+ """
15
+ Initializes an Image object.
16
+
17
+ Example:
18
+ ```python
19
+ import trackio
20
+ import numpy as np
21
+ from PIL import Image
22
+
23
+ # Create an image from numpy array
24
+ image_data = np.random.randint(0, 255, (64, 64, 3), dtype=np.uint8)
25
+ image = trackio.Image(image_data, caption="Random image")
26
+ trackio.log({"my_image": image})
27
+
28
+ # Create an image from PIL Image
29
+ pil_image = Image.new('RGB', (100, 100), color='red')
30
+ image = trackio.Image(pil_image, caption="Red square")
31
+ trackio.log({"red_image": image})
32
+
33
+ # Create an image from file path
34
+ image = trackio.Image("path/to/image.jpg", caption="Photo from file")
35
+ trackio.log({"file_image": image})
36
+ ```
37
+
38
+ Args:
39
+ value (`str`, `Path`, `numpy.ndarray`, or `PIL.Image`, *optional*):
40
+ A path to an image, a PIL Image, or a numpy array of shape (height, width, channels).
41
+ If numpy array, should be of type `np.uint8` with RGB values in the range `[0, 255]`.
42
+ caption (`str`, *optional*):
43
+ A string caption for the image.
44
+ """
45
+
46
+ TYPE = "trackio.image"
47
+
48
+ def __init__(self, value: TrackioImageSourceType, caption: str | None = None):
49
+ super().__init__(value, caption)
50
+ self._format: str | None = None
51
+
52
+ if not isinstance(self._value, TrackioImageSourceType):
53
+ raise ValueError(
54
+ f"Invalid value type, expected {TrackioImageSourceType}, got {type(self._value)}"
55
+ )
56
+ if isinstance(self._value, np.ndarray) and self._value.dtype != np.uint8:
57
+ raise ValueError(
58
+ f"Invalid value dtype, expected np.uint8, got {self._value.dtype}"
59
+ )
60
+ if (
61
+ isinstance(self._value, np.ndarray | PILImage.Image)
62
+ and self._format is None
63
+ ):
64
+ self._format = "png"
65
+
66
+ def _as_pil(self) -> PILImage.Image | None:
67
+ try:
68
+ if isinstance(self._value, np.ndarray):
69
+ arr = np.asarray(self._value).astype("uint8")
70
+ return PILImage.fromarray(arr).convert("RGBA")
71
+ if isinstance(self._value, PILImage.Image):
72
+ return self._value.convert("RGBA")
73
+ except Exception as e:
74
+ raise ValueError(f"Failed to process image data: {self._value}") from e
75
+ return None
76
+
77
+ def _save_media(self, file_path: Path):
78
+ if pil := self._as_pil():
79
+ pil.save(file_path, format=self._format)
80
+ elif isinstance(self._value, str | Path):
81
+ if os.path.isfile(self._value):
82
+ shutil.copy(self._value, file_path)
83
+ else:
84
+ raise ValueError(f"File not found: {self._value}")
trackio/media/media.py ADDED
@@ -0,0 +1,79 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import os
2
+ import uuid
3
+ from abc import ABC, abstractmethod
4
+ from pathlib import Path
5
+
6
+ from trackio.media.utils import get_project_media_path
7
+ from trackio.utils import MEDIA_DIR
8
+
9
+
10
+ class TrackioMedia(ABC):
11
+ """
12
+ Abstract base class for Trackio media objects
13
+ Provides shared functionality for file handling and serialization.
14
+ """
15
+
16
+ TYPE: str
17
+
18
+ def __init_subclass__(cls, **kwargs):
19
+ """Ensure subclasses define the TYPE attribute."""
20
+ super().__init_subclass__(**kwargs)
21
+ if not hasattr(cls, "TYPE") or cls.TYPE is None:
22
+ raise TypeError(f"Class {cls.__name__} must define TYPE attribute")
23
+
24
+ def __init__(self, value, caption: str | None = None):
25
+ """
26
+ Saves the value and caption, and if the value is a file path, checks if the file exists.
27
+ """
28
+ self.caption = caption
29
+ self._value = value
30
+ self._file_path: Path | None = None
31
+
32
+ if isinstance(self._value, str | Path):
33
+ if not os.path.isfile(self._value):
34
+ raise ValueError(f"File not found: {self._value}")
35
+
36
+ def _file_extension(self) -> str:
37
+ if self._file_path:
38
+ return self._file_path.suffix[1:].lower()
39
+ if isinstance(self._value, str | Path):
40
+ path = Path(self._value)
41
+ return path.suffix[1:].lower()
42
+ if hasattr(self, "_format") and self._format:
43
+ return self._format
44
+ return "unknown"
45
+
46
+ def _get_relative_file_path(self) -> Path | None:
47
+ return self._file_path
48
+
49
+ def _get_absolute_file_path(self) -> Path | None:
50
+ if self._file_path:
51
+ return MEDIA_DIR / self._file_path
52
+ return None
53
+
54
+ def _save(self, project: str, run: str, step: int = 0):
55
+ if self._file_path:
56
+ return
57
+
58
+ media_dir = get_project_media_path(project=project, run=run, step=step)
59
+ filename = f"{uuid.uuid4()}.{self._file_extension()}"
60
+ file_path = media_dir / filename
61
+
62
+ self._save_media(file_path)
63
+ self._file_path = file_path.relative_to(MEDIA_DIR)
64
+
65
+ @abstractmethod
66
+ def _save_media(self, file_path: Path):
67
+ """
68
+ Performs the actual media saving logic.
69
+ """
70
+ pass
71
+
72
+ def _to_dict(self) -> dict:
73
+ if not self._file_path:
74
+ raise ValueError("Media must be saved to file before serialization")
75
+ return {
76
+ "_type": self.TYPE,
77
+ "file_path": str(self._get_relative_file_path()),
78
+ "caption": self.caption,
79
+ }
trackio/media/utils.py ADDED
@@ -0,0 +1,60 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import shutil
2
+ from pathlib import Path
3
+
4
+ from trackio.utils import MEDIA_DIR
5
+
6
+
7
+ def check_path(file_path: str | Path) -> None:
8
+ """Raise an error if the parent directory does not exist."""
9
+ file_path = Path(file_path)
10
+ if not file_path.parent.exists():
11
+ try:
12
+ file_path.parent.mkdir(parents=True, exist_ok=True)
13
+ except OSError as e:
14
+ raise ValueError(
15
+ f"Failed to create parent directory {file_path.parent}: {e}"
16
+ )
17
+
18
+
19
+ def check_ffmpeg_installed() -> None:
20
+ """Raise an error if ffmpeg is not available on the system PATH."""
21
+ if shutil.which("ffmpeg") is None:
22
+ raise RuntimeError(
23
+ "ffmpeg is required to write video but was not found on your system. "
24
+ "Please install ffmpeg and ensure it is available on your PATH."
25
+ )
26
+
27
+
28
+ def get_project_media_path(
29
+ project: str,
30
+ run: str | None = None,
31
+ step: int | None = None,
32
+ relative_path: str | Path | None = None,
33
+ ) -> Path:
34
+ """
35
+ Get the full path where uploaded files are stored for a Trackio project (and create the directory if it doesn't exist).
36
+ If a run is not provided, the files are stored in a project-level directory with the given relative path.
37
+
38
+ Args:
39
+ project: The project name
40
+ run: The run name
41
+ step: The step number
42
+ relative_path: The relative path within the directory (only used if run is not provided)
43
+
44
+ Returns:
45
+ The full path to the media file
46
+ """
47
+ if step is not None and run is None:
48
+ raise ValueError("Uploading files at a specific step requires a run")
49
+
50
+ path = MEDIA_DIR / project
51
+ if run:
52
+ path /= run
53
+ if step is not None:
54
+ path /= str(step)
55
+ else:
56
+ path /= "files"
57
+ if relative_path:
58
+ path /= relative_path
59
+ path.mkdir(parents=True, exist_ok=True)
60
+ return path
trackio/media/video.py ADDED
@@ -0,0 +1,246 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import os
2
+ import shutil
3
+ import subprocess
4
+ from pathlib import Path
5
+ from typing import Literal
6
+
7
+ import numpy as np
8
+
9
+ from trackio.media.media import TrackioMedia
10
+ from trackio.media.utils import check_ffmpeg_installed, check_path
11
+
12
+ TrackioVideoSourceType = str | Path | np.ndarray
13
+ TrackioVideoFormatType = Literal["gif", "mp4", "webm"]
14
+ VideoCodec = Literal["h264", "vp9", "gif"]
15
+
16
+
17
+ class TrackioVideo(TrackioMedia):
18
+ """
19
+ Initializes a Video object.
20
+
21
+ Example:
22
+ ```python
23
+ import trackio
24
+ import numpy as np
25
+
26
+ # Create a simple video from numpy array
27
+ frames = np.random.randint(0, 255, (10, 3, 64, 64), dtype=np.uint8)
28
+ video = trackio.Video(frames, caption="Random video", fps=30)
29
+
30
+ # Create a batch of videos
31
+ batch_frames = np.random.randint(0, 255, (3, 10, 3, 64, 64), dtype=np.uint8)
32
+ batch_video = trackio.Video(batch_frames, caption="Batch of videos", fps=15)
33
+
34
+ # Create video from file path
35
+ video = trackio.Video("path/to/video.mp4", caption="Video from file")
36
+ ```
37
+
38
+ Args:
39
+ value (`str`, `Path`, or `numpy.ndarray`, *optional*):
40
+ A path to a video file, or a numpy array.
41
+ If numpy array, should be of type `np.uint8` with RGB values in the range `[0, 255]`.
42
+ It is expected to have shape of either (frames, channels, height, width) or (batch, frames, channels, height, width).
43
+ For the latter, the videos will be tiled into a grid.
44
+ caption (`str`, *optional*):
45
+ A string caption for the video.
46
+ fps (`int`, *optional*):
47
+ Frames per second for the video. Only used when value is an ndarray. Default is `24`.
48
+ format (`Literal["gif", "mp4", "webm"]`, *optional*):
49
+ Video format ("gif", "mp4", or "webm"). Only used when value is an ndarray. Default is "gif".
50
+ """
51
+
52
+ TYPE = "trackio.video"
53
+
54
+ def __init__(
55
+ self,
56
+ value: TrackioVideoSourceType,
57
+ caption: str | None = None,
58
+ fps: int | None = None,
59
+ format: TrackioVideoFormatType | None = None,
60
+ ):
61
+ super().__init__(value, caption)
62
+
63
+ if not isinstance(self._value, TrackioVideoSourceType):
64
+ raise ValueError(
65
+ f"Invalid value type, expected {TrackioVideoSourceType}, got {type(self._value)}"
66
+ )
67
+ if isinstance(self._value, np.ndarray):
68
+ if self._value.dtype != np.uint8:
69
+ raise ValueError(
70
+ f"Invalid value dtype, expected np.uint8, got {self._value.dtype}"
71
+ )
72
+ if format is None:
73
+ format = "gif"
74
+ if fps is None:
75
+ fps = 24
76
+ self._fps = fps
77
+ self._format = format
78
+
79
+ @staticmethod
80
+ def _check_array_format(video: np.ndarray) -> None:
81
+ """Raise an error if the array is not in the expected format."""
82
+ if not (video.ndim == 4 and video.shape[-1] == 3):
83
+ raise ValueError(
84
+ f"Expected RGB input shaped (F, H, W, 3), got {video.shape}. "
85
+ f"Input has {video.ndim} dimensions, expected 4."
86
+ )
87
+ if video.dtype != np.uint8:
88
+ raise TypeError(
89
+ f"Expected dtype=uint8, got {video.dtype}. "
90
+ "Please convert your video data to uint8 format."
91
+ )
92
+
93
+ @staticmethod
94
+ def write_video(
95
+ file_path: str | Path, video: np.ndarray, fps: float, codec: VideoCodec
96
+ ) -> None:
97
+ """RGB uint8 only, shape (F, H, W, 3)."""
98
+ check_ffmpeg_installed()
99
+ check_path(file_path)
100
+
101
+ if codec not in {"h264", "vp9", "gif"}:
102
+ raise ValueError("Unsupported codec. Use h264, vp9, or gif.")
103
+
104
+ arr = np.asarray(video)
105
+ TrackioVideo._check_array_format(arr)
106
+
107
+ frames = np.ascontiguousarray(arr)
108
+ _, height, width, _ = frames.shape
109
+ out_path = str(file_path)
110
+
111
+ cmd = [
112
+ "ffmpeg",
113
+ "-y",
114
+ "-f",
115
+ "rawvideo",
116
+ "-s",
117
+ f"{width}x{height}",
118
+ "-pix_fmt",
119
+ "rgb24",
120
+ "-r",
121
+ str(fps),
122
+ "-i",
123
+ "-",
124
+ "-an",
125
+ ]
126
+
127
+ if codec == "gif":
128
+ video_filter = "split[s0][s1];[s0]palettegen[p];[s1][p]paletteuse"
129
+ cmd += [
130
+ "-vf",
131
+ video_filter,
132
+ "-loop",
133
+ "0",
134
+ ]
135
+ elif codec == "h264":
136
+ cmd += [
137
+ "-vcodec",
138
+ "libx264",
139
+ "-pix_fmt",
140
+ "yuv420p",
141
+ "-movflags",
142
+ "+faststart",
143
+ ]
144
+ elif codec == "vp9":
145
+ bpp = 0.08
146
+ bps = int(width * height * fps * bpp)
147
+ if bps >= 1_000_000:
148
+ bitrate = f"{round(bps / 1_000_000)}M"
149
+ elif bps >= 1_000:
150
+ bitrate = f"{round(bps / 1_000)}k"
151
+ else:
152
+ bitrate = str(max(bps, 1))
153
+ cmd += [
154
+ "-vcodec",
155
+ "libvpx-vp9",
156
+ "-b:v",
157
+ bitrate,
158
+ "-pix_fmt",
159
+ "yuv420p",
160
+ ]
161
+ cmd += [out_path]
162
+ proc = subprocess.Popen(cmd, stdin=subprocess.PIPE, stderr=subprocess.PIPE)
163
+ try:
164
+ for frame in frames:
165
+ proc.stdin.write(frame.tobytes())
166
+ finally:
167
+ if proc.stdin:
168
+ proc.stdin.close()
169
+ stderr = (
170
+ proc.stderr.read().decode("utf-8", errors="ignore")
171
+ if proc.stderr
172
+ else ""
173
+ )
174
+ ret = proc.wait()
175
+ if ret != 0:
176
+ raise RuntimeError(f"ffmpeg failed with code {ret}\n{stderr}")
177
+
178
+ @property
179
+ def _codec(self) -> str:
180
+ match self._format:
181
+ case "gif":
182
+ return "gif"
183
+ case "mp4":
184
+ return "h264"
185
+ case "webm":
186
+ return "vp9"
187
+ case _:
188
+ raise ValueError(f"Unsupported format: {self._format}")
189
+
190
+ def _save_media(self, file_path: Path):
191
+ if isinstance(self._value, np.ndarray):
192
+ video = TrackioVideo._process_ndarray(self._value)
193
+ TrackioVideo.write_video(file_path, video, fps=self._fps, codec=self._codec)
194
+ elif isinstance(self._value, str | Path):
195
+ if os.path.isfile(self._value):
196
+ shutil.copy(self._value, file_path)
197
+ else:
198
+ raise ValueError(f"File not found: {self._value}")
199
+
200
+ @staticmethod
201
+ def _process_ndarray(value: np.ndarray) -> np.ndarray:
202
+ # Verify value is either 4D (single video) or 5D array (batched videos).
203
+ # Expected format: (frames, channels, height, width) or (batch, frames, channels, height, width)
204
+ if value.ndim < 4:
205
+ raise ValueError(
206
+ "Video requires at least 4 dimensions (frames, channels, height, width)"
207
+ )
208
+ if value.ndim > 5:
209
+ raise ValueError(
210
+ "Videos can have at most 5 dimensions (batch, frames, channels, height, width)"
211
+ )
212
+ if value.ndim == 4:
213
+ # Reshape to 5D with single batch: (1, frames, channels, height, width)
214
+ value = value[np.newaxis, ...]
215
+
216
+ value = TrackioVideo._tile_batched_videos(value)
217
+ return value
218
+
219
+ @staticmethod
220
+ def _tile_batched_videos(video: np.ndarray) -> np.ndarray:
221
+ """
222
+ Tiles a batch of videos into a grid of videos.
223
+
224
+ Input format: (batch, frames, channels, height, width) - original FCHW format
225
+ Output format: (frames, total_height, total_width, channels)
226
+ """
227
+ batch_size, frames, channels, height, width = video.shape
228
+
229
+ next_pow2 = 1 << (batch_size - 1).bit_length()
230
+ if batch_size != next_pow2:
231
+ pad_len = next_pow2 - batch_size
232
+ pad_shape = (pad_len, frames, channels, height, width)
233
+ padding = np.zeros(pad_shape, dtype=video.dtype)
234
+ video = np.concatenate((video, padding), axis=0)
235
+ batch_size = next_pow2
236
+
237
+ n_rows = 1 << ((batch_size.bit_length() - 1) // 2)
238
+ n_cols = batch_size // n_rows
239
+
240
+ # Reshape to grid layout: (n_rows, n_cols, frames, channels, height, width)
241
+ video = video.reshape(n_rows, n_cols, frames, channels, height, width)
242
+
243
+ # Rearrange dimensions to (frames, total_height, total_width, channels)
244
+ video = video.transpose(2, 0, 4, 1, 5, 3)
245
+ video = video.reshape(frames, n_rows * height, n_cols * width, channels)
246
+ return video
trackio/package.json ADDED
@@ -0,0 +1,6 @@
 
 
 
 
 
 
 
1
+ {
2
+ "name": "trackio",
3
+ "version": "0.22.0",
4
+ "description": "",
5
+ "python": "true"
6
+ }
trackio/py.typed ADDED
File without changes
trackio/remote_client.py ADDED
@@ -0,0 +1,28 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from __future__ import annotations
2
+
3
+ from gradio_client import Client
4
+
5
+
6
+ class RemoteClient:
7
+ def __init__(self, space: str, hf_token: str | None = None):
8
+ self._space = space
9
+ kwargs: dict = {"verbose": False}
10
+ if hf_token:
11
+ kwargs["hf_token"] = hf_token
12
+ try:
13
+ self._client = Client(space, **kwargs)
14
+ except Exception as e:
15
+ raise ConnectionError(
16
+ f"Could not connect to Space '{space}'. Is it running?\n{e}"
17
+ )
18
+
19
+ def predict(self, *args, api_name: str):
20
+ try:
21
+ return self._client.predict(*args, api_name=api_name)
22
+ except Exception as e:
23
+ if "API Not Found" in str(e) or "api_name" in str(e):
24
+ raise RuntimeError(
25
+ f"Space '{self._space}' does not support '{api_name}'. "
26
+ "Redeploy with `trackio sync`."
27
+ )
28
+ raise
trackio/run.py ADDED
@@ -0,0 +1,739 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import os
2
+ import shutil
3
+ import threading
4
+ import uuid
5
+ import warnings
6
+ from datetime import datetime, timezone
7
+ from pathlib import Path
8
+
9
+ import huggingface_hub
10
+ from gradio_client import Client, handle_file
11
+
12
+ from trackio import utils
13
+ from trackio.alerts import (
14
+ AlertLevel,
15
+ format_alert_terminal,
16
+ resolve_webhook_min_level,
17
+ send_webhook,
18
+ should_send_webhook,
19
+ )
20
+ from trackio.apple_gpu import AppleGpuMonitor, apple_gpu_available
21
+ from trackio.gpu import GpuMonitor, gpu_available
22
+ from trackio.histogram import Histogram
23
+ from trackio.markdown import Markdown
24
+ from trackio.media import TrackioMedia, get_project_media_path
25
+ from trackio.sqlite_storage import SQLiteStorage
26
+ from trackio.table import Table
27
+ from trackio.typehints import AlertEntry, LogEntry, SystemLogEntry, UploadEntry
28
+ from trackio.utils import _get_default_namespace
29
+
30
+ BATCH_SEND_INTERVAL = 0.5
31
+ MAX_BACKOFF = 30
32
+
33
+
34
+ class Run:
35
+ def __init__(
36
+ self,
37
+ url: str | None,
38
+ project: str,
39
+ client: Client | None,
40
+ name: str | None = None,
41
+ group: str | None = None,
42
+ config: dict | None = None,
43
+ space_id: str | None = None,
44
+ auto_log_gpu: bool = False,
45
+ gpu_log_interval: float = 10.0,
46
+ webhook_url: str | None = None,
47
+ webhook_min_level: AlertLevel | str | None = None,
48
+ ):
49
+ """
50
+ Initialize a Run for logging metrics to Trackio.
51
+
52
+ Args:
53
+ url: The URL of the Trackio server (local Gradio app or HF Space).
54
+ project: The name of the project to log metrics to.
55
+ client: A pre-configured gradio_client.Client instance, or None to
56
+ create one automatically in a background thread with retry logic.
57
+ Passing None is recommended for normal usage. Passing a client
58
+ is useful for testing (e.g., injecting a mock client).
59
+ name: The name of this run. If None, a readable name like
60
+ "brave-sunset-0" is auto-generated. If space_id is provided,
61
+ generates a "username-timestamp" format instead.
62
+ group: Optional group name to organize related runs together.
63
+ config: A dictionary of configuration/hyperparameters for this run.
64
+ Keys starting with '_' are reserved for internal use.
65
+ space_id: The HF Space ID if logging to a Space (e.g., "user/space").
66
+ If provided, media files will be uploaded to the Space.
67
+ auto_log_gpu: Whether to automatically log GPU metrics (utilization,
68
+ memory, temperature) at regular intervals.
69
+ gpu_log_interval: The interval in seconds between GPU metric logs.
70
+ Only used when auto_log_gpu is True.
71
+ webhook_url: A webhook URL to POST alert payloads to. Supports
72
+ Slack and Discord webhook URLs natively. Can also be set via
73
+ the TRACKIO_WEBHOOK_URL environment variable.
74
+ webhook_min_level: Minimum alert level that should trigger webhook
75
+ delivery. For example, `AlertLevel.WARN` sends only WARN and
76
+ ERROR alerts to webhook destinations. Can also be set via
77
+ `TRACKIO_WEBHOOK_MIN_LEVEL`.
78
+ """
79
+ self.url = url
80
+ self.project = project
81
+ self._client_lock = threading.Lock()
82
+ self._client_thread = None
83
+ self._client = client
84
+ self._space_id = space_id
85
+ self.name = name or utils.generate_readable_name(
86
+ SQLiteStorage.get_runs(project), space_id
87
+ )
88
+ self.group = group
89
+ self.config = utils.to_json_safe(config or {})
90
+
91
+ if isinstance(self.config, dict):
92
+ for key in self.config:
93
+ if key.startswith("_"):
94
+ raise ValueError(
95
+ f"Config key '{key}' is reserved (keys starting with '_' are reserved for internal use)"
96
+ )
97
+
98
+ self.config["_Username"] = self._get_username()
99
+ self.config["_Created"] = datetime.now(timezone.utc).isoformat()
100
+ self.config["_Group"] = self.group
101
+
102
+ self._queued_logs: list[LogEntry] = []
103
+ self._queued_system_logs: list[SystemLogEntry] = []
104
+ self._queued_uploads: list[UploadEntry] = []
105
+ self._queued_alerts: list[AlertEntry] = []
106
+ self._stop_flag = threading.Event()
107
+ self._config_logged = False
108
+ max_step = SQLiteStorage.get_max_step_for_run(self.project, self.name)
109
+ self._next_step = 0 if max_step is None else max_step + 1
110
+ self._has_local_buffer = False
111
+
112
+ self._is_local = space_id is None
113
+ self._webhook_url = webhook_url or os.environ.get("TRACKIO_WEBHOOK_URL")
114
+ self._webhook_min_level = resolve_webhook_min_level(
115
+ webhook_min_level or os.environ.get("TRACKIO_WEBHOOK_MIN_LEVEL")
116
+ )
117
+
118
+ if self._is_local:
119
+ self._local_sender_thread = threading.Thread(
120
+ target=self._local_batch_sender
121
+ )
122
+ self._local_sender_thread.daemon = True
123
+ self._local_sender_thread.start()
124
+ else:
125
+ self._client_thread = threading.Thread(target=self._init_client_background)
126
+ self._client_thread.daemon = True
127
+ self._client_thread.start()
128
+
129
+ self._gpu_monitor: "GpuMonitor | AppleGpuMonitor | None" = None
130
+ if auto_log_gpu:
131
+ if gpu_available():
132
+ self._gpu_monitor = GpuMonitor(self, interval=gpu_log_interval)
133
+ self._gpu_monitor.start()
134
+ elif apple_gpu_available():
135
+ self._gpu_monitor = AppleGpuMonitor(self, interval=gpu_log_interval)
136
+ self._gpu_monitor.start()
137
+
138
+ def _get_username(self) -> str | None:
139
+ try:
140
+ return _get_default_namespace()
141
+ except Exception:
142
+ return None
143
+
144
+ def _local_batch_sender(self):
145
+ while (
146
+ not self._stop_flag.is_set()
147
+ or len(self._queued_logs) > 0
148
+ or len(self._queued_system_logs) > 0
149
+ or len(self._queued_alerts) > 0
150
+ ):
151
+ if not self._stop_flag.is_set():
152
+ self._stop_flag.wait(timeout=BATCH_SEND_INTERVAL)
153
+
154
+ with self._client_lock:
155
+ if self._queued_logs:
156
+ logs_to_send = self._queued_logs.copy()
157
+ self._queued_logs.clear()
158
+ self._write_logs_to_sqlite(logs_to_send)
159
+
160
+ if self._queued_system_logs:
161
+ system_logs_to_send = self._queued_system_logs.copy()
162
+ self._queued_system_logs.clear()
163
+ self._write_system_logs_to_sqlite(system_logs_to_send)
164
+
165
+ if self._queued_alerts:
166
+ alerts_to_send = self._queued_alerts.copy()
167
+ self._queued_alerts.clear()
168
+ self._write_alerts_to_sqlite(alerts_to_send)
169
+
170
+ def _write_logs_to_sqlite(self, logs: list[LogEntry]):
171
+ logs_by_run: dict[tuple, dict] = {}
172
+ for entry in logs:
173
+ key = (entry["project"], entry["run"])
174
+ if key not in logs_by_run:
175
+ logs_by_run[key] = {
176
+ "metrics": [],
177
+ "steps": [],
178
+ "log_ids": [],
179
+ "config": None,
180
+ }
181
+ logs_by_run[key]["metrics"].append(entry["metrics"])
182
+ logs_by_run[key]["steps"].append(entry.get("step"))
183
+ logs_by_run[key]["log_ids"].append(entry.get("log_id"))
184
+ if entry.get("config") and logs_by_run[key]["config"] is None:
185
+ logs_by_run[key]["config"] = entry["config"]
186
+
187
+ for (project, run), data in logs_by_run.items():
188
+ has_log_ids = any(lid is not None for lid in data["log_ids"])
189
+ SQLiteStorage.bulk_log(
190
+ project=project,
191
+ run=run,
192
+ metrics_list=data["metrics"],
193
+ steps=data["steps"],
194
+ config=data["config"],
195
+ log_ids=data["log_ids"] if has_log_ids else None,
196
+ )
197
+
198
+ def _write_system_logs_to_sqlite(self, logs: list[SystemLogEntry]):
199
+ logs_by_run: dict[tuple, dict] = {}
200
+ for entry in logs:
201
+ key = (entry["project"], entry["run"])
202
+ if key not in logs_by_run:
203
+ logs_by_run[key] = {"metrics": [], "timestamps": [], "log_ids": []}
204
+ logs_by_run[key]["metrics"].append(entry["metrics"])
205
+ logs_by_run[key]["timestamps"].append(entry.get("timestamp"))
206
+ logs_by_run[key]["log_ids"].append(entry.get("log_id"))
207
+
208
+ for (project, run), data in logs_by_run.items():
209
+ has_log_ids = any(lid is not None for lid in data["log_ids"])
210
+ SQLiteStorage.bulk_log_system(
211
+ project=project,
212
+ run=run,
213
+ metrics_list=data["metrics"],
214
+ timestamps=data["timestamps"],
215
+ log_ids=data["log_ids"] if has_log_ids else None,
216
+ )
217
+
218
+ def _write_alerts_to_sqlite(self, alerts: list[AlertEntry]):
219
+ alerts_by_run: dict[tuple, dict] = {}
220
+ for entry in alerts:
221
+ key = (entry["project"], entry["run"])
222
+ if key not in alerts_by_run:
223
+ alerts_by_run[key] = {
224
+ "titles": [],
225
+ "texts": [],
226
+ "levels": [],
227
+ "steps": [],
228
+ "timestamps": [],
229
+ "alert_ids": [],
230
+ }
231
+ alerts_by_run[key]["titles"].append(entry["title"])
232
+ alerts_by_run[key]["texts"].append(entry.get("text"))
233
+ alerts_by_run[key]["levels"].append(entry["level"])
234
+ alerts_by_run[key]["steps"].append(entry.get("step"))
235
+ alerts_by_run[key]["timestamps"].append(entry.get("timestamp"))
236
+ alerts_by_run[key]["alert_ids"].append(entry.get("alert_id"))
237
+
238
+ for (project, run), data in alerts_by_run.items():
239
+ has_alert_ids = any(aid is not None for aid in data["alert_ids"])
240
+ SQLiteStorage.bulk_alert(
241
+ project=project,
242
+ run=run,
243
+ titles=data["titles"],
244
+ texts=data["texts"],
245
+ levels=data["levels"],
246
+ steps=data["steps"],
247
+ timestamps=data["timestamps"],
248
+ alert_ids=data["alert_ids"] if has_alert_ids else None,
249
+ )
250
+
251
+ def _batch_sender(self):
252
+ consecutive_failures = 0
253
+ while (
254
+ not self._stop_flag.is_set()
255
+ or len(self._queued_logs) > 0
256
+ or len(self._queued_system_logs) > 0
257
+ or len(self._queued_uploads) > 0
258
+ or len(self._queued_alerts) > 0
259
+ or self._has_local_buffer
260
+ ):
261
+ if not self._stop_flag.is_set():
262
+ if consecutive_failures:
263
+ sleep_time = min(
264
+ BATCH_SEND_INTERVAL * (2**consecutive_failures), MAX_BACKOFF
265
+ )
266
+ else:
267
+ sleep_time = BATCH_SEND_INTERVAL
268
+ self._stop_flag.wait(timeout=sleep_time)
269
+ elif self._has_local_buffer:
270
+ self._stop_flag.wait(timeout=BATCH_SEND_INTERVAL)
271
+
272
+ with self._client_lock:
273
+ if self._client is None:
274
+ if self._stop_flag.is_set():
275
+ if self._queued_logs:
276
+ self._persist_logs_locally(self._queued_logs)
277
+ self._queued_logs.clear()
278
+ if self._queued_system_logs:
279
+ self._persist_system_logs_locally(self._queued_system_logs)
280
+ self._queued_system_logs.clear()
281
+ if self._queued_uploads:
282
+ self._persist_uploads_locally(self._queued_uploads)
283
+ self._queued_uploads.clear()
284
+ if self._queued_alerts:
285
+ self._write_alerts_to_sqlite(self._queued_alerts)
286
+ self._queued_alerts.clear()
287
+ return
288
+
289
+ failed = False
290
+
291
+ if self._queued_logs:
292
+ logs_to_send = self._queued_logs.copy()
293
+ self._queued_logs.clear()
294
+ try:
295
+ self._client.predict(
296
+ api_name="/bulk_log",
297
+ logs=logs_to_send,
298
+ hf_token=huggingface_hub.utils.get_token(),
299
+ )
300
+ except Exception:
301
+ self._persist_logs_locally(logs_to_send)
302
+ failed = True
303
+
304
+ if self._queued_system_logs:
305
+ system_logs_to_send = self._queued_system_logs.copy()
306
+ self._queued_system_logs.clear()
307
+ try:
308
+ self._client.predict(
309
+ api_name="/bulk_log_system",
310
+ logs=system_logs_to_send,
311
+ hf_token=huggingface_hub.utils.get_token(),
312
+ )
313
+ except Exception:
314
+ self._persist_system_logs_locally(system_logs_to_send)
315
+ failed = True
316
+
317
+ if self._queued_uploads:
318
+ uploads_to_send = self._queued_uploads.copy()
319
+ self._queued_uploads.clear()
320
+ try:
321
+ self._client.predict(
322
+ api_name="/bulk_upload_media",
323
+ uploads=uploads_to_send,
324
+ hf_token=huggingface_hub.utils.get_token(),
325
+ )
326
+ except Exception:
327
+ self._persist_uploads_locally(uploads_to_send)
328
+ failed = True
329
+
330
+ if self._queued_alerts:
331
+ alerts_to_send = self._queued_alerts.copy()
332
+ self._queued_alerts.clear()
333
+ try:
334
+ self._client.predict(
335
+ api_name="/bulk_alert",
336
+ alerts=alerts_to_send,
337
+ hf_token=huggingface_hub.utils.get_token(),
338
+ )
339
+ except Exception:
340
+ self._write_alerts_to_sqlite(alerts_to_send)
341
+ failed = True
342
+
343
+ if failed:
344
+ consecutive_failures += 1
345
+ else:
346
+ consecutive_failures = 0
347
+ if self._has_local_buffer:
348
+ self._flush_local_buffer()
349
+
350
+ def _persist_logs_locally(self, logs: list[LogEntry]):
351
+ if not self._space_id:
352
+ return
353
+ logs_by_run: dict[tuple, dict] = {}
354
+ for entry in logs:
355
+ key = (entry["project"], entry["run"])
356
+ if key not in logs_by_run:
357
+ logs_by_run[key] = {
358
+ "metrics": [],
359
+ "steps": [],
360
+ "log_ids": [],
361
+ "config": None,
362
+ }
363
+ logs_by_run[key]["metrics"].append(entry["metrics"])
364
+ logs_by_run[key]["steps"].append(entry.get("step"))
365
+ logs_by_run[key]["log_ids"].append(entry.get("log_id"))
366
+ if entry.get("config") and logs_by_run[key]["config"] is None:
367
+ logs_by_run[key]["config"] = entry["config"]
368
+
369
+ for (project, run), data in logs_by_run.items():
370
+ SQLiteStorage.bulk_log(
371
+ project=project,
372
+ run=run,
373
+ metrics_list=data["metrics"],
374
+ steps=data["steps"],
375
+ log_ids=data["log_ids"],
376
+ config=data["config"],
377
+ space_id=self._space_id,
378
+ )
379
+ self._has_local_buffer = True
380
+
381
+ def _persist_system_logs_locally(self, logs: list[SystemLogEntry]):
382
+ if not self._space_id:
383
+ return
384
+ logs_by_run: dict[tuple, dict] = {}
385
+ for entry in logs:
386
+ key = (entry["project"], entry["run"])
387
+ if key not in logs_by_run:
388
+ logs_by_run[key] = {"metrics": [], "timestamps": [], "log_ids": []}
389
+ logs_by_run[key]["metrics"].append(entry["metrics"])
390
+ logs_by_run[key]["timestamps"].append(entry.get("timestamp"))
391
+ logs_by_run[key]["log_ids"].append(entry.get("log_id"))
392
+
393
+ for (project, run), data in logs_by_run.items():
394
+ SQLiteStorage.bulk_log_system(
395
+ project=project,
396
+ run=run,
397
+ metrics_list=data["metrics"],
398
+ timestamps=data["timestamps"],
399
+ log_ids=data["log_ids"],
400
+ space_id=self._space_id,
401
+ )
402
+ self._has_local_buffer = True
403
+
404
+ def _persist_uploads_locally(self, uploads: list[UploadEntry]):
405
+ if not self._space_id:
406
+ return
407
+ for entry in uploads:
408
+ file_data = entry.get("uploaded_file")
409
+ file_path = ""
410
+ if isinstance(file_data, dict):
411
+ file_path = file_data.get("path", "")
412
+ elif hasattr(file_data, "path"):
413
+ file_path = str(file_data.path)
414
+ else:
415
+ file_path = str(file_data)
416
+ SQLiteStorage.add_pending_upload(
417
+ project=entry["project"],
418
+ space_id=self._space_id,
419
+ run_name=entry.get("run"),
420
+ step=entry.get("step"),
421
+ file_path=file_path,
422
+ relative_path=entry.get("relative_path"),
423
+ )
424
+ self._has_local_buffer = True
425
+
426
+ def _flush_local_buffer(self):
427
+ try:
428
+ buffered_logs = SQLiteStorage.get_pending_logs(self.project)
429
+ if buffered_logs:
430
+ self._client.predict(
431
+ api_name="/bulk_log",
432
+ logs=buffered_logs["logs"],
433
+ hf_token=huggingface_hub.utils.get_token(),
434
+ )
435
+ SQLiteStorage.clear_pending_logs(self.project, buffered_logs["ids"])
436
+
437
+ buffered_sys = SQLiteStorage.get_pending_system_logs(self.project)
438
+ if buffered_sys:
439
+ self._client.predict(
440
+ api_name="/bulk_log_system",
441
+ logs=buffered_sys["logs"],
442
+ hf_token=huggingface_hub.utils.get_token(),
443
+ )
444
+ SQLiteStorage.clear_pending_system_logs(
445
+ self.project, buffered_sys["ids"]
446
+ )
447
+
448
+ buffered_uploads = SQLiteStorage.get_pending_uploads(self.project)
449
+ if buffered_uploads:
450
+ upload_entries = []
451
+ for u in buffered_uploads["uploads"]:
452
+ fp = u["file_path"]
453
+ if Path(fp).exists():
454
+ upload_entries.append(
455
+ {
456
+ "project": u["project"],
457
+ "run": u["run"],
458
+ "step": u["step"],
459
+ "relative_path": u["relative_path"],
460
+ "uploaded_file": handle_file(fp),
461
+ }
462
+ )
463
+ if upload_entries:
464
+ self._client.predict(
465
+ api_name="/bulk_upload_media",
466
+ uploads=upload_entries,
467
+ hf_token=huggingface_hub.utils.get_token(),
468
+ )
469
+ SQLiteStorage.clear_pending_uploads(
470
+ self.project, buffered_uploads["ids"]
471
+ )
472
+
473
+ self._has_local_buffer = False
474
+ except Exception:
475
+ pass
476
+
477
+ def _init_client_background(self):
478
+ if self._client is None:
479
+ fib = utils.fibo()
480
+ for sleep_coefficient in fib:
481
+ if self._stop_flag.is_set():
482
+ break
483
+ try:
484
+ client = Client(self.url, verbose=False)
485
+
486
+ with self._client_lock:
487
+ self._client = client
488
+ break
489
+ except Exception:
490
+ pass
491
+ sleep_time = min(0.1 * sleep_coefficient, MAX_BACKOFF)
492
+ self._stop_flag.wait(timeout=sleep_time)
493
+
494
+ self._batch_sender()
495
+
496
+ def _queue_upload(
497
+ self,
498
+ file_path,
499
+ step: int | None,
500
+ relative_path: str | None = None,
501
+ use_run_name: bool = True,
502
+ ):
503
+ if self._is_local:
504
+ self._save_upload_locally(file_path, step, relative_path, use_run_name)
505
+ else:
506
+ upload_entry: UploadEntry = {
507
+ "project": self.project,
508
+ "run": self.name if use_run_name else None,
509
+ "step": step,
510
+ "relative_path": relative_path,
511
+ "uploaded_file": handle_file(file_path),
512
+ }
513
+ with self._client_lock:
514
+ self._queued_uploads.append(upload_entry)
515
+
516
+ def _save_upload_locally(
517
+ self,
518
+ file_path,
519
+ step: int | None,
520
+ relative_path: str | None = None,
521
+ use_run_name: bool = True,
522
+ ):
523
+ media_path = get_project_media_path(
524
+ project=self.project,
525
+ run=self.name if use_run_name else None,
526
+ step=step,
527
+ relative_path=relative_path,
528
+ )
529
+ src = Path(file_path)
530
+ if src.exists() and str(src.resolve()) != str(Path(media_path).resolve()):
531
+ shutil.copy(str(src), str(media_path))
532
+
533
+ def _process_media(self, value: TrackioMedia, step: int | None) -> dict:
534
+ value._save(self.project, self.name, step if step is not None else 0)
535
+ if self._space_id:
536
+ self._queue_upload(value._get_absolute_file_path(), step)
537
+ return value._to_dict()
538
+
539
+ def _scan_and_queue_media_uploads(self, table_dict: dict, step: int | None):
540
+ if not self._space_id:
541
+ return
542
+
543
+ table_data = table_dict.get("_value", [])
544
+ for row in table_data:
545
+ for value in row.values():
546
+ if isinstance(value, dict) and value.get("_type") in [
547
+ "trackio.image",
548
+ "trackio.video",
549
+ "trackio.audio",
550
+ ]:
551
+ file_path = value.get("file_path")
552
+ if file_path:
553
+ from trackio.utils import MEDIA_DIR
554
+
555
+ absolute_path = MEDIA_DIR / file_path
556
+ self._queue_upload(absolute_path, step)
557
+ elif isinstance(value, list):
558
+ for item in value:
559
+ if isinstance(item, dict) and item.get("_type") in [
560
+ "trackio.image",
561
+ "trackio.video",
562
+ "trackio.audio",
563
+ ]:
564
+ file_path = item.get("file_path")
565
+ if file_path:
566
+ from trackio.utils import MEDIA_DIR
567
+
568
+ absolute_path = MEDIA_DIR / file_path
569
+ self._queue_upload(absolute_path, step)
570
+
571
+ def _ensure_sender_alive(self):
572
+ if self._is_local:
573
+ if (
574
+ hasattr(self, "_local_sender_thread")
575
+ and not self._local_sender_thread.is_alive()
576
+ and not self._stop_flag.is_set()
577
+ ):
578
+ self._local_sender_thread = threading.Thread(
579
+ target=self._local_batch_sender
580
+ )
581
+ self._local_sender_thread.daemon = True
582
+ self._local_sender_thread.start()
583
+ else:
584
+ if (
585
+ self._client_thread is not None
586
+ and not self._client_thread.is_alive()
587
+ and not self._stop_flag.is_set()
588
+ ):
589
+ self._client_thread = threading.Thread(
590
+ target=self._init_client_background
591
+ )
592
+ self._client_thread.daemon = True
593
+ self._client_thread.start()
594
+
595
+ def log(self, metrics: dict, step: int | None = None):
596
+ renamed_keys = []
597
+ new_metrics = {}
598
+
599
+ for k, v in metrics.items():
600
+ if k in utils.RESERVED_KEYS or k.startswith("__"):
601
+ new_key = f"__{k}"
602
+ renamed_keys.append(k)
603
+ new_metrics[new_key] = v
604
+ else:
605
+ new_metrics[k] = v
606
+
607
+ if renamed_keys:
608
+ warnings.warn(f"Reserved keys renamed: {renamed_keys} → '__{{key}}'")
609
+
610
+ metrics = new_metrics
611
+ for key, value in metrics.items():
612
+ if isinstance(value, Table):
613
+ metrics[key] = value._to_dict(
614
+ project=self.project, run=self.name, step=step
615
+ )
616
+ self._scan_and_queue_media_uploads(metrics[key], step)
617
+ elif isinstance(value, Histogram):
618
+ metrics[key] = value._to_dict()
619
+ elif isinstance(value, Markdown):
620
+ metrics[key] = value._to_dict()
621
+ elif isinstance(value, TrackioMedia):
622
+ metrics[key] = self._process_media(value, step)
623
+ metrics = utils.serialize_values(metrics)
624
+
625
+ if step is None:
626
+ step = self._next_step
627
+ self._next_step = max(self._next_step, step + 1)
628
+
629
+ config_to_log = None
630
+ if not self._config_logged and self.config:
631
+ config_to_log = utils.to_json_safe(self.config)
632
+ self._config_logged = True
633
+
634
+ log_entry: LogEntry = {
635
+ "project": self.project,
636
+ "run": self.name,
637
+ "metrics": metrics,
638
+ "step": step,
639
+ "config": config_to_log,
640
+ "log_id": uuid.uuid4().hex,
641
+ }
642
+
643
+ with self._client_lock:
644
+ self._queued_logs.append(log_entry)
645
+ self._ensure_sender_alive()
646
+
647
+ def alert(
648
+ self,
649
+ title: str,
650
+ text: str | None = None,
651
+ level: AlertLevel = AlertLevel.WARN,
652
+ step: int | None = None,
653
+ webhook_url: str | None = None,
654
+ ):
655
+ if step is None:
656
+ step = max(self._next_step - 1, 0)
657
+ timestamp = datetime.now(timezone.utc).isoformat()
658
+
659
+ print(format_alert_terminal(level, title, text, step))
660
+
661
+ alert_entry: AlertEntry = {
662
+ "project": self.project,
663
+ "run": self.name,
664
+ "title": title,
665
+ "text": text,
666
+ "level": level.value,
667
+ "step": step,
668
+ "timestamp": timestamp,
669
+ "alert_id": uuid.uuid4().hex,
670
+ }
671
+
672
+ with self._client_lock:
673
+ self._queued_alerts.append(alert_entry)
674
+ self._ensure_sender_alive()
675
+
676
+ url = webhook_url or self._webhook_url
677
+ if url and should_send_webhook(level, self._webhook_min_level):
678
+ t = threading.Thread(
679
+ target=send_webhook,
680
+ args=(
681
+ url,
682
+ level,
683
+ title,
684
+ text,
685
+ self.project,
686
+ self.name,
687
+ step,
688
+ timestamp,
689
+ ),
690
+ daemon=True,
691
+ )
692
+ t.start()
693
+
694
+ def log_system(self, metrics: dict):
695
+ metrics = utils.serialize_values(metrics)
696
+ timestamp = datetime.now(timezone.utc).isoformat()
697
+
698
+ system_log_entry: SystemLogEntry = {
699
+ "project": self.project,
700
+ "run": self.name,
701
+ "metrics": metrics,
702
+ "timestamp": timestamp,
703
+ "log_id": uuid.uuid4().hex,
704
+ }
705
+
706
+ with self._client_lock:
707
+ self._queued_system_logs.append(system_log_entry)
708
+ self._ensure_sender_alive()
709
+
710
+ def finish(self):
711
+ if self._gpu_monitor is not None:
712
+ self._gpu_monitor.stop()
713
+
714
+ self._stop_flag.set()
715
+
716
+ if self._is_local:
717
+ if hasattr(self, "_local_sender_thread"):
718
+ print("* Run finished. Uploading logs to Trackio (please wait...)")
719
+ self._local_sender_thread.join(timeout=30)
720
+ if self._local_sender_thread.is_alive():
721
+ warnings.warn(
722
+ "Could not flush all logs within 30s. Some data may be buffered locally."
723
+ )
724
+ else:
725
+ if self._client_thread is not None:
726
+ print(
727
+ "* Run finished. Uploading logs to Trackio Space (please wait...)"
728
+ )
729
+ self._client_thread.join(timeout=30)
730
+ if self._client_thread.is_alive():
731
+ warnings.warn(
732
+ "Could not flush all logs within 30s. Some data may be buffered locally."
733
+ )
734
+ if SQLiteStorage.has_pending_data(self.project):
735
+ warnings.warn(
736
+ f"* Some logs could not be sent to the Space (it may still be starting up). "
737
+ f"They have been saved locally and will be sent automatically next time you call: "
738
+ f'trackio.init(project="{self.project}", space_id="{self._space_id}")'
739
+ )
trackio/server.py ADDED
@@ -0,0 +1,743 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ """The main API layer for the Trackio UI."""
2
+
3
+ import base64
4
+ import logging
5
+ import os
6
+ import re
7
+ import secrets
8
+ import shutil
9
+ import sqlite3
10
+ import threading
11
+ import time
12
+ from collections import deque
13
+ from functools import lru_cache
14
+ from typing import Any
15
+ from urllib.parse import urlencode
16
+
17
+ import gradio as gr
18
+ import httpx
19
+ import huggingface_hub as hf
20
+ from starlette.requests import Request
21
+ from starlette.responses import RedirectResponse
22
+
23
+ import trackio.utils as utils
24
+ from trackio.media import get_project_media_path
25
+ from trackio.sqlite_storage import SQLiteStorage
26
+ from trackio.typehints import AlertEntry, LogEntry, SystemLogEntry, UploadEntry
27
+
28
+ HfApi = hf.HfApi()
29
+
30
+ logger = logging.getLogger("trackio")
31
+
32
+ _write_queue: deque[tuple[str, Any]] = deque()
33
+ _flush_thread: threading.Thread | None = None
34
+ _flush_lock = threading.Lock()
35
+ _FLUSH_INTERVAL = 2.0
36
+ _MAX_RETRIES = 30
37
+
38
+
39
+ def _enqueue_write(kind: str, payload: Any) -> None:
40
+ _write_queue.append((kind, payload))
41
+ _ensure_flush_thread()
42
+
43
+
44
+ def _ensure_flush_thread() -> None:
45
+ global _flush_thread
46
+ with _flush_lock:
47
+ if _flush_thread is not None and _flush_thread.is_alive():
48
+ return
49
+ _flush_thread = threading.Thread(target=_flush_loop, daemon=True)
50
+ _flush_thread.start()
51
+
52
+
53
+ def _flush_loop() -> None:
54
+ retries = 0
55
+ while _write_queue and retries < _MAX_RETRIES:
56
+ kind, payload = _write_queue[0]
57
+ try:
58
+ if kind == "bulk_log":
59
+ SQLiteStorage.bulk_log(**payload)
60
+ elif kind == "bulk_log_system":
61
+ SQLiteStorage.bulk_log_system(**payload)
62
+ elif kind == "bulk_alert":
63
+ SQLiteStorage.bulk_alert(**payload)
64
+ _write_queue.popleft()
65
+ retries = 0
66
+ except sqlite3.OperationalError as e:
67
+ msg = str(e).lower()
68
+ if "disk i/o error" in msg or "readonly" in msg:
69
+ retries += 1
70
+ logger.warning(
71
+ "write queue: flush failed (%s), retry %d/%d",
72
+ e,
73
+ retries,
74
+ _MAX_RETRIES,
75
+ )
76
+ time.sleep(min(_FLUSH_INTERVAL * retries, 15.0))
77
+ else:
78
+ logger.error("write queue: non-retryable error (%s), dropping entry", e)
79
+ _write_queue.popleft()
80
+ retries = 0
81
+ if _write_queue:
82
+ logger.error(
83
+ "write queue: giving up after %d retries, %d entries dropped",
84
+ _MAX_RETRIES,
85
+ len(_write_queue),
86
+ )
87
+ _write_queue.clear()
88
+
89
+
90
+ write_token = secrets.token_urlsafe(32)
91
+
92
+ OAUTH_CALLBACK_PATH = "/login/callback"
93
+ OAUTH_START_PATH = "/oauth/hf/start"
94
+
95
+
96
+ def _hf_access_token(request: gr.Request) -> str | None:
97
+ session_id = None
98
+ try:
99
+ session_id = request.headers.get("x-trackio-oauth-session")
100
+ except (AttributeError, TypeError):
101
+ pass
102
+ if session_id and session_id in _oauth_sessions:
103
+ token, created = _oauth_sessions[session_id]
104
+ if time.monotonic() - created <= _OAUTH_SESSION_TTL:
105
+ return token
106
+ del _oauth_sessions[session_id]
107
+ cookie_header = ""
108
+ try:
109
+ cookie_header = request.headers.get("cookie", "")
110
+ except (AttributeError, TypeError):
111
+ pass
112
+ if cookie_header:
113
+ for cookie in cookie_header.split(";"):
114
+ parts = cookie.strip().split("=", 1)
115
+ if len(parts) == 2 and parts[0] == "trackio_hf_access_token":
116
+ return parts[1] or None
117
+ return None
118
+
119
+
120
+ def _oauth_redirect_uri(request: Request) -> str:
121
+ space_host = os.getenv("SPACE_HOST")
122
+ if space_host:
123
+ space_host = space_host.split(",")[0]
124
+ return f"https://{space_host}{OAUTH_CALLBACK_PATH}"
125
+ return str(request.base_url).rstrip("/") + OAUTH_CALLBACK_PATH
126
+
127
+
128
+ class TrackioServer(gr.Server):
129
+ def close(self, verbose: bool = True) -> None:
130
+ if self.blocks is None:
131
+ return
132
+ if self.blocks.is_running:
133
+ self.blocks.close(verbose=verbose)
134
+
135
+
136
+ _OAUTH_STATE_TTL = 86400
137
+ _OAUTH_SESSION_TTL = 86400 * 30
138
+ _pending_oauth_states: dict[str, float] = {}
139
+ _oauth_sessions: dict[str, tuple[str, float]] = {}
140
+
141
+
142
+ def _evict_expired_oauth():
143
+ now = time.monotonic()
144
+ expired_states = [
145
+ k for k, t in _pending_oauth_states.items() if now - t > _OAUTH_STATE_TTL
146
+ ]
147
+ for k in expired_states:
148
+ del _pending_oauth_states[k]
149
+ expired_sessions = [
150
+ k for k, (_, t) in _oauth_sessions.items() if now - t > _OAUTH_SESSION_TTL
151
+ ]
152
+ for k in expired_sessions:
153
+ del _oauth_sessions[k]
154
+
155
+
156
+ def oauth_hf_start(request: Request):
157
+ client_id = os.getenv("OAUTH_CLIENT_ID")
158
+ if not client_id:
159
+ return RedirectResponse(url="/", status_code=302)
160
+ _evict_expired_oauth()
161
+ state = secrets.token_urlsafe(32)
162
+ _pending_oauth_states[state] = time.monotonic()
163
+ redirect_uri = _oauth_redirect_uri(request)
164
+ scope = os.getenv("OAUTH_SCOPES", "openid profile").strip()
165
+ url = "https://huggingface.co/oauth/authorize?" + urlencode(
166
+ {
167
+ "client_id": client_id,
168
+ "redirect_uri": redirect_uri,
169
+ "response_type": "code",
170
+ "scope": scope,
171
+ "state": state,
172
+ }
173
+ )
174
+ return RedirectResponse(url=url, status_code=302)
175
+
176
+
177
+ def oauth_hf_callback(request: Request):
178
+ client_id = os.getenv("OAUTH_CLIENT_ID")
179
+ client_secret = os.getenv("OAUTH_CLIENT_SECRET")
180
+ err = "/?oauth_error=1"
181
+ if not client_id or not client_secret:
182
+ return RedirectResponse(url=err, status_code=302)
183
+ got_state = request.query_params.get("state")
184
+ code = request.query_params.get("code")
185
+ if not got_state or got_state not in _pending_oauth_states or not code:
186
+ return RedirectResponse(url=err, status_code=302)
187
+ state_created = _pending_oauth_states.pop(got_state)
188
+ if time.monotonic() - state_created > _OAUTH_STATE_TTL:
189
+ return RedirectResponse(url=err, status_code=302)
190
+ redirect_uri = _oauth_redirect_uri(request)
191
+ auth_b64 = base64.b64encode(f"{client_id}:{client_secret}".encode()).decode()
192
+ try:
193
+ with httpx.Client() as client:
194
+ token_resp = client.post(
195
+ "https://huggingface.co/oauth/token",
196
+ headers={"Authorization": f"Basic {auth_b64}"},
197
+ data={
198
+ "grant_type": "authorization_code",
199
+ "code": code,
200
+ "redirect_uri": redirect_uri,
201
+ "client_id": client_id,
202
+ },
203
+ )
204
+ token_resp.raise_for_status()
205
+ access_token = token_resp.json()["access_token"]
206
+ except Exception:
207
+ return RedirectResponse(url=err, status_code=302)
208
+ session_id = secrets.token_urlsafe(32)
209
+ _oauth_sessions[session_id] = (access_token, time.monotonic())
210
+ on_spaces = os.getenv("SYSTEM") == "spaces"
211
+ resp = RedirectResponse(url=f"/?oauth_session={session_id}", status_code=302)
212
+ resp.set_cookie(
213
+ key="trackio_hf_access_token",
214
+ value=access_token,
215
+ httponly=True,
216
+ samesite="none" if on_spaces else "lax",
217
+ max_age=86400 * 30,
218
+ path="/",
219
+ secure=on_spaces,
220
+ )
221
+ return resp
222
+
223
+
224
+ def oauth_logout(request: Request):
225
+ on_spaces = os.getenv("SYSTEM") == "spaces"
226
+ resp = RedirectResponse(url="/", status_code=302)
227
+ resp.delete_cookie(
228
+ "trackio_hf_access_token",
229
+ path="/",
230
+ samesite="none" if on_spaces else "lax",
231
+ secure=on_spaces,
232
+ )
233
+ return resp
234
+
235
+
236
+ @lru_cache(maxsize=32)
237
+ def check_hf_token_has_write_access(hf_token: str | None) -> None:
238
+ """
239
+ Checks if the provided hf_token has write access to the space. If it does not
240
+ have write access, a PermissionError is raised. Otherwise, the function returns None.
241
+
242
+ The function is cached in two separate caches to avoid unnecessary API calls to /whoami-v2 which is heavily rate-limited:
243
+ - A cache of the whoami response for the hf_token using .whoami(token=hf_token, cache=True).
244
+ - This entire function is cached using @lru_cache(maxsize=32).
245
+ """
246
+ if os.getenv("SYSTEM") == "spaces":
247
+ if hf_token is None:
248
+ raise PermissionError(
249
+ "Expected a HF_TOKEN to be provided when logging to a Space"
250
+ )
251
+ space_token = os.getenv("HF_TOKEN")
252
+ if space_token and hf_token == space_token:
253
+ # If the HF_TOKEN is the same as the space token, we can assume that the user has write access.
254
+ # This avoids unnecessary API calls to /whoami-v2 which is heavily rate-limited.
255
+ return
256
+ who = HfApi.whoami(token=hf_token, cache=True)
257
+ owner_name = os.getenv("SPACE_AUTHOR_NAME")
258
+ repo_name = os.getenv("SPACE_REPO_NAME")
259
+ orgs = [o["name"] for o in who["orgs"]]
260
+ if owner_name != who["name"] and owner_name not in orgs:
261
+ raise PermissionError(
262
+ "Expected the provided hf_token to be the user owner of the space, or be a member of the org owner of the space"
263
+ )
264
+ access_token = who["auth"]["accessToken"]
265
+ if access_token["role"] == "fineGrained":
266
+ matched = False
267
+ for item in access_token["fineGrained"]["scoped"]:
268
+ if (
269
+ item["entity"]["type"] == "space"
270
+ and item["entity"]["name"] == f"{owner_name}/{repo_name}"
271
+ and "repo.write" in item["permissions"]
272
+ ):
273
+ matched = True
274
+ break
275
+ if (
276
+ (
277
+ item["entity"]["type"] == "user"
278
+ or item["entity"]["type"] == "org"
279
+ )
280
+ and item["entity"]["name"] == owner_name
281
+ and "repo.write" in item["permissions"]
282
+ ):
283
+ matched = True
284
+ break
285
+ if not matched:
286
+ raise PermissionError(
287
+ "Expected the provided hf_token with fine grained permissions to provide write access to the space"
288
+ )
289
+ elif access_token["role"] != "write":
290
+ raise PermissionError(
291
+ "Expected the provided hf_token to provide write permissions"
292
+ )
293
+
294
+
295
+ _oauth_write_cache: dict[str, tuple[bool, float]] = {}
296
+ _OAUTH_WRITE_CACHE_TTL = 300
297
+
298
+
299
+ def check_oauth_token_has_write_access(oauth_token: str | None) -> None:
300
+ if not os.getenv("SYSTEM") == "spaces":
301
+ return
302
+ if oauth_token is None:
303
+ raise PermissionError(
304
+ "Expected an oauth to be provided when logging to a Space"
305
+ )
306
+ now = time.monotonic()
307
+ cached = _oauth_write_cache.get(oauth_token)
308
+ if cached is not None:
309
+ allowed, ts = cached
310
+ if now - ts < _OAUTH_WRITE_CACHE_TTL:
311
+ if not allowed:
312
+ raise PermissionError(
313
+ "Expected the oauth token to be the user owner of the space, or be a member of the org owner of the space"
314
+ )
315
+ return
316
+ who = HfApi.whoami(oauth_token, cache=True)
317
+ user_name = who["name"]
318
+ owner_name = os.getenv("SPACE_AUTHOR_NAME")
319
+ if user_name == owner_name:
320
+ _oauth_write_cache[oauth_token] = (True, now)
321
+ return
322
+ for org in who["orgs"]:
323
+ if org["name"] == owner_name and org["roleInOrg"] == "write":
324
+ _oauth_write_cache[oauth_token] = (True, now)
325
+ return
326
+ _oauth_write_cache[oauth_token] = (False, now)
327
+ raise PermissionError(
328
+ "Expected the oauth token to be the user owner of the space, or be a member of the org owner of the space"
329
+ )
330
+
331
+
332
+ def check_write_access(request: gr.Request, token: str) -> bool:
333
+ cookies = request.headers.get("cookie", "")
334
+ if cookies:
335
+ for cookie in cookies.split(";"):
336
+ parts = cookie.strip().split("=", 1)
337
+ if len(parts) == 2 and parts[0] == "trackio_write_token":
338
+ return parts[1] == token
339
+ if hasattr(request, "query_params") and request.query_params:
340
+ qp = request.query_params.get("write_token")
341
+ return qp == token
342
+ return False
343
+
344
+
345
+ def assert_can_mutate_runs(request: gr.Request) -> None:
346
+ if os.getenv("SYSTEM") != "spaces":
347
+ if check_write_access(request, write_token):
348
+ return
349
+ raise gr.Error(
350
+ "A write_token is required to delete or rename runs. "
351
+ "Open the dashboard using the link that includes the write_token query parameter."
352
+ )
353
+ hf_tok = _hf_access_token(request)
354
+ if hf_tok is not None:
355
+ try:
356
+ check_oauth_token_has_write_access(hf_tok)
357
+ except PermissionError as e:
358
+ raise gr.Error(str(e)) from e
359
+ return
360
+ if check_write_access(request, write_token):
361
+ return
362
+ raise gr.Error(
363
+ "Sign in with Hugging Face to delete or rename runs. You need write access to this Space, "
364
+ "or open the dashboard using a link that includes the write_token query parameter."
365
+ )
366
+
367
+
368
+ def get_run_mutation_status(request: gr.Request) -> dict[str, Any]:
369
+ if os.getenv("SYSTEM") != "spaces":
370
+ if check_write_access(request, write_token):
371
+ return {"spaces": False, "allowed": True, "auth": "local"}
372
+ return {"spaces": False, "allowed": False, "auth": "none"}
373
+ hf_tok = _hf_access_token(request)
374
+ if hf_tok is not None:
375
+ try:
376
+ check_oauth_token_has_write_access(hf_tok)
377
+ return {"spaces": True, "allowed": True, "auth": "oauth"}
378
+ except PermissionError:
379
+ return {"spaces": True, "allowed": False, "auth": "oauth_insufficient"}
380
+ if check_write_access(request, write_token):
381
+ return {"spaces": True, "allowed": True, "auth": "write_token"}
382
+ return {"spaces": True, "allowed": False, "auth": "none"}
383
+
384
+
385
+ def upload_db_to_space(
386
+ project: str, uploaded_db: gr.FileData, hf_token: str | None
387
+ ) -> None:
388
+ check_hf_token_has_write_access(hf_token)
389
+ db_project_path = SQLiteStorage.get_project_db_path(project)
390
+ os.makedirs(os.path.dirname(db_project_path), exist_ok=True)
391
+ shutil.copy(uploaded_db["path"], db_project_path)
392
+
393
+
394
+ def bulk_upload_media(uploads: list[UploadEntry], hf_token: str | None) -> None:
395
+ check_hf_token_has_write_access(hf_token)
396
+ for upload in uploads:
397
+ media_path = get_project_media_path(
398
+ project=upload["project"],
399
+ run=upload["run"],
400
+ step=upload["step"],
401
+ relative_path=upload["relative_path"],
402
+ )
403
+ shutil.copy(upload["uploaded_file"]["path"], media_path)
404
+
405
+
406
+ def log(
407
+ project: str,
408
+ run: str,
409
+ metrics: dict[str, Any],
410
+ step: int | None,
411
+ hf_token: str | None,
412
+ ) -> None:
413
+ check_hf_token_has_write_access(hf_token)
414
+ SQLiteStorage.log(project=project, run=run, metrics=metrics, step=step)
415
+
416
+
417
+ def bulk_log(
418
+ logs: list[LogEntry],
419
+ hf_token: str | None,
420
+ ) -> None:
421
+ check_hf_token_has_write_access(hf_token)
422
+
423
+ logs_by_run = {}
424
+ for log_entry in logs:
425
+ key = (log_entry["project"], log_entry["run"])
426
+ if key not in logs_by_run:
427
+ logs_by_run[key] = {
428
+ "metrics": [],
429
+ "steps": [],
430
+ "log_ids": [],
431
+ "config": None,
432
+ }
433
+ logs_by_run[key]["metrics"].append(log_entry["metrics"])
434
+ logs_by_run[key]["steps"].append(log_entry.get("step"))
435
+ logs_by_run[key]["log_ids"].append(log_entry.get("log_id"))
436
+ if log_entry.get("config") and logs_by_run[key]["config"] is None:
437
+ logs_by_run[key]["config"] = log_entry["config"]
438
+
439
+ for (project, run), data in logs_by_run.items():
440
+ has_log_ids = any(lid is not None for lid in data["log_ids"])
441
+ payload = dict(
442
+ project=project,
443
+ run=run,
444
+ metrics_list=data["metrics"],
445
+ steps=data["steps"],
446
+ config=data["config"],
447
+ log_ids=data["log_ids"] if has_log_ids else None,
448
+ )
449
+ try:
450
+ SQLiteStorage.bulk_log(**payload)
451
+ except sqlite3.OperationalError:
452
+ _enqueue_write("bulk_log", payload)
453
+
454
+
455
+ def bulk_log_system(
456
+ logs: list[SystemLogEntry],
457
+ hf_token: str | None,
458
+ ) -> None:
459
+ check_hf_token_has_write_access(hf_token)
460
+
461
+ logs_by_run = {}
462
+ for log_entry in logs:
463
+ key = (log_entry["project"], log_entry["run"])
464
+ if key not in logs_by_run:
465
+ logs_by_run[key] = {"metrics": [], "timestamps": [], "log_ids": []}
466
+ logs_by_run[key]["metrics"].append(log_entry["metrics"])
467
+ logs_by_run[key]["timestamps"].append(log_entry.get("timestamp"))
468
+ logs_by_run[key]["log_ids"].append(log_entry.get("log_id"))
469
+
470
+ for (project, run), data in logs_by_run.items():
471
+ has_log_ids = any(lid is not None for lid in data["log_ids"])
472
+ payload = dict(
473
+ project=project,
474
+ run=run,
475
+ metrics_list=data["metrics"],
476
+ timestamps=data["timestamps"],
477
+ log_ids=data["log_ids"] if has_log_ids else None,
478
+ )
479
+ try:
480
+ SQLiteStorage.bulk_log_system(**payload)
481
+ except sqlite3.OperationalError:
482
+ _enqueue_write("bulk_log_system", payload)
483
+
484
+
485
+ def bulk_alert(
486
+ alerts: list[AlertEntry],
487
+ hf_token: str | None,
488
+ ) -> None:
489
+ check_hf_token_has_write_access(hf_token)
490
+
491
+ alerts_by_run: dict[tuple, dict] = {}
492
+ for entry in alerts:
493
+ key = (entry["project"], entry["run"])
494
+ if key not in alerts_by_run:
495
+ alerts_by_run[key] = {
496
+ "titles": [],
497
+ "texts": [],
498
+ "levels": [],
499
+ "steps": [],
500
+ "timestamps": [],
501
+ "alert_ids": [],
502
+ }
503
+ alerts_by_run[key]["titles"].append(entry["title"])
504
+ alerts_by_run[key]["texts"].append(entry.get("text"))
505
+ alerts_by_run[key]["levels"].append(entry["level"])
506
+ alerts_by_run[key]["steps"].append(entry.get("step"))
507
+ alerts_by_run[key]["timestamps"].append(entry.get("timestamp"))
508
+ alerts_by_run[key]["alert_ids"].append(entry.get("alert_id"))
509
+
510
+ for (project, run), data in alerts_by_run.items():
511
+ has_alert_ids = any(aid is not None for aid in data["alert_ids"])
512
+ payload = dict(
513
+ project=project,
514
+ run=run,
515
+ titles=data["titles"],
516
+ texts=data["texts"],
517
+ levels=data["levels"],
518
+ steps=data["steps"],
519
+ timestamps=data["timestamps"],
520
+ alert_ids=data["alert_ids"] if has_alert_ids else None,
521
+ )
522
+ try:
523
+ SQLiteStorage.bulk_alert(**payload)
524
+ except sqlite3.OperationalError:
525
+ _enqueue_write("bulk_alert", payload)
526
+
527
+
528
+ def get_alerts(
529
+ project: str,
530
+ run: str | None = None,
531
+ level: str | None = None,
532
+ since: str | None = None,
533
+ ) -> list[dict]:
534
+ return SQLiteStorage.get_alerts(project, run_name=run, level=level, since=since)
535
+
536
+
537
+ def get_metric_values(
538
+ project: str,
539
+ run: str,
540
+ metric_name: str,
541
+ step: int | None = None,
542
+ around_step: int | None = None,
543
+ at_time: str | None = None,
544
+ window: int | None = None,
545
+ ) -> list[dict]:
546
+ return SQLiteStorage.get_metric_values(
547
+ project,
548
+ run,
549
+ metric_name,
550
+ step=step,
551
+ around_step=around_step,
552
+ at_time=at_time,
553
+ window=window,
554
+ )
555
+
556
+
557
+ def get_runs_for_project(project: str) -> list[str]:
558
+ return SQLiteStorage.get_runs(project)
559
+
560
+
561
+ def get_metrics_for_run(project: str, run: str) -> list[str]:
562
+ return SQLiteStorage.get_all_metrics_for_run(project, run)
563
+
564
+
565
+ def filter_metrics_by_regex(metrics: list[str], filter_pattern: str) -> list[str]:
566
+ if not filter_pattern.strip():
567
+ return metrics
568
+ try:
569
+ pattern = re.compile(filter_pattern, re.IGNORECASE)
570
+ return [metric for metric in metrics if pattern.search(metric)]
571
+ except re.error:
572
+ return [
573
+ metric for metric in metrics if filter_pattern.lower() in metric.lower()
574
+ ]
575
+
576
+
577
+ def get_all_projects() -> list[str]:
578
+ return SQLiteStorage.get_projects()
579
+
580
+
581
+ def get_project_summary(project: str) -> dict:
582
+ runs = SQLiteStorage.get_runs(project)
583
+ if not runs:
584
+ return {"project": project, "num_runs": 0, "runs": [], "last_activity": None}
585
+
586
+ last_steps = SQLiteStorage.get_max_steps_for_runs(project)
587
+
588
+ return {
589
+ "project": project,
590
+ "num_runs": len(runs),
591
+ "runs": runs,
592
+ "last_activity": max(last_steps.values()) if last_steps else None,
593
+ }
594
+
595
+
596
+ def get_run_summary(project: str, run: str) -> dict:
597
+ num_logs = SQLiteStorage.get_log_count(project, run)
598
+ if num_logs == 0:
599
+ return {
600
+ "project": project,
601
+ "run": run,
602
+ "num_logs": 0,
603
+ "metrics": [],
604
+ "config": None,
605
+ "last_step": None,
606
+ }
607
+
608
+ metrics = SQLiteStorage.get_all_metrics_for_run(project, run)
609
+ config = SQLiteStorage.get_run_config(project, run)
610
+ last_step = SQLiteStorage.get_last_step(project, run)
611
+
612
+ return {
613
+ "project": project,
614
+ "run": run,
615
+ "num_logs": num_logs,
616
+ "metrics": metrics,
617
+ "config": config,
618
+ "last_step": last_step,
619
+ }
620
+
621
+
622
+ def get_system_metrics_for_run(project: str, run: str) -> list[str]:
623
+ return SQLiteStorage.get_all_system_metrics_for_run(project, run)
624
+
625
+
626
+ def get_system_logs(project: str, run: str) -> list[dict]:
627
+ return SQLiteStorage.get_system_logs(project, run)
628
+
629
+
630
+ def get_snapshot(
631
+ project: str,
632
+ run: str,
633
+ step: int | None = None,
634
+ around_step: int | None = None,
635
+ at_time: str | None = None,
636
+ window: int | None = None,
637
+ ) -> dict:
638
+ return SQLiteStorage.get_snapshot(
639
+ project, run, step=step, around_step=around_step, at_time=at_time, window=window
640
+ )
641
+
642
+
643
+ def get_logs(project: str, run: str) -> list[dict]:
644
+ return SQLiteStorage.get_logs(project, run, max_points=1500)
645
+
646
+
647
+ def get_settings() -> dict:
648
+ return {
649
+ "logo_urls": utils.get_logo_urls(),
650
+ "color_palette": utils.get_color_palette(),
651
+ "plot_order": [
652
+ item.strip()
653
+ for item in os.environ.get("TRACKIO_PLOT_ORDER", "").split(",")
654
+ if item.strip()
655
+ ],
656
+ "table_truncate_length": int(
657
+ os.environ.get("TRACKIO_TABLE_TRUNCATE_LENGTH", "250")
658
+ ),
659
+ "media_dir": str(utils.MEDIA_DIR),
660
+ "space_id": os.getenv("SPACE_ID"),
661
+ }
662
+
663
+
664
+ def get_project_files(project: str) -> list[dict]:
665
+ files_dir = utils.MEDIA_DIR / project / "files"
666
+ if not files_dir.exists():
667
+ return []
668
+ results = []
669
+ for file_path in sorted(files_dir.rglob("*")):
670
+ if file_path.is_file():
671
+ relative = file_path.relative_to(files_dir)
672
+ results.append(
673
+ {
674
+ "name": str(relative),
675
+ "path": str(file_path),
676
+ "size": file_path.stat().st_size,
677
+ }
678
+ )
679
+ return results
680
+
681
+
682
+ def delete_run(request: gr.Request, project: str, run: str) -> bool:
683
+ assert_can_mutate_runs(request)
684
+ return SQLiteStorage.delete_run(project, run)
685
+
686
+
687
+ def rename_run(
688
+ request: gr.Request,
689
+ project: str,
690
+ old_name: str,
691
+ new_name: str,
692
+ ) -> bool:
693
+ assert_can_mutate_runs(request)
694
+ SQLiteStorage.rename_run(project, old_name, new_name)
695
+ return True
696
+
697
+
698
+ def force_sync() -> bool:
699
+ if os.environ.get("TRACKIO_BUCKET_ID"):
700
+ return True
701
+ SQLiteStorage._dataset_import_attempted = True
702
+ SQLiteStorage.export_to_parquet()
703
+ scheduler = SQLiteStorage.get_scheduler()
704
+ scheduler.trigger().result()
705
+ return True
706
+
707
+
708
+ CSS = ""
709
+ HEAD = ""
710
+
711
+ gr.set_static_paths(paths=[utils.MEDIA_DIR])
712
+
713
+
714
+ def make_trackio_server() -> TrackioServer:
715
+ server = TrackioServer(title="Trackio Dashboard")
716
+ server.add_api_route(OAUTH_START_PATH, oauth_hf_start, methods=["GET"])
717
+ server.add_api_route(OAUTH_CALLBACK_PATH, oauth_hf_callback, methods=["GET"])
718
+ server.add_api_route("/oauth/logout", oauth_logout, methods=["GET"])
719
+ server.api(fn=get_run_mutation_status, name="get_run_mutation_status")
720
+ server.api(fn=upload_db_to_space, name="upload_db_to_space")
721
+ server.api(fn=bulk_upload_media, name="bulk_upload_media")
722
+ server.api(fn=log, name="log")
723
+ server.api(fn=bulk_log, name="bulk_log")
724
+ server.api(fn=bulk_log_system, name="bulk_log_system")
725
+ server.api(fn=bulk_alert, name="bulk_alert")
726
+ server.api(fn=get_alerts, name="get_alerts")
727
+ server.api(fn=get_metric_values, name="get_metric_values")
728
+ server.api(fn=get_runs_for_project, name="get_runs_for_project")
729
+ server.api(fn=get_metrics_for_run, name="get_metrics_for_run")
730
+ server.api(fn=get_all_projects, name="get_all_projects")
731
+ server.api(fn=get_project_summary, name="get_project_summary")
732
+ server.api(fn=get_run_summary, name="get_run_summary")
733
+ server.api(fn=get_system_metrics_for_run, name="get_system_metrics_for_run")
734
+ server.api(fn=get_system_logs, name="get_system_logs")
735
+ server.api(fn=get_snapshot, name="get_snapshot")
736
+ server.api(fn=get_logs, name="get_logs")
737
+ server.api(fn=get_settings, name="get_settings")
738
+ server.api(fn=get_project_files, name="get_project_files")
739
+ server.api(fn=delete_run, name="delete_run")
740
+ server.api(fn=rename_run, name="rename_run")
741
+ server.api(fn=force_sync, name="force_sync")
742
+ server.write_token = write_token
743
+ return server
trackio/sqlite_storage.py ADDED
@@ -0,0 +1,1920 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import json as json_mod
2
+ import os
3
+ import shutil
4
+ import sqlite3
5
+ import time
6
+ from collections.abc import Iterator
7
+ from contextlib import contextmanager
8
+ from datetime import datetime, timezone
9
+ from pathlib import Path
10
+ from threading import Lock
11
+
12
+ try:
13
+ import fcntl
14
+ except ImportError:
15
+ fcntl = None
16
+
17
+ try:
18
+ import msvcrt as _msvcrt
19
+ except ImportError:
20
+ _msvcrt = None
21
+
22
+ import huggingface_hub as hf
23
+ import orjson
24
+ import pandas as pd
25
+
26
+ from trackio.commit_scheduler import CommitScheduler
27
+ from trackio.dummy_commit_scheduler import DummyCommitScheduler
28
+ from trackio.utils import (
29
+ MEDIA_DIR,
30
+ TRACKIO_DIR,
31
+ deserialize_values,
32
+ get_color_palette,
33
+ serialize_values,
34
+ )
35
+
36
+ DB_EXT = ".db"
37
+
38
+ _JOURNAL_MODE_WHITELIST = frozenset(
39
+ {"wal", "delete", "truncate", "persist", "memory", "off"}
40
+ )
41
+
42
+
43
+ def _configure_sqlite_pragmas(conn: sqlite3.Connection) -> None:
44
+ override = os.environ.get("TRACKIO_SQLITE_JOURNAL_MODE", "").strip().lower()
45
+ if override in _JOURNAL_MODE_WHITELIST:
46
+ journal = override.upper()
47
+ elif os.environ.get("SYSTEM") == "spaces":
48
+ journal = "DELETE"
49
+ else:
50
+ journal = "WAL"
51
+ conn.execute(f"PRAGMA journal_mode = {journal}")
52
+ conn.execute("PRAGMA synchronous = NORMAL")
53
+ conn.execute("PRAGMA temp_store = MEMORY")
54
+ conn.execute("PRAGMA cache_size = -20000")
55
+
56
+
57
+ class ProcessLock:
58
+ """A file-based lock that works across processes using fcntl (Unix) or msvcrt (Windows)."""
59
+
60
+ def __init__(self, lockfile_path: Path):
61
+ self.lockfile_path = lockfile_path
62
+ self.lockfile = None
63
+
64
+ def __enter__(self):
65
+ if fcntl is None and _msvcrt is None:
66
+ return self
67
+ self.lockfile_path.parent.mkdir(parents=True, exist_ok=True)
68
+ self.lockfile = open(self.lockfile_path, "w")
69
+
70
+ max_retries = 100
71
+ for attempt in range(max_retries):
72
+ try:
73
+ if fcntl is not None:
74
+ fcntl.flock(self.lockfile.fileno(), fcntl.LOCK_EX | fcntl.LOCK_NB)
75
+ else:
76
+ _msvcrt.locking(self.lockfile.fileno(), _msvcrt.LK_NBLCK, 1)
77
+ return self
78
+ except (IOError, OSError):
79
+ if attempt < max_retries - 1:
80
+ time.sleep(0.1)
81
+ else:
82
+ raise IOError("Could not acquire database lock after 10 seconds")
83
+
84
+ def __exit__(self, exc_type, exc_val, exc_tb):
85
+ if self.lockfile:
86
+ try:
87
+ if fcntl is not None:
88
+ fcntl.flock(self.lockfile.fileno(), fcntl.LOCK_UN)
89
+ elif _msvcrt is not None:
90
+ _msvcrt.locking(self.lockfile.fileno(), _msvcrt.LK_UNLCK, 1)
91
+ except (IOError, OSError):
92
+ pass
93
+ self.lockfile.close()
94
+
95
+
96
+ class SQLiteStorage:
97
+ _dataset_import_attempted = False
98
+ _current_scheduler: CommitScheduler | DummyCommitScheduler | None = None
99
+ _scheduler_lock = Lock()
100
+
101
+ @staticmethod
102
+ @contextmanager
103
+ def _get_connection(
104
+ db_path: Path,
105
+ *,
106
+ timeout: float = 30.0,
107
+ configure_pragmas: bool = True,
108
+ row_factory=sqlite3.Row,
109
+ ) -> Iterator[sqlite3.Connection]:
110
+ conn = sqlite3.connect(str(db_path), timeout=timeout)
111
+ try:
112
+ if configure_pragmas:
113
+ _configure_sqlite_pragmas(conn)
114
+ if row_factory is not None:
115
+ conn.row_factory = row_factory
116
+ with conn:
117
+ yield conn
118
+ finally:
119
+ conn.close()
120
+
121
+ @staticmethod
122
+ def _get_process_lock(project: str) -> ProcessLock:
123
+ lockfile_path = TRACKIO_DIR / f"{project}.lock"
124
+ return ProcessLock(lockfile_path)
125
+
126
+ @staticmethod
127
+ def get_project_db_filename(project: str) -> str:
128
+ """Get the database filename for a specific project."""
129
+ safe_project_name = "".join(
130
+ c for c in project if c.isalnum() or c in ("-", "_")
131
+ ).rstrip()
132
+ if not safe_project_name:
133
+ safe_project_name = "default"
134
+ return f"{safe_project_name}{DB_EXT}"
135
+
136
+ @staticmethod
137
+ def get_project_db_path(project: str) -> Path:
138
+ """Get the database path for a specific project."""
139
+ filename = SQLiteStorage.get_project_db_filename(project)
140
+ return TRACKIO_DIR / filename
141
+
142
+ @staticmethod
143
+ def init_db(project: str) -> Path:
144
+ """
145
+ Initialize the SQLite database with required tables.
146
+ Returns the database path.
147
+ """
148
+ SQLiteStorage._ensure_hub_loaded()
149
+ db_path = SQLiteStorage.get_project_db_path(project)
150
+ db_path.parent.mkdir(parents=True, exist_ok=True)
151
+ with SQLiteStorage._get_process_lock(project):
152
+ with SQLiteStorage._get_connection(db_path, row_factory=None) as conn:
153
+ cursor = conn.cursor()
154
+ cursor.execute(
155
+ """
156
+ CREATE TABLE IF NOT EXISTS metrics (
157
+ id INTEGER PRIMARY KEY AUTOINCREMENT,
158
+ timestamp TEXT NOT NULL,
159
+ run_name TEXT NOT NULL,
160
+ step INTEGER NOT NULL,
161
+ metrics TEXT NOT NULL
162
+ )
163
+ """
164
+ )
165
+ cursor.execute(
166
+ """
167
+ CREATE TABLE IF NOT EXISTS configs (
168
+ id INTEGER PRIMARY KEY AUTOINCREMENT,
169
+ run_name TEXT NOT NULL,
170
+ config TEXT NOT NULL,
171
+ created_at TEXT NOT NULL,
172
+ UNIQUE(run_name)
173
+ )
174
+ """
175
+ )
176
+ cursor.execute(
177
+ """
178
+ CREATE INDEX IF NOT EXISTS idx_metrics_run_step
179
+ ON metrics(run_name, step)
180
+ """
181
+ )
182
+ cursor.execute(
183
+ """
184
+ CREATE INDEX IF NOT EXISTS idx_configs_run_name
185
+ ON configs(run_name)
186
+ """
187
+ )
188
+ cursor.execute(
189
+ """
190
+ CREATE INDEX IF NOT EXISTS idx_metrics_run_timestamp
191
+ ON metrics(run_name, timestamp)
192
+ """
193
+ )
194
+ cursor.execute(
195
+ """
196
+ CREATE TABLE IF NOT EXISTS system_metrics (
197
+ id INTEGER PRIMARY KEY AUTOINCREMENT,
198
+ timestamp TEXT NOT NULL,
199
+ run_name TEXT NOT NULL,
200
+ metrics TEXT NOT NULL
201
+ )
202
+ """
203
+ )
204
+ cursor.execute(
205
+ """
206
+ CREATE INDEX IF NOT EXISTS idx_system_metrics_run_timestamp
207
+ ON system_metrics(run_name, timestamp)
208
+ """
209
+ )
210
+
211
+ cursor.execute(
212
+ """
213
+ CREATE TABLE IF NOT EXISTS project_metadata (
214
+ key TEXT PRIMARY KEY,
215
+ value TEXT NOT NULL
216
+ )
217
+ """
218
+ )
219
+
220
+ cursor.execute(
221
+ """
222
+ CREATE TABLE IF NOT EXISTS pending_uploads (
223
+ id INTEGER PRIMARY KEY AUTOINCREMENT,
224
+ space_id TEXT NOT NULL,
225
+ run_name TEXT,
226
+ step INTEGER,
227
+ file_path TEXT NOT NULL,
228
+ relative_path TEXT,
229
+ created_at TEXT NOT NULL
230
+ )
231
+ """
232
+ )
233
+
234
+ cursor.execute(
235
+ """
236
+ CREATE TABLE IF NOT EXISTS alerts (
237
+ id INTEGER PRIMARY KEY AUTOINCREMENT,
238
+ timestamp TEXT NOT NULL,
239
+ run_name TEXT NOT NULL,
240
+ title TEXT NOT NULL,
241
+ text TEXT,
242
+ level TEXT NOT NULL DEFAULT 'warn',
243
+ step INTEGER,
244
+ alert_id TEXT
245
+ )
246
+ """
247
+ )
248
+ cursor.execute(
249
+ """
250
+ CREATE INDEX IF NOT EXISTS idx_alerts_run
251
+ ON alerts(run_name)
252
+ """
253
+ )
254
+ cursor.execute(
255
+ """
256
+ CREATE INDEX IF NOT EXISTS idx_alerts_timestamp
257
+ ON alerts(timestamp)
258
+ """
259
+ )
260
+ cursor.execute(
261
+ """
262
+ CREATE UNIQUE INDEX IF NOT EXISTS idx_alerts_alert_id
263
+ ON alerts(alert_id) WHERE alert_id IS NOT NULL
264
+ """
265
+ )
266
+
267
+ for table in ("metrics", "system_metrics"):
268
+ for col in ("log_id TEXT", "space_id TEXT"):
269
+ try:
270
+ cursor.execute(f"ALTER TABLE {table} ADD COLUMN {col}")
271
+ except sqlite3.OperationalError:
272
+ pass
273
+ cursor.execute(
274
+ f"""CREATE UNIQUE INDEX IF NOT EXISTS idx_{table}_log_id
275
+ ON {table}(log_id) WHERE log_id IS NOT NULL"""
276
+ )
277
+ cursor.execute(
278
+ f"""CREATE INDEX IF NOT EXISTS idx_{table}_pending
279
+ ON {table}(space_id) WHERE space_id IS NOT NULL"""
280
+ )
281
+
282
+ conn.commit()
283
+ return db_path
284
+
285
+ @staticmethod
286
+ def _flatten_json_column(df: pd.DataFrame, col: str) -> pd.DataFrame:
287
+ if df.empty:
288
+ return df
289
+ expanded = df[col].copy()
290
+ expanded = pd.DataFrame(
291
+ expanded.apply(
292
+ lambda x: deserialize_values(orjson.loads(x))
293
+ ).values.tolist(),
294
+ index=df.index,
295
+ )
296
+ df = df.drop(columns=[col])
297
+ expanded = expanded.loc[:, ~expanded.columns.isin(df.columns)]
298
+ return pd.concat([df, expanded], axis=1)
299
+
300
+ @staticmethod
301
+ def _read_table(db_path: Path, table: str) -> pd.DataFrame:
302
+ try:
303
+ with SQLiteStorage._get_connection(
304
+ db_path, timeout=5.0, configure_pragmas=False, row_factory=None
305
+ ) as conn:
306
+ return pd.read_sql(f"SELECT * FROM {table}", conn)
307
+ except Exception:
308
+ return pd.DataFrame()
309
+
310
+ @staticmethod
311
+ def _flatten_and_write_parquet(
312
+ db_path: Path, table: str, json_col: str, parquet_path: Path
313
+ ) -> None:
314
+ if (
315
+ parquet_path.exists()
316
+ and db_path.stat().st_mtime <= parquet_path.stat().st_mtime
317
+ ):
318
+ return
319
+ df = SQLiteStorage._read_table(db_path, table)
320
+ if df.empty:
321
+ return
322
+ df = SQLiteStorage._flatten_json_column(df, json_col)
323
+ df.to_parquet(
324
+ parquet_path,
325
+ write_page_index=True,
326
+ use_content_defined_chunking=True,
327
+ )
328
+
329
+ @staticmethod
330
+ def export_to_parquet():
331
+ """
332
+ Exports all projects' DB files as Parquet under the same path but with extension ".parquet".
333
+ Also exports system_metrics to separate parquet files with "_system.parquet" suffix.
334
+ Also exports configs to separate parquet files with "_configs.parquet" suffix.
335
+ """
336
+ if not SQLiteStorage._dataset_import_attempted:
337
+ return
338
+ if not TRACKIO_DIR.exists():
339
+ return
340
+
341
+ all_paths = os.listdir(TRACKIO_DIR)
342
+ db_names = [f for f in all_paths if f.endswith(DB_EXT)]
343
+ for db_name in db_names:
344
+ db_path = TRACKIO_DIR / db_name
345
+ SQLiteStorage._flatten_and_write_parquet(
346
+ db_path, "metrics", "metrics", db_path.with_suffix(".parquet")
347
+ )
348
+ SQLiteStorage._flatten_and_write_parquet(
349
+ db_path,
350
+ "system_metrics",
351
+ "metrics",
352
+ TRACKIO_DIR / (db_path.stem + "_system.parquet"),
353
+ )
354
+ SQLiteStorage._flatten_and_write_parquet(
355
+ db_path,
356
+ "configs",
357
+ "config",
358
+ TRACKIO_DIR / (db_path.stem + "_configs.parquet"),
359
+ )
360
+
361
+ @staticmethod
362
+ def export_for_static_space(
363
+ project: str, output_dir: Path, db_path_override: Path | None = None
364
+ ) -> None:
365
+ """
366
+ Exports a single project's data as Parquet + JSON files for static Space deployment.
367
+
368
+ Args:
369
+ project: The project name.
370
+ output_dir: Directory to write the exported files to.
371
+ db_path_override: If provided, read from this DB file instead of the
372
+ default local project path. Useful when exporting from a downloaded
373
+ remote database.
374
+ """
375
+ db_path = db_path_override or SQLiteStorage.get_project_db_path(project)
376
+ if not db_path.exists():
377
+ raise FileNotFoundError(f"No database found for project '{project}'")
378
+
379
+ output_dir.mkdir(parents=True, exist_ok=True)
380
+ aux_dir = output_dir / "aux"
381
+ aux_dir.mkdir(parents=True, exist_ok=True)
382
+
383
+ metrics_df = SQLiteStorage._read_table(db_path, "metrics")
384
+ if not metrics_df.empty:
385
+ flat = SQLiteStorage._flatten_json_column(metrics_df.copy(), "metrics")
386
+ flat.to_parquet(output_dir / "metrics.parquet")
387
+
388
+ sys_df = SQLiteStorage._read_table(db_path, "system_metrics")
389
+ if not sys_df.empty:
390
+ flat = SQLiteStorage._flatten_json_column(sys_df.copy(), "metrics")
391
+ flat.to_parquet(aux_dir / "system_metrics.parquet")
392
+
393
+ configs_df = SQLiteStorage._read_table(db_path, "configs")
394
+ if not configs_df.empty:
395
+ flat = SQLiteStorage._flatten_json_column(configs_df.copy(), "config")
396
+ flat.to_parquet(aux_dir / "configs.parquet")
397
+
398
+ try:
399
+ with SQLiteStorage._get_connection(db_path) as conn:
400
+ cursor = conn.cursor()
401
+ cursor.execute(
402
+ """SELECT run_name, MAX(step) as last_step, COUNT(*) as log_count
403
+ FROM metrics GROUP BY run_name ORDER BY MIN(timestamp) ASC"""
404
+ )
405
+ rows = cursor.fetchall()
406
+ runs_meta = [
407
+ {
408
+ "name": row["run_name"],
409
+ "last_step": row["last_step"],
410
+ "log_count": row["log_count"],
411
+ }
412
+ for row in rows
413
+ ]
414
+ except sqlite3.OperationalError:
415
+ runs_meta = []
416
+ with open(output_dir / "runs.json", "w") as f:
417
+ json_mod.dump(runs_meta, f)
418
+
419
+ settings = {
420
+ "color_palette": get_color_palette(),
421
+ "plot_order": [
422
+ item.strip()
423
+ for item in os.environ.get("TRACKIO_PLOT_ORDER", "").split(",")
424
+ if item.strip()
425
+ ],
426
+ }
427
+ with open(output_dir / "settings.json", "w") as f:
428
+ json_mod.dump(settings, f)
429
+
430
+ @staticmethod
431
+ def _cleanup_wal_sidecars(db_path: Path) -> None:
432
+ """Remove leftover -wal/-shm files for a DB basename (prevents disk I/O errors)."""
433
+ for suffix in ("-wal", "-shm"):
434
+ sidecar = Path(str(db_path) + suffix)
435
+ try:
436
+ if sidecar.exists():
437
+ sidecar.unlink()
438
+ except Exception:
439
+ pass
440
+
441
+ @staticmethod
442
+ def import_from_parquet():
443
+ """
444
+ Imports to all DB files that have matching files under the same path but with extension ".parquet".
445
+ Also imports system_metrics from "_system.parquet" files.
446
+ Also imports configs from "_configs.parquet" files.
447
+ """
448
+ if not TRACKIO_DIR.exists():
449
+ return
450
+
451
+ all_paths = os.listdir(TRACKIO_DIR)
452
+ parquet_names = [
453
+ f
454
+ for f in all_paths
455
+ if f.endswith(".parquet")
456
+ and not f.endswith("_system.parquet")
457
+ and not f.endswith("_configs.parquet")
458
+ ]
459
+ imported_projects = {Path(name).stem for name in parquet_names}
460
+ for pq_name in parquet_names:
461
+ parquet_path = TRACKIO_DIR / pq_name
462
+ db_path = parquet_path.with_suffix(DB_EXT)
463
+
464
+ SQLiteStorage._cleanup_wal_sidecars(db_path)
465
+
466
+ df = pd.read_parquet(parquet_path)
467
+ if "metrics" not in df.columns:
468
+ metrics = df.copy()
469
+ structural_cols = [
470
+ "id",
471
+ "timestamp",
472
+ "run_name",
473
+ "step",
474
+ "log_id",
475
+ "space_id",
476
+ ]
477
+ df = df[[c for c in structural_cols if c in df.columns]]
478
+ for col in structural_cols:
479
+ if col in metrics.columns:
480
+ del metrics[col]
481
+ metrics = orjson.loads(metrics.to_json(orient="records"))
482
+ df["metrics"] = [orjson.dumps(serialize_values(row)) for row in metrics]
483
+
484
+ with SQLiteStorage._get_connection(
485
+ db_path, configure_pragmas=False, row_factory=None
486
+ ) as conn:
487
+ df.to_sql("metrics", conn, if_exists="replace", index=False)
488
+ conn.commit()
489
+
490
+ system_parquet_names = [f for f in all_paths if f.endswith("_system.parquet")]
491
+ for pq_name in system_parquet_names:
492
+ parquet_path = TRACKIO_DIR / pq_name
493
+ db_name = pq_name.replace("_system.parquet", DB_EXT)
494
+ db_path = TRACKIO_DIR / db_name
495
+ project_name = db_path.stem
496
+ if project_name not in imported_projects and not db_path.exists():
497
+ continue
498
+
499
+ df = pd.read_parquet(parquet_path)
500
+ if "metrics" not in df.columns:
501
+ metrics = df.copy()
502
+ other_cols = ["id", "timestamp", "run_name"]
503
+ df = df[[c for c in other_cols if c in df.columns]]
504
+ for col in other_cols:
505
+ if col in metrics.columns:
506
+ del metrics[col]
507
+ metrics = orjson.loads(metrics.to_json(orient="records"))
508
+ df["metrics"] = [orjson.dumps(serialize_values(row)) for row in metrics]
509
+
510
+ with SQLiteStorage._get_connection(
511
+ db_path, configure_pragmas=False, row_factory=None
512
+ ) as conn:
513
+ df.to_sql("system_metrics", conn, if_exists="replace", index=False)
514
+ conn.commit()
515
+
516
+ configs_parquet_names = [f for f in all_paths if f.endswith("_configs.parquet")]
517
+ for pq_name in configs_parquet_names:
518
+ parquet_path = TRACKIO_DIR / pq_name
519
+ db_name = pq_name.replace("_configs.parquet", DB_EXT)
520
+ db_path = TRACKIO_DIR / db_name
521
+ project_name = db_path.stem
522
+ if project_name not in imported_projects and not db_path.exists():
523
+ continue
524
+
525
+ df = pd.read_parquet(parquet_path)
526
+ if "config" not in df.columns:
527
+ config_data = df.copy()
528
+ other_cols = ["id", "run_name", "created_at"]
529
+ df = df[[c for c in other_cols if c in df.columns]]
530
+ for col in other_cols:
531
+ if col in config_data.columns:
532
+ del config_data[col]
533
+ config_data = orjson.loads(config_data.to_json(orient="records"))
534
+ df["config"] = [
535
+ orjson.dumps(serialize_values(row)) for row in config_data
536
+ ]
537
+
538
+ with SQLiteStorage._get_connection(
539
+ db_path, configure_pragmas=False, row_factory=None
540
+ ) as conn:
541
+ df.to_sql("configs", conn, if_exists="replace", index=False)
542
+ conn.commit()
543
+
544
+ @staticmethod
545
+ def get_scheduler():
546
+ """
547
+ Get the scheduler for the database based on the environment variables.
548
+ This applies to both local and Spaces.
549
+ """
550
+ with SQLiteStorage._scheduler_lock:
551
+ if SQLiteStorage._current_scheduler is not None:
552
+ return SQLiteStorage._current_scheduler
553
+ hf_token = os.environ.get("HF_TOKEN")
554
+ dataset_id = os.environ.get("TRACKIO_DATASET_ID")
555
+ space_repo_name = os.environ.get("SPACE_REPO_NAME")
556
+ if dataset_id is not None and space_repo_name is not None:
557
+ scheduler = CommitScheduler(
558
+ repo_id=dataset_id,
559
+ repo_type="dataset",
560
+ folder_path=TRACKIO_DIR,
561
+ private=True,
562
+ allow_patterns=[
563
+ "*.parquet",
564
+ "*_system.parquet",
565
+ "*_configs.parquet",
566
+ "media/**/*",
567
+ ],
568
+ squash_history=True,
569
+ token=hf_token,
570
+ on_before_commit=SQLiteStorage.export_to_parquet,
571
+ )
572
+ else:
573
+ scheduler = DummyCommitScheduler()
574
+ SQLiteStorage._current_scheduler = scheduler
575
+ return scheduler
576
+
577
+ @staticmethod
578
+ def log(project: str, run: str, metrics: dict, step: int | None = None):
579
+ """
580
+ Safely log metrics to the database. Before logging, this method will ensure the database exists
581
+ and is set up with the correct tables. It also uses a cross-process lock to prevent
582
+ database locking errors when multiple processes access the same database.
583
+
584
+ This method is not used in the latest versions of Trackio (replaced by bulk_log) but
585
+ is kept for backwards compatibility for users who are connecting to a newer version of
586
+ a Trackio Spaces dashboard with an older version of Trackio installed locally.
587
+ """
588
+ db_path = SQLiteStorage.init_db(project)
589
+ with SQLiteStorage._get_process_lock(project):
590
+ with SQLiteStorage._get_connection(db_path) as conn:
591
+ cursor = conn.cursor()
592
+ cursor.execute(
593
+ """
594
+ SELECT MAX(step)
595
+ FROM metrics
596
+ WHERE run_name = ?
597
+ """,
598
+ (run,),
599
+ )
600
+ last_step = cursor.fetchone()[0]
601
+ current_step = (
602
+ 0
603
+ if step is None and last_step is None
604
+ else (step if step is not None else last_step + 1)
605
+ )
606
+ current_timestamp = datetime.now(timezone.utc).isoformat()
607
+ cursor.execute(
608
+ """
609
+ INSERT INTO metrics
610
+ (timestamp, run_name, step, metrics)
611
+ VALUES (?, ?, ?, ?)
612
+ """,
613
+ (
614
+ current_timestamp,
615
+ run,
616
+ current_step,
617
+ orjson.dumps(serialize_values(metrics)),
618
+ ),
619
+ )
620
+ conn.commit()
621
+
622
+ @staticmethod
623
+ def bulk_log(
624
+ project: str,
625
+ run: str,
626
+ metrics_list: list[dict],
627
+ steps: list[int] | None = None,
628
+ timestamps: list[str] | None = None,
629
+ config: dict | None = None,
630
+ log_ids: list[str] | None = None,
631
+ space_id: str | None = None,
632
+ ):
633
+ """
634
+ Safely log bulk metrics to the database. Before logging, this method will ensure the database exists
635
+ and is set up with the correct tables. It also uses a cross-process lock to prevent
636
+ database locking errors when multiple processes access the same database.
637
+ """
638
+ if not metrics_list:
639
+ return
640
+
641
+ if timestamps is None:
642
+ timestamps = [datetime.now(timezone.utc).isoformat()] * len(metrics_list)
643
+
644
+ db_path = SQLiteStorage.init_db(project)
645
+ with SQLiteStorage._get_process_lock(project):
646
+ with SQLiteStorage._get_connection(db_path) as conn:
647
+ cursor = conn.cursor()
648
+
649
+ if steps is None:
650
+ steps = list(range(len(metrics_list)))
651
+ elif any(s is None for s in steps):
652
+ cursor.execute(
653
+ "SELECT MAX(step) FROM metrics WHERE run_name = ?", (run,)
654
+ )
655
+ last_step = cursor.fetchone()[0]
656
+ current_step = 0 if last_step is None else last_step + 1
657
+ processed_steps = []
658
+ for step in steps:
659
+ if step is None:
660
+ processed_steps.append(current_step)
661
+ current_step += 1
662
+ else:
663
+ processed_steps.append(step)
664
+ steps = processed_steps
665
+
666
+ if len(metrics_list) != len(steps) or len(metrics_list) != len(
667
+ timestamps
668
+ ):
669
+ raise ValueError(
670
+ "metrics_list, steps, and timestamps must have the same length"
671
+ )
672
+
673
+ data = []
674
+ for i, metrics in enumerate(metrics_list):
675
+ lid = log_ids[i] if log_ids else None
676
+ data.append(
677
+ (
678
+ timestamps[i],
679
+ run,
680
+ steps[i],
681
+ orjson.dumps(serialize_values(metrics)),
682
+ lid,
683
+ space_id,
684
+ )
685
+ )
686
+
687
+ cursor.executemany(
688
+ """
689
+ INSERT OR IGNORE INTO metrics
690
+ (timestamp, run_name, step, metrics, log_id, space_id)
691
+ VALUES (?, ?, ?, ?, ?, ?)
692
+ """,
693
+ data,
694
+ )
695
+
696
+ if config:
697
+ current_timestamp = datetime.now(timezone.utc).isoformat()
698
+ cursor.execute(
699
+ """
700
+ INSERT OR REPLACE INTO configs
701
+ (run_name, config, created_at)
702
+ VALUES (?, ?, ?)
703
+ """,
704
+ (
705
+ run,
706
+ orjson.dumps(serialize_values(config)),
707
+ current_timestamp,
708
+ ),
709
+ )
710
+
711
+ conn.commit()
712
+
713
+ @staticmethod
714
+ def bulk_log_system(
715
+ project: str,
716
+ run: str,
717
+ metrics_list: list[dict],
718
+ timestamps: list[str] | None = None,
719
+ log_ids: list[str] | None = None,
720
+ space_id: str | None = None,
721
+ ):
722
+ """
723
+ Log system metrics (GPU, etc.) to the database without step numbers.
724
+ These metrics use timestamps for the x-axis instead of steps.
725
+ """
726
+ if not metrics_list:
727
+ return
728
+
729
+ if timestamps is None:
730
+ timestamps = [datetime.now(timezone.utc).isoformat()] * len(metrics_list)
731
+
732
+ if len(metrics_list) != len(timestamps):
733
+ raise ValueError("metrics_list and timestamps must have the same length")
734
+
735
+ db_path = SQLiteStorage.init_db(project)
736
+ with SQLiteStorage._get_process_lock(project):
737
+ with SQLiteStorage._get_connection(db_path) as conn:
738
+ cursor = conn.cursor()
739
+ data = []
740
+ for i, metrics in enumerate(metrics_list):
741
+ lid = log_ids[i] if log_ids else None
742
+ data.append(
743
+ (
744
+ timestamps[i],
745
+ run,
746
+ orjson.dumps(serialize_values(metrics)),
747
+ lid,
748
+ space_id,
749
+ )
750
+ )
751
+
752
+ cursor.executemany(
753
+ """
754
+ INSERT OR IGNORE INTO system_metrics
755
+ (timestamp, run_name, metrics, log_id, space_id)
756
+ VALUES (?, ?, ?, ?, ?)
757
+ """,
758
+ data,
759
+ )
760
+ conn.commit()
761
+
762
+ @staticmethod
763
+ def bulk_alert(
764
+ project: str,
765
+ run: str,
766
+ titles: list[str],
767
+ texts: list[str | None],
768
+ levels: list[str],
769
+ steps: list[int | None],
770
+ timestamps: list[str] | None = None,
771
+ alert_ids: list[str] | None = None,
772
+ ):
773
+ if not titles:
774
+ return
775
+
776
+ if timestamps is None:
777
+ timestamps = [datetime.now(timezone.utc).isoformat()] * len(titles)
778
+
779
+ db_path = SQLiteStorage.init_db(project)
780
+ with SQLiteStorage._get_process_lock(project):
781
+ with SQLiteStorage._get_connection(db_path) as conn:
782
+ cursor = conn.cursor()
783
+ data = []
784
+ for i in range(len(titles)):
785
+ aid = alert_ids[i] if alert_ids else None
786
+ data.append(
787
+ (
788
+ timestamps[i],
789
+ run,
790
+ titles[i],
791
+ texts[i],
792
+ levels[i],
793
+ steps[i],
794
+ aid,
795
+ )
796
+ )
797
+
798
+ cursor.executemany(
799
+ """
800
+ INSERT OR IGNORE INTO alerts
801
+ (timestamp, run_name, title, text, level, step, alert_id)
802
+ VALUES (?, ?, ?, ?, ?, ?, ?)
803
+ """,
804
+ data,
805
+ )
806
+ conn.commit()
807
+
808
+ @staticmethod
809
+ def get_alerts(
810
+ project: str,
811
+ run_name: str | None = None,
812
+ level: str | None = None,
813
+ since: str | None = None,
814
+ ) -> list[dict]:
815
+ db_path = SQLiteStorage.get_project_db_path(project)
816
+ if not db_path.exists():
817
+ return []
818
+
819
+ with SQLiteStorage._get_connection(db_path) as conn:
820
+ cursor = conn.cursor()
821
+ try:
822
+ query = (
823
+ "SELECT timestamp, run_name, title, text, level, step FROM alerts"
824
+ )
825
+ conditions = []
826
+ params = []
827
+ if run_name is not None:
828
+ conditions.append("run_name = ?")
829
+ params.append(run_name)
830
+ if level is not None:
831
+ conditions.append("level = ?")
832
+ params.append(level)
833
+ if since is not None:
834
+ conditions.append("timestamp > ?")
835
+ params.append(since)
836
+ if conditions:
837
+ query += " WHERE " + " AND ".join(conditions)
838
+ query += " ORDER BY timestamp DESC"
839
+ cursor.execute(query, params)
840
+
841
+ rows = cursor.fetchall()
842
+ return [
843
+ {
844
+ "timestamp": row["timestamp"],
845
+ "run": row["run_name"],
846
+ "title": row["title"],
847
+ "text": row["text"],
848
+ "level": row["level"],
849
+ "step": row["step"],
850
+ }
851
+ for row in rows
852
+ ]
853
+ except sqlite3.OperationalError as e:
854
+ if "no such table: alerts" in str(e):
855
+ return []
856
+ raise
857
+
858
+ @staticmethod
859
+ def get_alert_count(project: str) -> int:
860
+ db_path = SQLiteStorage.get_project_db_path(project)
861
+ if not db_path.exists():
862
+ return 0
863
+
864
+ with SQLiteStorage._get_connection(db_path) as conn:
865
+ cursor = conn.cursor()
866
+ try:
867
+ cursor.execute("SELECT COUNT(*) FROM alerts")
868
+ return cursor.fetchone()[0]
869
+ except sqlite3.OperationalError:
870
+ return 0
871
+
872
+ @staticmethod
873
+ def get_system_logs(project: str, run: str) -> list[dict]:
874
+ """Retrieve system metrics for a specific run. Returns metrics with timestamps (no steps)."""
875
+ db_path = SQLiteStorage.get_project_db_path(project)
876
+ if not db_path.exists():
877
+ return []
878
+
879
+ with SQLiteStorage._get_connection(db_path) as conn:
880
+ cursor = conn.cursor()
881
+ try:
882
+ cursor.execute(
883
+ """
884
+ SELECT timestamp, metrics
885
+ FROM system_metrics
886
+ WHERE run_name = ?
887
+ ORDER BY timestamp
888
+ """,
889
+ (run,),
890
+ )
891
+
892
+ rows = cursor.fetchall()
893
+ results = []
894
+ for row in rows:
895
+ metrics = orjson.loads(row["metrics"])
896
+ metrics = deserialize_values(metrics)
897
+ metrics["timestamp"] = row["timestamp"]
898
+ results.append(metrics)
899
+ return results
900
+ except sqlite3.OperationalError as e:
901
+ if "no such table: system_metrics" in str(e):
902
+ return []
903
+ raise
904
+
905
+ @staticmethod
906
+ def get_all_system_metrics_for_run(project: str, run: str) -> list[str]:
907
+ """Get all system metric names for a specific project/run."""
908
+ return SQLiteStorage._get_metric_names(
909
+ project, run, "system_metrics", exclude_keys={"timestamp"}
910
+ )
911
+
912
+ @staticmethod
913
+ def has_system_metrics(project: str) -> bool:
914
+ """Check if a project has any system metrics logged."""
915
+ db_path = SQLiteStorage.get_project_db_path(project)
916
+ if not db_path.exists():
917
+ return False
918
+
919
+ with SQLiteStorage._get_connection(db_path) as conn:
920
+ cursor = conn.cursor()
921
+ try:
922
+ cursor.execute("SELECT COUNT(*) FROM system_metrics LIMIT 1")
923
+ count = cursor.fetchone()[0]
924
+ return count > 0
925
+ except sqlite3.OperationalError:
926
+ return False
927
+
928
+ @staticmethod
929
+ def get_log_count(project: str, run: str) -> int:
930
+ SQLiteStorage._ensure_hub_loaded()
931
+ db_path = SQLiteStorage.get_project_db_path(project)
932
+ if not db_path.exists():
933
+ return 0
934
+ try:
935
+ with SQLiteStorage._get_connection(db_path) as conn:
936
+ cursor = conn.cursor()
937
+ cursor.execute(
938
+ "SELECT COUNT(*) FROM metrics WHERE run_name = ?",
939
+ (run,),
940
+ )
941
+ return cursor.fetchone()[0]
942
+ except sqlite3.OperationalError as e:
943
+ if "no such table: metrics" in str(e):
944
+ return 0
945
+ raise
946
+
947
+ @staticmethod
948
+ def get_last_step(project: str, run: str) -> int | None:
949
+ db_path = SQLiteStorage.get_project_db_path(project)
950
+ if not db_path.exists():
951
+ return None
952
+ try:
953
+ with SQLiteStorage._get_connection(db_path) as conn:
954
+ cursor = conn.cursor()
955
+ cursor.execute(
956
+ "SELECT MAX(step) FROM metrics WHERE run_name = ?",
957
+ (run,),
958
+ )
959
+ row = cursor.fetchone()
960
+ return row[0] if row and row[0] is not None else None
961
+ except sqlite3.OperationalError as e:
962
+ if "no such table: metrics" in str(e):
963
+ return None
964
+ raise
965
+
966
+ @staticmethod
967
+ def get_logs(project: str, run: str, max_points: int | None = None) -> list[dict]:
968
+ """Retrieve logs for a specific run. Logs include the step count (int) and the timestamp (datetime object)."""
969
+ db_path = SQLiteStorage.get_project_db_path(project)
970
+ if not db_path.exists():
971
+ return []
972
+
973
+ try:
974
+ with SQLiteStorage._get_connection(db_path) as conn:
975
+ cursor = conn.cursor()
976
+ cursor.execute(
977
+ """
978
+ SELECT timestamp, step, metrics
979
+ FROM metrics
980
+ WHERE run_name = ?
981
+ ORDER BY timestamp
982
+ """,
983
+ (run,),
984
+ )
985
+
986
+ rows = cursor.fetchall()
987
+ if max_points is not None and len(rows) > max_points:
988
+ step = len(rows) / max_points
989
+ indices = {int(i * step) for i in range(max_points)}
990
+ indices.add(len(rows) - 1)
991
+ rows = [rows[i] for i in sorted(indices)]
992
+
993
+ results = []
994
+ for row in rows:
995
+ metrics = orjson.loads(row["metrics"])
996
+ metrics = deserialize_values(metrics)
997
+ metrics["timestamp"] = row["timestamp"]
998
+ metrics["step"] = row["step"]
999
+ results.append(metrics)
1000
+ return results
1001
+ except sqlite3.OperationalError as e:
1002
+ if "no such table: metrics" in str(e):
1003
+ return []
1004
+ raise
1005
+
1006
+ @staticmethod
1007
+ def load_from_dataset():
1008
+ bucket_id = os.environ.get("TRACKIO_BUCKET_ID")
1009
+ if bucket_id is not None:
1010
+ if not SQLiteStorage._dataset_import_attempted:
1011
+ from trackio.bucket_storage import download_bucket_to_trackio_dir
1012
+
1013
+ try:
1014
+ download_bucket_to_trackio_dir(bucket_id)
1015
+ except Exception:
1016
+ pass
1017
+ SQLiteStorage._dataset_import_attempted = True
1018
+ return
1019
+ dataset_id = os.environ.get("TRACKIO_DATASET_ID")
1020
+ space_repo_name = os.environ.get("SPACE_REPO_NAME")
1021
+ if dataset_id is not None and space_repo_name is not None:
1022
+ hfapi = hf.HfApi()
1023
+ updated = False
1024
+ if not TRACKIO_DIR.exists():
1025
+ TRACKIO_DIR.mkdir(parents=True, exist_ok=True)
1026
+ with SQLiteStorage.get_scheduler().lock:
1027
+ try:
1028
+ files = hfapi.list_repo_files(dataset_id, repo_type="dataset")
1029
+ for file in files:
1030
+ # Download parquet and media assets
1031
+ if not (file.endswith(".parquet") or file.startswith("media/")):
1032
+ continue
1033
+ if (TRACKIO_DIR / file).exists():
1034
+ continue
1035
+ hf.hf_hub_download(
1036
+ dataset_id, file, repo_type="dataset", local_dir=TRACKIO_DIR
1037
+ )
1038
+ updated = True
1039
+ except hf.errors.EntryNotFoundError:
1040
+ pass
1041
+ except hf.errors.RepositoryNotFoundError:
1042
+ pass
1043
+ if updated:
1044
+ SQLiteStorage.import_from_parquet()
1045
+ SQLiteStorage._dataset_import_attempted = True
1046
+
1047
+ @staticmethod
1048
+ def _ensure_hub_loaded():
1049
+ if not SQLiteStorage._dataset_import_attempted:
1050
+ SQLiteStorage.load_from_dataset()
1051
+
1052
+ @staticmethod
1053
+ def get_projects() -> list[str]:
1054
+ """
1055
+ Get list of all projects by scanning the database files in the trackio directory.
1056
+ """
1057
+ SQLiteStorage._ensure_hub_loaded()
1058
+
1059
+ projects: set[str] = set()
1060
+ if not TRACKIO_DIR.exists():
1061
+ return []
1062
+
1063
+ for db_file in TRACKIO_DIR.glob(f"*{DB_EXT}"):
1064
+ project_name = db_file.stem
1065
+ projects.add(project_name)
1066
+ return sorted(projects)
1067
+
1068
+ @staticmethod
1069
+ def get_runs(project: str) -> list[str]:
1070
+ """Get list of all runs for a project, ordered by creation time."""
1071
+ SQLiteStorage._ensure_hub_loaded()
1072
+ db_path = SQLiteStorage.get_project_db_path(project)
1073
+ if not db_path.exists():
1074
+ return []
1075
+
1076
+ try:
1077
+ with SQLiteStorage._get_connection(db_path) as conn:
1078
+ cursor = conn.cursor()
1079
+ cursor.execute(
1080
+ """
1081
+ SELECT run_name
1082
+ FROM metrics
1083
+ GROUP BY run_name
1084
+ ORDER BY MIN(timestamp) ASC
1085
+ """,
1086
+ )
1087
+ return [row[0] for row in cursor.fetchall()]
1088
+ except sqlite3.OperationalError as e:
1089
+ if "no such table: metrics" in str(e):
1090
+ return []
1091
+ raise
1092
+
1093
+ @staticmethod
1094
+ def get_max_steps_for_runs(project: str) -> dict[str, int]:
1095
+ """Get the maximum step for each run in a project."""
1096
+ db_path = SQLiteStorage.get_project_db_path(project)
1097
+ if not db_path.exists():
1098
+ return {}
1099
+
1100
+ try:
1101
+ with SQLiteStorage._get_connection(db_path) as conn:
1102
+ cursor = conn.cursor()
1103
+ cursor.execute(
1104
+ """
1105
+ SELECT run_name, MAX(step) as max_step
1106
+ FROM metrics
1107
+ GROUP BY run_name
1108
+ """
1109
+ )
1110
+
1111
+ results = {}
1112
+ for row in cursor.fetchall():
1113
+ results[row["run_name"]] = row["max_step"]
1114
+
1115
+ return results
1116
+ except sqlite3.OperationalError as e:
1117
+ if "no such table: metrics" in str(e):
1118
+ return {}
1119
+ raise
1120
+
1121
+ @staticmethod
1122
+ def get_max_step_for_run(project: str, run: str) -> int | None:
1123
+ """Get the maximum step for a specific run, or None if no logs exist."""
1124
+ db_path = SQLiteStorage.get_project_db_path(project)
1125
+ if not db_path.exists():
1126
+ return None
1127
+
1128
+ try:
1129
+ with SQLiteStorage._get_connection(db_path) as conn:
1130
+ cursor = conn.cursor()
1131
+ cursor.execute(
1132
+ "SELECT MAX(step) FROM metrics WHERE run_name = ?", (run,)
1133
+ )
1134
+ result = cursor.fetchone()[0]
1135
+ return result
1136
+ except sqlite3.OperationalError as e:
1137
+ if "no such table: metrics" in str(e):
1138
+ return None
1139
+ raise
1140
+
1141
+ @staticmethod
1142
+ def get_run_config(project: str, run: str) -> dict | None:
1143
+ """Get configuration for a specific run."""
1144
+ db_path = SQLiteStorage.get_project_db_path(project)
1145
+ if not db_path.exists():
1146
+ return None
1147
+
1148
+ with SQLiteStorage._get_connection(db_path) as conn:
1149
+ cursor = conn.cursor()
1150
+ try:
1151
+ cursor.execute(
1152
+ """
1153
+ SELECT config FROM configs WHERE run_name = ?
1154
+ """,
1155
+ (run,),
1156
+ )
1157
+
1158
+ row = cursor.fetchone()
1159
+ if row:
1160
+ config = orjson.loads(row["config"])
1161
+ return deserialize_values(config)
1162
+ return None
1163
+ except sqlite3.OperationalError as e:
1164
+ if "no such table: configs" in str(e):
1165
+ return None
1166
+ raise
1167
+
1168
+ @staticmethod
1169
+ def delete_run(project: str, run: str) -> bool:
1170
+ """Delete a run from the database (metrics, config, and system_metrics)."""
1171
+ db_path = SQLiteStorage.get_project_db_path(project)
1172
+ if not db_path.exists():
1173
+ return False
1174
+
1175
+ with SQLiteStorage._get_process_lock(project):
1176
+ with SQLiteStorage._get_connection(db_path) as conn:
1177
+ cursor = conn.cursor()
1178
+ try:
1179
+ cursor.execute("DELETE FROM metrics WHERE run_name = ?", (run,))
1180
+ cursor.execute("DELETE FROM configs WHERE run_name = ?", (run,))
1181
+ try:
1182
+ cursor.execute(
1183
+ "DELETE FROM system_metrics WHERE run_name = ?", (run,)
1184
+ )
1185
+ except sqlite3.OperationalError:
1186
+ pass
1187
+ try:
1188
+ cursor.execute("DELETE FROM alerts WHERE run_name = ?", (run,))
1189
+ except sqlite3.OperationalError:
1190
+ pass
1191
+ conn.commit()
1192
+ return True
1193
+ except sqlite3.Error:
1194
+ return False
1195
+
1196
+ @staticmethod
1197
+ def _update_media_paths(obj, old_prefix, new_prefix):
1198
+ """Update media file paths in nested data structures."""
1199
+ if isinstance(obj, dict):
1200
+ if obj.get("_type") in [
1201
+ "trackio.image",
1202
+ "trackio.video",
1203
+ "trackio.audio",
1204
+ ]:
1205
+ old_path = obj.get("file_path", "")
1206
+ if isinstance(old_path, str):
1207
+ normalized_path = old_path.replace("\\", "/")
1208
+ if normalized_path.startswith(old_prefix):
1209
+ new_path = normalized_path.replace(old_prefix, new_prefix, 1)
1210
+ return {**obj, "file_path": new_path}
1211
+ return {
1212
+ key: SQLiteStorage._update_media_paths(value, old_prefix, new_prefix)
1213
+ for key, value in obj.items()
1214
+ }
1215
+ elif isinstance(obj, list):
1216
+ return [
1217
+ SQLiteStorage._update_media_paths(item, old_prefix, new_prefix)
1218
+ for item in obj
1219
+ ]
1220
+ return obj
1221
+
1222
+ @staticmethod
1223
+ def _rewrite_metrics_rows(metrics_rows, new_run_name, old_prefix, new_prefix):
1224
+ """Deserialize metrics rows, update media paths, and reserialize."""
1225
+ result = []
1226
+ for row in metrics_rows:
1227
+ metrics_data = orjson.loads(row["metrics"])
1228
+ metrics_deserialized = deserialize_values(metrics_data)
1229
+ updated = SQLiteStorage._update_media_paths(
1230
+ metrics_deserialized, old_prefix, new_prefix
1231
+ )
1232
+ result.append(
1233
+ (
1234
+ row["timestamp"],
1235
+ new_run_name,
1236
+ row["step"],
1237
+ orjson.dumps(serialize_values(updated)),
1238
+ )
1239
+ )
1240
+ return result
1241
+
1242
+ @staticmethod
1243
+ def _move_media_dir(source: Path, target: Path):
1244
+ """Move a media directory from source to target."""
1245
+ if source.exists():
1246
+ target.parent.mkdir(parents=True, exist_ok=True)
1247
+ if target.exists():
1248
+ shutil.rmtree(target)
1249
+ shutil.move(str(source), str(target))
1250
+
1251
+ @staticmethod
1252
+ def rename_run(project: str, old_name: str, new_name: str) -> None:
1253
+ """Rename a run within the same project.
1254
+
1255
+ Raises:
1256
+ ValueError: If the new name is empty, the old run doesn't exist,
1257
+ or a run with the new name already exists.
1258
+ RuntimeError: If the database operation fails.
1259
+ """
1260
+ if not new_name or not new_name.strip():
1261
+ raise ValueError("New run name cannot be empty")
1262
+
1263
+ new_name = new_name.strip()
1264
+
1265
+ db_path = SQLiteStorage.get_project_db_path(project)
1266
+ if not db_path.exists():
1267
+ raise ValueError(f"Project '{project}' does not exist")
1268
+
1269
+ with SQLiteStorage._get_process_lock(project):
1270
+ with SQLiteStorage._get_connection(db_path) as conn:
1271
+ cursor = conn.cursor()
1272
+
1273
+ cursor.execute(
1274
+ "SELECT COUNT(*) FROM metrics WHERE run_name = ?", (old_name,)
1275
+ )
1276
+ if cursor.fetchone()[0] == 0:
1277
+ raise ValueError(
1278
+ f"Run '{old_name}' does not exist in project '{project}'"
1279
+ )
1280
+
1281
+ cursor.execute(
1282
+ "SELECT COUNT(*) FROM metrics WHERE run_name = ?", (new_name,)
1283
+ )
1284
+ if cursor.fetchone()[0] > 0:
1285
+ raise ValueError(
1286
+ f"A run named '{new_name}' already exists in project '{project}'"
1287
+ )
1288
+
1289
+ try:
1290
+ cursor.execute(
1291
+ "SELECT timestamp, step, metrics FROM metrics WHERE run_name = ?",
1292
+ (old_name,),
1293
+ )
1294
+ metrics_rows = cursor.fetchall()
1295
+
1296
+ old_prefix = f"{project}/{old_name}/"
1297
+ new_prefix = f"{project}/{new_name}/"
1298
+
1299
+ updated_rows = SQLiteStorage._rewrite_metrics_rows(
1300
+ metrics_rows, new_name, old_prefix, new_prefix
1301
+ )
1302
+
1303
+ cursor.execute(
1304
+ "DELETE FROM metrics WHERE run_name = ?", (old_name,)
1305
+ )
1306
+ cursor.executemany(
1307
+ "INSERT INTO metrics (timestamp, run_name, step, metrics) VALUES (?, ?, ?, ?)",
1308
+ updated_rows,
1309
+ )
1310
+
1311
+ cursor.execute(
1312
+ "UPDATE configs SET run_name = ? WHERE run_name = ?",
1313
+ (new_name, old_name),
1314
+ )
1315
+
1316
+ try:
1317
+ cursor.execute(
1318
+ "UPDATE system_metrics SET run_name = ? WHERE run_name = ?",
1319
+ (new_name, old_name),
1320
+ )
1321
+ except sqlite3.OperationalError:
1322
+ pass
1323
+
1324
+ try:
1325
+ cursor.execute(
1326
+ "UPDATE alerts SET run_name = ? WHERE run_name = ?",
1327
+ (new_name, old_name),
1328
+ )
1329
+ except sqlite3.OperationalError:
1330
+ pass
1331
+
1332
+ conn.commit()
1333
+
1334
+ SQLiteStorage._move_media_dir(
1335
+ MEDIA_DIR / project / old_name,
1336
+ MEDIA_DIR / project / new_name,
1337
+ )
1338
+ except sqlite3.Error as e:
1339
+ raise RuntimeError(
1340
+ f"Database error while renaming run '{old_name}' to '{new_name}': {e}"
1341
+ ) from e
1342
+
1343
+ @staticmethod
1344
+ def move_run(project: str, run: str, new_project: str) -> bool:
1345
+ """Move a run from one project to another."""
1346
+ source_db_path = SQLiteStorage.get_project_db_path(project)
1347
+ if not source_db_path.exists():
1348
+ return False
1349
+
1350
+ target_db_path = SQLiteStorage.init_db(new_project)
1351
+
1352
+ with SQLiteStorage._get_process_lock(project):
1353
+ with SQLiteStorage._get_process_lock(new_project):
1354
+ with SQLiteStorage._get_connection(source_db_path) as source_conn:
1355
+ source_cursor = source_conn.cursor()
1356
+
1357
+ source_cursor.execute(
1358
+ "SELECT timestamp, step, metrics FROM metrics WHERE run_name = ?",
1359
+ (run,),
1360
+ )
1361
+ metrics_rows = source_cursor.fetchall()
1362
+
1363
+ source_cursor.execute(
1364
+ "SELECT config, created_at FROM configs WHERE run_name = ?",
1365
+ (run,),
1366
+ )
1367
+ config_row = source_cursor.fetchone()
1368
+
1369
+ try:
1370
+ source_cursor.execute(
1371
+ "SELECT timestamp, metrics FROM system_metrics WHERE run_name = ?",
1372
+ (run,),
1373
+ )
1374
+ system_metrics_rows = source_cursor.fetchall()
1375
+ except sqlite3.OperationalError:
1376
+ system_metrics_rows = []
1377
+
1378
+ try:
1379
+ source_cursor.execute(
1380
+ "SELECT timestamp, title, text, level, step, alert_id FROM alerts WHERE run_name = ?",
1381
+ (run,),
1382
+ )
1383
+ alert_rows = source_cursor.fetchall()
1384
+ except sqlite3.OperationalError:
1385
+ alert_rows = []
1386
+
1387
+ if not metrics_rows and not config_row and not system_metrics_rows:
1388
+ return False
1389
+
1390
+ with SQLiteStorage._get_connection(target_db_path) as target_conn:
1391
+ target_cursor = target_conn.cursor()
1392
+
1393
+ old_prefix = f"{project}/{run}/"
1394
+ new_prefix = f"{new_project}/{run}/"
1395
+ updated_rows = SQLiteStorage._rewrite_metrics_rows(
1396
+ metrics_rows, run, old_prefix, new_prefix
1397
+ )
1398
+
1399
+ target_cursor.executemany(
1400
+ "INSERT INTO metrics (timestamp, run_name, step, metrics) VALUES (?, ?, ?, ?)",
1401
+ updated_rows,
1402
+ )
1403
+
1404
+ if config_row:
1405
+ target_cursor.execute(
1406
+ """
1407
+ INSERT OR REPLACE INTO configs (run_name, config, created_at)
1408
+ VALUES (?, ?, ?)
1409
+ """,
1410
+ (run, config_row["config"], config_row["created_at"]),
1411
+ )
1412
+
1413
+ for row in system_metrics_rows:
1414
+ try:
1415
+ target_cursor.execute(
1416
+ """
1417
+ INSERT INTO system_metrics (timestamp, run_name, metrics)
1418
+ VALUES (?, ?, ?)
1419
+ """,
1420
+ (row["timestamp"], run, row["metrics"]),
1421
+ )
1422
+ except sqlite3.OperationalError:
1423
+ pass
1424
+
1425
+ for row in alert_rows:
1426
+ try:
1427
+ target_cursor.execute(
1428
+ """
1429
+ INSERT OR IGNORE INTO alerts (timestamp, run_name, title, text, level, step, alert_id)
1430
+ VALUES (?, ?, ?, ?, ?, ?, ?)
1431
+ """,
1432
+ (
1433
+ row["timestamp"],
1434
+ run,
1435
+ row["title"],
1436
+ row["text"],
1437
+ row["level"],
1438
+ row["step"],
1439
+ row["alert_id"],
1440
+ ),
1441
+ )
1442
+ except sqlite3.OperationalError:
1443
+ pass
1444
+
1445
+ target_conn.commit()
1446
+
1447
+ SQLiteStorage._move_media_dir(
1448
+ MEDIA_DIR / project / run,
1449
+ MEDIA_DIR / new_project / run,
1450
+ )
1451
+
1452
+ source_cursor.execute(
1453
+ "DELETE FROM metrics WHERE run_name = ?", (run,)
1454
+ )
1455
+ source_cursor.execute(
1456
+ "DELETE FROM configs WHERE run_name = ?", (run,)
1457
+ )
1458
+ try:
1459
+ source_cursor.execute(
1460
+ "DELETE FROM system_metrics WHERE run_name = ?", (run,)
1461
+ )
1462
+ except sqlite3.OperationalError:
1463
+ pass
1464
+ try:
1465
+ source_cursor.execute(
1466
+ "DELETE FROM alerts WHERE run_name = ?", (run,)
1467
+ )
1468
+ except sqlite3.OperationalError:
1469
+ pass
1470
+ source_conn.commit()
1471
+
1472
+ return True
1473
+
1474
+ @staticmethod
1475
+ def get_all_run_configs(project: str) -> dict[str, dict]:
1476
+ """Get configurations for all runs in a project."""
1477
+ db_path = SQLiteStorage.get_project_db_path(project)
1478
+ if not db_path.exists():
1479
+ return {}
1480
+
1481
+ with SQLiteStorage._get_connection(db_path) as conn:
1482
+ cursor = conn.cursor()
1483
+ try:
1484
+ cursor.execute(
1485
+ """
1486
+ SELECT run_name, config FROM configs
1487
+ """
1488
+ )
1489
+
1490
+ results = {}
1491
+ for row in cursor.fetchall():
1492
+ config = orjson.loads(row["config"])
1493
+ results[row["run_name"]] = deserialize_values(config)
1494
+ return results
1495
+ except sqlite3.OperationalError as e:
1496
+ if "no such table: configs" in str(e):
1497
+ return {}
1498
+ raise
1499
+
1500
+ @staticmethod
1501
+ def get_metric_values(
1502
+ project: str,
1503
+ run: str,
1504
+ metric_name: str,
1505
+ step: int | None = None,
1506
+ around_step: int | None = None,
1507
+ at_time: str | None = None,
1508
+ window: int | float | None = None,
1509
+ ) -> list[dict]:
1510
+ """Get values for a specific metric in a project/run with optional filtering.
1511
+
1512
+ Filtering modes:
1513
+ - step: return the single row at exactly this step
1514
+ - around_step + window: return rows where step is in [around_step - window, around_step + window]
1515
+ - at_time + window: return rows within ±window seconds of the ISO timestamp
1516
+ - No filters: return all rows
1517
+ """
1518
+ db_path = SQLiteStorage.get_project_db_path(project)
1519
+ if not db_path.exists():
1520
+ return []
1521
+
1522
+ with SQLiteStorage._get_connection(db_path) as conn:
1523
+ cursor = conn.cursor()
1524
+ query = "SELECT timestamp, step, metrics FROM metrics WHERE run_name = ?"
1525
+ params: list = [run]
1526
+
1527
+ if step is not None:
1528
+ query += " AND step = ?"
1529
+ params.append(step)
1530
+ elif around_step is not None and window is not None:
1531
+ query += " AND step >= ? AND step <= ?"
1532
+ params.extend([around_step - int(window), around_step + int(window)])
1533
+ elif at_time is not None and window is not None:
1534
+ query += (
1535
+ " AND timestamp >= datetime(?, '-' || ? || ' seconds')"
1536
+ " AND timestamp <= datetime(?, '+' || ? || ' seconds')"
1537
+ )
1538
+ params.extend([at_time, int(window), at_time, int(window)])
1539
+
1540
+ query += " ORDER BY timestamp"
1541
+ cursor.execute(query, params)
1542
+
1543
+ rows = cursor.fetchall()
1544
+ results = []
1545
+ for row in rows:
1546
+ metrics = orjson.loads(row["metrics"])
1547
+ metrics = deserialize_values(metrics)
1548
+ if metric_name in metrics:
1549
+ results.append(
1550
+ {
1551
+ "timestamp": row["timestamp"],
1552
+ "step": row["step"],
1553
+ "value": metrics[metric_name],
1554
+ }
1555
+ )
1556
+ return results
1557
+
1558
+ @staticmethod
1559
+ def get_snapshot(
1560
+ project: str,
1561
+ run: str,
1562
+ step: int | None = None,
1563
+ around_step: int | None = None,
1564
+ at_time: str | None = None,
1565
+ window: int | float | None = None,
1566
+ ) -> dict[str, list[dict]]:
1567
+ """Get all metrics at/around a point in time or step.
1568
+
1569
+ Returns a dict mapping metric names to lists of {timestamp, step, value}.
1570
+ """
1571
+ db_path = SQLiteStorage.get_project_db_path(project)
1572
+ if not db_path.exists():
1573
+ return {}
1574
+
1575
+ with SQLiteStorage._get_connection(db_path) as conn:
1576
+ cursor = conn.cursor()
1577
+ query = "SELECT timestamp, step, metrics FROM metrics WHERE run_name = ?"
1578
+ params: list = [run]
1579
+
1580
+ if step is not None:
1581
+ query += " AND step = ?"
1582
+ params.append(step)
1583
+ elif around_step is not None and window is not None:
1584
+ query += " AND step >= ? AND step <= ?"
1585
+ params.extend([around_step - int(window), around_step + int(window)])
1586
+ elif at_time is not None and window is not None:
1587
+ query += (
1588
+ " AND timestamp >= datetime(?, '-' || ? || ' seconds')"
1589
+ " AND timestamp <= datetime(?, '+' || ? || ' seconds')"
1590
+ )
1591
+ params.extend([at_time, int(window), at_time, int(window)])
1592
+
1593
+ query += " ORDER BY timestamp"
1594
+ cursor.execute(query, params)
1595
+
1596
+ result: dict[str, list[dict]] = {}
1597
+ for row in cursor.fetchall():
1598
+ metrics = orjson.loads(row["metrics"])
1599
+ metrics = deserialize_values(metrics)
1600
+ for key, value in metrics.items():
1601
+ if key not in result:
1602
+ result[key] = []
1603
+ result[key].append(
1604
+ {
1605
+ "timestamp": row["timestamp"],
1606
+ "step": row["step"],
1607
+ "value": value,
1608
+ }
1609
+ )
1610
+ return result
1611
+
1612
+ @staticmethod
1613
+ def get_all_metrics_for_run(project: str, run: str) -> list[str]:
1614
+ """Get all metric names for a specific project/run."""
1615
+ return SQLiteStorage._get_metric_names(
1616
+ project, run, "metrics", exclude_keys={"timestamp", "step"}
1617
+ )
1618
+
1619
+ @staticmethod
1620
+ def _get_metric_names(
1621
+ project: str, run: str, table: str, exclude_keys: set[str]
1622
+ ) -> list[str]:
1623
+ db_path = SQLiteStorage.get_project_db_path(project)
1624
+ if not db_path.exists():
1625
+ return []
1626
+
1627
+ with SQLiteStorage._get_connection(db_path) as conn:
1628
+ cursor = conn.cursor()
1629
+ try:
1630
+ cursor.execute(
1631
+ f"""
1632
+ SELECT metrics
1633
+ FROM {table}
1634
+ WHERE run_name = ?
1635
+ ORDER BY timestamp
1636
+ """,
1637
+ (run,),
1638
+ )
1639
+
1640
+ rows = cursor.fetchall()
1641
+ all_metrics = set()
1642
+ for row in rows:
1643
+ metrics = orjson.loads(row["metrics"])
1644
+ metrics = deserialize_values(metrics)
1645
+ for key in metrics.keys():
1646
+ if key not in exclude_keys:
1647
+ all_metrics.add(key)
1648
+ return sorted(list(all_metrics))
1649
+ except sqlite3.OperationalError as e:
1650
+ if f"no such table: {table}" in str(e):
1651
+ return []
1652
+ raise
1653
+
1654
+ @staticmethod
1655
+ def set_project_metadata(project: str, key: str, value: str) -> None:
1656
+ db_path = SQLiteStorage.init_db(project)
1657
+ with SQLiteStorage._get_process_lock(project):
1658
+ with SQLiteStorage._get_connection(db_path) as conn:
1659
+ conn.execute(
1660
+ "INSERT OR REPLACE INTO project_metadata (key, value) VALUES (?, ?)",
1661
+ (key, value),
1662
+ )
1663
+ conn.commit()
1664
+
1665
+ @staticmethod
1666
+ def get_project_metadata(project: str, key: str) -> str | None:
1667
+ db_path = SQLiteStorage.get_project_db_path(project)
1668
+ if not db_path.exists():
1669
+ return None
1670
+ with SQLiteStorage._get_connection(db_path) as conn:
1671
+ cursor = conn.cursor()
1672
+ try:
1673
+ cursor.execute(
1674
+ "SELECT value FROM project_metadata WHERE key = ?", (key,)
1675
+ )
1676
+ row = cursor.fetchone()
1677
+ return row[0] if row else None
1678
+ except sqlite3.OperationalError:
1679
+ return None
1680
+
1681
+ @staticmethod
1682
+ def get_space_id(project: str) -> str | None:
1683
+ return SQLiteStorage.get_project_metadata(project, "space_id")
1684
+
1685
+ @staticmethod
1686
+ def has_pending_data(project: str) -> bool:
1687
+ db_path = SQLiteStorage.get_project_db_path(project)
1688
+ if not db_path.exists():
1689
+ return False
1690
+ with SQLiteStorage._get_connection(db_path) as conn:
1691
+ cursor = conn.cursor()
1692
+ try:
1693
+ cursor.execute(
1694
+ "SELECT EXISTS(SELECT 1 FROM metrics WHERE space_id IS NOT NULL LIMIT 1)"
1695
+ )
1696
+ if cursor.fetchone()[0]:
1697
+ return True
1698
+ except sqlite3.OperationalError:
1699
+ pass
1700
+ try:
1701
+ cursor.execute(
1702
+ "SELECT EXISTS(SELECT 1 FROM system_metrics WHERE space_id IS NOT NULL LIMIT 1)"
1703
+ )
1704
+ if cursor.fetchone()[0]:
1705
+ return True
1706
+ except sqlite3.OperationalError:
1707
+ pass
1708
+ try:
1709
+ cursor.execute("SELECT EXISTS(SELECT 1 FROM pending_uploads LIMIT 1)")
1710
+ if cursor.fetchone()[0]:
1711
+ return True
1712
+ except sqlite3.OperationalError:
1713
+ pass
1714
+ return False
1715
+
1716
+ @staticmethod
1717
+ def get_pending_logs(project: str) -> dict | None:
1718
+ return SQLiteStorage._get_pending(
1719
+ project, "metrics", extra_fields=["step"], include_config=True
1720
+ )
1721
+
1722
+ @staticmethod
1723
+ def clear_pending_logs(project: str, metric_ids: list[int]) -> None:
1724
+ SQLiteStorage._clear_pending(project, "metrics", metric_ids)
1725
+
1726
+ @staticmethod
1727
+ def get_pending_system_logs(project: str) -> dict | None:
1728
+ return SQLiteStorage._get_pending(project, "system_metrics")
1729
+
1730
+ @staticmethod
1731
+ def _get_pending(
1732
+ project: str,
1733
+ table: str,
1734
+ extra_fields: list[str] | None = None,
1735
+ include_config: bool = False,
1736
+ ) -> dict | None:
1737
+ db_path = SQLiteStorage.get_project_db_path(project)
1738
+ if not db_path.exists():
1739
+ return None
1740
+ extra_cols = ", ".join(extra_fields) + ", " if extra_fields else ""
1741
+ with SQLiteStorage._get_connection(db_path) as conn:
1742
+ cursor = conn.cursor()
1743
+ try:
1744
+ cursor.execute(
1745
+ f"""SELECT id, timestamp, run_name, {extra_cols}metrics, log_id, space_id
1746
+ FROM {table} WHERE space_id IS NOT NULL"""
1747
+ )
1748
+ except sqlite3.OperationalError:
1749
+ return None
1750
+ rows = cursor.fetchall()
1751
+ if not rows:
1752
+ return None
1753
+ logs = []
1754
+ ids = []
1755
+ for row in rows:
1756
+ metrics = deserialize_values(orjson.loads(row["metrics"]))
1757
+ entry = {
1758
+ "project": project,
1759
+ "run": row["run_name"],
1760
+ "metrics": metrics,
1761
+ "timestamp": row["timestamp"],
1762
+ "log_id": row["log_id"],
1763
+ }
1764
+ for field in extra_fields or []:
1765
+ entry[field] = row[field]
1766
+ if include_config:
1767
+ entry["config"] = None
1768
+ logs.append(entry)
1769
+ ids.append(row["id"])
1770
+ return {"logs": logs, "ids": ids, "space_id": rows[0]["space_id"]}
1771
+
1772
+ @staticmethod
1773
+ def clear_pending_system_logs(project: str, metric_ids: list[int]) -> None:
1774
+ SQLiteStorage._clear_pending(project, "system_metrics", metric_ids)
1775
+
1776
+ @staticmethod
1777
+ def _clear_pending(project: str, table: str, ids: list[int]) -> None:
1778
+ if not ids:
1779
+ return
1780
+ db_path = SQLiteStorage.get_project_db_path(project)
1781
+ if not db_path.exists():
1782
+ return
1783
+ with SQLiteStorage._get_process_lock(project):
1784
+ with SQLiteStorage._get_connection(db_path) as conn:
1785
+ placeholders = ",".join("?" * len(ids))
1786
+ conn.execute(
1787
+ f"UPDATE {table} SET space_id = NULL WHERE id IN ({placeholders})",
1788
+ ids,
1789
+ )
1790
+ conn.commit()
1791
+
1792
+ @staticmethod
1793
+ def get_pending_uploads(project: str) -> dict | None:
1794
+ db_path = SQLiteStorage.get_project_db_path(project)
1795
+ if not db_path.exists():
1796
+ return None
1797
+ with SQLiteStorage._get_connection(db_path) as conn:
1798
+ cursor = conn.cursor()
1799
+ try:
1800
+ cursor.execute(
1801
+ """SELECT id, space_id, run_name, step, file_path, relative_path
1802
+ FROM pending_uploads"""
1803
+ )
1804
+ except sqlite3.OperationalError:
1805
+ return None
1806
+ rows = cursor.fetchall()
1807
+ if not rows:
1808
+ return None
1809
+ uploads = []
1810
+ ids = []
1811
+ for row in rows:
1812
+ uploads.append(
1813
+ {
1814
+ "project": project,
1815
+ "run": row["run_name"],
1816
+ "step": row["step"],
1817
+ "file_path": row["file_path"],
1818
+ "relative_path": row["relative_path"],
1819
+ }
1820
+ )
1821
+ ids.append(row["id"])
1822
+ return {"uploads": uploads, "ids": ids, "space_id": rows[0]["space_id"]}
1823
+
1824
+ @staticmethod
1825
+ def clear_pending_uploads(project: str, upload_ids: list[int]) -> None:
1826
+ if not upload_ids:
1827
+ return
1828
+ db_path = SQLiteStorage.get_project_db_path(project)
1829
+ if not db_path.exists():
1830
+ return
1831
+ with SQLiteStorage._get_process_lock(project):
1832
+ with SQLiteStorage._get_connection(db_path) as conn:
1833
+ placeholders = ",".join("?" * len(upload_ids))
1834
+ conn.execute(
1835
+ f"DELETE FROM pending_uploads WHERE id IN ({placeholders})",
1836
+ upload_ids,
1837
+ )
1838
+ conn.commit()
1839
+
1840
+ @staticmethod
1841
+ def add_pending_upload(
1842
+ project: str,
1843
+ space_id: str,
1844
+ run_name: str | None,
1845
+ step: int | None,
1846
+ file_path: str,
1847
+ relative_path: str | None,
1848
+ ) -> None:
1849
+ db_path = SQLiteStorage.init_db(project)
1850
+ with SQLiteStorage._get_process_lock(project):
1851
+ with SQLiteStorage._get_connection(db_path) as conn:
1852
+ conn.execute(
1853
+ """INSERT INTO pending_uploads
1854
+ (space_id, run_name, step, file_path, relative_path, created_at)
1855
+ VALUES (?, ?, ?, ?, ?, ?)""",
1856
+ (
1857
+ space_id,
1858
+ run_name,
1859
+ step,
1860
+ file_path,
1861
+ relative_path,
1862
+ datetime.now(timezone.utc).isoformat(),
1863
+ ),
1864
+ )
1865
+ conn.commit()
1866
+
1867
+ @staticmethod
1868
+ def get_all_logs_for_sync(project: str) -> list[dict]:
1869
+ return SQLiteStorage._get_all_for_sync(
1870
+ project,
1871
+ "metrics",
1872
+ order_by="run_name, step",
1873
+ extra_fields=["step"],
1874
+ include_config=True,
1875
+ )
1876
+
1877
+ @staticmethod
1878
+ def get_all_system_logs_for_sync(project: str) -> list[dict]:
1879
+ return SQLiteStorage._get_all_for_sync(
1880
+ project, "system_metrics", order_by="run_name, timestamp"
1881
+ )
1882
+
1883
+ @staticmethod
1884
+ def _get_all_for_sync(
1885
+ project: str,
1886
+ table: str,
1887
+ order_by: str,
1888
+ extra_fields: list[str] | None = None,
1889
+ include_config: bool = False,
1890
+ ) -> list[dict]:
1891
+ db_path = SQLiteStorage.get_project_db_path(project)
1892
+ if not db_path.exists():
1893
+ return []
1894
+ extra_cols = ", ".join(extra_fields) + ", " if extra_fields else ""
1895
+ with SQLiteStorage._get_connection(db_path) as conn:
1896
+ cursor = conn.cursor()
1897
+ try:
1898
+ cursor.execute(
1899
+ f"""SELECT timestamp, run_name, {extra_cols}metrics, log_id
1900
+ FROM {table} ORDER BY {order_by}"""
1901
+ )
1902
+ except sqlite3.OperationalError:
1903
+ return []
1904
+ rows = cursor.fetchall()
1905
+ results = []
1906
+ for row in rows:
1907
+ metrics = deserialize_values(orjson.loads(row["metrics"]))
1908
+ entry = {
1909
+ "project": project,
1910
+ "run": row["run_name"],
1911
+ "metrics": metrics,
1912
+ "timestamp": row["timestamp"],
1913
+ "log_id": row["log_id"],
1914
+ }
1915
+ for field in extra_fields or []:
1916
+ entry[field] = row[field]
1917
+ if include_config:
1918
+ entry["config"] = None
1919
+ results.append(entry)
1920
+ return results
trackio/table.py ADDED
@@ -0,0 +1,173 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import os
2
+ from typing import Any, Literal
3
+
4
+ from pandas import DataFrame
5
+
6
+ from trackio.media.media import TrackioMedia
7
+ from trackio.utils import MEDIA_DIR
8
+
9
+
10
+ class Table:
11
+ """
12
+ Initializes a Table object.
13
+
14
+ Tables can be used to log tabular data including images, numbers, and text.
15
+
16
+ Args:
17
+ columns (`list[str]`, *optional*):
18
+ Names of the columns in the table. Optional if `data` is provided. Not
19
+ expected if `dataframe` is provided. Currently ignored.
20
+ data (`list[list[Any]]`, *optional*):
21
+ 2D row-oriented array of values. Each value can be a number, a string
22
+ (treated as Markdown and truncated if too long), or a `Trackio.Image` or
23
+ list of `Trackio.Image` objects.
24
+ dataframe (`pandas.DataFrame`, *optional*):
25
+ DataFrame used to create the table. When set, `data` and `columns`
26
+ arguments are ignored.
27
+ rows (`list[list[Any]]`, *optional*):
28
+ Currently ignored.
29
+ optional (`bool` or `list[bool]`, *optional*, defaults to `True`):
30
+ Currently ignored.
31
+ allow_mixed_types (`bool`, *optional*, defaults to `False`):
32
+ Currently ignored.
33
+ log_mode: (`Literal["IMMUTABLE", "MUTABLE", "INCREMENTAL"]` or `None`, *optional*, defaults to `"IMMUTABLE"`):
34
+ Currently ignored.
35
+ """
36
+
37
+ TYPE = "trackio.table"
38
+
39
+ def __init__(
40
+ self,
41
+ columns: list[str] | None = None,
42
+ data: list[list[Any]] | None = None,
43
+ dataframe: DataFrame | None = None,
44
+ rows: list[list[Any]] | None = None,
45
+ optional: bool | list[bool] = True,
46
+ allow_mixed_types: bool = False,
47
+ log_mode: Literal["IMMUTABLE", "MUTABLE", "INCREMENTAL"] | None = "IMMUTABLE",
48
+ ):
49
+ # TODO: implement support for columns, dtype, optional, allow_mixed_types, and log_mode.
50
+ # for now (like `rows`) they are included for API compat but don't do anything.
51
+ if dataframe is None:
52
+ self.data = DataFrame(data) if data is not None else DataFrame()
53
+ else:
54
+ self.data = dataframe
55
+
56
+ def _has_media_objects(self, dataframe: DataFrame) -> bool:
57
+ """Check if dataframe contains any TrackioMedia objects or lists of TrackioMedia objects."""
58
+ for col in dataframe.columns:
59
+ if dataframe[col].apply(lambda x: isinstance(x, TrackioMedia)).any():
60
+ return True
61
+ if (
62
+ dataframe[col]
63
+ .apply(
64
+ lambda x: (
65
+ isinstance(x, list)
66
+ and len(x) > 0
67
+ and isinstance(x[0], TrackioMedia)
68
+ )
69
+ )
70
+ .any()
71
+ ):
72
+ return True
73
+ return False
74
+
75
+ def _process_data(self, project: str, run: str, step: int = 0):
76
+ """Convert dataframe to dict format, processing any TrackioMedia objects if present."""
77
+ df = self.data
78
+ if not self._has_media_objects(df):
79
+ return df.to_dict(orient="records")
80
+
81
+ processed_df = df.copy()
82
+ for col in processed_df.columns:
83
+ for idx in processed_df.index:
84
+ value = processed_df.at[idx, col]
85
+ if isinstance(value, TrackioMedia):
86
+ value._save(project, run, step)
87
+ processed_df.at[idx, col] = value._to_dict()
88
+ if (
89
+ isinstance(value, list)
90
+ and len(value) > 0
91
+ and isinstance(value[0], TrackioMedia)
92
+ ):
93
+ [v._save(project, run, step) for v in value]
94
+ processed_df.at[idx, col] = [v._to_dict() for v in value]
95
+
96
+ return processed_df.to_dict(orient="records")
97
+
98
+ @staticmethod
99
+ def to_display_format(table_data: list[dict]) -> list[dict]:
100
+ """
101
+ Converts stored table data to display format for UI rendering.
102
+
103
+ Note:
104
+ This does not use the `self.data` attribute, but instead uses the
105
+ `table_data` parameter, which is what the UI receives.
106
+
107
+ Args:
108
+ table_data (`list[dict]`):
109
+ List of dictionaries representing table rows (from stored `_value`).
110
+
111
+ Returns:
112
+ `list[dict]`: Table data with images converted to markdown syntax and long
113
+ text truncated.
114
+ """
115
+ truncate_length = int(os.getenv("TRACKIO_TABLE_TRUNCATE_LENGTH", "250"))
116
+
117
+ def convert_image_to_markdown(image_data: dict) -> str:
118
+ relative_path = image_data.get("file_path", "")
119
+ caption = image_data.get("caption", "")
120
+ absolute_path = MEDIA_DIR / relative_path
121
+ return f'<img src="/gradio_api/file={absolute_path}" alt="{caption}" />'
122
+
123
+ processed_data = []
124
+ for row in table_data:
125
+ processed_row = {}
126
+ for key, value in row.items():
127
+ if isinstance(value, dict) and value.get("_type") == "trackio.image":
128
+ processed_row[key] = convert_image_to_markdown(value)
129
+ elif (
130
+ isinstance(value, list)
131
+ and len(value) > 0
132
+ and isinstance(value[0], dict)
133
+ and value[0].get("_type") == "trackio.image"
134
+ ):
135
+ # This assumes that if the first item is an image, all items are images. Ok for now since we don't support mixed types in a single cell.
136
+ processed_row[key] = (
137
+ '<div style="display: flex; gap: 10px;">'
138
+ + "".join([convert_image_to_markdown(item) for item in value])
139
+ + "</div>"
140
+ )
141
+ elif isinstance(value, str) and len(value) > truncate_length:
142
+ truncated = value[:truncate_length]
143
+ full_text = value.replace("<", "&lt;").replace(">", "&gt;")
144
+ processed_row[key] = (
145
+ f'<details style="display: inline;">'
146
+ f'<summary style="display: inline; cursor: pointer;">{truncated}…<span><em>(truncated, click to expand)</em></span></summary>'
147
+ f'<div style="margin-top: 10px; padding: 10px; background: #f5f5f5; border-radius: 4px; max-height: 400px; overflow: auto;">'
148
+ f'<pre style="white-space: pre-wrap; word-wrap: break-word; margin: 0;">{full_text}</pre>'
149
+ f"</div>"
150
+ f"</details>"
151
+ )
152
+ else:
153
+ processed_row[key] = value
154
+ processed_data.append(processed_row)
155
+ return processed_data
156
+
157
+ def _to_dict(self, project: str, run: str, step: int = 0):
158
+ """
159
+ Converts the table to a dictionary representation.
160
+
161
+ Args:
162
+ project (`str`):
163
+ Project name for saving media files.
164
+ run (`str`):
165
+ Run name for saving media files.
166
+ step (`int`, *optional*, defaults to `0`):
167
+ Step number for saving media files.
168
+ """
169
+ data = self._process_data(project, run, step)
170
+ return {
171
+ "_type": self.TYPE,
172
+ "_value": data,
173
+ }
trackio/typehints.py ADDED
@@ -0,0 +1,39 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from typing import Any, TypedDict
2
+
3
+ from gradio import FileData
4
+
5
+
6
+ class LogEntry(TypedDict, total=False):
7
+ project: str
8
+ run: str
9
+ metrics: dict[str, Any]
10
+ step: int | None
11
+ config: dict[str, Any] | None
12
+ log_id: str | None
13
+
14
+
15
+ class SystemLogEntry(TypedDict, total=False):
16
+ project: str
17
+ run: str
18
+ metrics: dict[str, Any]
19
+ timestamp: str
20
+ log_id: str | None
21
+
22
+
23
+ class AlertEntry(TypedDict, total=False):
24
+ project: str
25
+ run: str
26
+ title: str
27
+ text: str | None
28
+ level: str
29
+ step: int | None
30
+ timestamp: str
31
+ alert_id: str | None
32
+
33
+
34
+ class UploadEntry(TypedDict):
35
+ project: str
36
+ run: str | None
37
+ step: int | None
38
+ relative_path: str | None
39
+ uploaded_file: FileData
trackio/utils.py ADDED
@@ -0,0 +1,927 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import math
2
+ import os
3
+ import re
4
+ import secrets
5
+ import time
6
+ import warnings
7
+ from datetime import datetime, timezone
8
+ from pathlib import Path
9
+ from typing import TYPE_CHECKING
10
+ from urllib.parse import urlencode
11
+
12
+ import huggingface_hub
13
+ import numpy as np
14
+ import pandas as pd
15
+ from huggingface_hub.constants import HF_HOME
16
+
17
+ if TYPE_CHECKING:
18
+ from trackio.commit_scheduler import CommitScheduler
19
+ from trackio.dummy_commit_scheduler import DummyCommitScheduler
20
+
21
+ RESERVED_KEYS = ["project", "run", "timestamp", "step", "time", "metrics"]
22
+
23
+ TRACKIO_LOGO_DIR = Path(__file__).parent / "assets"
24
+
25
+
26
+ def get_logo_urls() -> dict[str, str]:
27
+ """Get logo URLs from environment variables or use defaults."""
28
+ light_url = os.environ.get(
29
+ "TRACKIO_LOGO_LIGHT_URL",
30
+ f"/gradio_api/file={TRACKIO_LOGO_DIR}/trackio_logo_type_light_transparent.png",
31
+ )
32
+ dark_url = os.environ.get(
33
+ "TRACKIO_LOGO_DARK_URL",
34
+ f"/gradio_api/file={TRACKIO_LOGO_DIR}/trackio_logo_type_dark_transparent.png",
35
+ )
36
+ return {"light": light_url, "dark": dark_url}
37
+
38
+
39
+ def order_metrics_by_plot_preference(metrics: list[str]) -> tuple[list[str], dict]:
40
+ """
41
+ Order metrics based on TRACKIO_PLOT_ORDER environment variable and group them.
42
+
43
+ Args:
44
+ metrics: List of metric names to order and group
45
+
46
+ Returns:
47
+ Tuple of (ordered_group_names, grouped_metrics_dict)
48
+ """
49
+ plot_order_env = os.environ.get("TRACKIO_PLOT_ORDER", "")
50
+ if not plot_order_env.strip():
51
+ plot_order = []
52
+ else:
53
+ plot_order = [
54
+ item.strip() for item in plot_order_env.split(",") if item.strip()
55
+ ]
56
+
57
+ def get_metric_priority(metric: str) -> tuple[int, int, str]:
58
+ if not plot_order:
59
+ return (float("inf"), float("inf"), metric)
60
+
61
+ group_prefix = metric.split("/")[0] if "/" in metric else "charts"
62
+ no_match_priority = len(plot_order)
63
+
64
+ group_priority = no_match_priority
65
+ for i, pattern in enumerate(plot_order):
66
+ pattern_group = pattern.split("/")[0] if "/" in pattern else "charts"
67
+ if pattern_group == group_prefix:
68
+ group_priority = i
69
+ break
70
+
71
+ within_group_priority = no_match_priority
72
+ for i, pattern in enumerate(plot_order):
73
+ if pattern == metric:
74
+ within_group_priority = i
75
+ break
76
+ elif pattern.endswith("/*") and within_group_priority == no_match_priority:
77
+ pattern_prefix = pattern[:-2]
78
+ if metric.startswith(pattern_prefix + "/"):
79
+ within_group_priority = i + len(plot_order)
80
+
81
+ return (group_priority, within_group_priority, metric)
82
+
83
+ result = {}
84
+ for metric in metrics:
85
+ if "/" not in metric:
86
+ if "charts" not in result:
87
+ result["charts"] = {"direct_metrics": [], "subgroups": {}}
88
+ result["charts"]["direct_metrics"].append(metric)
89
+ else:
90
+ parts = metric.split("/")
91
+ main_prefix = parts[0]
92
+ if main_prefix not in result:
93
+ result[main_prefix] = {"direct_metrics": [], "subgroups": {}}
94
+ if len(parts) == 2:
95
+ result[main_prefix]["direct_metrics"].append(metric)
96
+ else:
97
+ subprefix = parts[1]
98
+ if subprefix not in result[main_prefix]["subgroups"]:
99
+ result[main_prefix]["subgroups"][subprefix] = []
100
+ result[main_prefix]["subgroups"][subprefix].append(metric)
101
+
102
+ for group_data in result.values():
103
+ group_data["direct_metrics"].sort(key=get_metric_priority)
104
+ for subgroup_name in group_data["subgroups"]:
105
+ group_data["subgroups"][subgroup_name].sort(key=get_metric_priority)
106
+
107
+ if "charts" in result and not result["charts"]["direct_metrics"]:
108
+ del result["charts"]
109
+
110
+ def get_group_priority(group_name: str) -> tuple[int, str]:
111
+ if not plot_order:
112
+ return (float("inf"), group_name)
113
+
114
+ min_priority = len(plot_order)
115
+ for i, pattern in enumerate(plot_order):
116
+ pattern_group = pattern.split("/")[0] if "/" in pattern else "charts"
117
+ if pattern_group == group_name:
118
+ min_priority = min(min_priority, i)
119
+ return (min_priority, group_name)
120
+
121
+ ordered_groups = sorted(result.keys(), key=get_group_priority)
122
+
123
+ return ordered_groups, result
124
+
125
+
126
+ def persistent_storage_enabled() -> bool:
127
+ return (
128
+ os.environ.get("PERSISTANT_STORAGE_ENABLED") == "true"
129
+ ) # typo in the name of the environment variable
130
+
131
+
132
+ def _get_trackio_dir() -> Path:
133
+ if persistent_storage_enabled():
134
+ return Path("/data/trackio")
135
+ if os.environ.get("TRACKIO_DIR"):
136
+ return Path(os.environ.get("TRACKIO_DIR"))
137
+ return Path(HF_HOME) / "trackio"
138
+
139
+
140
+ TRACKIO_DIR = _get_trackio_dir()
141
+ MEDIA_DIR = TRACKIO_DIR / "media"
142
+
143
+
144
+ def get_or_create_project_hash(project: str) -> str:
145
+ hash_path = TRACKIO_DIR / f"{project}.hash"
146
+ if hash_path.exists():
147
+ return hash_path.read_text().strip()
148
+ hash_value = secrets.token_urlsafe(8)
149
+ TRACKIO_DIR.mkdir(parents=True, exist_ok=True)
150
+ hash_path.write_text(hash_value)
151
+ return hash_value
152
+
153
+
154
+ def generate_readable_name(used_names: list[str], space_id: str | None = None) -> str:
155
+ """
156
+ Generates a random, readable name like "dainty-sunset-0".
157
+ If space_id is provided, generates username-timestamp format instead.
158
+ """
159
+ if space_id is not None:
160
+ username = _get_default_namespace()
161
+ timestamp = int(time.time())
162
+ return f"{username}-{timestamp}"
163
+ adjectives = [
164
+ "dainty",
165
+ "brave",
166
+ "calm",
167
+ "eager",
168
+ "fancy",
169
+ "gentle",
170
+ "happy",
171
+ "jolly",
172
+ "kind",
173
+ "lively",
174
+ "merry",
175
+ "nice",
176
+ "proud",
177
+ "quick",
178
+ "hugging",
179
+ "silly",
180
+ "tidy",
181
+ "witty",
182
+ "zealous",
183
+ "bright",
184
+ "shy",
185
+ "bold",
186
+ "clever",
187
+ "daring",
188
+ "elegant",
189
+ "faithful",
190
+ "graceful",
191
+ "honest",
192
+ "inventive",
193
+ "jovial",
194
+ "keen",
195
+ "lucky",
196
+ "modest",
197
+ "noble",
198
+ "optimistic",
199
+ "patient",
200
+ "quirky",
201
+ "resourceful",
202
+ "sincere",
203
+ "thoughtful",
204
+ "upbeat",
205
+ "valiant",
206
+ "warm",
207
+ "youthful",
208
+ "zesty",
209
+ "adventurous",
210
+ "breezy",
211
+ "cheerful",
212
+ "delightful",
213
+ "energetic",
214
+ "fearless",
215
+ "glad",
216
+ "hopeful",
217
+ "imaginative",
218
+ "joyful",
219
+ "kindly",
220
+ "luminous",
221
+ "mysterious",
222
+ "neat",
223
+ "outgoing",
224
+ "playful",
225
+ "radiant",
226
+ "spirited",
227
+ "tranquil",
228
+ "unique",
229
+ "vivid",
230
+ "wise",
231
+ "zany",
232
+ "artful",
233
+ "bubbly",
234
+ "charming",
235
+ "dazzling",
236
+ "earnest",
237
+ "festive",
238
+ "gentlemanly",
239
+ "hearty",
240
+ "intrepid",
241
+ "jubilant",
242
+ "knightly",
243
+ "lively",
244
+ "magnetic",
245
+ "nimble",
246
+ "orderly",
247
+ "peaceful",
248
+ "quick-witted",
249
+ "robust",
250
+ "sturdy",
251
+ "trusty",
252
+ "upstanding",
253
+ "vibrant",
254
+ "whimsical",
255
+ ]
256
+ nouns = [
257
+ "sunset",
258
+ "forest",
259
+ "river",
260
+ "mountain",
261
+ "breeze",
262
+ "meadow",
263
+ "ocean",
264
+ "valley",
265
+ "sky",
266
+ "field",
267
+ "cloud",
268
+ "star",
269
+ "rain",
270
+ "leaf",
271
+ "stone",
272
+ "flower",
273
+ "bird",
274
+ "tree",
275
+ "wave",
276
+ "trail",
277
+ "island",
278
+ "desert",
279
+ "hill",
280
+ "lake",
281
+ "pond",
282
+ "grove",
283
+ "canyon",
284
+ "reef",
285
+ "bay",
286
+ "peak",
287
+ "glade",
288
+ "marsh",
289
+ "cliff",
290
+ "dune",
291
+ "spring",
292
+ "brook",
293
+ "cave",
294
+ "plain",
295
+ "ridge",
296
+ "wood",
297
+ "blossom",
298
+ "petal",
299
+ "root",
300
+ "branch",
301
+ "seed",
302
+ "acorn",
303
+ "pine",
304
+ "willow",
305
+ "cedar",
306
+ "elm",
307
+ "falcon",
308
+ "eagle",
309
+ "sparrow",
310
+ "robin",
311
+ "owl",
312
+ "finch",
313
+ "heron",
314
+ "crane",
315
+ "duck",
316
+ "swan",
317
+ "fox",
318
+ "wolf",
319
+ "bear",
320
+ "deer",
321
+ "moose",
322
+ "otter",
323
+ "beaver",
324
+ "lynx",
325
+ "hare",
326
+ "badger",
327
+ "butterfly",
328
+ "bee",
329
+ "ant",
330
+ "beetle",
331
+ "dragonfly",
332
+ "firefly",
333
+ "ladybug",
334
+ "moth",
335
+ "spider",
336
+ "worm",
337
+ "coral",
338
+ "kelp",
339
+ "shell",
340
+ "pebble",
341
+ "face",
342
+ "boulder",
343
+ "cobble",
344
+ "sand",
345
+ "wavelet",
346
+ "tide",
347
+ "current",
348
+ "mist",
349
+ ]
350
+ number = 0
351
+ name = f"{adjectives[0]}-{nouns[0]}-{number}"
352
+ while name in used_names:
353
+ number += 1
354
+ adjective = adjectives[number % len(adjectives)]
355
+ noun = nouns[number % len(nouns)]
356
+ name = f"{adjective}-{noun}-{number}"
357
+ return name
358
+
359
+
360
+ def is_in_notebook():
361
+ """
362
+ Detect if code is running in a notebook environment (Jupyter, Colab, etc.).
363
+ """
364
+ try:
365
+ from IPython import get_ipython
366
+
367
+ if get_ipython() is not None:
368
+ return get_ipython().__class__.__name__ in [
369
+ "ZMQInteractiveShell", # Jupyter notebook/lab
370
+ "Shell", # IPython terminal
371
+ ] or "google.colab" in str(get_ipython())
372
+ except ImportError:
373
+ pass
374
+ return False
375
+
376
+
377
+ def block_main_thread_until_keyboard_interrupt():
378
+ try:
379
+ while True:
380
+ time.sleep(0.1)
381
+ except (KeyboardInterrupt, OSError):
382
+ print("Keyboard interruption in main thread... closing dashboard.")
383
+
384
+
385
+ def simplify_column_names(columns: list[str]) -> dict[str, str]:
386
+ """
387
+ Simplifies column names to first 10 alphanumeric or "/" characters with unique suffixes.
388
+
389
+ Args:
390
+ columns: List of original column names
391
+
392
+ Returns:
393
+ Dictionary mapping original column names to simplified names
394
+ """
395
+ simplified_names = {}
396
+ used_names = set()
397
+
398
+ for col in columns:
399
+ alphanumeric = re.sub(r"[^a-zA-Z0-9/]", "", col)
400
+ base_name = alphanumeric[:10] if alphanumeric else f"col_{len(used_names)}"
401
+
402
+ final_name = base_name
403
+ suffix = 1
404
+ while final_name in used_names:
405
+ final_name = f"{base_name}_{suffix}"
406
+ suffix += 1
407
+
408
+ simplified_names[col] = final_name
409
+ used_names.add(final_name)
410
+
411
+ return simplified_names
412
+
413
+
414
+ def print_dashboard_instructions(project: str) -> None:
415
+ """
416
+ Prints instructions for viewing the Trackio dashboard.
417
+
418
+ Args:
419
+ project: The name of the project to show dashboard for.
420
+ """
421
+ ORANGE = "\033[38;5;208m"
422
+ BOLD = "\033[1m"
423
+ RESET = "\033[0m"
424
+
425
+ print("* View dashboard by running in your terminal:")
426
+ print(f'{BOLD}{ORANGE}trackio show --project "{project}"{RESET}')
427
+ print(f'* or by running in Python: trackio.show(project="{project}")')
428
+
429
+
430
+ def preprocess_space_and_dataset_ids(
431
+ space_id: str | None,
432
+ dataset_id: str | None,
433
+ bucket_id: str | None = None,
434
+ ) -> tuple[str | None, str | None, str | None]:
435
+ """
436
+ Preprocesses the Space and Bucket names to ensure they are valid
437
+ "username/name" format. When space_id is provided and bucket_id is not
438
+ explicitly set, auto-generates a bucket_id.
439
+ """
440
+ if space_id is not None and "/" not in space_id:
441
+ username = _get_default_namespace()
442
+ space_id = f"{username}/{space_id}"
443
+ if dataset_id is not None:
444
+ warnings.warn(
445
+ "`dataset_id` is deprecated. Use `bucket_id` instead.",
446
+ DeprecationWarning,
447
+ stacklevel=3,
448
+ )
449
+ if dataset_id is not None and "/" not in dataset_id:
450
+ username = _get_default_namespace()
451
+ dataset_id = f"{username}/{dataset_id}"
452
+ if bucket_id is not None and "/" not in bucket_id:
453
+ username = _get_default_namespace()
454
+ bucket_id = f"{username}/{bucket_id}"
455
+ if space_id is not None and dataset_id is None and bucket_id is None:
456
+ bucket_id = f"{space_id}-bucket"
457
+ return space_id, dataset_id, bucket_id
458
+
459
+
460
+ def fibo():
461
+ """Generator for Fibonacci backoff: 1, 1, 2, 3, 5, 8, ..."""
462
+ a, b = 1, 1
463
+ while True:
464
+ yield a
465
+ a, b = b, a + b
466
+
467
+
468
+ def format_timestamp(timestamp_str):
469
+ """Convert ISO timestamp to human-readable format like '3 minutes ago'."""
470
+ if not timestamp_str or pd.isna(timestamp_str):
471
+ return "Unknown"
472
+
473
+ try:
474
+ created_time = datetime.fromisoformat(timestamp_str.replace("Z", "+00:00"))
475
+ if created_time.tzinfo is None:
476
+ created_time = created_time.replace(tzinfo=timezone.utc)
477
+
478
+ now = datetime.now(timezone.utc)
479
+ diff = now - created_time
480
+
481
+ seconds = int(diff.total_seconds())
482
+ if seconds < 60:
483
+ return "Just now"
484
+ elif seconds < 3600:
485
+ minutes = seconds // 60
486
+ return f"{minutes} minute{'s' if minutes != 1 else ''} ago"
487
+ elif seconds < 86400:
488
+ hours = seconds // 3600
489
+ return f"{hours} hour{'s' if hours != 1 else ''} ago"
490
+ else:
491
+ days = seconds // 86400
492
+ return f"{days} day{'s' if days != 1 else ''} ago"
493
+ except Exception:
494
+ return "Unknown"
495
+
496
+
497
+ DEFAULT_COLOR_PALETTE = [
498
+ "#A8769B",
499
+ "#E89957",
500
+ "#3B82F6",
501
+ "#10B981",
502
+ "#EF4444",
503
+ "#8B5CF6",
504
+ "#14B8A6",
505
+ "#F59E0B",
506
+ "#EC4899",
507
+ "#06B6D4",
508
+ ]
509
+
510
+
511
+ def get_color_palette() -> list[str]:
512
+ """Get the color palette from environment variable or use default."""
513
+ env_palette = os.environ.get("TRACKIO_COLOR_PALETTE")
514
+ if env_palette:
515
+ return [color.strip() for color in env_palette.split(",")]
516
+ return DEFAULT_COLOR_PALETTE
517
+
518
+
519
+ def get_color_mapping(
520
+ runs: list[str], smoothing: bool, color_palette: list[str] | None = None
521
+ ) -> dict[str, str]:
522
+ """Generate color mapping for runs, with transparency for original data when smoothing is enabled."""
523
+ if color_palette is None:
524
+ color_palette = get_color_palette()
525
+
526
+ color_map = {}
527
+
528
+ for i, run in enumerate(runs):
529
+ base_color = color_palette[i % len(color_palette)]
530
+
531
+ if smoothing:
532
+ color_map[run] = base_color + "4D"
533
+ color_map[f"{run}_smoothed"] = base_color
534
+ else:
535
+ color_map[run] = base_color
536
+
537
+ return color_map
538
+
539
+
540
+ def downsample(
541
+ df: pd.DataFrame,
542
+ x: str,
543
+ y: str,
544
+ color: str | None,
545
+ x_lim: tuple[float | None, float | None] | None = None,
546
+ ) -> tuple[pd.DataFrame, tuple[float, float] | None]:
547
+ """
548
+ Downsample the dataframe to reduce the number of points plotted.
549
+ Also updates the x-axis limits to the data min/max if either of the x-axis limits are None.
550
+
551
+ Args:
552
+ df: The dataframe to downsample.
553
+ x: The column name to use for the x-axis.
554
+ y: The column name to use for the y-axis.
555
+ color: The column name to use for the color.
556
+ x_lim: The x-axis limits to use.
557
+
558
+ Returns:
559
+ A tuple containing the downsampled dataframe and the updated x-axis limits.
560
+ """
561
+ if df.empty:
562
+ if x_lim is not None:
563
+ x_lim = (x_lim[0] or 0, x_lim[1] or 0)
564
+ return df, x_lim
565
+
566
+ columns_to_keep = [x, y]
567
+ if color is not None and color in df.columns:
568
+ columns_to_keep.append(color)
569
+ df = df[columns_to_keep].copy()
570
+
571
+ data_x_min = df[x].min()
572
+ data_x_max = df[x].max()
573
+
574
+ if x_lim is not None:
575
+ x_min, x_max = x_lim
576
+ if x_min is None:
577
+ x_min = data_x_min
578
+ if x_max is None:
579
+ x_max = data_x_max
580
+ updated_x_lim = (x_min, x_max)
581
+ else:
582
+ updated_x_lim = None
583
+
584
+ n_bins = 100
585
+
586
+ if color is not None and color in df.columns:
587
+ groups = df.groupby(color)
588
+ else:
589
+ groups = [(None, df)]
590
+
591
+ downsampled_indices = []
592
+
593
+ for _, group_df in groups:
594
+ if group_df.empty:
595
+ continue
596
+
597
+ group_df = group_df.sort_values(x)
598
+
599
+ if updated_x_lim is not None:
600
+ x_min, x_max = updated_x_lim
601
+ before_point = group_df[group_df[x] < x_min].tail(1)
602
+ after_point = group_df[group_df[x] > x_max].head(1)
603
+ group_df = group_df[(group_df[x] >= x_min) & (group_df[x] <= x_max)]
604
+ else:
605
+ before_point = after_point = None
606
+ x_min = group_df[x].min()
607
+ x_max = group_df[x].max()
608
+
609
+ if before_point is not None and not before_point.empty:
610
+ downsampled_indices.extend(before_point.index.tolist())
611
+ if after_point is not None and not after_point.empty:
612
+ downsampled_indices.extend(after_point.index.tolist())
613
+
614
+ if group_df.empty:
615
+ continue
616
+
617
+ if x_min == x_max:
618
+ min_y_idx = group_df[y].idxmin()
619
+ max_y_idx = group_df[y].idxmax()
620
+ if min_y_idx != max_y_idx:
621
+ downsampled_indices.extend([min_y_idx, max_y_idx])
622
+ else:
623
+ downsampled_indices.append(min_y_idx)
624
+ continue
625
+
626
+ if len(group_df) < 500:
627
+ downsampled_indices.extend(group_df.index.tolist())
628
+ continue
629
+
630
+ bins = np.linspace(x_min, x_max, n_bins + 1)
631
+ group_df["bin"] = pd.cut(
632
+ group_df[x], bins=bins, labels=False, include_lowest=True
633
+ )
634
+
635
+ for bin_idx in group_df["bin"].dropna().unique():
636
+ bin_data = group_df[group_df["bin"] == bin_idx]
637
+ if bin_data.empty:
638
+ continue
639
+
640
+ min_y_idx = bin_data[y].idxmin()
641
+ max_y_idx = bin_data[y].idxmax()
642
+
643
+ downsampled_indices.append(min_y_idx)
644
+ if min_y_idx != max_y_idx:
645
+ downsampled_indices.append(max_y_idx)
646
+
647
+ unique_indices = list(set(downsampled_indices))
648
+
649
+ downsampled_df = df.loc[unique_indices].copy()
650
+
651
+ if color is not None:
652
+ downsampled_df = (
653
+ downsampled_df.groupby(color, sort=False)[downsampled_df.columns]
654
+ .apply(lambda group: group.sort_values(x))
655
+ .reset_index(drop=True)
656
+ )
657
+ else:
658
+ downsampled_df = downsampled_df.sort_values(x).reset_index(drop=True)
659
+
660
+ downsampled_df = downsampled_df.drop(columns=["bin"], errors="ignore")
661
+
662
+ return downsampled_df, updated_x_lim
663
+
664
+
665
+ def sort_metrics_by_prefix(metrics: list[str]) -> list[str]:
666
+ """
667
+ Sort metrics by grouping prefixes together for dropdown/list display.
668
+ Metrics without prefixes come first, then grouped by prefix.
669
+
670
+ Args:
671
+ metrics: List of metric names
672
+
673
+ Returns:
674
+ List of metric names sorted by prefix
675
+
676
+ Example:
677
+ Input: ["train/loss", "loss", "train/acc", "val/loss"]
678
+ Output: ["loss", "train/acc", "train/loss", "val/loss"]
679
+ """
680
+ groups = group_metrics_by_prefix(metrics)
681
+ result = []
682
+
683
+ if "charts" in groups:
684
+ result.extend(groups["charts"])
685
+
686
+ for group_name in sorted(groups.keys()):
687
+ if group_name != "charts":
688
+ result.extend(groups[group_name])
689
+
690
+ return result
691
+
692
+
693
+ def group_metrics_by_prefix(metrics: list[str]) -> dict[str, list[str]]:
694
+ """
695
+ Group metrics by their prefix. Metrics without prefix go to 'charts' group.
696
+
697
+ Args:
698
+ metrics: List of metric names
699
+
700
+ Returns:
701
+ Dictionary with prefix names as keys and lists of metrics as values
702
+
703
+ Example:
704
+ Input: ["loss", "accuracy", "train/loss", "train/acc", "val/loss"]
705
+ Output: {
706
+ "charts": ["loss", "accuracy"],
707
+ "train": ["train/loss", "train/acc"],
708
+ "val": ["val/loss"]
709
+ }
710
+ """
711
+ no_prefix = []
712
+ with_prefix = []
713
+
714
+ for metric in metrics:
715
+ if "/" in metric:
716
+ with_prefix.append(metric)
717
+ else:
718
+ no_prefix.append(metric)
719
+
720
+ no_prefix.sort()
721
+
722
+ prefix_groups = {}
723
+ for metric in with_prefix:
724
+ prefix = metric.split("/")[0]
725
+ if prefix not in prefix_groups:
726
+ prefix_groups[prefix] = []
727
+ prefix_groups[prefix].append(metric)
728
+
729
+ for prefix in prefix_groups:
730
+ prefix_groups[prefix].sort()
731
+
732
+ groups = {}
733
+ if no_prefix:
734
+ groups["charts"] = no_prefix
735
+
736
+ for prefix in sorted(prefix_groups.keys()):
737
+ groups[prefix] = prefix_groups[prefix]
738
+
739
+ return groups
740
+
741
+
742
+ def get_sync_status(scheduler: "CommitScheduler | DummyCommitScheduler") -> int | None:
743
+ """Get the sync status from the CommitScheduler in an integer number of minutes, or None if not synced yet."""
744
+ if getattr(
745
+ scheduler, "last_push_time", None
746
+ ): # DummyCommitScheduler doesn't have last_push_time
747
+ time_diff = time.time() - scheduler.last_push_time
748
+ return int(time_diff / 60)
749
+ else:
750
+ return None
751
+
752
+
753
+ def generate_share_url(
754
+ project: str,
755
+ metrics: str,
756
+ selected_runs: list = None,
757
+ hide_headers: bool = False,
758
+ ) -> str:
759
+ """Generate the shareable Space URL based on current settings."""
760
+ space_host = os.environ.get("SPACE_HOST", "")
761
+ if not space_host:
762
+ return ""
763
+
764
+ params: dict[str, str] = {}
765
+
766
+ if project:
767
+ params["project"] = project
768
+
769
+ if metrics and metrics.strip():
770
+ params["metrics"] = metrics
771
+
772
+ if selected_runs:
773
+ params["runs"] = ",".join(selected_runs)
774
+
775
+ if hide_headers:
776
+ params["accordion"] = "hidden"
777
+ params["sidebar"] = "hidden"
778
+ params["navbar"] = "hidden"
779
+
780
+ query_string = urlencode(params)
781
+ return f"https://{space_host}?{query_string}"
782
+
783
+
784
+ def generate_embed_code(
785
+ project: str,
786
+ metrics: str,
787
+ selected_runs: list = None,
788
+ hide_headers: bool = False,
789
+ ) -> str:
790
+ """Generate the embed iframe code based on current settings."""
791
+ embed_url = generate_share_url(project, metrics, selected_runs, hide_headers)
792
+ if not embed_url:
793
+ return ""
794
+
795
+ return f'<iframe src="{embed_url}" style="width:1600px; height:500px; border:0;"></iframe>'
796
+
797
+
798
+ def serialize_values(metrics):
799
+ """
800
+ Serialize infinity and NaN values in metrics dict to make it JSON-compliant.
801
+ Only handles top-level float values.
802
+
803
+ Converts:
804
+ - float('inf') -> "Infinity"
805
+ - float('-inf') -> "-Infinity"
806
+ - float('nan') -> "NaN"
807
+
808
+ Example:
809
+ {"loss": float('inf'), "accuracy": 0.95} -> {"loss": "Infinity", "accuracy": 0.95}
810
+ """
811
+ if not isinstance(metrics, dict):
812
+ return metrics
813
+
814
+ result = {}
815
+ for key, value in metrics.items():
816
+ if isinstance(value, float):
817
+ if math.isinf(value):
818
+ result[key] = "Infinity" if value > 0 else "-Infinity"
819
+ elif math.isnan(value):
820
+ result[key] = "NaN"
821
+ else:
822
+ result[key] = value
823
+ elif isinstance(value, np.floating):
824
+ float_val = float(value)
825
+ if math.isinf(float_val):
826
+ result[key] = "Infinity" if float_val > 0 else "-Infinity"
827
+ elif math.isnan(float_val):
828
+ result[key] = "NaN"
829
+ else:
830
+ result[key] = float_val
831
+ else:
832
+ result[key] = value
833
+ return result
834
+
835
+
836
+ def deserialize_values(metrics):
837
+ """
838
+ Deserialize infinity and NaN string values back to their numeric forms.
839
+ Only handles top-level string values.
840
+
841
+ Converts:
842
+ - "Infinity" -> float('inf')
843
+ - "-Infinity" -> float('-inf')
844
+ - "NaN" -> float('nan')
845
+
846
+ Example:
847
+ {"loss": "Infinity", "accuracy": 0.95} -> {"loss": float('inf'), "accuracy": 0.95}
848
+ """
849
+ if not isinstance(metrics, dict):
850
+ return metrics
851
+
852
+ result = {}
853
+ for key, value in metrics.items():
854
+ if value == "Infinity":
855
+ result[key] = float("inf")
856
+ elif value == "-Infinity":
857
+ result[key] = float("-inf")
858
+ elif value == "NaN":
859
+ result[key] = float("nan")
860
+ else:
861
+ result[key] = value
862
+ return result
863
+
864
+
865
+ def get_full_url(
866
+ base_url: str, project: str | None, write_token: str, footer: bool = True
867
+ ) -> str:
868
+ params = []
869
+ if project:
870
+ params.append(f"project={project}")
871
+ params.append(f"write_token={write_token}")
872
+ if not footer:
873
+ params.append("footer=false")
874
+ return base_url + "?" + "&".join(params)
875
+
876
+
877
+ def embed_url_in_notebook(url: str) -> None:
878
+ try:
879
+ from IPython.display import HTML, display
880
+
881
+ embed_code = HTML(
882
+ f'<div><iframe src="{url}" width="100%" height="1000px" allow="autoplay; camera; microphone; clipboard-read; clipboard-write;" frameborder="0" allowfullscreen></iframe></div>'
883
+ )
884
+ display(embed_code)
885
+ except ImportError:
886
+ pass
887
+
888
+
889
+ def to_json_safe(obj):
890
+ if isinstance(obj, (str, int, float, bool, type(None))):
891
+ return obj
892
+ if isinstance(obj, np.generic):
893
+ return obj.item()
894
+ if isinstance(obj, dict):
895
+ return {str(k): to_json_safe(v) for k, v in obj.items()}
896
+ if isinstance(obj, (list, tuple, set)):
897
+ return [to_json_safe(v) for v in obj]
898
+ if hasattr(obj, "to_dict") and callable(obj.to_dict):
899
+ return to_json_safe(obj.to_dict())
900
+ if hasattr(obj, "__dict__"):
901
+ return {
902
+ str(k): to_json_safe(v)
903
+ for k, v in vars(obj).items()
904
+ if not k.startswith("_")
905
+ }
906
+ return str(obj)
907
+
908
+
909
+ def get_space() -> str | None:
910
+ """
911
+ Get the space ID ("user/space") if Trackio is running in a Space, or None if not.
912
+ """
913
+ return os.environ.get("SPACE_ID")
914
+
915
+
916
+ def ordered_subset(items: list[str], subset: list[str] | None) -> list[str]:
917
+ subset_set = set(subset or [])
918
+ return [item for item in items if item in subset_set]
919
+
920
+
921
+ def _get_default_namespace() -> str:
922
+ """Get the default namespace (username).
923
+
924
+ This function uses caching to avoid repeated API calls to /whoami-v2.
925
+ """
926
+ token = huggingface_hub.get_token()
927
+ return huggingface_hub.whoami(token=token, cache=True)["name"]