NavyDevilDoc commited on
Commit
9e01cfe
·
verified ·
1 Parent(s): ac7456f

Update src/tracker.py

Browse files

fixing user permission issues

Files changed (1) hide show
  1. src/tracker.py +267 -261
src/tracker.py CHANGED
@@ -1,262 +1,268 @@
1
- import streamlit as st
2
- import streamlit_authenticator as stauth
3
- import yaml
4
- from yaml.loader import SafeLoader
5
- import json
6
- import os
7
- import uuid
8
- from datetime import datetime
9
- import pytz
10
- from huggingface_hub import HfApi, hf_hub_download, snapshot_download, CommitScheduler
11
- from pathlib import Path
12
- import bcrypt
13
-
14
- # --- CONFIGURATION ---
15
- DATASET_REPO_ID = "NavyDevilDoc/navy-ai-logs"
16
- LOG_FILE = "usage_log.json"
17
- CONFIG_FILE = "config.yaml"
18
- CHROMA_ROOT = "chroma_db"
19
- HF_TOKEN = os.getenv("HF_TOKEN")
20
- INVITE_CODE = os.getenv("INVITE_CODE", "CHANGE_ME_IN_SETTINGS") # Security Fix
21
- TIMEZONE = pytz.timezone("US/Eastern")
22
-
23
- # --- DATA PERSISTENCE SETUP (The Fix) ---
24
- # Create a local directory that the Scheduler will watch
25
- LOCAL_DATA_DIR = Path("data_persistence")
26
- LOCAL_DATA_DIR.mkdir(exist_ok=True)
27
-
28
- # Initialize the Scheduler
29
- # This runs in the background and pushes changes every 1 minute
30
- scheduler = CommitScheduler(
31
- repo_id=DATASET_REPO_ID,
32
- repo_type="dataset",
33
- folder_path=LOCAL_DATA_DIR,
34
- path_in_repo=".", # Sync to root of dataset
35
- every=1, # Sync every 1 minute
36
- token=HF_TOKEN
37
- )
38
-
39
- # --- PATH HELPERS ---
40
- def get_config_path():
41
- """Returns the path to the LOCAL config file in the persistence folder."""
42
- return LOCAL_DATA_DIR / CONFIG_FILE
43
-
44
- def get_log_path():
45
- """Returns the path to the LOCAL log file in the persistence folder."""
46
- return LOCAL_DATA_DIR / LOG_FILE
47
-
48
- # --- GENERIC FILE SYNC (Cached!) ---
49
- @st.cache_data(ttl=60) # Only check cloud every 60 seconds
50
- def download_config_if_needed():
51
- """Downloads config from HF only if cache is stale."""
52
- if not HF_TOKEN: return
53
- try:
54
- hf_hub_download(
55
- repo_id=DATASET_REPO_ID,
56
- filename=CONFIG_FILE,
57
- repo_type="dataset",
58
- local_dir=LOCAL_DATA_DIR, # Download directly to watched folder
59
- token=HF_TOKEN,
60
- force_download=True
61
- )
62
- print("✅ Config refreshed from cloud.")
63
- except Exception as e:
64
- print(f"⚠️ Cloud pull failed for config: {e}")
65
-
66
- # We don't cache logs because we need to write to them frequently
67
- def ensure_log_exists():
68
- if not (LOCAL_DATA_DIR / LOG_FILE).exists():
69
- try:
70
- hf_hub_download(
71
- repo_id=DATASET_REPO_ID,
72
- filename=LOG_FILE,
73
- repo_type="dataset",
74
- local_dir=LOCAL_DATA_DIR,
75
- token=HF_TOKEN
76
- )
77
- except:
78
- # Create empty log if it doesn't exist on cloud yet
79
- with open(LOCAL_DATA_DIR / LOG_FILE, "w") as f:
80
- json.dump({}, f)
81
-
82
- # --- USER DB SYNC (For ChromaDB) ---
83
- def download_user_db(username):
84
- """Restores ONLY the specific user's Knowledge Base."""
85
- if not HF_TOKEN: return
86
-
87
- target_dir = os.path.dirname(os.path.abspath(__file__))
88
- user_db_path = f"{CHROMA_ROOT}/{username}"
89
-
90
- try:
91
- # We don't use the scheduler for ChromaDB yet (too large)
92
- # We stick to snapshot_download for now
93
- print(f"📥 Syncing Knowledge Base for {username}...")
94
- snapshot_download(
95
- repo_id=DATASET_REPO_ID,
96
- repo_type="dataset",
97
- allow_patterns=[f"{user_db_path}/*"],
98
- local_dir=target_dir,
99
- token=HF_TOKEN
100
- )
101
- print("✅ User Knowledge Base Restored.")
102
- except Exception as e:
103
- print(f"⚠️ New user or sync error: {e}")
104
-
105
- def upload_user_db(username):
106
- """Backs up ONLY the specific user's Knowledge Base."""
107
- if not HF_TOKEN: return
108
-
109
- target_dir = os.path.dirname(os.path.abspath(__file__))
110
- user_db_rel_path = os.path.join(CHROMA_ROOT, username)
111
- user_db_abs_path = os.path.join(target_dir, user_db_rel_path)
112
-
113
- if not os.path.exists(user_db_abs_path):
114
- return
115
-
116
- try:
117
- api = HfApi(token=HF_TOKEN)
118
- api.upload_folder(
119
- folder_path=user_db_abs_path,
120
- path_in_repo=user_db_rel_path,
121
- repo_id=DATASET_REPO_ID,
122
- repo_type="dataset",
123
- commit_message=f"KB Update ({username}): {datetime.now(TIMEZONE)}"
124
- )
125
- print(f"✅ Knowledge Base Saved for {username}.")
126
- except Exception as e:
127
- print(f"⚠️ DB sync failed: {e}")
128
-
129
- # --- AUTHENTICATION ---
130
- def check_login():
131
- # 1. Cached Download
132
- download_config_if_needed()
133
-
134
- try:
135
- config_path = get_config_path()
136
- if not config_path.exists():
137
- st.error(f"🚨 CRITICAL: Config not found at {config_path}")
138
- return False
139
-
140
- with open(config_path) as file:
141
- config = yaml.load(file, Loader=SafeLoader)
142
- except Exception as e:
143
- st.error(f"🚨 Config Error: {e}")
144
- return False
145
-
146
- authenticator = stauth.Authenticate(
147
- config['credentials'],
148
- config['cookie']['name'],
149
- config['cookie']['key'],
150
- config['cookie']['expiry_days']
151
- )
152
-
153
- authenticator.login(location='main')
154
-
155
- if st.session_state["authentication_status"]:
156
- username = st.session_state["username"]
157
- try:
158
- user_data = config['credentials']['usernames'].get(username, {})
159
- user_roles = user_data.get('roles', [])
160
- except Exception as e:
161
- user_roles = []
162
-
163
- st.session_state.roles = user_roles
164
- st.session_state.username = username
165
- st.session_state.name = st.session_state.get("name")
166
- st.session_state.authenticator = authenticator
167
- return True
168
-
169
- elif st.session_state["authentication_status"] is False:
170
- st.error('Username/password is incorrect')
171
- return False
172
- elif st.session_state["authentication_status"] is None:
173
- st.warning('Please enter your username and password')
174
- return False
175
-
176
- # --- REGISTRATION ---
177
- def register_user(new_email, new_username, new_name, new_password, invite_code):
178
- if invite_code != INVITE_CODE:
179
- return False, "Invalid Invite Code."
180
-
181
- # Ensure we have the latest config before writing
182
- download_config_if_needed()
183
- config_path = get_config_path()
184
-
185
- # Lock the file for reading/writing
186
- # (The Scheduler handles the cloud sync, but we need to handle local consistency)
187
- with scheduler.lock:
188
- with open(config_path) as file:
189
- config = yaml.load(file, Loader=SafeLoader)
190
-
191
- if new_username in config['credentials']['usernames']:
192
- return False, "Username already exists."
193
-
194
- hashed_bytes = bcrypt.hashpw(new_password.encode('utf-8'), bcrypt.gensalt())
195
- hashed_pwd = hashed_bytes.decode('utf-8')
196
-
197
- new_user_entry = {
198
- "email": new_email,
199
- "name": new_name,
200
- "password": hashed_pwd,
201
- "roles": ["user"]
202
- }
203
-
204
- config['credentials']['usernames'][new_username] = new_user_entry
205
-
206
- with open(config_path, 'w') as file:
207
- yaml.dump(config, file, default_flow_style=False)
208
-
209
- return True, "Account created! Please log in."
210
-
211
- # --- LOGGING ---
212
- def log_usage(model_name, input_tokens, output_tokens):
213
- ensure_log_exists()
214
- log_path = get_log_path()
215
-
216
- username = st.session_state.get("username", "anonymous")
217
- now_est = datetime.now(TIMEZONE)
218
- today = now_est.strftime("%Y-%m-%d")
219
-
220
- # Scheduler Lock guarantees atomic writes locally
221
- with scheduler.lock:
222
- data = {}
223
- if log_path.exists():
224
- with open(log_path, "r") as f:
225
- try:
226
- data = json.load(f)
227
- except:
228
- data = {}
229
-
230
- if today not in data:
231
- data[today] = {"total_tokens": 0, "users": {}}
232
-
233
- if username not in data[today]["users"]:
234
- data[today]["users"][username] = {"input": 0, "output": 0, "calls": 0}
235
-
236
- data[today]["total_tokens"] += (input_tokens + output_tokens)
237
- data[today]["users"][username]["input"] += input_tokens
238
- data[today]["users"][username]["output"] += output_tokens
239
- data[today]["users"][username]["calls"] += 1
240
-
241
- with open(log_path, "w") as f:
242
- json.dump(data, f, indent=2)
243
-
244
- # No need to call upload_file() manually!
245
- # The Scheduler detects the file change and uploads it automatically.
246
-
247
- def get_daily_stats():
248
- ensure_log_exists()
249
- log_path = get_log_path()
250
-
251
- now_est = datetime.now(TIMEZONE)
252
- today = now_est.strftime("%Y-%m-%d")
253
-
254
- if log_path.exists():
255
- with open(log_path, "r") as f:
256
- try:
257
- data = json.load(f)
258
- if today in data:
259
- return data[today]
260
- except:
261
- pass
 
 
 
 
 
 
262
  return {"total_tokens": 0, "users": {}}
 
1
+ import streamlit as st
2
+ import streamlit_authenticator as stauth
3
+ import yaml
4
+ from yaml.loader import SafeLoader
5
+ import json
6
+ import os
7
+ import stat # <--- NEW IMPORT for permission handling
8
+ import uuid
9
+ from datetime import datetime
10
+ import pytz
11
+ from huggingface_hub import HfApi, hf_hub_download, snapshot_download, CommitScheduler
12
+ from pathlib import Path
13
+ import bcrypt
14
+
15
+ # --- CONFIGURATION ---
16
+ DATASET_REPO_ID = "NavyDevilDoc/navy-ai-logs"
17
+ LOG_FILE = "usage_log.json"
18
+ CONFIG_FILE = "config.yaml"
19
+ CHROMA_ROOT = "chroma_db"
20
+ HF_TOKEN = os.getenv("HF_TOKEN")
21
+ INVITE_CODE = os.getenv("INVITE_CODE", "CHANGE_ME_IN_SETTINGS")
22
+ TIMEZONE = pytz.timezone("US/Eastern")
23
+
24
+ # --- DATA PERSISTENCE SETUP ---
25
+ LOCAL_DATA_DIR = Path("data_persistence")
26
+ LOCAL_DATA_DIR.mkdir(exist_ok=True)
27
+
28
+ scheduler = CommitScheduler(
29
+ repo_id=DATASET_REPO_ID,
30
+ repo_type="dataset",
31
+ folder_path=LOCAL_DATA_DIR,
32
+ path_in_repo=".",
33
+ every=1,
34
+ token=HF_TOKEN
35
+ )
36
+
37
+ # --- PATH HELPERS ---
38
+ def get_config_path():
39
+ return LOCAL_DATA_DIR / CONFIG_FILE
40
+
41
+ def get_log_path():
42
+ return LOCAL_DATA_DIR / LOG_FILE
43
+
44
+ # --- GENERIC FILE SYNC ---
45
+ @st.cache_data(ttl=60)
46
+ def download_config_if_needed():
47
+ if not HF_TOKEN: return
48
+ try:
49
+ hf_hub_download(
50
+ repo_id=DATASET_REPO_ID,
51
+ filename=CONFIG_FILE,
52
+ repo_type="dataset",
53
+ local_dir=LOCAL_DATA_DIR,
54
+ token=HF_TOKEN,
55
+ force_download=True
56
+ )
57
+ print("✅ Config refreshed from cloud.")
58
+ except Exception as e:
59
+ print(f"⚠️ Cloud pull failed for config: {e}")
60
+
61
+ def ensure_log_exists():
62
+ if not (LOCAL_DATA_DIR / LOG_FILE).exists():
63
+ try:
64
+ hf_hub_download(
65
+ repo_id=DATASET_REPO_ID,
66
+ filename=LOG_FILE,
67
+ repo_type="dataset",
68
+ local_dir=LOCAL_DATA_DIR,
69
+ token=HF_TOKEN
70
+ )
71
+ except:
72
+ with open(LOCAL_DATA_DIR / LOG_FILE, "w") as f:
73
+ json.dump({}, f)
74
+
75
+ # --- USER DB SYNC (THE FIX IS HERE) ---
76
+ def download_user_db(username):
77
+ """Restores ONLY the specific user's Knowledge Base and unlocks permissions."""
78
+ if not HF_TOKEN: return
79
+
80
+ target_dir = os.path.dirname(os.path.abspath(__file__))
81
+ user_db_path = f"{CHROMA_ROOT}/{username}"
82
+
83
+ try:
84
+ print(f"📥 Syncing Knowledge Base for {username}...")
85
+ snapshot_download(
86
+ repo_id=DATASET_REPO_ID,
87
+ repo_type="dataset",
88
+ allow_patterns=[f"{user_db_path}/*"],
89
+ local_dir=target_dir,
90
+ token=HF_TOKEN
91
+ )
92
+
93
+ # --- PERMISSION FIX ---
94
+ # Force-add Write permissions to the downloaded folder and files.
95
+ # This solves the "readonly database" (Error 1032).
96
+
97
+ user_folder = Path(target_dir) / CHROMA_ROOT / username
98
+
99
+ if user_folder.exists():
100
+ # 1. Unlock the User Directory itself
101
+ current_mode = user_folder.stat().st_mode
102
+ user_folder.chmod(current_mode | stat.S_IWUSR | stat.S_IXUSR)
103
+
104
+ # 2. Unlock every file and subfolder inside
105
+ for item in user_folder.rglob('*'):
106
+ current_mode = item.stat().st_mode
107
+ if item.is_dir():
108
+ # Directories need Execute (IX) to be traversable
109
+ item.chmod(current_mode | stat.S_IWUSR | stat.S_IXUSR)
110
+ else:
111
+ # Files need Write (IW) to be modifiable
112
+ item.chmod(current_mode | stat.S_IWUSR)
113
+
114
+ print("✅ User Knowledge Base Restored & Unlocked.")
115
+
116
+ except Exception as e:
117
+ print(f"⚠️ New user or sync error: {e}")
118
+
119
+ def upload_user_db(username):
120
+ """Backs up ONLY the specific user's Knowledge Base."""
121
+ if not HF_TOKEN: return
122
+
123
+ target_dir = os.path.dirname(os.path.abspath(__file__))
124
+ user_db_rel_path = os.path.join(CHROMA_ROOT, username)
125
+ user_db_abs_path = os.path.join(target_dir, user_db_rel_path)
126
+
127
+ if not os.path.exists(user_db_abs_path):
128
+ return
129
+
130
+ try:
131
+ api = HfApi(token=HF_TOKEN)
132
+ api.upload_folder(
133
+ folder_path=user_db_abs_path,
134
+ path_in_repo=user_db_rel_path,
135
+ repo_id=DATASET_REPO_ID,
136
+ repo_type="dataset",
137
+ commit_message=f"KB Update ({username}): {datetime.now(TIMEZONE)}"
138
+ )
139
+ print(f"✅ Knowledge Base Saved for {username}.")
140
+ except Exception as e:
141
+ print(f"⚠️ DB sync failed: {e}")
142
+
143
+ # --- AUTHENTICATION ---
144
+ def check_login():
145
+ download_config_if_needed()
146
+
147
+ try:
148
+ config_path = get_config_path()
149
+ if not config_path.exists():
150
+ st.error(f"🚨 CRITICAL: Config not found at {config_path}")
151
+ return False
152
+
153
+ with open(config_path) as file:
154
+ config = yaml.load(file, Loader=SafeLoader)
155
+ except Exception as e:
156
+ st.error(f"🚨 Config Error: {e}")
157
+ return False
158
+
159
+ authenticator = stauth.Authenticate(
160
+ config['credentials'],
161
+ config['cookie']['name'],
162
+ config['cookie']['key'],
163
+ config['cookie']['expiry_days']
164
+ )
165
+
166
+ authenticator.login(location='main')
167
+
168
+ if st.session_state["authentication_status"]:
169
+ username = st.session_state["username"]
170
+ try:
171
+ user_data = config['credentials']['usernames'].get(username, {})
172
+ user_roles = user_data.get('roles', [])
173
+ except Exception as e:
174
+ user_roles = []
175
+
176
+ st.session_state.roles = user_roles
177
+ st.session_state.username = username
178
+ st.session_state.name = st.session_state.get("name")
179
+ st.session_state.authenticator = authenticator
180
+ return True
181
+
182
+ elif st.session_state["authentication_status"] is False:
183
+ st.error('Username/password is incorrect')
184
+ return False
185
+ elif st.session_state["authentication_status"] is None:
186
+ st.warning('Please enter your username and password')
187
+ return False
188
+
189
+ # --- REGISTRATION ---
190
+ def register_user(new_email, new_username, new_name, new_password, invite_code):
191
+ if invite_code != INVITE_CODE:
192
+ return False, "Invalid Invite Code."
193
+
194
+ download_config_if_needed()
195
+ config_path = get_config_path()
196
+
197
+ with scheduler.lock:
198
+ with open(config_path) as file:
199
+ config = yaml.load(file, Loader=SafeLoader)
200
+
201
+ if new_username in config['credentials']['usernames']:
202
+ return False, "Username already exists."
203
+
204
+ hashed_bytes = bcrypt.hashpw(new_password.encode('utf-8'), bcrypt.gensalt())
205
+ hashed_pwd = hashed_bytes.decode('utf-8')
206
+
207
+ new_user_entry = {
208
+ "email": new_email,
209
+ "name": new_name,
210
+ "password": hashed_pwd,
211
+ "roles": ["user"]
212
+ }
213
+
214
+ config['credentials']['usernames'][new_username] = new_user_entry
215
+
216
+ with open(config_path, 'w') as file:
217
+ yaml.dump(config, file, default_flow_style=False)
218
+
219
+ return True, "Account created! Please log in."
220
+
221
+ # --- LOGGING ---
222
+ def log_usage(model_name, input_tokens, output_tokens):
223
+ ensure_log_exists()
224
+ log_path = get_log_path()
225
+
226
+ username = st.session_state.get("username", "anonymous")
227
+ now_est = datetime.now(TIMEZONE)
228
+ today = now_est.strftime("%Y-%m-%d")
229
+
230
+ with scheduler.lock:
231
+ data = {}
232
+ if log_path.exists():
233
+ with open(log_path, "r") as f:
234
+ try:
235
+ data = json.load(f)
236
+ except:
237
+ data = {}
238
+
239
+ if today not in data:
240
+ data[today] = {"total_tokens": 0, "users": {}}
241
+
242
+ if username not in data[today]["users"]:
243
+ data[today]["users"][username] = {"input": 0, "output": 0, "calls": 0}
244
+
245
+ data[today]["total_tokens"] += (input_tokens + output_tokens)
246
+ data[today]["users"][username]["input"] += input_tokens
247
+ data[today]["users"][username]["output"] += output_tokens
248
+ data[today]["users"][username]["calls"] += 1
249
+
250
+ with open(log_path, "w") as f:
251
+ json.dump(data, f, indent=2)
252
+
253
+ def get_daily_stats():
254
+ ensure_log_exists()
255
+ log_path = get_log_path()
256
+
257
+ now_est = datetime.now(TIMEZONE)
258
+ today = now_est.strftime("%Y-%m-%d")
259
+
260
+ if log_path.exists():
261
+ with open(log_path, "r") as f:
262
+ try:
263
+ data = json.load(f)
264
+ if today in data:
265
+ return data[today]
266
+ except:
267
+ pass
268
  return {"total_tokens": 0, "users": {}}