HIRO12121212 commited on
Commit
4fcd409
·
verified ·
1 Parent(s): ed1728c

Update token_cleaner.py

Browse files
Files changed (1) hide show
  1. token_cleaner.py +60 -130
token_cleaner.py CHANGED
@@ -25,86 +25,44 @@ logger = logging.getLogger(__name__)
25
 
26
  class TokenCleaner:
27
  def __init__(self, hf_token, dataset_name="HIRO12121212/videoinutoken"):
28
- """
29
- Initialize Token Cleaner
30
-
31
- Args:
32
- hf_token: HuggingFace API token
33
- dataset_name: Dataset name containing videoinu tokens
34
- """
35
  self.hf_token = hf_token
36
  self.dataset_name = dataset_name
37
  self.hf_api = HfApi()
38
  logger.info(f"Token Cleaner initialized for dataset: {dataset_name}")
39
 
40
  def decode_jwt_token(self, token):
41
- """
42
- Decode JWT token to extract expiration time
43
-
44
- Args:
45
- token: JWT token string
46
-
47
- Returns:
48
- dict: Decoded token data or None if invalid
49
- """
50
  try:
51
- # JWT format: header.payload.signature
52
  parts = token.split('.')
53
  if len(parts) != 3:
54
  logger.warning("Invalid JWT format (not 3 parts)")
55
  return None
56
-
57
- # Decode payload (second part)
58
  payload = parts[1]
59
- # Add padding if needed (JWT base64 doesn't use padding)
60
  padding = 4 - (len(payload) % 4)
61
  if padding != 4:
62
  payload += '=' * padding
63
-
64
- # Decode base64
65
  decoded_bytes = base64.urlsafe_b64decode(payload)
66
  decoded_json = json.loads(decoded_bytes)
67
  return decoded_json
68
-
69
  except Exception as e:
70
  logger.error(f"Error decoding JWT token: {str(e)}")
71
  return None
72
 
73
  def is_token_expired(self, token_data):
74
- """
75
- Check if token is expired based on 'exp' field
76
-
77
- Args:
78
- token_data: Decoded JWT data
79
-
80
- Returns:
81
- bool: True if expired, False otherwise
82
- """
83
  if not token_data or 'exp' not in token_data:
84
- return True # Consider invalid tokens as expired
85
-
86
  exp_timestamp = token_data['exp']
87
  current_timestamp = int(time.time())
88
  is_expired = current_timestamp >= exp_timestamp
89
-
90
  if is_expired:
91
  exp_date = datetime.fromtimestamp(exp_timestamp)
92
  logger.info(f"Token expired on {exp_date} (email: {token_data.get('email', 'unknown')})")
93
-
94
  return is_expired
95
 
96
  def fetch_all_tokens(self):
97
- """
98
- Fetch all tokens from HuggingFace dataset
99
-
100
- Returns:
101
- list: Array of token data
102
- """
103
  try:
104
  download_url = f"https://huggingface.co/datasets/{self.dataset_name}/resolve/main/videoinu_tokens.json"
105
  headers = {"Authorization": f"Bearer {self.hf_token}"}
106
  response = requests.get(download_url, headers=headers, timeout=10)
107
-
108
  if response.status_code == 200:
109
  tokens_array = json.loads(response.text)
110
  logger.info(f"✅ Fetched {len(tokens_array)} tokens from dataset")
@@ -112,27 +70,16 @@ class TokenCleaner:
112
  else:
113
  logger.error(f"Failed to fetch tokens: {response.status_code}")
114
  return []
115
-
116
  except Exception as e:
117
  logger.error(f"Error fetching tokens: {str(e)}")
118
  return []
119
 
120
  def update_dataset(self, tokens_array):
121
- """
122
- Update dataset with cleaned tokens
123
-
124
- Args:
125
- tokens_array: Array of valid tokens
126
-
127
- Returns:
128
- bool: True if successful
129
- """
130
  try:
131
  import tempfile
132
  with tempfile.NamedTemporaryFile(mode='w', suffix='.json', delete=False) as f:
133
  json.dump(tokens_array, f, indent=2)
134
  temp_path = f.name
135
-
136
  self.hf_api.upload_file(
137
  path_or_fileobj=temp_path,
138
  path_in_repo="videoinu_tokens.json",
@@ -140,36 +87,22 @@ class TokenCleaner:
140
  repo_type="dataset",
141
  token=self.hf_token
142
  )
143
-
144
  os.unlink(temp_path)
145
  logger.info(f"✅ Updated dataset with {len(tokens_array)} tokens")
146
  return True
147
-
148
  except Exception as e:
149
  logger.error(f"Error updating dataset: {str(e)}")
150
  return False
151
 
152
  def clean_expired_tokens(self):
153
- """
154
- Main cleaning function - removes expired tokens from dataset
155
-
156
- Returns:
157
- dict: Statistics about cleaning operation
158
- """
159
  logger.info("=" * 80)
160
  logger.info("Starting token cleaning cycle...")
161
  logger.info("=" * 80)
162
 
163
- # Fetch all tokens
164
  all_tokens = self.fetch_all_tokens()
165
  if not all_tokens:
166
  logger.warning("No tokens found in dataset")
167
- return {
168
- "total": 0,
169
- "valid": 0,
170
- "expired": 0,
171
- "invalid": 0
172
- }
173
 
174
  total_count = len(all_tokens)
175
  valid_tokens = []
@@ -178,20 +111,17 @@ class TokenCleaner:
178
 
179
  logger.info(f"Checking {total_count} tokens...")
180
 
181
- # Check each token
182
  for i, token_entry in enumerate(all_tokens):
183
  token = token_entry.get('token', '')
184
  email = token_entry.get('email', 'unknown')
185
  credits = token_entry.get('credits_remaining', 0)
186
 
187
- # Decode token
188
  decoded = self.decode_jwt_token(token)
189
  if decoded is None:
190
  logger.warning(f"[{i+1}/{total_count}] Invalid token format: {email}")
191
  invalid_count += 1
192
  continue
193
 
194
- # Check expiration
195
  if self.is_token_expired(decoded):
196
  logger.info(f"[{i+1}/{total_count}] 🗑️ Removing expired token: {email} ({credits} credits)")
197
  expired_count += 1
@@ -201,12 +131,10 @@ class TokenCleaner:
201
  logger.info(f"[{i+1}/{total_count}] ✅ Valid token: {email} (expires: {exp_date}, {credits} credits)")
202
  valid_tokens.append(token_entry)
203
 
204
- # Update dataset if any tokens were removed
205
  if expired_count > 0 or invalid_count > 0:
206
  logger.info(f"\n🧹 Cleaning dataset...")
207
  logger.info(f" Removing {expired_count} expired tokens")
208
  logger.info(f" Removing {invalid_count} invalid tokens")
209
-
210
  if self.update_dataset(valid_tokens):
211
  logger.info(f"✅ Dataset cleaned successfully!")
212
  else:
@@ -218,7 +146,8 @@ class TokenCleaner:
218
  "total": total_count,
219
  "valid": len(valid_tokens),
220
  "expired": expired_count,
221
- "invalid": invalid_count
 
222
  }
223
 
224
  logger.info("=" * 80)
@@ -232,12 +161,6 @@ class TokenCleaner:
232
  return stats
233
 
234
  def run_continuous(self, interval_minutes=60):
235
- """
236
- Run token cleaner continuously
237
-
238
- Args:
239
- interval_minutes: Minutes between cleaning cycles (default: 60 = 1 hour)
240
- """
241
  logger.info(f"🚀 Token Cleaner started - checking every {interval_minutes} minutes")
242
  while True:
243
  try:
@@ -254,61 +177,68 @@ class TokenCleaner:
254
  logger.info(f"Retrying in 1 minute...")
255
  time.sleep(60)
256
 
257
- # Gradio interface functions
258
- def clean_now(cleaner):
 
 
 
 
 
 
 
 
 
 
 
 
 
 
259
  stats = cleaner.clean_expired_tokens()
260
  summary = (
261
- f"Cleaning Summary:\n"
262
- f"Total tokens checked: {stats['total']}\n"
263
- f"Valid tokens: {stats['valid']}\n"
264
- f"Expired tokens removed: {stats['expired']}\n"
265
- f"Invalid tokens removed: {stats['invalid']}"
266
  )
267
  return summary
268
 
269
- if __name__ == "__main__":
270
- # Get HF token from environment
271
- HF_TOKEN = os.getenv("HF_TOKEN")
272
- if not HF_TOKEN:
273
- logger.error("HF_TOKEN environment variable not set!")
274
- exit(1)
 
275
 
276
- # Create cleaner instance
277
- cleaner = TokenCleaner(
278
- hf_token=HF_TOKEN,
279
- dataset_name="HIRO12121212/videoinutoken"
280
- )
281
 
282
- # Start background cleaner thread
283
- threading.Thread(target=cleaner.run_continuous, args=(60,), daemon=True).start()
 
 
284
 
285
- # Gradio interface
286
- with gr.Blocks(title="Videoinu Token Cleaner") as demo:
287
- gr.Markdown("""
288
- # Videoinu Token Cleaner
289
-
290
- This app cleans expired tokens from the Hugging Face dataset.
291
-
292
- - Background cleaning runs every 60 minutes.
293
- - Click the button below to trigger a manual clean.
294
-
295
- Logs are available in the console/space logs.
296
- """)
297
-
298
- output = gr.Textbox(label="Cleaning Results")
299
- clean_button = gr.Button("Clean Now")
300
-
301
- clean_button.click(
302
- fn=clean_now,
303
- inputs=[],
304
- outputs=[output],
305
- api_name="clean_now"
306
- )
307
 
308
- # Launch Gradio app
309
- # Use port 7860 for Hugging Face Spaces
310
- demo.launch(
311
- server_name="0.0.0.0",
312
- server_port=7860,
313
- show_error=True
314
- )
 
 
 
 
 
 
 
 
 
 
 
25
 
26
  class TokenCleaner:
27
  def __init__(self, hf_token, dataset_name="HIRO12121212/videoinutoken"):
 
 
 
 
 
 
 
28
  self.hf_token = hf_token
29
  self.dataset_name = dataset_name
30
  self.hf_api = HfApi()
31
  logger.info(f"Token Cleaner initialized for dataset: {dataset_name}")
32
 
33
  def decode_jwt_token(self, token):
 
 
 
 
 
 
 
 
 
34
  try:
 
35
  parts = token.split('.')
36
  if len(parts) != 3:
37
  logger.warning("Invalid JWT format (not 3 parts)")
38
  return None
 
 
39
  payload = parts[1]
 
40
  padding = 4 - (len(payload) % 4)
41
  if padding != 4:
42
  payload += '=' * padding
 
 
43
  decoded_bytes = base64.urlsafe_b64decode(payload)
44
  decoded_json = json.loads(decoded_bytes)
45
  return decoded_json
 
46
  except Exception as e:
47
  logger.error(f"Error decoding JWT token: {str(e)}")
48
  return None
49
 
50
  def is_token_expired(self, token_data):
 
 
 
 
 
 
 
 
 
51
  if not token_data or 'exp' not in token_data:
52
+ return True
 
53
  exp_timestamp = token_data['exp']
54
  current_timestamp = int(time.time())
55
  is_expired = current_timestamp >= exp_timestamp
 
56
  if is_expired:
57
  exp_date = datetime.fromtimestamp(exp_timestamp)
58
  logger.info(f"Token expired on {exp_date} (email: {token_data.get('email', 'unknown')})")
 
59
  return is_expired
60
 
61
  def fetch_all_tokens(self):
 
 
 
 
 
 
62
  try:
63
  download_url = f"https://huggingface.co/datasets/{self.dataset_name}/resolve/main/videoinu_tokens.json"
64
  headers = {"Authorization": f"Bearer {self.hf_token}"}
65
  response = requests.get(download_url, headers=headers, timeout=10)
 
66
  if response.status_code == 200:
67
  tokens_array = json.loads(response.text)
68
  logger.info(f"✅ Fetched {len(tokens_array)} tokens from dataset")
 
70
  else:
71
  logger.error(f"Failed to fetch tokens: {response.status_code}")
72
  return []
 
73
  except Exception as e:
74
  logger.error(f"Error fetching tokens: {str(e)}")
75
  return []
76
 
77
  def update_dataset(self, tokens_array):
 
 
 
 
 
 
 
 
 
78
  try:
79
  import tempfile
80
  with tempfile.NamedTemporaryFile(mode='w', suffix='.json', delete=False) as f:
81
  json.dump(tokens_array, f, indent=2)
82
  temp_path = f.name
 
83
  self.hf_api.upload_file(
84
  path_or_fileobj=temp_path,
85
  path_in_repo="videoinu_tokens.json",
 
87
  repo_type="dataset",
88
  token=self.hf_token
89
  )
 
90
  os.unlink(temp_path)
91
  logger.info(f"✅ Updated dataset with {len(tokens_array)} tokens")
92
  return True
 
93
  except Exception as e:
94
  logger.error(f"Error updating dataset: {str(e)}")
95
  return False
96
 
97
  def clean_expired_tokens(self):
 
 
 
 
 
 
98
  logger.info("=" * 80)
99
  logger.info("Starting token cleaning cycle...")
100
  logger.info("=" * 80)
101
 
 
102
  all_tokens = self.fetch_all_tokens()
103
  if not all_tokens:
104
  logger.warning("No tokens found in dataset")
105
+ return {"total": 0, "valid": 0, "expired": 0, "invalid": 0}
 
 
 
 
 
106
 
107
  total_count = len(all_tokens)
108
  valid_tokens = []
 
111
 
112
  logger.info(f"Checking {total_count} tokens...")
113
 
 
114
  for i, token_entry in enumerate(all_tokens):
115
  token = token_entry.get('token', '')
116
  email = token_entry.get('email', 'unknown')
117
  credits = token_entry.get('credits_remaining', 0)
118
 
 
119
  decoded = self.decode_jwt_token(token)
120
  if decoded is None:
121
  logger.warning(f"[{i+1}/{total_count}] Invalid token format: {email}")
122
  invalid_count += 1
123
  continue
124
 
 
125
  if self.is_token_expired(decoded):
126
  logger.info(f"[{i+1}/{total_count}] 🗑️ Removing expired token: {email} ({credits} credits)")
127
  expired_count += 1
 
131
  logger.info(f"[{i+1}/{total_count}] ✅ Valid token: {email} (expires: {exp_date}, {credits} credits)")
132
  valid_tokens.append(token_entry)
133
 
 
134
  if expired_count > 0 or invalid_count > 0:
135
  logger.info(f"\n🧹 Cleaning dataset...")
136
  logger.info(f" Removing {expired_count} expired tokens")
137
  logger.info(f" Removing {invalid_count} invalid tokens")
 
138
  if self.update_dataset(valid_tokens):
139
  logger.info(f"✅ Dataset cleaned successfully!")
140
  else:
 
146
  "total": total_count,
147
  "valid": len(valid_tokens),
148
  "expired": expired_count,
149
+ "invalid": invalid_count,
150
+ "timestamp": datetime.now().strftime("%Y-%m-%d %H:%M:%S")
151
  }
152
 
153
  logger.info("=" * 80)
 
161
  return stats
162
 
163
  def run_continuous(self, interval_minutes=60):
 
 
 
 
 
 
164
  logger.info(f"🚀 Token Cleaner started - checking every {interval_minutes} minutes")
165
  while True:
166
  try:
 
177
  logger.info(f"Retrying in 1 minute...")
178
  time.sleep(60)
179
 
180
+ # Global cleaner instance
181
+ HF_TOKEN = os.getenv("HF_TOKEN")
182
+ if not HF_TOKEN:
183
+ logger.error("HF_TOKEN environment variable not set!")
184
+ exit(1)
185
+
186
+ cleaner = TokenCleaner(
187
+ hf_token=HF_TOKEN,
188
+ dataset_name="HIRO12121212/videoinutoken"
189
+ )
190
+
191
+ # Start background thread
192
+ threading.Thread(target=cleaner.run_continuous, args=(60,), daemon=True).start()
193
+
194
+ # Gradio functions (no arguments needed)
195
+ def clean_now():
196
  stats = cleaner.clean_expired_tokens()
197
  summary = (
198
+ f"🧹 Cleaning completed at {stats['timestamp']}\n\n"
199
+ f"Total tokens checked: **{stats['total']}**\n"
200
+ f"Valid tokens remaining: **{stats['valid']}**\n"
201
+ f"Expired tokens removed: **{stats['expired']}**\n"
202
+ f"Invalid tokens removed: **{stats['invalid']}**"
203
  )
204
  return summary
205
 
206
+ def get_status():
207
+ return "Background cleaner is running (checks every 60 minutes). Use 'Clean Now' for manual cleaning."
208
+
209
+ # Gradio interface
210
+ with gr.Blocks(title="Videoinu Token Cleaner") as demo:
211
+ gr.Markdown("""
212
+ # 🧹 Videoinu Token Cleaner Dashboard
213
 
214
+ This Space automatically cleans expired tokens from the dataset every hour.
 
 
 
 
215
 
216
+ - Background cleaning is always running.
217
+ - Click **Clean Now** to trigger an immediate clean.
218
+ - Results appear here + full logs in Space logs.
219
+ """)
220
 
221
+ status = gr.Textbox(label="Status", value="Background cleaner active", interactive=False)
222
+ output = gr.Markdown(label="Last Cleaning Result")
223
+
224
+ with gr.Row():
225
+ clean_button = gr.Button("Clean Now", variant="primary", scale=1)
226
+ refresh_button = gr.Button("Refresh Status", scale=1)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
227
 
228
+ clean_button.click(
229
+ fn=clean_now,
230
+ outputs=output,
231
+ api_name="clean_now"
232
+ )
233
+
234
+ refresh_button.click(
235
+ fn=get_status,
236
+ outputs=status
237
+ )
238
+
239
+ # Launch the app
240
+ demo.launch(
241
+ server_name="0.0.0.0",
242
+ server_port=7860,
243
+ show_error=True
244
+ )