Beracles commited on
Commit
b475f1d
·
1 Parent(s): 9ee80d0

优化日志加载过程,使用进度条显示加载状态,并注释掉不必要的打印信息

Browse files
Files changed (1) hide show
  1. logging_helper.py +6 -5
logging_helper.py CHANGED
@@ -8,6 +8,7 @@ from apscheduler.schedulers.background import BackgroundScheduler
8
  from utils import beijing, md5, json_to_str
9
  from huggingface_hub import HfApi
10
  import pandas as pd
 
11
  from datetime import datetime, date, timedelta
12
  from zoneinfo import ZoneInfo
13
 
@@ -201,22 +202,22 @@ class LoggingHelper:
201
  from_timestamp = from_timestamp or start_timestamp
202
  to_timestamp = to_timestamp or end_timestamp
203
  total_files_loaded = 0
204
- for remotepath, timestamp in self.timestamps.items():
205
  if timestamp < from_timestamp or timestamp > to_timestamp:
206
  continue
207
  localpath = "/".join([self.local_dir, remotepath])
208
- print(f"[load_logs] Loading file {localpath}")
209
  # 检查该文件是否存在
210
  if not os.path.exists(localpath):
211
- print(f"[load_logs] File not found: {localpath}")
212
  continue
213
  try:
214
  # 检查文件是否为空
215
  if os.path.getsize(localpath) == 0:
216
- print(f"[load_logs] Skipping empty file: {remotepath}")
217
  continue
218
  if remotepath in self.buffer:
219
- print(f"[load_logs] File already loaded: {remotepath}")
220
  continue
221
  # 加载JSON数据到Dataset
222
  dataset = ds.Dataset.from_json(localpath)
 
8
  from utils import beijing, md5, json_to_str
9
  from huggingface_hub import HfApi
10
  import pandas as pd
11
+ from tqdm import tqdm
12
  from datetime import datetime, date, timedelta
13
  from zoneinfo import ZoneInfo
14
 
 
202
  from_timestamp = from_timestamp or start_timestamp
203
  to_timestamp = to_timestamp or end_timestamp
204
  total_files_loaded = 0
205
+ for remotepath, timestamp in tqdm(self.timestamps.items()):
206
  if timestamp < from_timestamp or timestamp > to_timestamp:
207
  continue
208
  localpath = "/".join([self.local_dir, remotepath])
209
+ # print(f"[load_logs] Loading file {localpath}")
210
  # 检查该文件是否存在
211
  if not os.path.exists(localpath):
212
+ # print(f"[load_logs] File not found: {localpath}")
213
  continue
214
  try:
215
  # 检查文件是否为空
216
  if os.path.getsize(localpath) == 0:
217
+ # print(f"[load_logs] Skipping empty file: {remotepath}")
218
  continue
219
  if remotepath in self.buffer:
220
+ # print(f"[load_logs] File already loaded: {remotepath}")
221
  continue
222
  # 加载JSON数据到Dataset
223
  dataset = ds.Dataset.from_json(localpath)