Spaces:
Running
Running
优化日志加载过程,使用进度条显示加载状态,并注释掉不必要的打印信息
Browse files- logging_helper.py +6 -5
logging_helper.py
CHANGED
|
@@ -8,6 +8,7 @@ from apscheduler.schedulers.background import BackgroundScheduler
|
|
| 8 |
from utils import beijing, md5, json_to_str
|
| 9 |
from huggingface_hub import HfApi
|
| 10 |
import pandas as pd
|
|
|
|
| 11 |
from datetime import datetime, date, timedelta
|
| 12 |
from zoneinfo import ZoneInfo
|
| 13 |
|
|
@@ -201,22 +202,22 @@ class LoggingHelper:
|
|
| 201 |
from_timestamp = from_timestamp or start_timestamp
|
| 202 |
to_timestamp = to_timestamp or end_timestamp
|
| 203 |
total_files_loaded = 0
|
| 204 |
-
for remotepath, timestamp in self.timestamps.items():
|
| 205 |
if timestamp < from_timestamp or timestamp > to_timestamp:
|
| 206 |
continue
|
| 207 |
localpath = "/".join([self.local_dir, remotepath])
|
| 208 |
-
print(f"[load_logs] Loading file {localpath}")
|
| 209 |
# 检查该文件是否存在
|
| 210 |
if not os.path.exists(localpath):
|
| 211 |
-
print(f"[load_logs] File not found: {localpath}")
|
| 212 |
continue
|
| 213 |
try:
|
| 214 |
# 检查文件是否为空
|
| 215 |
if os.path.getsize(localpath) == 0:
|
| 216 |
-
print(f"[load_logs] Skipping empty file: {remotepath}")
|
| 217 |
continue
|
| 218 |
if remotepath in self.buffer:
|
| 219 |
-
print(f"[load_logs] File already loaded: {remotepath}")
|
| 220 |
continue
|
| 221 |
# 加载JSON数据到Dataset
|
| 222 |
dataset = ds.Dataset.from_json(localpath)
|
|
|
|
| 8 |
from utils import beijing, md5, json_to_str
|
| 9 |
from huggingface_hub import HfApi
|
| 10 |
import pandas as pd
|
| 11 |
+
from tqdm import tqdm
|
| 12 |
from datetime import datetime, date, timedelta
|
| 13 |
from zoneinfo import ZoneInfo
|
| 14 |
|
|
|
|
| 202 |
from_timestamp = from_timestamp or start_timestamp
|
| 203 |
to_timestamp = to_timestamp or end_timestamp
|
| 204 |
total_files_loaded = 0
|
| 205 |
+
for remotepath, timestamp in tqdm(self.timestamps.items()):
|
| 206 |
if timestamp < from_timestamp or timestamp > to_timestamp:
|
| 207 |
continue
|
| 208 |
localpath = "/".join([self.local_dir, remotepath])
|
| 209 |
+
# print(f"[load_logs] Loading file {localpath}")
|
| 210 |
# 检查该文件是否存在
|
| 211 |
if not os.path.exists(localpath):
|
| 212 |
+
# print(f"[load_logs] File not found: {localpath}")
|
| 213 |
continue
|
| 214 |
try:
|
| 215 |
# 检查文件是否为空
|
| 216 |
if os.path.getsize(localpath) == 0:
|
| 217 |
+
# print(f"[load_logs] Skipping empty file: {remotepath}")
|
| 218 |
continue
|
| 219 |
if remotepath in self.buffer:
|
| 220 |
+
# print(f"[load_logs] File already loaded: {remotepath}")
|
| 221 |
continue
|
| 222 |
# 加载JSON数据到Dataset
|
| 223 |
dataset = ds.Dataset.from_json(localpath)
|