chenchaoyun commited on
Commit ·
9fd1066
1
Parent(s): 57a33d4
fix
Browse files- api_routes.py +25 -0
api_routes.py
CHANGED
|
@@ -1,6 +1,7 @@
|
|
| 1 |
import asyncio
|
| 2 |
import base64
|
| 3 |
import functools
|
|
|
|
| 4 |
import hashlib
|
| 5 |
import inspect
|
| 6 |
import io
|
|
@@ -3614,6 +3615,28 @@ def _flatten_chinese_celeb_dataset_dir(target_dir: str) -> bool:
|
|
| 3614 |
return True
|
| 3615 |
|
| 3616 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 3617 |
def extract_chinese_celeb_dataset_sync() -> Dict[str, Any]:
|
| 3618 |
"""
|
| 3619 |
同步执行 chinese_celeb_dataset 解压操作,供启动流程或其他同步场景复用。
|
|
@@ -3634,6 +3657,7 @@ def extract_chinese_celeb_dataset_sync() -> Dict[str, Any]:
|
|
| 3634 |
|
| 3635 |
extract_result = _extract_tar_archive(archive_path, target_dir)
|
| 3636 |
flattened = _flatten_chinese_celeb_dataset_dir(target_dir)
|
|
|
|
| 3637 |
|
| 3638 |
return {
|
| 3639 |
"success": True,
|
|
@@ -3644,6 +3668,7 @@ def extract_chinese_celeb_dataset_sync() -> Dict[str, Any]:
|
|
| 3644 |
"stdout": extract_result.get("stdout"),
|
| 3645 |
"stderr": extract_result.get("stderr"),
|
| 3646 |
"normalized": flattened,
|
|
|
|
| 3647 |
}
|
| 3648 |
|
| 3649 |
|
|
|
|
| 1 |
import asyncio
|
| 2 |
import base64
|
| 3 |
import functools
|
| 4 |
+
import glob
|
| 5 |
import hashlib
|
| 6 |
import inspect
|
| 7 |
import io
|
|
|
|
| 3615 |
return True
|
| 3616 |
|
| 3617 |
|
| 3618 |
+
def _cleanup_chinese_celeb_hidden_files(target_dir: str) -> int:
|
| 3619 |
+
"""
|
| 3620 |
+
删除解压后遗留的 macOS 资源分叉文件(._*),避免污染后续处理。
|
| 3621 |
+
"""
|
| 3622 |
+
pattern = os.path.join(target_dir, "._*")
|
| 3623 |
+
removed = 0
|
| 3624 |
+
for hidden_path in glob.glob(pattern):
|
| 3625 |
+
try:
|
| 3626 |
+
if os.path.isdir(hidden_path):
|
| 3627 |
+
shutil.rmtree(hidden_path, ignore_errors=True)
|
| 3628 |
+
else:
|
| 3629 |
+
os.remove(hidden_path)
|
| 3630 |
+
removed += 1
|
| 3631 |
+
except FileNotFoundError:
|
| 3632 |
+
continue
|
| 3633 |
+
except OSError as exc:
|
| 3634 |
+
logger.warning("清理隐藏文件失败: %s (%s)", hidden_path, exc)
|
| 3635 |
+
if removed:
|
| 3636 |
+
logger.info("已清理 chinese_celeb_dataset 隐藏文件 %d 个 (pattern=%s)", removed, pattern)
|
| 3637 |
+
return removed
|
| 3638 |
+
|
| 3639 |
+
|
| 3640 |
def extract_chinese_celeb_dataset_sync() -> Dict[str, Any]:
|
| 3641 |
"""
|
| 3642 |
同步执行 chinese_celeb_dataset 解压操作,供启动流程或其他同步场景复用。
|
|
|
|
| 3657 |
|
| 3658 |
extract_result = _extract_tar_archive(archive_path, target_dir)
|
| 3659 |
flattened = _flatten_chinese_celeb_dataset_dir(target_dir)
|
| 3660 |
+
hidden_removed = _cleanup_chinese_celeb_hidden_files(target_dir)
|
| 3661 |
|
| 3662 |
return {
|
| 3663 |
"success": True,
|
|
|
|
| 3668 |
"stdout": extract_result.get("stdout"),
|
| 3669 |
"stderr": extract_result.get("stderr"),
|
| 3670 |
"normalized": flattened,
|
| 3671 |
+
"hidden_removed": hidden_removed,
|
| 3672 |
}
|
| 3673 |
|
| 3674 |
|