chenchaoyun commited on
Commit ·
172ad88
1
Parent(s): bd33028
Isolate DeepFace and anime model execution
Browse files- Dockerfile +1 -0
- api_routes.py +104 -22
- config.py +3 -0
Dockerfile
CHANGED
|
@@ -7,6 +7,7 @@ ENV TZ=Asia/Shanghai \
|
|
| 7 |
IMAGES_DIR=/opt/data/images \
|
| 8 |
MODELS_PATH=/opt/data/models \
|
| 9 |
DEEPFACE_HOME=/opt/data/models \
|
|
|
|
| 10 |
FAISS_INDEX_DIR=/opt/data/faiss \
|
| 11 |
CELEBRITY_SOURCE_DIR=/opt/data/chinese_celeb_dataset
|
| 12 |
|
|
|
|
| 7 |
IMAGES_DIR=/opt/data/images \
|
| 8 |
MODELS_PATH=/opt/data/models \
|
| 9 |
DEEPFACE_HOME=/opt/data/models \
|
| 10 |
+
TF_USE_LEGACY_KERAS=1 \
|
| 11 |
FAISS_INDEX_DIR=/opt/data/faiss \
|
| 12 |
CELEBRITY_SOURCE_DIR=/opt/data/chinese_celeb_dataset
|
| 13 |
|
api_routes.py
CHANGED
|
@@ -16,6 +16,8 @@ from concurrent.futures import ThreadPoolExecutor
|
|
| 16 |
from datetime import datetime
|
| 17 |
from typing import Any, Dict, List, Optional, Tuple
|
| 18 |
|
|
|
|
|
|
|
| 19 |
import cv2
|
| 20 |
import numpy as np
|
| 21 |
from fastapi import APIRouter, File, UploadFile, HTTPException, Query, Request, \
|
|
@@ -254,8 +256,12 @@ if CLIP_AVAILABLE:
|
|
| 254 |
logger.error(f"CLIP function import failed: {e}")
|
| 255 |
CLIP_AVAILABLE = False
|
| 256 |
|
| 257 |
-
# 创建线程池执行器用于异步处理CPU密集型任务
|
|
|
|
|
|
|
| 258 |
executor = ThreadPoolExecutor(max_workers=4)
|
|
|
|
|
|
|
| 259 |
|
| 260 |
|
| 261 |
def _log_stage_duration(stage: str, start_time: float, extra: str | None = None) -> float:
|
|
@@ -282,6 +288,18 @@ async def process_cpu_intensive_task(func, *args, **kwargs):
|
|
| 282 |
return await loop.run_in_executor(executor, lambda: func(*args, **kwargs))
|
| 283 |
|
| 284 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 285 |
def _keep_cpu_busy(duration: float, inner_loops: int = 5000) -> Dict[str, Any]:
|
| 286 |
"""
|
| 287 |
在给定时间内执行纯CPU计算,用于防止服务器进入空闲态。
|
|
@@ -356,6 +374,21 @@ def _reset_deepface_model_cache(model_name: str = "VGG-Face") -> None:
|
|
| 356 |
logger.info(f"已清除DeepFace缓存模型: {model_name}")
|
| 357 |
|
| 358 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 359 |
def _recover_deepface_model(model_name: str = "VGG-Face") -> None:
|
| 360 |
"""组合清理动作,尽量恢复DeepFace模型可用状态。"""
|
| 361 |
cleared = _clear_keras_session()
|
|
@@ -901,7 +934,7 @@ async def _refresh_celebrity_cache(sample_image_path: str,
|
|
| 901 |
lock = _ensure_deepface_lock()
|
| 902 |
async with lock:
|
| 903 |
try:
|
| 904 |
-
await
|
| 905 |
deepface_module.find,
|
| 906 |
img_path=sample_image_path,
|
| 907 |
db_path=db_path,
|
|
@@ -913,12 +946,12 @@ async def _refresh_celebrity_cache(sample_image_path: str,
|
|
| 913 |
refresh_database=True,
|
| 914 |
)
|
| 915 |
except (AttributeError, RuntimeError) as attr_exc:
|
| 916 |
-
if
|
| 917 |
logger.warning(
|
| 918 |
-
f"刷新明星向量缓存遇到
|
| 919 |
_recover_deepface_model()
|
| 920 |
try:
|
| 921 |
-
await
|
| 922 |
deepface_module.find,
|
| 923 |
img_path=sample_image_path,
|
| 924 |
db_path=db_path,
|
|
@@ -938,7 +971,28 @@ async def _refresh_celebrity_cache(sample_image_path: str,
|
|
| 938 |
f"刷新明星向量缓存遇到模型状态异常,尝试恢复后重试: {exc}")
|
| 939 |
_recover_deepface_model()
|
| 940 |
try:
|
| 941 |
-
await
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 942 |
deepface_module.find,
|
| 943 |
img_path=sample_image_path,
|
| 944 |
db_path=db_path,
|
|
@@ -951,8 +1005,6 @@ async def _refresh_celebrity_cache(sample_image_path: str,
|
|
| 951 |
)
|
| 952 |
except Exception as retry_exc:
|
| 953 |
logger.warning(f"恢复后重新刷新明星缓存仍失败: {retry_exc}")
|
| 954 |
-
except Exception as e:
|
| 955 |
-
logger.warning(f"Refresh celebrity cache failed: {e}")
|
| 956 |
|
| 957 |
|
| 958 |
async def _log_progress(task_name: str,
|
|
@@ -2976,7 +3028,7 @@ async def anime_stylize_photo(
|
|
| 2976 |
# 使用AnimeStylizer对图像进行动漫风格化
|
| 2977 |
logger.info(f"Starting to stylize image with anime style, style: {style_description}...")
|
| 2978 |
try:
|
| 2979 |
-
stylized_image = await
|
| 2980 |
logger.info("Anime stylization processing completed")
|
| 2981 |
except Exception as e:
|
| 2982 |
logger.error(f"Anime stylization processing failed: {e}")
|
|
@@ -4549,16 +4601,16 @@ async def match_celebrity_face(
|
|
| 4549 |
lock = _ensure_deepface_lock()
|
| 4550 |
async with lock:
|
| 4551 |
try:
|
| 4552 |
-
find_result = await
|
| 4553 |
deepface_module.find,
|
| 4554 |
**_build_find_kwargs(refresh=False),
|
| 4555 |
)
|
| 4556 |
except (AttributeError, RuntimeError) as attr_err:
|
| 4557 |
-
if
|
| 4558 |
logger.warning(
|
| 4559 |
-
f"DeepFace find encountered
|
| 4560 |
_recover_deepface_model()
|
| 4561 |
-
find_result = await
|
| 4562 |
deepface_module.find,
|
| 4563 |
**_build_find_kwargs(refresh=True),
|
| 4564 |
)
|
|
@@ -4573,7 +4625,17 @@ async def match_celebrity_face(
|
|
| 4573 |
logger.warning(
|
| 4574 |
f"DeepFace find failed without refresh: {ve}, 尝试清理模型后刷新缓存。")
|
| 4575 |
_recover_deepface_model()
|
| 4576 |
-
find_result = await
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 4577 |
deepface_module.find,
|
| 4578 |
**_build_find_kwargs(refresh=True),
|
| 4579 |
)
|
|
@@ -4907,7 +4969,7 @@ async def face_similarity_verification(
|
|
| 4907 |
async with lock:
|
| 4908 |
try:
|
| 4909 |
# 使用ArcFace模型进行人脸比对
|
| 4910 |
-
verification_result = await
|
| 4911 |
deepface_module.verify,
|
| 4912 |
img1_path=original_path1,
|
| 4913 |
img2_path=original_path2,
|
|
@@ -4918,12 +4980,12 @@ async def face_similarity_verification(
|
|
| 4918 |
logger.info(
|
| 4919 |
f"DeepFace verification completed result:{json.dumps(verification_result, ensure_ascii=False)}")
|
| 4920 |
except (AttributeError, RuntimeError) as attr_err:
|
| 4921 |
-
if
|
| 4922 |
logger.warning(
|
| 4923 |
-
f"DeepFace verification 遇到
|
| 4924 |
_recover_deepface_model()
|
| 4925 |
try:
|
| 4926 |
-
verification_result = await
|
| 4927 |
deepface_module.verify,
|
| 4928 |
img1_path=original_path1,
|
| 4929 |
img2_path=original_path2,
|
|
@@ -4945,7 +5007,7 @@ async def face_similarity_verification(
|
|
| 4945 |
f"DeepFace verification 遇到模型状态异常,尝试恢复后重试: {ve}")
|
| 4946 |
_recover_deepface_model()
|
| 4947 |
try:
|
| 4948 |
-
verification_result = await
|
| 4949 |
deepface_module.verify,
|
| 4950 |
img1_path=original_path1,
|
| 4951 |
img2_path=original_path2,
|
|
@@ -4961,9 +5023,29 @@ async def face_similarity_verification(
|
|
| 4961 |
raise HTTPException(status_code=500,
|
| 4962 |
detail=f"人脸比对失败: {str(retry_error)}") from retry_error
|
| 4963 |
except Exception as e:
|
| 4964 |
-
|
| 4965 |
-
|
| 4966 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 4967 |
|
| 4968 |
# 提取比对结果
|
| 4969 |
verified = verification_result["verified"]
|
|
|
|
| 16 |
from datetime import datetime
|
| 17 |
from typing import Any, Dict, List, Optional, Tuple
|
| 18 |
|
| 19 |
+
os.environ.setdefault("TF_USE_LEGACY_KERAS", "1")
|
| 20 |
+
|
| 21 |
import cv2
|
| 22 |
import numpy as np
|
| 23 |
from fastapi import APIRouter, File, UploadFile, HTTPException, Query, Request, \
|
|
|
|
| 256 |
logger.error(f"CLIP function import failed: {e}")
|
| 257 |
CLIP_AVAILABLE = False
|
| 258 |
|
| 259 |
+
# 创建线程池执行器用于异步处理CPU密集型任务。
|
| 260 |
+
# TensorFlow/Keras 模型对线程上下文敏感,DeepFace 必须固定到单独线程,
|
| 261 |
+
# 避免被 ModelScope/Anime Style 的模型加载污染运行状态。
|
| 262 |
executor = ThreadPoolExecutor(max_workers=4)
|
| 263 |
+
deepface_executor = ThreadPoolExecutor(max_workers=1)
|
| 264 |
+
anime_style_executor = ThreadPoolExecutor(max_workers=1)
|
| 265 |
|
| 266 |
|
| 267 |
def _log_stage_duration(stage: str, start_time: float, extra: str | None = None) -> float:
|
|
|
|
| 288 |
return await loop.run_in_executor(executor, lambda: func(*args, **kwargs))
|
| 289 |
|
| 290 |
|
| 291 |
+
async def process_deepface_task(func, *args, **kwargs):
|
| 292 |
+
"""在 DeepFace 专用单线程 executor 中执行,避免跨线程复用 Keras 模型。"""
|
| 293 |
+
loop = asyncio.get_event_loop()
|
| 294 |
+
return await loop.run_in_executor(deepface_executor, lambda: func(*args, **kwargs))
|
| 295 |
+
|
| 296 |
+
|
| 297 |
+
async def process_anime_style_task(func, *args, **kwargs):
|
| 298 |
+
"""在 Anime Style 专用单线程 executor 中执行,隔离 ModelScope pipeline。"""
|
| 299 |
+
loop = asyncio.get_event_loop()
|
| 300 |
+
return await loop.run_in_executor(anime_style_executor, lambda: func(*args, **kwargs))
|
| 301 |
+
|
| 302 |
+
|
| 303 |
def _keep_cpu_busy(duration: float, inner_loops: int = 5000) -> Dict[str, Any]:
|
| 304 |
"""
|
| 305 |
在给定时间内执行纯CPU计算,用于防止服务器进入空闲态。
|
|
|
|
| 374 |
logger.info(f"已清除DeepFace缓存模型: {model_name}")
|
| 375 |
|
| 376 |
|
| 377 |
+
def _is_deepface_model_state_error(exc: BaseException) -> bool:
|
| 378 |
+
"""识别 Keras/DeepFace 模型状态污染类异常,触发清理并重试。"""
|
| 379 |
+
message = str(exc)
|
| 380 |
+
error_markers = (
|
| 381 |
+
"numpy",
|
| 382 |
+
"SymbolicTensor",
|
| 383 |
+
"EagerTensor",
|
| 384 |
+
"Attempting to capture",
|
| 385 |
+
"without building a function",
|
| 386 |
+
"conv2d",
|
| 387 |
+
"Graph execution error",
|
| 388 |
+
)
|
| 389 |
+
return any(marker in message for marker in error_markers)
|
| 390 |
+
|
| 391 |
+
|
| 392 |
def _recover_deepface_model(model_name: str = "VGG-Face") -> None:
|
| 393 |
"""组合清理动作,尽量恢复DeepFace模型可用状态。"""
|
| 394 |
cleared = _clear_keras_session()
|
|
|
|
| 934 |
lock = _ensure_deepface_lock()
|
| 935 |
async with lock:
|
| 936 |
try:
|
| 937 |
+
await process_deepface_task(
|
| 938 |
deepface_module.find,
|
| 939 |
img_path=sample_image_path,
|
| 940 |
db_path=db_path,
|
|
|
|
| 946 |
refresh_database=True,
|
| 947 |
)
|
| 948 |
except (AttributeError, RuntimeError) as attr_exc:
|
| 949 |
+
if _is_deepface_model_state_error(attr_exc):
|
| 950 |
logger.warning(
|
| 951 |
+
f"刷新明星向量缓存遇到 DeepFace 模型状态异常,尝试恢复后重试: {attr_exc}")
|
| 952 |
_recover_deepface_model()
|
| 953 |
try:
|
| 954 |
+
await process_deepface_task(
|
| 955 |
deepface_module.find,
|
| 956 |
img_path=sample_image_path,
|
| 957 |
db_path=db_path,
|
|
|
|
| 971 |
f"刷新明星向量缓存遇到模型状态异常,尝试恢复后重试: {exc}")
|
| 972 |
_recover_deepface_model()
|
| 973 |
try:
|
| 974 |
+
await process_deepface_task(
|
| 975 |
+
deepface_module.find,
|
| 976 |
+
img_path=sample_image_path,
|
| 977 |
+
db_path=db_path,
|
| 978 |
+
model_name="VGG-Face",
|
| 979 |
+
detector_backend="yolov11n",
|
| 980 |
+
distance_metric="cosine",
|
| 981 |
+
enforce_detection=True,
|
| 982 |
+
silent=True,
|
| 983 |
+
refresh_database=True,
|
| 984 |
+
)
|
| 985 |
+
except Exception as retry_exc:
|
| 986 |
+
logger.warning(f"恢复后重新刷新明星缓存仍失败: {retry_exc}")
|
| 987 |
+
except Exception as exc:
|
| 988 |
+
if not _is_deepface_model_state_error(exc):
|
| 989 |
+
logger.warning(f"Refresh celebrity cache failed: {exc}")
|
| 990 |
+
return
|
| 991 |
+
logger.warning(
|
| 992 |
+
f"刷新明星向量缓存遇到 DeepFace 底层状态异常,尝试恢复后重试: {exc}")
|
| 993 |
+
_recover_deepface_model()
|
| 994 |
+
try:
|
| 995 |
+
await process_deepface_task(
|
| 996 |
deepface_module.find,
|
| 997 |
img_path=sample_image_path,
|
| 998 |
db_path=db_path,
|
|
|
|
| 1005 |
)
|
| 1006 |
except Exception as retry_exc:
|
| 1007 |
logger.warning(f"恢复后重新刷新明星缓存仍失败: {retry_exc}")
|
|
|
|
|
|
|
| 1008 |
|
| 1009 |
|
| 1010 |
async def _log_progress(task_name: str,
|
|
|
|
| 3028 |
# 使用AnimeStylizer对图像进行动漫风格化
|
| 3029 |
logger.info(f"Starting to stylize image with anime style, style: {style_description}...")
|
| 3030 |
try:
|
| 3031 |
+
stylized_image = await process_anime_style_task(anime_stylizer.stylize_image, image, style_type)
|
| 3032 |
logger.info("Anime stylization processing completed")
|
| 3033 |
except Exception as e:
|
| 3034 |
logger.error(f"Anime stylization processing failed: {e}")
|
|
|
|
| 4601 |
lock = _ensure_deepface_lock()
|
| 4602 |
async with lock:
|
| 4603 |
try:
|
| 4604 |
+
find_result = await process_deepface_task(
|
| 4605 |
deepface_module.find,
|
| 4606 |
**_build_find_kwargs(refresh=False),
|
| 4607 |
)
|
| 4608 |
except (AttributeError, RuntimeError) as attr_err:
|
| 4609 |
+
if _is_deepface_model_state_error(attr_err):
|
| 4610 |
logger.warning(
|
| 4611 |
+
f"DeepFace find encountered model state error, 尝试清理模型后刷新缓存: {attr_err}")
|
| 4612 |
_recover_deepface_model()
|
| 4613 |
+
find_result = await process_deepface_task(
|
| 4614 |
deepface_module.find,
|
| 4615 |
**_build_find_kwargs(refresh=True),
|
| 4616 |
)
|
|
|
|
| 4625 |
logger.warning(
|
| 4626 |
f"DeepFace find failed without refresh: {ve}, 尝试清理模型后刷新缓存。")
|
| 4627 |
_recover_deepface_model()
|
| 4628 |
+
find_result = await process_deepface_task(
|
| 4629 |
+
deepface_module.find,
|
| 4630 |
+
**_build_find_kwargs(refresh=True),
|
| 4631 |
+
)
|
| 4632 |
+
except Exception as exc:
|
| 4633 |
+
if not _is_deepface_model_state_error(exc):
|
| 4634 |
+
raise
|
| 4635 |
+
logger.warning(
|
| 4636 |
+
f"DeepFace find 遇到底层模型状态异常,尝试清理模型后刷新缓存: {exc}")
|
| 4637 |
+
_recover_deepface_model()
|
| 4638 |
+
find_result = await process_deepface_task(
|
| 4639 |
deepface_module.find,
|
| 4640 |
**_build_find_kwargs(refresh=True),
|
| 4641 |
)
|
|
|
|
| 4969 |
async with lock:
|
| 4970 |
try:
|
| 4971 |
# 使用ArcFace模型进行人脸比对
|
| 4972 |
+
verification_result = await process_deepface_task(
|
| 4973 |
deepface_module.verify,
|
| 4974 |
img1_path=original_path1,
|
| 4975 |
img2_path=original_path2,
|
|
|
|
| 4980 |
logger.info(
|
| 4981 |
f"DeepFace verification completed result:{json.dumps(verification_result, ensure_ascii=False)}")
|
| 4982 |
except (AttributeError, RuntimeError) as attr_err:
|
| 4983 |
+
if _is_deepface_model_state_error(attr_err):
|
| 4984 |
logger.warning(
|
| 4985 |
+
f"DeepFace verification 遇到模型状态异常,尝试恢复后重试: {attr_err}")
|
| 4986 |
_recover_deepface_model()
|
| 4987 |
try:
|
| 4988 |
+
verification_result = await process_deepface_task(
|
| 4989 |
deepface_module.verify,
|
| 4990 |
img1_path=original_path1,
|
| 4991 |
img2_path=original_path2,
|
|
|
|
| 5007 |
f"DeepFace verification 遇到模型状态异常,尝试恢复后重试: {ve}")
|
| 5008 |
_recover_deepface_model()
|
| 5009 |
try:
|
| 5010 |
+
verification_result = await process_deepface_task(
|
| 5011 |
deepface_module.verify,
|
| 5012 |
img1_path=original_path1,
|
| 5013 |
img2_path=original_path2,
|
|
|
|
| 5023 |
raise HTTPException(status_code=500,
|
| 5024 |
detail=f"人脸比对失败: {str(retry_error)}") from retry_error
|
| 5025 |
except Exception as e:
|
| 5026 |
+
if not _is_deepface_model_state_error(e):
|
| 5027 |
+
logger.error(f"DeepFace verification failed: {e}")
|
| 5028 |
+
raise HTTPException(status_code=500,
|
| 5029 |
+
detail=f"人脸比对失败: {str(e)}") from e
|
| 5030 |
+
logger.warning(
|
| 5031 |
+
f"DeepFace verification 遇到底层模型状态异常,尝试恢复后重试: {e}")
|
| 5032 |
+
_recover_deepface_model()
|
| 5033 |
+
try:
|
| 5034 |
+
verification_result = await process_deepface_task(
|
| 5035 |
+
deepface_module.verify,
|
| 5036 |
+
img1_path=original_path1,
|
| 5037 |
+
img2_path=original_path2,
|
| 5038 |
+
model_name="VGG-Face",
|
| 5039 |
+
detector_backend="yolov11n",
|
| 5040 |
+
distance_metric="cosine"
|
| 5041 |
+
)
|
| 5042 |
+
logger.info(
|
| 5043 |
+
f"DeepFace verification completed after recovery: {json.dumps(verification_result, ensure_ascii=False)}")
|
| 5044 |
+
except Exception as retry_error:
|
| 5045 |
+
logger.error(
|
| 5046 |
+
f"DeepFace verification failed after recovery attempt: {retry_error}")
|
| 5047 |
+
raise HTTPException(status_code=500,
|
| 5048 |
+
detail=f"人脸比对失败: {str(retry_error)}") from retry_error
|
| 5049 |
|
| 5050 |
# 提取比对结果
|
| 5051 |
verified = verification_result["verified"]
|
config.py
CHANGED
|
@@ -1,6 +1,9 @@
|
|
| 1 |
import logging
|
| 2 |
import os
|
| 3 |
|
|
|
|
|
|
|
|
|
|
| 4 |
# 解决OpenMP库冲突问题
|
| 5 |
os.environ["KMP_DUPLICATE_LIB_OK"] = "TRUE"
|
| 6 |
# 设置CPU线程数为CPU核心数,提高CPU利用率
|
|
|
|
| 1 |
import logging
|
| 2 |
import os
|
| 3 |
|
| 4 |
+
# DeepFace 仍依赖 tf-keras 行为;必须在任何 TensorFlow/Keras/DeepFace import 前设置。
|
| 5 |
+
os.environ.setdefault("TF_USE_LEGACY_KERAS", "1")
|
| 6 |
+
|
| 7 |
# 解决OpenMP库冲突问题
|
| 8 |
os.environ["KMP_DUPLICATE_LIB_OK"] = "TRUE"
|
| 9 |
# 设置CPU线程数为CPU核心数,提高CPU利用率
|