Spaces:
Runtime error
Runtime error
sjiangtao2024 commited on
Commit ·
c79e000
1
Parent(s): 6bff6a1
Add media cache retention cleanup
Browse files- README.md +4 -0
- app/services/grok/utils/cache.py +46 -8
- app/services/grok/utils/cache_scheduler.py +65 -0
- config.defaults.toml +4 -0
- deploy/hf-space/.env.example +2 -0
- deploy/hf-space/README.md +4 -0
- main.py +6 -0
README.md
CHANGED
|
@@ -51,6 +51,8 @@ With this model:
|
|
| 51 |
- `GROK2API_PROXY_ASSET_URL`
|
| 52 |
- `GROK2API_CF_CLEARANCE`
|
| 53 |
- `GROK2API_CF_COOKIES`
|
|
|
|
|
|
|
| 54 |
- `LOG_LEVEL`
|
| 55 |
- `SERVER_WORKERS`
|
| 56 |
- `DATA_DIR`
|
|
@@ -114,3 +116,5 @@ Without those secrets, the Space can build, but the backend protection will eith
|
|
| 114 |
- The start script preserves existing `data/config.toml` keys and only overlays values supplied through environment variables.
|
| 115 |
- `/health` stays public so Cloudflare Worker cron can warm the Space.
|
| 116 |
- The backend API remains protected because `app.api_key` / `app.function_key` / `app.app_key` are written from secrets at startup.
|
|
|
|
|
|
|
|
|
| 51 |
- `GROK2API_PROXY_ASSET_URL`
|
| 52 |
- `GROK2API_CF_CLEARANCE`
|
| 53 |
- `GROK2API_CF_COOKIES`
|
| 54 |
+
- `SERVER_STORAGE_TYPE`
|
| 55 |
+
- `SERVER_STORAGE_URL`
|
| 56 |
- `LOG_LEVEL`
|
| 57 |
- `SERVER_WORKERS`
|
| 58 |
- `DATA_DIR`
|
|
|
|
| 116 |
- The start script preserves existing `data/config.toml` keys and only overlays values supplied through environment variables.
|
| 117 |
- `/health` stays public so Cloudflare Worker cron can warm the Space.
|
| 118 |
- The backend API remains protected because `app.api_key` / `app.function_key` / `app.app_key` are written from secrets at startup.
|
| 119 |
+
- For durable token/config storage on HF Space, prefer `SERVER_STORAGE_TYPE=pgsql` with `SERVER_STORAGE_URL`.
|
| 120 |
+
- Media cache now defaults to `cache.retention_days = 7` and the app cleans expired image/video files every 24 hours.
|
app/services/grok/utils/cache.py
CHANGED
|
@@ -2,6 +2,7 @@
|
|
| 2 |
Local cache utilities.
|
| 3 |
"""
|
| 4 |
|
|
|
|
| 5 |
from typing import Any, Dict
|
| 6 |
|
| 7 |
from app.core.storage import DATA_DIR
|
|
@@ -26,15 +27,21 @@ class CacheService:
|
|
| 26 |
def _allowed_exts(self, media_type: str):
|
| 27 |
return IMAGE_EXTS if media_type == "image" else VIDEO_EXTS
|
| 28 |
|
| 29 |
-
def
|
| 30 |
cache_dir = self._cache_dir(media_type)
|
| 31 |
if not cache_dir.exists():
|
| 32 |
-
return
|
| 33 |
-
|
| 34 |
allowed = self._allowed_exts(media_type)
|
| 35 |
-
|
| 36 |
f for f in cache_dir.glob("*") if f.is_file() and f.suffix.lower() in allowed
|
| 37 |
]
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 38 |
total_size = sum(f.stat().st_size for f in files)
|
| 39 |
return {"count": len(files), "size_mb": round(total_size / 1024 / 1024, 2)}
|
| 40 |
|
|
@@ -45,10 +52,7 @@ class CacheService:
|
|
| 45 |
if not cache_dir.exists():
|
| 46 |
return {"total": 0, "page": page, "page_size": page_size, "items": []}
|
| 47 |
|
| 48 |
-
|
| 49 |
-
files = [
|
| 50 |
-
f for f in cache_dir.glob("*") if f.is_file() and f.suffix.lower() in allowed
|
| 51 |
-
]
|
| 52 |
|
| 53 |
items = []
|
| 54 |
for f in files:
|
|
@@ -106,5 +110,39 @@ class CacheService:
|
|
| 106 |
|
| 107 |
return {"count": count, "size_mb": round(total_size / 1024 / 1024, 2)}
|
| 108 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 109 |
|
| 110 |
__all__ = ["CacheService"]
|
|
|
|
| 2 |
Local cache utilities.
|
| 3 |
"""
|
| 4 |
|
| 5 |
+
import time
|
| 6 |
from typing import Any, Dict
|
| 7 |
|
| 8 |
from app.core.storage import DATA_DIR
|
|
|
|
| 27 |
def _allowed_exts(self, media_type: str):
|
| 28 |
return IMAGE_EXTS if media_type == "image" else VIDEO_EXTS
|
| 29 |
|
| 30 |
+
def _media_files(self, media_type: str):
|
| 31 |
cache_dir = self._cache_dir(media_type)
|
| 32 |
if not cache_dir.exists():
|
| 33 |
+
return []
|
|
|
|
| 34 |
allowed = self._allowed_exts(media_type)
|
| 35 |
+
return [
|
| 36 |
f for f in cache_dir.glob("*") if f.is_file() and f.suffix.lower() in allowed
|
| 37 |
]
|
| 38 |
+
|
| 39 |
+
def get_stats(self, media_type: str = "image") -> Dict[str, Any]:
|
| 40 |
+
cache_dir = self._cache_dir(media_type)
|
| 41 |
+
if not cache_dir.exists():
|
| 42 |
+
return {"count": 0, "size_mb": 0.0}
|
| 43 |
+
|
| 44 |
+
files = self._media_files(media_type)
|
| 45 |
total_size = sum(f.stat().st_size for f in files)
|
| 46 |
return {"count": len(files), "size_mb": round(total_size / 1024 / 1024, 2)}
|
| 47 |
|
|
|
|
| 52 |
if not cache_dir.exists():
|
| 53 |
return {"total": 0, "page": page, "page_size": page_size, "items": []}
|
| 54 |
|
| 55 |
+
files = self._media_files(media_type)
|
|
|
|
|
|
|
|
|
|
| 56 |
|
| 57 |
items = []
|
| 58 |
for f in files:
|
|
|
|
| 110 |
|
| 111 |
return {"count": count, "size_mb": round(total_size / 1024 / 1024, 2)}
|
| 112 |
|
| 113 |
+
def cleanup_expired_files(
|
| 114 |
+
self, retention_days: int, now: float | None = None
|
| 115 |
+
) -> Dict[str, Any]:
|
| 116 |
+
if retention_days <= 0:
|
| 117 |
+
return {"deleted": 0, "deleted_bytes": 0, "deleted_by_type": {"image": 0, "video": 0}}
|
| 118 |
+
|
| 119 |
+
current_time = now if now is not None else time.time()
|
| 120 |
+
expire_before = current_time - retention_days * 24 * 60 * 60
|
| 121 |
+
deleted = 0
|
| 122 |
+
deleted_bytes = 0
|
| 123 |
+
deleted_by_type = {"image": 0, "video": 0}
|
| 124 |
+
|
| 125 |
+
for media_type in ("image", "video"):
|
| 126 |
+
for path in self._media_files(media_type):
|
| 127 |
+
try:
|
| 128 |
+
stat = path.stat()
|
| 129 |
+
except Exception:
|
| 130 |
+
continue
|
| 131 |
+
if stat.st_mtime > expire_before:
|
| 132 |
+
continue
|
| 133 |
+
try:
|
| 134 |
+
path.unlink()
|
| 135 |
+
deleted += 1
|
| 136 |
+
deleted_bytes += stat.st_size
|
| 137 |
+
deleted_by_type[media_type] += 1
|
| 138 |
+
except Exception:
|
| 139 |
+
continue
|
| 140 |
+
|
| 141 |
+
return {
|
| 142 |
+
"deleted": deleted,
|
| 143 |
+
"deleted_bytes": deleted_bytes,
|
| 144 |
+
"deleted_by_type": deleted_by_type,
|
| 145 |
+
}
|
| 146 |
+
|
| 147 |
|
| 148 |
__all__ = ["CacheService"]
|
app/services/grok/utils/cache_scheduler.py
ADDED
|
@@ -0,0 +1,65 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
"""Background scheduler for cache retention cleanup."""
|
| 2 |
+
|
| 3 |
+
import asyncio
|
| 4 |
+
|
| 5 |
+
from app.core.config import get_config
|
| 6 |
+
from app.core.logger import logger
|
| 7 |
+
from app.services.grok.utils.cache import CacheService
|
| 8 |
+
from app.services.grok.utils.locks import _file_lock
|
| 9 |
+
|
| 10 |
+
_task: asyncio.Task | None = None
|
| 11 |
+
|
| 12 |
+
|
| 13 |
+
async def cleanup_once() -> dict:
|
| 14 |
+
retention_days = int(get_config("cache.retention_days", 0) or 0)
|
| 15 |
+
if retention_days <= 0:
|
| 16 |
+
return {"deleted": 0, "deleted_bytes": 0, "deleted_by_type": {"image": 0, "video": 0}}
|
| 17 |
+
|
| 18 |
+
async with _file_lock("cache_cleanup", timeout=5):
|
| 19 |
+
service = CacheService()
|
| 20 |
+
result = service.cleanup_expired_files(retention_days=retention_days)
|
| 21 |
+
|
| 22 |
+
if result["deleted"] > 0:
|
| 23 |
+
logger.info(
|
| 24 |
+
"Cache retention cleanup removed "
|
| 25 |
+
f"{result['deleted']} files "
|
| 26 |
+
f"({result['deleted_bytes'] / 1024 / 1024:.2f}MB)"
|
| 27 |
+
)
|
| 28 |
+
else:
|
| 29 |
+
logger.debug("Cache retention cleanup found no expired media files")
|
| 30 |
+
return result
|
| 31 |
+
|
| 32 |
+
|
| 33 |
+
async def _scheduler_loop():
|
| 34 |
+
logger.info("cache retention scheduler started")
|
| 35 |
+
while True:
|
| 36 |
+
try:
|
| 37 |
+
await cleanup_once()
|
| 38 |
+
except Exception as exc:
|
| 39 |
+
logger.warning(f"Cache retention cleanup failed: {exc}")
|
| 40 |
+
|
| 41 |
+
interval_hours = int(get_config("cache.cleanup_interval_hours", 24) or 24)
|
| 42 |
+
interval_seconds = max(1, interval_hours) * 60 * 60
|
| 43 |
+
await asyncio.sleep(interval_seconds)
|
| 44 |
+
|
| 45 |
+
|
| 46 |
+
def start():
|
| 47 |
+
global _task
|
| 48 |
+
if _task is not None:
|
| 49 |
+
return
|
| 50 |
+
if int(get_config("cache.retention_days", 0) or 0) <= 0:
|
| 51 |
+
logger.info("cache retention scheduler disabled")
|
| 52 |
+
return
|
| 53 |
+
_task = asyncio.get_event_loop().create_task(_scheduler_loop())
|
| 54 |
+
logger.info("cache retention background task started")
|
| 55 |
+
|
| 56 |
+
|
| 57 |
+
def stop():
|
| 58 |
+
global _task
|
| 59 |
+
if _task is not None:
|
| 60 |
+
_task.cancel()
|
| 61 |
+
_task = None
|
| 62 |
+
logger.info("cache retention background task stopped")
|
| 63 |
+
|
| 64 |
+
|
| 65 |
+
__all__ = ["cleanup_once", "start", "stop"]
|
config.defaults.toml
CHANGED
|
@@ -97,6 +97,10 @@ reload_interval_sec = 30
|
|
| 97 |
enable_auto_clean = true
|
| 98 |
# 缓存大小上限(MB)
|
| 99 |
limit_mb = 512
|
|
|
|
|
|
|
|
|
|
|
|
|
| 100 |
|
| 101 |
# ==================== 对话配置 ====================
|
| 102 |
[chat]
|
|
|
|
| 97 |
enable_auto_clean = true
|
| 98 |
# 缓存大小上限(MB)
|
| 99 |
limit_mb = 512
|
| 100 |
+
# 媒体缓存保留天数(<=0 表示禁用按天清理)
|
| 101 |
+
retention_days = 7
|
| 102 |
+
# 按天清理的执行间隔(小时)
|
| 103 |
+
cleanup_interval_hours = 24
|
| 104 |
|
| 105 |
# ==================== 对话配置 ====================
|
| 106 |
[chat]
|
deploy/hf-space/.env.example
CHANGED
|
@@ -9,6 +9,8 @@ GROK2API_PROXY_BASE_URL=
|
|
| 9 |
GROK2API_PROXY_ASSET_URL=
|
| 10 |
GROK2API_CF_CLEARANCE=
|
| 11 |
GROK2API_CF_COOKIES=
|
|
|
|
|
|
|
| 12 |
|
| 13 |
LOG_LEVEL=INFO
|
| 14 |
SERVER_HOST=0.0.0.0
|
|
|
|
| 9 |
GROK2API_PROXY_ASSET_URL=
|
| 10 |
GROK2API_CF_CLEARANCE=
|
| 11 |
GROK2API_CF_COOKIES=
|
| 12 |
+
SERVER_STORAGE_TYPE=pgsql
|
| 13 |
+
SERVER_STORAGE_URL=
|
| 14 |
|
| 15 |
LOG_LEVEL=INFO
|
| 16 |
SERVER_HOST=0.0.0.0
|
deploy/hf-space/README.md
CHANGED
|
@@ -51,6 +51,8 @@ With this model:
|
|
| 51 |
- `GROK2API_PROXY_ASSET_URL`
|
| 52 |
- `GROK2API_CF_CLEARANCE`
|
| 53 |
- `GROK2API_CF_COOKIES`
|
|
|
|
|
|
|
| 54 |
- `LOG_LEVEL`
|
| 55 |
- `SERVER_WORKERS`
|
| 56 |
- `DATA_DIR`
|
|
@@ -114,3 +116,5 @@ Without those secrets, the Space can build, but the backend protection will eith
|
|
| 114 |
- The start script preserves existing `data/config.toml` keys and only overlays values supplied through environment variables.
|
| 115 |
- `/health` stays public so Cloudflare Worker cron can warm the Space.
|
| 116 |
- The backend API remains protected because `app.api_key` / `app.function_key` / `app.app_key` are written from secrets at startup.
|
|
|
|
|
|
|
|
|
| 51 |
- `GROK2API_PROXY_ASSET_URL`
|
| 52 |
- `GROK2API_CF_CLEARANCE`
|
| 53 |
- `GROK2API_CF_COOKIES`
|
| 54 |
+
- `SERVER_STORAGE_TYPE`
|
| 55 |
+
- `SERVER_STORAGE_URL`
|
| 56 |
- `LOG_LEVEL`
|
| 57 |
- `SERVER_WORKERS`
|
| 58 |
- `DATA_DIR`
|
|
|
|
| 116 |
- The start script preserves existing `data/config.toml` keys and only overlays values supplied through environment variables.
|
| 117 |
- `/health` stays public so Cloudflare Worker cron can warm the Space.
|
| 118 |
- The backend API remains protected because `app.api_key` / `app.function_key` / `app.app_key` are written from secrets at startup.
|
| 119 |
+
- For durable token/config storage on HF Space, prefer `SERVER_STORAGE_TYPE=pgsql` with `SERVER_STORAGE_URL`.
|
| 120 |
+
- Media cache now defaults to `cache.retention_days = 7` and the app cleans expired image/video files every 24 hours.
|
main.py
CHANGED
|
@@ -93,6 +93,9 @@ async def lifespan(app: FastAPI):
|
|
| 93 |
from app.services.cf_refresh import start as cf_refresh_start
|
| 94 |
cf_refresh_start()
|
| 95 |
|
|
|
|
|
|
|
|
|
|
| 96 |
logger.info("Application startup complete.")
|
| 97 |
yield
|
| 98 |
|
|
@@ -102,6 +105,9 @@ async def lifespan(app: FastAPI):
|
|
| 102 |
from app.services.cf_refresh import stop as cf_refresh_stop
|
| 103 |
cf_refresh_stop()
|
| 104 |
|
|
|
|
|
|
|
|
|
|
| 105 |
from app.core.storage import StorageFactory
|
| 106 |
|
| 107 |
if StorageFactory._instance:
|
|
|
|
| 93 |
from app.services.cf_refresh import start as cf_refresh_start
|
| 94 |
cf_refresh_start()
|
| 95 |
|
| 96 |
+
from app.services.grok.utils.cache_scheduler import start as cache_cleanup_start
|
| 97 |
+
cache_cleanup_start()
|
| 98 |
+
|
| 99 |
logger.info("Application startup complete.")
|
| 100 |
yield
|
| 101 |
|
|
|
|
| 105 |
from app.services.cf_refresh import stop as cf_refresh_stop
|
| 106 |
cf_refresh_stop()
|
| 107 |
|
| 108 |
+
from app.services.grok.utils.cache_scheduler import stop as cache_cleanup_stop
|
| 109 |
+
cache_cleanup_stop()
|
| 110 |
+
|
| 111 |
from app.core.storage import StorageFactory
|
| 112 |
|
| 113 |
if StorageFactory._instance:
|