sjiangtao2024 commited on
Commit
c79e000
·
1 Parent(s): 6bff6a1

Add media cache retention cleanup

Browse files
README.md CHANGED
@@ -51,6 +51,8 @@ With this model:
51
  - `GROK2API_PROXY_ASSET_URL`
52
  - `GROK2API_CF_CLEARANCE`
53
  - `GROK2API_CF_COOKIES`
 
 
54
  - `LOG_LEVEL`
55
  - `SERVER_WORKERS`
56
  - `DATA_DIR`
@@ -114,3 +116,5 @@ Without those secrets, the Space can build, but the backend protection will eith
114
  - The start script preserves existing `data/config.toml` keys and only overlays values supplied through environment variables.
115
  - `/health` stays public so Cloudflare Worker cron can warm the Space.
116
  - The backend API remains protected because `app.api_key` / `app.function_key` / `app.app_key` are written from secrets at startup.
 
 
 
51
  - `GROK2API_PROXY_ASSET_URL`
52
  - `GROK2API_CF_CLEARANCE`
53
  - `GROK2API_CF_COOKIES`
54
+ - `SERVER_STORAGE_TYPE`
55
+ - `SERVER_STORAGE_URL`
56
  - `LOG_LEVEL`
57
  - `SERVER_WORKERS`
58
  - `DATA_DIR`
 
116
  - The start script preserves existing `data/config.toml` keys and only overlays values supplied through environment variables.
117
  - `/health` stays public so Cloudflare Worker cron can warm the Space.
118
  - The backend API remains protected because `app.api_key` / `app.function_key` / `app.app_key` are written from secrets at startup.
119
+ - For durable token/config storage on HF Space, prefer `SERVER_STORAGE_TYPE=pgsql` with `SERVER_STORAGE_URL`.
120
+ - Media cache now defaults to `cache.retention_days = 7` and the app cleans expired image/video files every 24 hours.
app/services/grok/utils/cache.py CHANGED
@@ -2,6 +2,7 @@
2
  Local cache utilities.
3
  """
4
 
 
5
  from typing import Any, Dict
6
 
7
  from app.core.storage import DATA_DIR
@@ -26,15 +27,21 @@ class CacheService:
26
  def _allowed_exts(self, media_type: str):
27
  return IMAGE_EXTS if media_type == "image" else VIDEO_EXTS
28
 
29
- def get_stats(self, media_type: str = "image") -> Dict[str, Any]:
30
  cache_dir = self._cache_dir(media_type)
31
  if not cache_dir.exists():
32
- return {"count": 0, "size_mb": 0.0}
33
-
34
  allowed = self._allowed_exts(media_type)
35
- files = [
36
  f for f in cache_dir.glob("*") if f.is_file() and f.suffix.lower() in allowed
37
  ]
 
 
 
 
 
 
 
38
  total_size = sum(f.stat().st_size for f in files)
39
  return {"count": len(files), "size_mb": round(total_size / 1024 / 1024, 2)}
40
 
@@ -45,10 +52,7 @@ class CacheService:
45
  if not cache_dir.exists():
46
  return {"total": 0, "page": page, "page_size": page_size, "items": []}
47
 
48
- allowed = self._allowed_exts(media_type)
49
- files = [
50
- f for f in cache_dir.glob("*") if f.is_file() and f.suffix.lower() in allowed
51
- ]
52
 
53
  items = []
54
  for f in files:
@@ -106,5 +110,39 @@ class CacheService:
106
 
107
  return {"count": count, "size_mb": round(total_size / 1024 / 1024, 2)}
108
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
109
 
110
  __all__ = ["CacheService"]
 
2
  Local cache utilities.
3
  """
4
 
5
+ import time
6
  from typing import Any, Dict
7
 
8
  from app.core.storage import DATA_DIR
 
27
  def _allowed_exts(self, media_type: str):
28
  return IMAGE_EXTS if media_type == "image" else VIDEO_EXTS
29
 
30
+ def _media_files(self, media_type: str):
31
  cache_dir = self._cache_dir(media_type)
32
  if not cache_dir.exists():
33
+ return []
 
34
  allowed = self._allowed_exts(media_type)
35
+ return [
36
  f for f in cache_dir.glob("*") if f.is_file() and f.suffix.lower() in allowed
37
  ]
38
+
39
+ def get_stats(self, media_type: str = "image") -> Dict[str, Any]:
40
+ cache_dir = self._cache_dir(media_type)
41
+ if not cache_dir.exists():
42
+ return {"count": 0, "size_mb": 0.0}
43
+
44
+ files = self._media_files(media_type)
45
  total_size = sum(f.stat().st_size for f in files)
46
  return {"count": len(files), "size_mb": round(total_size / 1024 / 1024, 2)}
47
 
 
52
  if not cache_dir.exists():
53
  return {"total": 0, "page": page, "page_size": page_size, "items": []}
54
 
55
+ files = self._media_files(media_type)
 
 
 
56
 
57
  items = []
58
  for f in files:
 
110
 
111
  return {"count": count, "size_mb": round(total_size / 1024 / 1024, 2)}
112
 
113
+ def cleanup_expired_files(
114
+ self, retention_days: int, now: float | None = None
115
+ ) -> Dict[str, Any]:
116
+ if retention_days <= 0:
117
+ return {"deleted": 0, "deleted_bytes": 0, "deleted_by_type": {"image": 0, "video": 0}}
118
+
119
+ current_time = now if now is not None else time.time()
120
+ expire_before = current_time - retention_days * 24 * 60 * 60
121
+ deleted = 0
122
+ deleted_bytes = 0
123
+ deleted_by_type = {"image": 0, "video": 0}
124
+
125
+ for media_type in ("image", "video"):
126
+ for path in self._media_files(media_type):
127
+ try:
128
+ stat = path.stat()
129
+ except Exception:
130
+ continue
131
+ if stat.st_mtime > expire_before:
132
+ continue
133
+ try:
134
+ path.unlink()
135
+ deleted += 1
136
+ deleted_bytes += stat.st_size
137
+ deleted_by_type[media_type] += 1
138
+ except Exception:
139
+ continue
140
+
141
+ return {
142
+ "deleted": deleted,
143
+ "deleted_bytes": deleted_bytes,
144
+ "deleted_by_type": deleted_by_type,
145
+ }
146
+
147
 
148
  __all__ = ["CacheService"]
app/services/grok/utils/cache_scheduler.py ADDED
@@ -0,0 +1,65 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ """Background scheduler for cache retention cleanup."""
2
+
3
+ import asyncio
4
+
5
+ from app.core.config import get_config
6
+ from app.core.logger import logger
7
+ from app.services.grok.utils.cache import CacheService
8
+ from app.services.grok.utils.locks import _file_lock
9
+
10
+ _task: asyncio.Task | None = None
11
+
12
+
13
+ async def cleanup_once() -> dict:
14
+ retention_days = int(get_config("cache.retention_days", 0) or 0)
15
+ if retention_days <= 0:
16
+ return {"deleted": 0, "deleted_bytes": 0, "deleted_by_type": {"image": 0, "video": 0}}
17
+
18
+ async with _file_lock("cache_cleanup", timeout=5):
19
+ service = CacheService()
20
+ result = service.cleanup_expired_files(retention_days=retention_days)
21
+
22
+ if result["deleted"] > 0:
23
+ logger.info(
24
+ "Cache retention cleanup removed "
25
+ f"{result['deleted']} files "
26
+ f"({result['deleted_bytes'] / 1024 / 1024:.2f}MB)"
27
+ )
28
+ else:
29
+ logger.debug("Cache retention cleanup found no expired media files")
30
+ return result
31
+
32
+
33
+ async def _scheduler_loop():
34
+ logger.info("cache retention scheduler started")
35
+ while True:
36
+ try:
37
+ await cleanup_once()
38
+ except Exception as exc:
39
+ logger.warning(f"Cache retention cleanup failed: {exc}")
40
+
41
+ interval_hours = int(get_config("cache.cleanup_interval_hours", 24) or 24)
42
+ interval_seconds = max(1, interval_hours) * 60 * 60
43
+ await asyncio.sleep(interval_seconds)
44
+
45
+
46
+ def start():
47
+ global _task
48
+ if _task is not None:
49
+ return
50
+ if int(get_config("cache.retention_days", 0) or 0) <= 0:
51
+ logger.info("cache retention scheduler disabled")
52
+ return
53
+ _task = asyncio.get_event_loop().create_task(_scheduler_loop())
54
+ logger.info("cache retention background task started")
55
+
56
+
57
+ def stop():
58
+ global _task
59
+ if _task is not None:
60
+ _task.cancel()
61
+ _task = None
62
+ logger.info("cache retention background task stopped")
63
+
64
+
65
+ __all__ = ["cleanup_once", "start", "stop"]
config.defaults.toml CHANGED
@@ -97,6 +97,10 @@ reload_interval_sec = 30
97
  enable_auto_clean = true
98
  # 缓存大小上限(MB)
99
  limit_mb = 512
 
 
 
 
100
 
101
  # ==================== 对话配置 ====================
102
  [chat]
 
97
  enable_auto_clean = true
98
  # 缓存大小上限(MB)
99
  limit_mb = 512
100
+ # 媒体缓存保留天数(<=0 表示禁用按天清理)
101
+ retention_days = 7
102
+ # 按天清理的执行间隔(小时)
103
+ cleanup_interval_hours = 24
104
 
105
  # ==================== 对话配置 ====================
106
  [chat]
deploy/hf-space/.env.example CHANGED
@@ -9,6 +9,8 @@ GROK2API_PROXY_BASE_URL=
9
  GROK2API_PROXY_ASSET_URL=
10
  GROK2API_CF_CLEARANCE=
11
  GROK2API_CF_COOKIES=
 
 
12
 
13
  LOG_LEVEL=INFO
14
  SERVER_HOST=0.0.0.0
 
9
  GROK2API_PROXY_ASSET_URL=
10
  GROK2API_CF_CLEARANCE=
11
  GROK2API_CF_COOKIES=
12
+ SERVER_STORAGE_TYPE=pgsql
13
+ SERVER_STORAGE_URL=
14
 
15
  LOG_LEVEL=INFO
16
  SERVER_HOST=0.0.0.0
deploy/hf-space/README.md CHANGED
@@ -51,6 +51,8 @@ With this model:
51
  - `GROK2API_PROXY_ASSET_URL`
52
  - `GROK2API_CF_CLEARANCE`
53
  - `GROK2API_CF_COOKIES`
 
 
54
  - `LOG_LEVEL`
55
  - `SERVER_WORKERS`
56
  - `DATA_DIR`
@@ -114,3 +116,5 @@ Without those secrets, the Space can build, but the backend protection will eith
114
  - The start script preserves existing `data/config.toml` keys and only overlays values supplied through environment variables.
115
  - `/health` stays public so Cloudflare Worker cron can warm the Space.
116
  - The backend API remains protected because `app.api_key` / `app.function_key` / `app.app_key` are written from secrets at startup.
 
 
 
51
  - `GROK2API_PROXY_ASSET_URL`
52
  - `GROK2API_CF_CLEARANCE`
53
  - `GROK2API_CF_COOKIES`
54
+ - `SERVER_STORAGE_TYPE`
55
+ - `SERVER_STORAGE_URL`
56
  - `LOG_LEVEL`
57
  - `SERVER_WORKERS`
58
  - `DATA_DIR`
 
116
  - The start script preserves existing `data/config.toml` keys and only overlays values supplied through environment variables.
117
  - `/health` stays public so Cloudflare Worker cron can warm the Space.
118
  - The backend API remains protected because `app.api_key` / `app.function_key` / `app.app_key` are written from secrets at startup.
119
+ - For durable token/config storage on HF Space, prefer `SERVER_STORAGE_TYPE=pgsql` with `SERVER_STORAGE_URL`.
120
+ - Media cache now defaults to `cache.retention_days = 7` and the app cleans expired image/video files every 24 hours.
main.py CHANGED
@@ -93,6 +93,9 @@ async def lifespan(app: FastAPI):
93
  from app.services.cf_refresh import start as cf_refresh_start
94
  cf_refresh_start()
95
 
 
 
 
96
  logger.info("Application startup complete.")
97
  yield
98
 
@@ -102,6 +105,9 @@ async def lifespan(app: FastAPI):
102
  from app.services.cf_refresh import stop as cf_refresh_stop
103
  cf_refresh_stop()
104
 
 
 
 
105
  from app.core.storage import StorageFactory
106
 
107
  if StorageFactory._instance:
 
93
  from app.services.cf_refresh import start as cf_refresh_start
94
  cf_refresh_start()
95
 
96
+ from app.services.grok.utils.cache_scheduler import start as cache_cleanup_start
97
+ cache_cleanup_start()
98
+
99
  logger.info("Application startup complete.")
100
  yield
101
 
 
105
  from app.services.cf_refresh import stop as cf_refresh_stop
106
  cf_refresh_stop()
107
 
108
+ from app.services.grok.utils.cache_scheduler import stop as cache_cleanup_stop
109
+ cache_cleanup_stop()
110
+
111
  from app.core.storage import StorageFactory
112
 
113
  if StorageFactory._instance: