Spaces:

superxu520
/

G_AI

Paused

App Files Files Community

superxu520 commited on Feb 26

Commit

d4a9626

1 Parent(s): 4647ed2

fix: 修复速率限制器死锁风险、临时文件清理兼容性和图片目录初始化问题

Browse files

Files changed (2) hide show

app/server/middleware.py +36 -16
app/server/rate_limiter.py +18 -36

app/server/middleware.py CHANGED Viewed

@@ -16,13 +16,23 @@ from ..utils import g_config
 # Persistent directory for storing generated images
 # Support environment variable override for Docker/HF deployments
-IMAGE_STORE_DIR = Path(os.getenv("GEMINI_IMAGE_STORE_PATH", tempfile.gettempdir())) / "ai_generated_images"
-IMAGE_STORE_DIR.mkdir(parents=True, exist_ok=True)
 def get_image_store_dir() -> Path:
-    """Returns a persistent directory for storing images."""
-    return IMAGE_STORE_DIR
 def get_image_token(filename: str) -> str:
@@ -47,24 +57,30 @@ def verify_image_token(filename: str, token: str | None) -> bool:
 def cleanup_expired_images(retention_days: int) -> int:
-    """Delete images in IMAGE_STORE_DIR older than retention_days."""
     if retention_days <= 0:
         return 0
     now = time.time()
     retention_seconds = retention_days * 24 * 60 * 60
     cutoff = now - retention_seconds
     count = 0
-    for file_path in IMAGE_STORE_DIR.iterdir():
-        if not file_path.is_file():
-            continue
-        try:
-            if file_path.stat().st_mtime < cutoff:
-                file_path.unlink()
-                count += 1
-        except Exception as e:
-            logger.warning(f"Failed to delete expired image {file_path}: {e}")
     if count > 0:
         logger.info(f"Cleaned up {count} expired images.")
@@ -93,8 +109,12 @@ async def get_temp_dir():
     try:
         yield Path(temp_dir.name)
     finally:
-        # Run cleanup in thread pool to avoid blocking
-        await asyncio.get_event_loop().run_in_executor(None, temp_dir.cleanup)
 def verify_api_key(

 # Persistent directory for storing generated images
 # Support environment variable override for Docker/HF deployments
+# Directory will be created on first access to avoid import-time failures
+_IMAGE_STORE_DIR: Path | None = None
 def get_image_store_dir() -> Path:
+    """Returns a persistent directory for storing images. Creates it if it doesn't exist."""
+    global _IMAGE_STORE_DIR
+    if _IMAGE_STORE_DIR is None:
+        _IMAGE_STORE_DIR = Path(os.getenv("GEMINI_IMAGE_STORE_PATH", tempfile.gettempdir())) / "ai_generated_images"
+        try:
+            _IMAGE_STORE_DIR.mkdir(parents=True, exist_ok=True)
+        except OSError as e:
+            logger.error(f"Failed to create image store directory at {_IMAGE_STORE_DIR}: {e}")
+            logger.warning("Falling back to system temp directory")
+            _IMAGE_STORE_DIR = Path(tempfile.gettempdir()) / "ai_generated_images"
+            _IMAGE_STORE_DIR.mkdir(parents=True, exist_ok=True)
+    return _IMAGE_STORE_DIR
 def get_image_token(filename: str) -> str:
 def cleanup_expired_images(retention_days: int) -> int:
+    """Delete images in image store directory older than retention_days."""
     if retention_days <= 0:
         return 0
+    image_store_dir = get_image_store_dir()
     now = time.time()
     retention_seconds = retention_days * 24 * 60 * 60
     cutoff = now - retention_seconds
     count = 0
+    try:
+        for file_path in image_store_dir.iterdir():
+            if not file_path.is_file():
+                continue
+            try:
+                if file_path.stat().st_mtime < cutoff:
+                    file_path.unlink()
+                    count += 1
+            except Exception as e:
+                logger.warning(f"Failed to delete expired image {file_path}: {e}")
+    except FileNotFoundError:
+        logger.debug(f"Image store directory does not exist yet: {image_store_dir}")
+    except Exception as e:
+        logger.warning(f"Failed to cleanup expired images: {e}")
     if count > 0:
         logger.info(f"Cleaned up {count} expired images.")
     try:
         yield Path(temp_dir.name)
     finally:
+        # Run cleanup in thread pool to avoid blocking (Python 3.9+)
+        try:
+            await asyncio.to_thread(temp_dir.cleanup)
+        except AttributeError:
+            # Fallback for Python < 3.9
+            await asyncio.get_running_loop().run_in_executor(None, temp_dir.cleanup)
 def verify_api_key(

app/server/rate_limiter.py CHANGED Viewed

@@ -5,7 +5,6 @@ Protects Gemini API from being overwhelmed by too many concurrent requests.
 import asyncio
 import time
-from collections import defaultdict
 from typing import Callable
 from fastapi import HTTPException, Request, status
@@ -14,7 +13,7 @@ from loguru import logger
 class RateLimiter:
     """
-    Token bucket rate limiter for concurrent requests.
     Limits the number of simultaneous requests being processed.
     When limit is exceeded, new requests are queued or rejected.
@@ -30,51 +29,34 @@ class RateLimiter:
         """
         self.max_concurrent = max_concurrent
         self.queue_timeout = queue_timeout
-        self._current_count = 0
         self._lock = asyncio.Lock()
-        self._waiters = 0
     async def acquire(self) -> None:
         """
         Acquire permission to process a request.
         Blocks until a slot is available or timeout.
         """
-        start_time = time.monotonic()
-        async with self._lock:
-            self._waiters += 1
-            try:
-                while self._current_count >= self.max_concurrent:
-                    # Check timeout
-                    elapsed = time.monotonic() - start_time
-                    if elapsed >= self.queue_timeout:
-                        raise HTTPException(
-                            status_code=status.HTTP_503_SERVICE_UNAVAILABLE,
-                            detail="Server is busy. Please try again later.",
-                        )
-                    # Wait for a slot to become available
-                    self._lock.release()
-                    try:
-                        await asyncio.sleep(0.1)  # Small delay to avoid busy waiting
-                    finally:
-                        await self._lock.acquire()
-                    # Re-check timeout after sleep
-                    if time.monotonic() - start_time >= self.queue_timeout:
-                        raise HTTPException(
-                            status_code=status.HTTP_503_SERVICE_UNAVAILABLE,
-                            detail="Server is busy. Please try again later.",
-                        )
-                self._current_count += 1
-            finally:
-                self._waiters -= 1
     async def release(self) -> None:
         """Release a slot after request processing completes."""
         async with self._lock:
-            self._current_count -= 1
 # Global rate limiter instance

 import asyncio
 import time
 from typing import Callable
 from fastapi import HTTPException, Request, status
 class RateLimiter:
     """
+    Semaphore-based rate limiter for concurrent requests.
     Limits the number of simultaneous requests being processed.
     When limit is exceeded, new requests are queued or rejected.
         """
         self.max_concurrent = max_concurrent
         self.queue_timeout = queue_timeout
+        self._semaphore = asyncio.Semaphore(max_concurrent)
+        self._acquired_count = 0
         self._lock = asyncio.Lock()
     async def acquire(self) -> None:
         """
         Acquire permission to process a request.
         Blocks until a slot is available or timeout.
         """
+        try:
+            # Use asyncio.wait_for to implement timeout
+            await asyncio.wait_for(self._semaphore.acquire(), timeout=self.queue_timeout)
+            async with self._lock:
+                self._acquired_count += 1
+                logger.debug(f"Rate limiter: acquired slot ({self._acquired_count}/{self.max_concurrent})")
+        except asyncio.TimeoutError:
+            logger.warning(f"Rate limiter: request queued for {self.queue_timeout}s, rejecting")
+            raise HTTPException(
+                status_code=status.HTTP_503_SERVICE_UNAVAILABLE,
+                detail="Server is busy. Please try again later.",
+            )
     async def release(self) -> None:
         """Release a slot after request processing completes."""
         async with self._lock:
+            self._acquired_count -= 1
+            logger.debug(f"Rate limiter: released slot ({self._acquired_count}/{self.max_concurrent})")
+        self._semaphore.release()
 # Global rate limiter instance