Spaces:
Sleeping
Sleeping
Factor Studios
commited on
Update http_storage.py
Browse files- http_storage.py +23 -7
http_storage.py
CHANGED
|
@@ -53,12 +53,14 @@ class HTTPGPUStorage:
|
|
| 53 |
# Configure HTTP session with connection pooling and retries
|
| 54 |
self.http_session = requests.Session()
|
| 55 |
|
| 56 |
-
# Configure retry strategy
|
|
|
|
| 57 |
retry_strategy = Retry(
|
| 58 |
-
total=3,
|
| 59 |
status_forcelist=[429, 500, 502, 503, 504],
|
| 60 |
-
allowed_methods=["HEAD", "GET", "OPTIONS", "POST", "PUT", "DELETE"],
|
| 61 |
-
backoff_factor=
|
|
|
|
| 62 |
)
|
| 63 |
|
| 64 |
adapter = HTTPAdapter(
|
|
@@ -87,10 +89,18 @@ class HTTPGPUStorage:
|
|
| 87 |
def _create_session(self):
|
| 88 |
"""Create HTTP session with the server"""
|
| 89 |
try:
|
|
|
|
| 90 |
response = self.http_session.post(
|
| 91 |
f"{self.api_base}/sessions",
|
| 92 |
-
json={
|
| 93 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 94 |
)
|
| 95 |
response.raise_for_status()
|
| 96 |
|
|
@@ -113,9 +123,15 @@ class HTTPGPUStorage:
|
|
| 113 |
return False
|
| 114 |
|
| 115 |
def _make_request(self, method: str, endpoint: str, **kwargs) -> Optional[Dict[str, Any]]:
|
| 116 |
-
"""Make HTTP request with error handling and
|
| 117 |
if self._closing:
|
| 118 |
return {"status": "error", "message": "HTTP client is closing"}
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 119 |
|
| 120 |
url = f"{self.api_base}{endpoint}"
|
| 121 |
timeout = kwargs.pop('timeout', 30) # Allow timeout override, default 30s
|
|
|
|
| 53 |
# Configure HTTP session with connection pooling and retries
|
| 54 |
self.http_session = requests.Session()
|
| 55 |
|
| 56 |
+
# Configure retry strategy with exponential backoff
|
| 57 |
+
# Match server-side configuration
|
| 58 |
retry_strategy = Retry(
|
| 59 |
+
total=3, # Match FastAPI default retry limit
|
| 60 |
status_forcelist=[429, 500, 502, 503, 504],
|
| 61 |
+
allowed_methods=["HEAD", "GET", "OPTIONS", "POST", "PUT", "DELETE"],
|
| 62 |
+
backoff_factor=0.5, # Faster initial retries since server timeout is 30s
|
| 63 |
+
respect_retry_after_header=True
|
| 64 |
)
|
| 65 |
|
| 66 |
adapter = HTTPAdapter(
|
|
|
|
| 89 |
def _create_session(self):
|
| 90 |
"""Create HTTP session with the server"""
|
| 91 |
try:
|
| 92 |
+
# Match server session timeout
|
| 93 |
response = self.http_session.post(
|
| 94 |
f"{self.api_base}/sessions",
|
| 95 |
+
json={
|
| 96 |
+
"client_id": "virtual_gpu_client",
|
| 97 |
+
"resource_limits": {
|
| 98 |
+
"max_vram_gb": 40, # Match A100 VRAM size
|
| 99 |
+
"max_models": 5,
|
| 100 |
+
"max_batch_size": 32
|
| 101 |
+
}
|
| 102 |
+
},
|
| 103 |
+
timeout=30 # Match server connection_timeout
|
| 104 |
)
|
| 105 |
response.raise_for_status()
|
| 106 |
|
|
|
|
| 123 |
return False
|
| 124 |
|
| 125 |
def _make_request(self, method: str, endpoint: str, **kwargs) -> Optional[Dict[str, Any]]:
|
| 126 |
+
"""Make HTTP request with error handling, retries and recovery"""
|
| 127 |
if self._closing:
|
| 128 |
return {"status": "error", "message": "HTTP client is closing"}
|
| 129 |
+
|
| 130 |
+
# Reset connection if too many errors
|
| 131 |
+
if self.error_count > 10:
|
| 132 |
+
logging.warning("Too many errors, attempting to reset connection...")
|
| 133 |
+
self.error_count = 0
|
| 134 |
+
self._create_session()
|
| 135 |
|
| 136 |
url = f"{self.api_base}{endpoint}"
|
| 137 |
timeout = kwargs.pop('timeout', 30) # Allow timeout override, default 30s
|