Factor Studios commited on
Commit
c12c99a
·
verified ·
1 Parent(s): 6b96572

Update http_storage.py

Browse files
Files changed (1) hide show
  1. http_storage.py +23 -7
http_storage.py CHANGED
@@ -53,12 +53,14 @@ class HTTPGPUStorage:
53
  # Configure HTTP session with connection pooling and retries
54
  self.http_session = requests.Session()
55
 
56
- # Configure retry strategy
 
57
  retry_strategy = Retry(
58
- total=3,
59
  status_forcelist=[429, 500, 502, 503, 504],
60
- allowed_methods=["HEAD", "GET", "OPTIONS", "POST", "PUT", "DELETE"], # Updated parameter name
61
- backoff_factor=1
 
62
  )
63
 
64
  adapter = HTTPAdapter(
@@ -87,10 +89,18 @@ class HTTPGPUStorage:
87
  def _create_session(self):
88
  """Create HTTP session with the server"""
89
  try:
 
90
  response = self.http_session.post(
91
  f"{self.api_base}/sessions",
92
- json={"client_id": "virtual_gpu_client"},
93
- timeout=30
 
 
 
 
 
 
 
94
  )
95
  response.raise_for_status()
96
 
@@ -113,9 +123,15 @@ class HTTPGPUStorage:
113
  return False
114
 
115
  def _make_request(self, method: str, endpoint: str, **kwargs) -> Optional[Dict[str, Any]]:
116
- """Make HTTP request with error handling and retries"""
117
  if self._closing:
118
  return {"status": "error", "message": "HTTP client is closing"}
 
 
 
 
 
 
119
 
120
  url = f"{self.api_base}{endpoint}"
121
  timeout = kwargs.pop('timeout', 30) # Allow timeout override, default 30s
 
53
  # Configure HTTP session with connection pooling and retries
54
  self.http_session = requests.Session()
55
 
56
+ # Configure retry strategy with exponential backoff
57
+ # Match server-side configuration
58
  retry_strategy = Retry(
59
+ total=3, # Match FastAPI default retry limit
60
  status_forcelist=[429, 500, 502, 503, 504],
61
+ allowed_methods=["HEAD", "GET", "OPTIONS", "POST", "PUT", "DELETE"],
62
+ backoff_factor=0.5, # Faster initial retries since server timeout is 30s
63
+ respect_retry_after_header=True
64
  )
65
 
66
  adapter = HTTPAdapter(
 
89
  def _create_session(self):
90
  """Create HTTP session with the server"""
91
  try:
92
+ # Match server session timeout
93
  response = self.http_session.post(
94
  f"{self.api_base}/sessions",
95
+ json={
96
+ "client_id": "virtual_gpu_client",
97
+ "resource_limits": {
98
+ "max_vram_gb": 40, # Match A100 VRAM size
99
+ "max_models": 5,
100
+ "max_batch_size": 32
101
+ }
102
+ },
103
+ timeout=30 # Match server connection_timeout
104
  )
105
  response.raise_for_status()
106
 
 
123
  return False
124
 
125
  def _make_request(self, method: str, endpoint: str, **kwargs) -> Optional[Dict[str, Any]]:
126
+ """Make HTTP request with error handling, retries and recovery"""
127
  if self._closing:
128
  return {"status": "error", "message": "HTTP client is closing"}
129
+
130
+ # Reset connection if too many errors
131
+ if self.error_count > 10:
132
+ logging.warning("Too many errors, attempting to reset connection...")
133
+ self.error_count = 0
134
+ self._create_session()
135
 
136
  url = f"{self.api_base}{endpoint}"
137
  timeout = kwargs.pop('timeout', 30) # Allow timeout override, default 30s