Spaces:
Sleeping
Sleeping
Factor Studios
commited on
Update test_ai_integration.py
Browse files- test_ai_integration.py +34 -10
test_ai_integration.py
CHANGED
|
@@ -2,6 +2,7 @@
|
|
| 2 |
Test AI integration with WebSocket-based storage and zero CPU memory usage.
|
| 3 |
All operations are performed through WebSocket storage with direct tensor core access.
|
| 4 |
"""
|
|
|
|
| 5 |
from gpu_arch import Chip
|
| 6 |
from ai import AIAccelerator
|
| 7 |
from virtual_vram import VirtualVRAM
|
|
@@ -10,9 +11,16 @@ import numpy as np
|
|
| 10 |
from websocket_storage import WebSocketGPUStorage
|
| 11 |
import time
|
| 12 |
import os
|
|
|
|
| 13 |
import contextlib
|
| 14 |
-
import resource
|
| 15 |
import atexit
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 16 |
|
| 17 |
# Increase system file descriptor limit
|
| 18 |
def increase_file_limit():
|
|
@@ -23,16 +31,34 @@ def increase_file_limit():
|
|
| 23 |
except Exception as e:
|
| 24 |
print(f"Warning: Could not increase file descriptor limit: {e}")
|
| 25 |
|
| 26 |
-
# WebSocket connection manager
|
| 27 |
@contextlib.contextmanager
|
| 28 |
-
def websocket_manager():
|
| 29 |
-
storage =
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 30 |
try:
|
| 31 |
-
if not storage.wait_for_connection():
|
| 32 |
-
raise RuntimeError("Could not connect to GPU storage server")
|
| 33 |
yield storage
|
| 34 |
finally:
|
| 35 |
-
storage
|
|
|
|
| 36 |
|
| 37 |
# Cleanup handler
|
| 38 |
def cleanup_resources():
|
|
@@ -288,6 +314,4 @@ def test_ai_integration():
|
|
| 288 |
print(f"Matrix operations test failed: {e}")
|
| 289 |
return
|
| 290 |
|
| 291 |
-
print("\n--- All AI Integration Tests Completed ---")
|
| 292 |
-
|
| 293 |
-
|
|
|
|
| 2 |
Test AI integration with WebSocket-based storage and zero CPU memory usage.
|
| 3 |
All operations are performed through WebSocket storage with direct tensor core access.
|
| 4 |
"""
|
| 5 |
+
import asyncio
|
| 6 |
from gpu_arch import Chip
|
| 7 |
from ai import AIAccelerator
|
| 8 |
from virtual_vram import VirtualVRAM
|
|
|
|
| 11 |
from websocket_storage import WebSocketGPUStorage
|
| 12 |
import time
|
| 13 |
import os
|
| 14 |
+
import platform
|
| 15 |
import contextlib
|
|
|
|
| 16 |
import atexit
|
| 17 |
+
import logging
|
| 18 |
+
|
| 19 |
+
# Configure logging
|
| 20 |
+
logging.basicConfig(
|
| 21 |
+
level=logging.INFO,
|
| 22 |
+
format='%(asctime)s - %(levelname)s - %(message)s'
|
| 23 |
+
)
|
| 24 |
|
| 25 |
# Increase system file descriptor limit
|
| 26 |
def increase_file_limit():
|
|
|
|
| 31 |
except Exception as e:
|
| 32 |
print(f"Warning: Could not increase file descriptor limit: {e}")
|
| 33 |
|
| 34 |
+
# WebSocket connection manager with retry
|
| 35 |
@contextlib.contextmanager
|
| 36 |
+
def websocket_manager(max_retries=3, retry_delay=2):
|
| 37 |
+
storage = None
|
| 38 |
+
for attempt in range(max_retries):
|
| 39 |
+
try:
|
| 40 |
+
storage = WebSocketGPUStorage()
|
| 41 |
+
if storage.wait_for_connection(timeout=10.0):
|
| 42 |
+
logging.info("Successfully connected to GPU storage server")
|
| 43 |
+
break
|
| 44 |
+
else:
|
| 45 |
+
logging.warning(f"Connection attempt {attempt + 1} failed, retrying...")
|
| 46 |
+
if storage:
|
| 47 |
+
storage.close()
|
| 48 |
+
time.sleep(retry_delay)
|
| 49 |
+
except Exception as e:
|
| 50 |
+
logging.error(f"Connection attempt {attempt + 1} failed with error: {e}")
|
| 51 |
+
if storage:
|
| 52 |
+
storage.close()
|
| 53 |
+
if attempt == max_retries - 1:
|
| 54 |
+
raise RuntimeError(f"Could not connect to GPU storage server after {max_retries} attempts")
|
| 55 |
+
time.sleep(retry_delay)
|
| 56 |
+
|
| 57 |
try:
|
|
|
|
|
|
|
| 58 |
yield storage
|
| 59 |
finally:
|
| 60 |
+
if storage:
|
| 61 |
+
storage.close() # Ensure connection is closed
|
| 62 |
|
| 63 |
# Cleanup handler
|
| 64 |
def cleanup_resources():
|
|
|
|
| 314 |
print(f"Matrix operations test failed: {e}")
|
| 315 |
return
|
| 316 |
|
| 317 |
+
print("\n--- All AI Integration Tests Completed ---")
|
|
|
|
|
|