Spaces:
Sleeping
Sleeping
Factor Studios
commited on
Update test_ai_integration.py
Browse files- test_ai_integration.py +29 -2
test_ai_integration.py
CHANGED
|
@@ -78,7 +78,21 @@ def test_ai_integration():
|
|
| 78 |
'ai_accelerators': [],
|
| 79 |
'model_id': None,
|
| 80 |
'vram': None,
|
| 81 |
-
'storage': None
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 82 |
}
|
| 83 |
|
| 84 |
# Increase file descriptor limit
|
|
@@ -95,16 +109,29 @@ def test_ai_integration():
|
|
| 95 |
try:
|
| 96 |
# Use WebSocket connection manager for proper resource handling
|
| 97 |
with websocket_manager() as storage:
|
|
|
|
|
|
|
| 98 |
# Initialize virtual GPU stack with unlimited WebSocket storage
|
| 99 |
chip_for_loading = Chip(chip_id=0, vram_size_gb=None) # Unlimited storage
|
|
|
|
| 100 |
|
| 101 |
# Initialize VRAM with WebSocket storage
|
| 102 |
vram = VirtualVRAM()
|
| 103 |
vram.storage = storage # Share WebSocket connection
|
|
|
|
| 104 |
|
| 105 |
-
# Set up AI accelerator
|
| 106 |
ai_accelerator_for_loading = chip_for_loading.ai_accelerator
|
| 107 |
ai_accelerator_for_loading.vram = vram # Use WebSocket-backed VRAM
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 108 |
|
| 109 |
# Load BLIP-2 Large model directly to WebSocket storage
|
| 110 |
from transformers import AutoModelForCausalLM, AutoProcessor
|
|
|
|
| 78 |
'ai_accelerators': [],
|
| 79 |
'model_id': None,
|
| 80 |
'vram': None,
|
| 81 |
+
'storage': None,
|
| 82 |
+
'model_config': None,
|
| 83 |
+
'tensor_registry': {},
|
| 84 |
+
'initialized': False
|
| 85 |
+
}
|
| 86 |
+
|
| 87 |
+
# Initialize global tensor registry
|
| 88 |
+
global_tensor_registry = {
|
| 89 |
+
'model_tensors': {},
|
| 90 |
+
'runtime_tensors': {},
|
| 91 |
+
'placeholder_tensors': {},
|
| 92 |
+
'stats': {
|
| 93 |
+
'total_vram_used': 0,
|
| 94 |
+
'active_tensors': 0
|
| 95 |
+
}
|
| 96 |
}
|
| 97 |
|
| 98 |
# Increase file descriptor limit
|
|
|
|
| 109 |
try:
|
| 110 |
# Use WebSocket connection manager for proper resource handling
|
| 111 |
with websocket_manager() as storage:
|
| 112 |
+
components['storage'] = storage # Save storage reference
|
| 113 |
+
|
| 114 |
# Initialize virtual GPU stack with unlimited WebSocket storage
|
| 115 |
chip_for_loading = Chip(chip_id=0, vram_size_gb=None) # Unlimited storage
|
| 116 |
+
components['chips'].append(chip_for_loading)
|
| 117 |
|
| 118 |
# Initialize VRAM with WebSocket storage
|
| 119 |
vram = VirtualVRAM()
|
| 120 |
vram.storage = storage # Share WebSocket connection
|
| 121 |
+
components['vram'] = vram
|
| 122 |
|
| 123 |
+
# Set up AI accelerator with proper initialization
|
| 124 |
ai_accelerator_for_loading = chip_for_loading.ai_accelerator
|
| 125 |
ai_accelerator_for_loading.vram = vram # Use WebSocket-backed VRAM
|
| 126 |
+
ai_accelerator_for_loading.initialize_tensor_cores() # Ensure tensor cores are ready
|
| 127 |
+
components['ai_accelerators'].append(ai_accelerator_for_loading)
|
| 128 |
+
|
| 129 |
+
# Initialize model registry in WebSocket storage
|
| 130 |
+
storage.store_state("model_registry", "state", {
|
| 131 |
+
"initialized": True,
|
| 132 |
+
"max_vram": None, # Unlimited
|
| 133 |
+
"active_models": {}
|
| 134 |
+
})
|
| 135 |
|
| 136 |
# Load BLIP-2 Large model directly to WebSocket storage
|
| 137 |
from transformers import AutoModelForCausalLM, AutoProcessor
|