Spaces:

factorstudios
/

INTAI

Sleeping

App Files Files Community

Factor Studios commited on Aug 12

Commit

d4eab31

verified ·

1 Parent(s): e95e6fb

Upload test_ai_integration.py

Browse files

Files changed (1) hide show

test_ai_integration.py +46 -15

test_ai_integration.py CHANGED Viewed

@@ -184,15 +184,28 @@ def test_ai_integration():
             model_size = sum(p.numel() * p.element_size() for p in model.parameters())
             print(f"Model size: {model_size / (1024**3):.2f} GB")
-            # Store model in WebSocket storage with size information
-            # Load model directly using AIAccelerator's load_model method
-            ai_accelerator_for_loading.load_model(
-                model_id=model_id,
-                model=model,
-                processor=processor
-            )
-            print(f"Model '{model_id}' loaded successfully to WebSocket storage.")
             assert ai_accelerator_for_loading.has_model(model_id), "Model not found in WebSocket storage after loading."
             # Store model parameters in components dict
@@ -371,13 +384,31 @@ def test_ai_integration():
                 # Load image section from WebSocket storage
                 tensor_id = f"input_image/{img_name}"
-                # Run inference using WebSocket-stored weights
-                result = accelerator.inference(model_id, tensor_id)
-                # Store result in WebSocket storage
-                if result is not None:
-                    storage.store_tensor(f"results/chip_{i}/{img_name}", result)
-                    results.append(result)
             elapsed = time.time() - start_time

             model_size = sum(p.numel() * p.element_size() for p in model.parameters())
             print(f"Model size: {model_size / (1024**3):.2f} GB")
+            # Upload model weights directly to WebSocket storage
+            print("Uploading model weights to WebSocket storage...")
+            for name, param in model.state_dict().items():
+                # Convert tensor to numpy and upload
+                weight_data = param.cpu().numpy()
+                storage.store_tensor(f"model_weights/{model_id}/{name}", weight_data)
+            # Store minimal model info without serializing the config
+            storage.store_state(f"models/{model_id}", "info", {
+                "name": model_id,
+                "size_bytes": model_size,
+                "num_parameters": sum(p.numel() for p in model.parameters()),
+                "weight_keys": list(model.state_dict().keys())
+            })
+            # Set model reference without serializing the full model
+            ai_accelerator_for_loading.model_refs[model_id] = {
+                "weight_prefix": f"model_weights/{model_id}",
+                "size": model_size
+            }
+            print(f"Model weights uploaded successfully to WebSocket storage")
             assert ai_accelerator_for_loading.has_model(model_id), "Model not found in WebSocket storage after loading."
             # Store model parameters in components dict
                 # Load image section from WebSocket storage
                 tensor_id = f"input_image/{img_name}"
+                # Load weights from WebSocket storage and run inference
+                try:
+                    # Get model info
+                    model_info = accelerator.storage.load_state(f"models/{model_id}", "info")
+                    weight_prefix = f"model_weights/{model_id}"
+                    # Load input tensor
+                    input_tensor = accelerator.storage.load_tensor(tensor_id)
+                    # Run inference with direct weight access
+                    result = accelerator.inference_with_ws_weights(
+                        model_id=model_id,
+                        input_tensor=input_tensor,
+                        weight_prefix=weight_prefix
+                    )
+                    # Store result in WebSocket storage
+                    if result is not None:
+                        storage.store_tensor(f"results/chip_{i}/{img_name}", result)
+                        results.append(result)
+                    else:
+                        logging.error(f"Inference returned None for chip {i}")
+                except Exception as e:
+                    logging.error(f"Inference failed on chip {i}: {str(e)}")
+                    raise
             elapsed = time.time() - start_time