Spaces:

factorstudios
/

INTAI

Sleeping

App Files Files Community

Factor Studios commited on Aug 12, 2025

Commit

373ab21

verified ·

1 Parent(s): 5e36ec1

Update test_ai_integration.py

Browse files

Files changed (1) hide show

test_ai_integration.py +63 -13

test_ai_integration.py CHANGED Viewed

@@ -111,19 +111,69 @@ def test_ai_integration():
         model_id = "microsoft/florence-2-large"
         print(f"Loading model {model_id} directly to WebSocket storage...")
-        # Load model and processor directly to WebSocket storage
-        model = AutoModelForCausalLM.from_pretrained(model_id, trust_remote_code=True)
-        processor = AutoProcessor.from_pretrained(model_id, trust_remote_code=True)
-        # Store model in WebSocket storage without CPU intermediary
-        ai_accelerator_for_loading.load_model(model_id, model, processor)
-        print(f"Model '{model_id}' loaded successfully to WebSocket storage.")
-        assert ai_accelerator_for_loading.has_model(model_id), "Model not found in WebSocket storage after loading."
-        # Clear any CPU-side model data
-        model = None
-        import gc
-        gc.collect()
     except Exception as e:
         print(f"Model loading test failed: {e}")

         model_id = "microsoft/florence-2-large"
         print(f"Loading model {model_id} directly to WebSocket storage...")
+        try:
+            # Load model and processor with proper error handling
+            model = AutoModelForCausalLM.from_pretrained(
+                model_id,
+                trust_remote_code=True,
+                device_map="auto",  # Allow automatic device mapping
+                torch_dtype="auto"   # Use appropriate dtype
+            )
+            processor = AutoProcessor.from_pretrained(
+                model_id,
+                trust_remote_code=True
+            )
+            # Calculate model size for proper VRAM allocation
+            model_size = sum(p.numel() * p.element_size() for p in model.parameters())
+            print(f"Model size: {model_size / (1024**3):.2f} GB")
+            # Store model in WebSocket storage with size information
+            ai_accelerator_for_loading.load_model(
+                model_id=model_id,
+                model=model,
+                processor=processor,
+                model_config={
+                    "size_bytes": model_size,
+                    "unlimited_vram": True,
+                    "allow_resize": True
+                }
+            )
+            print(f"Model '{model_id}' loaded successfully to WebSocket storage.")
+            assert ai_accelerator_for_loading.has_model(model_id), "Model not found in WebSocket storage after loading."
+            # Store model parameters in components dict
+            components['model_id'] = model_id
+            components['model_size'] = model_size
+            # Clear any CPU-side model data
+            model = None
+            processor = None
+            import gc
+            gc.collect()
+        except Exception as e:
+            print(f"Detailed model loading error: {str(e)}")
+            print("Falling back to zero-copy tensor mode...")
+            # Try loading with zero-copy tensor mode
+            try:
+                ai_accelerator_for_loading.load_model(
+                    model_id=model_id,
+                    model=None,  # Use zero-copy mode
+                    processor=None,
+                    model_config={
+                        "zero_copy": True,
+                        "unlimited_vram": True,
+                        "allow_resize": True
+                    }
+                )
+                components['model_id'] = model_id
+                print("Successfully loaded model in zero-copy mode")
+            except Exception as e2:
+                print(f"Zero-copy fallback also failed: {str(e2)}")
+                raise
     except Exception as e:
         print(f"Model loading test failed: {e}")