Factor Studios commited on
Commit
1980145
·
verified ·
1 Parent(s): a7e21c0

Upload 207 files

Browse files
This view is limited to 50 files because it contains too many changes.   See raw diff
.gitattributes CHANGED
@@ -33,3 +33,157 @@ saved_model/**/* filter=lfs diff=lfs merge=lfs -text
33
  *.zip filter=lfs diff=lfs merge=lfs -text
34
  *.zst filter=lfs diff=lfs merge=lfs -text
35
  *tfevents* filter=lfs diff=lfs merge=lfs -text
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
33
  *.zip filter=lfs diff=lfs merge=lfs -text
34
  *.zst filter=lfs diff=lfs merge=lfs -text
35
  *tfevents* filter=lfs diff=lfs merge=lfs -text
36
+ sample_task/0001.png filter=lfs diff=lfs merge=lfs -text
37
+ sample_task/0002.png filter=lfs diff=lfs merge=lfs -text
38
+ sample_task/0003.png filter=lfs diff=lfs merge=lfs -text
39
+ sample_task/0004.png filter=lfs diff=lfs merge=lfs -text
40
+ sample_task/0005.png filter=lfs diff=lfs merge=lfs -text
41
+ sample_task/0006.png filter=lfs diff=lfs merge=lfs -text
42
+ sample_task/0007.png filter=lfs diff=lfs merge=lfs -text
43
+ sample_task/0008.png filter=lfs diff=lfs merge=lfs -text
44
+ sample_task/0009.png filter=lfs diff=lfs merge=lfs -text
45
+ sample_task/0010.png filter=lfs diff=lfs merge=lfs -text
46
+ sample_task/0011.png filter=lfs diff=lfs merge=lfs -text
47
+ sample_task/0012.png filter=lfs diff=lfs merge=lfs -text
48
+ sample_task/0013.png filter=lfs diff=lfs merge=lfs -text
49
+ sample_task/0014.png filter=lfs diff=lfs merge=lfs -text
50
+ sample_task/0015.png filter=lfs diff=lfs merge=lfs -text
51
+ sample_task/0016.png filter=lfs diff=lfs merge=lfs -text
52
+ sample_task/0017.png filter=lfs diff=lfs merge=lfs -text
53
+ sample_task/0018.png filter=lfs diff=lfs merge=lfs -text
54
+ sample_task/0019.png filter=lfs diff=lfs merge=lfs -text
55
+ sample_task/0020.png filter=lfs diff=lfs merge=lfs -text
56
+ sample_task/0021.png filter=lfs diff=lfs merge=lfs -text
57
+ sample_task/0022.png filter=lfs diff=lfs merge=lfs -text
58
+ sample_task/0023.png filter=lfs diff=lfs merge=lfs -text
59
+ sample_task/0024.png filter=lfs diff=lfs merge=lfs -text
60
+ sample_task/0025.png filter=lfs diff=lfs merge=lfs -text
61
+ sample_task/0026.png filter=lfs diff=lfs merge=lfs -text
62
+ sample_task/0027.png filter=lfs diff=lfs merge=lfs -text
63
+ sample_task/0028.png filter=lfs diff=lfs merge=lfs -text
64
+ sample_task/0029.png filter=lfs diff=lfs merge=lfs -text
65
+ sample_task/0030.png filter=lfs diff=lfs merge=lfs -text
66
+ sample_task/0031.png filter=lfs diff=lfs merge=lfs -text
67
+ sample_task/0032.png filter=lfs diff=lfs merge=lfs -text
68
+ sample_task/0033.png filter=lfs diff=lfs merge=lfs -text
69
+ sample_task/0034.png filter=lfs diff=lfs merge=lfs -text
70
+ sample_task/0035.png filter=lfs diff=lfs merge=lfs -text
71
+ sample_task/0036.png filter=lfs diff=lfs merge=lfs -text
72
+ sample_task/0037.png filter=lfs diff=lfs merge=lfs -text
73
+ sample_task/0038.png filter=lfs diff=lfs merge=lfs -text
74
+ sample_task/0039.png filter=lfs diff=lfs merge=lfs -text
75
+ sample_task/0043.png filter=lfs diff=lfs merge=lfs -text
76
+ sample_task/0044.png filter=lfs diff=lfs merge=lfs -text
77
+ sample_task/0045.png filter=lfs diff=lfs merge=lfs -text
78
+ sample_task/0046.png filter=lfs diff=lfs merge=lfs -text
79
+ sample_task/0047.png filter=lfs diff=lfs merge=lfs -text
80
+ sample_task/0048.png filter=lfs diff=lfs merge=lfs -text
81
+ sample_task/0049.png filter=lfs diff=lfs merge=lfs -text
82
+ sample_task/0050.png filter=lfs diff=lfs merge=lfs -text
83
+ sample_task/0051.png filter=lfs diff=lfs merge=lfs -text
84
+ sample_task/0052.png filter=lfs diff=lfs merge=lfs -text
85
+ sample_task/0053.png filter=lfs diff=lfs merge=lfs -text
86
+ sample_task/0054.png filter=lfs diff=lfs merge=lfs -text
87
+ sample_task/0055.png filter=lfs diff=lfs merge=lfs -text
88
+ sample_task/0056.png filter=lfs diff=lfs merge=lfs -text
89
+ sample_task/0057.png filter=lfs diff=lfs merge=lfs -text
90
+ sample_task/0059.png filter=lfs diff=lfs merge=lfs -text
91
+ sample_task/0060.png filter=lfs diff=lfs merge=lfs -text
92
+ sample_task/0061.png filter=lfs diff=lfs merge=lfs -text
93
+ sample_task/0062.png filter=lfs diff=lfs merge=lfs -text
94
+ sample_task/0063.png filter=lfs diff=lfs merge=lfs -text
95
+ sample_task/0064.png filter=lfs diff=lfs merge=lfs -text
96
+ sample_task/0065.png filter=lfs diff=lfs merge=lfs -text
97
+ sample_task/0066.png filter=lfs diff=lfs merge=lfs -text
98
+ sample_task/0067.png filter=lfs diff=lfs merge=lfs -text
99
+ sample_task/0068.png filter=lfs diff=lfs merge=lfs -text
100
+ sample_task/0069.png filter=lfs diff=lfs merge=lfs -text
101
+ sample_task/0070.png filter=lfs diff=lfs merge=lfs -text
102
+ sample_task/0072.png filter=lfs diff=lfs merge=lfs -text
103
+ sample_task/0073.png filter=lfs diff=lfs merge=lfs -text
104
+ sample_task/0074.png filter=lfs diff=lfs merge=lfs -text
105
+ sample_task/0075.png filter=lfs diff=lfs merge=lfs -text
106
+ sample_task/0076.png filter=lfs diff=lfs merge=lfs -text
107
+ sample_task/0077.png filter=lfs diff=lfs merge=lfs -text
108
+ sample_task/0078.png filter=lfs diff=lfs merge=lfs -text
109
+ sample_task/0079.png filter=lfs diff=lfs merge=lfs -text
110
+ sample_task/0080.png filter=lfs diff=lfs merge=lfs -text
111
+ sample_task/0081.png filter=lfs diff=lfs merge=lfs -text
112
+ sample_task/0082.png filter=lfs diff=lfs merge=lfs -text
113
+ sample_task/0083.png filter=lfs diff=lfs merge=lfs -text
114
+ sample_task/0084.png filter=lfs diff=lfs merge=lfs -text
115
+ sample_task/0085.png filter=lfs diff=lfs merge=lfs -text
116
+ sample_task/0086.png filter=lfs diff=lfs merge=lfs -text
117
+ sample_task/0087.png filter=lfs diff=lfs merge=lfs -text
118
+ sample_task/0088.png filter=lfs diff=lfs merge=lfs -text
119
+ sample_task/0089.png filter=lfs diff=lfs merge=lfs -text
120
+ sample_task/0090.png filter=lfs diff=lfs merge=lfs -text
121
+ sample_task/0091.png filter=lfs diff=lfs merge=lfs -text
122
+ sample_task/0092.png filter=lfs diff=lfs merge=lfs -text
123
+ sample_task/0093.png filter=lfs diff=lfs merge=lfs -text
124
+ sample_task/0094.png filter=lfs diff=lfs merge=lfs -text
125
+ sample_task/0095.png filter=lfs diff=lfs merge=lfs -text
126
+ sample_task/0096.png filter=lfs diff=lfs merge=lfs -text
127
+ sample_task/0097.png filter=lfs diff=lfs merge=lfs -text
128
+ sample_task/0098.png filter=lfs diff=lfs merge=lfs -text
129
+ sample_task/0099.png filter=lfs diff=lfs merge=lfs -text
130
+ sample_task/0100.png filter=lfs diff=lfs merge=lfs -text
131
+ sample_task/0101.png filter=lfs diff=lfs merge=lfs -text
132
+ sample_task/0102.png filter=lfs diff=lfs merge=lfs -text
133
+ sample_task/0103.png filter=lfs diff=lfs merge=lfs -text
134
+ sample_task/0104.png filter=lfs diff=lfs merge=lfs -text
135
+ sample_task/0105.png filter=lfs diff=lfs merge=lfs -text
136
+ sample_task/0107.png filter=lfs diff=lfs merge=lfs -text
137
+ sample_task/0108.png filter=lfs diff=lfs merge=lfs -text
138
+ sample_task/0109.png filter=lfs diff=lfs merge=lfs -text
139
+ sample_task/0110.png filter=lfs diff=lfs merge=lfs -text
140
+ sample_task/0111.png filter=lfs diff=lfs merge=lfs -text
141
+ sample_task/0112.png filter=lfs diff=lfs merge=lfs -text
142
+ sample_task/0113.png filter=lfs diff=lfs merge=lfs -text
143
+ sample_task/0114.png filter=lfs diff=lfs merge=lfs -text
144
+ sample_task/0115.png filter=lfs diff=lfs merge=lfs -text
145
+ sample_task/0116.png filter=lfs diff=lfs merge=lfs -text
146
+ sample_task/0117.png filter=lfs diff=lfs merge=lfs -text
147
+ sample_task/0118.png filter=lfs diff=lfs merge=lfs -text
148
+ sample_task/0119.png filter=lfs diff=lfs merge=lfs -text
149
+ sample_task/0120.png filter=lfs diff=lfs merge=lfs -text
150
+ sample_task/0121.png filter=lfs diff=lfs merge=lfs -text
151
+ sample_task/0122.png filter=lfs diff=lfs merge=lfs -text
152
+ sample_task/0123.png filter=lfs diff=lfs merge=lfs -text
153
+ sample_task/0124.png filter=lfs diff=lfs merge=lfs -text
154
+ sample_task/0125.png filter=lfs diff=lfs merge=lfs -text
155
+ sample_task/0126.png filter=lfs diff=lfs merge=lfs -text
156
+ sample_task/0127.png filter=lfs diff=lfs merge=lfs -text
157
+ sample_task/0128.png filter=lfs diff=lfs merge=lfs -text
158
+ sample_task/0129.png filter=lfs diff=lfs merge=lfs -text
159
+ sample_task/0130.png filter=lfs diff=lfs merge=lfs -text
160
+ sample_task/0131.png filter=lfs diff=lfs merge=lfs -text
161
+ sample_task/0132.png filter=lfs diff=lfs merge=lfs -text
162
+ sample_task/0133.png filter=lfs diff=lfs merge=lfs -text
163
+ sample_task/0134.png filter=lfs diff=lfs merge=lfs -text
164
+ sample_task/0135.png filter=lfs diff=lfs merge=lfs -text
165
+ sample_task/0136.png filter=lfs diff=lfs merge=lfs -text
166
+ sample_task/0137.png filter=lfs diff=lfs merge=lfs -text
167
+ sample_task/0138.png filter=lfs diff=lfs merge=lfs -text
168
+ sample_task/0139.png filter=lfs diff=lfs merge=lfs -text
169
+ sample_task/0140.png filter=lfs diff=lfs merge=lfs -text
170
+ sample_task/0141.png filter=lfs diff=lfs merge=lfs -text
171
+ sample_task/0142.png filter=lfs diff=lfs merge=lfs -text
172
+ sample_task/0143.png filter=lfs diff=lfs merge=lfs -text
173
+ sample_task/0144.png filter=lfs diff=lfs merge=lfs -text
174
+ sample_task/0145.png filter=lfs diff=lfs merge=lfs -text
175
+ sample_task/0146.png filter=lfs diff=lfs merge=lfs -text
176
+ sample_task/0147.png filter=lfs diff=lfs merge=lfs -text
177
+ sample_task/0148.png filter=lfs diff=lfs merge=lfs -text
178
+ sample_task/0149.png filter=lfs diff=lfs merge=lfs -text
179
+ sample_task/0150.png filter=lfs diff=lfs merge=lfs -text
180
+ sample_task/0151.png filter=lfs diff=lfs merge=lfs -text
181
+ sample_task/0152.png filter=lfs diff=lfs merge=lfs -text
182
+ sample_task/0153.png filter=lfs diff=lfs merge=lfs -text
183
+ sample_task/0154.png filter=lfs diff=lfs merge=lfs -text
184
+ sample_task/0155.png filter=lfs diff=lfs merge=lfs -text
185
+ sample_task/0156.png filter=lfs diff=lfs merge=lfs -text
186
+ sample_task/0157.png filter=lfs diff=lfs merge=lfs -text
187
+ sample_task/0158.png filter=lfs diff=lfs merge=lfs -text
188
+ sample_task/0159.png filter=lfs diff=lfs merge=lfs -text
189
+ sample_task/0160.png filter=lfs diff=lfs merge=lfs -text
ai_http.py CHANGED
@@ -1,5 +1,6 @@
1
  import numpy as np
2
  import time
 
3
  from typing import Dict, Any, Optional, Tuple, Union, List
4
  from enum import Enum
5
  from tensor_core import TensorCoreArray
@@ -96,27 +97,56 @@ class AIAccelerator:
96
  return False
97
  return model_id in self.model_registry and self.storage.is_model_loaded(model_id)
98
 
99
- def load_model(self, model_id: str, model: Dict[str, Any], processor: Any = None) -> bool:
100
- """Load a model into the accelerator"""
 
 
 
 
 
 
 
 
101
  try:
102
  if not self.storage:
103
  raise RuntimeError("No storage available")
104
 
105
- # Store model in local storage
106
- if not self.storage.load_model(model_id, model_data=model):
107
- raise RuntimeError("Failed to store model in local storage")
 
 
 
 
 
108
 
109
- # Update model registry and resource monitor
 
 
 
 
 
 
 
110
  self.model_registry[model_id] = {
111
- 'config': model,
 
 
112
  'loaded_at': time.time(),
113
  'processor': processor
114
  }
115
 
 
 
 
 
 
 
 
 
 
116
  # Update resource monitoring
117
  self.resource_monitor['loaded_models'].add(model_id)
118
-
119
- # Update storage monitoring if available
120
  if hasattr(self.storage, 'resource_monitor'):
121
  self.storage.resource_monitor['loaded_models'].add(model_id)
122
 
@@ -126,16 +156,16 @@ class AIAccelerator:
126
  print(f"Error loading model {model_id}: {str(e)}")
127
  return False
128
 
129
- # Model registries
130
- self.model_registry: Dict[str, Any] = {}
131
- self.tokenizer_registry: Dict[str, Any] = {}
132
- self.model_configs: Dict[str, Any] = {} # Store model architectures
133
- self.model_loaded = False
134
 
135
- # Batch processing configuration
136
- self.max_batch_size = 64
137
- self.min_batch_size = 4
138
- self.dynamic_batching = True # Enable automatic batch size adjustment
139
 
140
  def _serialize_model_config(self, config: Any) -> dict:
141
  """Convert model config to a serializable format."""
@@ -511,22 +541,56 @@ class AIAccelerator:
511
  return False
512
 
513
  def inference(self, model_id: str, input_tensor_id: str) -> Optional[np.ndarray]:
514
- """Run inference using HTTP storage"""
515
  try:
516
- # Load input tensor
517
  input_data = self.storage.load_tensor(input_tensor_id)
518
  if input_data is None:
519
  print(f"Could not load input tensor {input_tensor_id}")
520
  return None
521
 
522
- # Run inference via HTTP API
523
- result = self.storage.start_inference(model_id, input_data)
 
524
 
525
- if result and result.get('output') is not None:
526
- return result['output']
527
- else:
528
- print(f"Inference failed for model {model_id}")
 
 
 
 
 
529
  return None
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
530
 
531
  except Exception as e:
532
  print(f"Error during inference: {str(e)}")
 
1
  import numpy as np
2
  import time
3
+ import torch
4
  from typing import Dict, Any, Optional, Tuple, Union, List
5
  from enum import Enum
6
  from tensor_core import TensorCoreArray
 
97
  return False
98
  return model_id in self.model_registry and self.storage.is_model_loaded(model_id)
99
 
100
+ def load_model(self, model_id: str, model: Dict[str, Any],
101
+ processor: Any = None, model_config: Dict[str, Any] = None) -> bool:
102
+ """Load a model into the virtual GPU accelerator
103
+
104
+ Args:
105
+ model_id: Unique identifier for the model
106
+ model: Model dictionary containing layer weights and architecture
107
+ processor: Optional preprocessing/postprocessing functions
108
+ model_config: Optional model configuration
109
+ """
110
  try:
111
  if not self.storage:
112
  raise RuntimeError("No storage available")
113
 
114
+ # Extract and store model weights in virtual VRAM
115
+ weights = {}
116
+ for layer_name, layer_data in model.get("layers", {}).items():
117
+ # Store weights and biases in virtual VRAM
118
+ weight_id = f"{model_id}/{layer_name}/weight"
119
+ if not self.storage.store_tensor(weight_id, layer_data["weight"]):
120
+ raise RuntimeError(f"Failed to store weights for layer {layer_name}")
121
+ weights[layer_name] = {"weight": weight_id}
122
 
123
+ # Store bias if present
124
+ if "bias" in layer_data:
125
+ bias_id = f"{model_id}/{layer_name}/bias"
126
+ if not self.storage.store_tensor(bias_id, layer_data["bias"]):
127
+ raise RuntimeError(f"Failed to store bias for layer {layer_name}")
128
+ weights[layer_name]["bias"] = bias_id
129
+
130
+ # Update model registry with weight references and config
131
  self.model_registry[model_id] = {
132
+ 'weights': weights,
133
+ 'config': model_config or {},
134
+ 'architecture': model.get("architecture", {}),
135
  'loaded_at': time.time(),
136
  'processor': processor
137
  }
138
 
139
+ # Pre-allocate VRAM if using size limits
140
+ if hasattr(self.vram, 'pre_allocate_vram'):
141
+ total_size = sum(
142
+ np.prod(layer["weight"].shape) * 4 # Assuming float32
143
+ for layer in model.get("layers", {}).values()
144
+ )
145
+ if not self.vram.pre_allocate_vram(total_size):
146
+ raise RuntimeError("Insufficient VRAM for model weights")
147
+
148
  # Update resource monitoring
149
  self.resource_monitor['loaded_models'].add(model_id)
 
 
150
  if hasattr(self.storage, 'resource_monitor'):
151
  self.storage.resource_monitor['loaded_models'].add(model_id)
152
 
 
156
  print(f"Error loading model {model_id}: {str(e)}")
157
  return False
158
 
159
+ # # Model registries
160
+ # self.model_registry: Dict[str, Any] = {}
161
+ # self.tokenizer_registry: Dict[str, Any] = {}
162
+ # self.model_configs: Dict[str, Any] = {} # Store model architectures
163
+ # self.model_loaded = False
164
 
165
+ # # Batch processing configuration
166
+ # self.max_batch_size = 64
167
+ # self.min_batch_size = 4
168
+ # self.dynamic_batching = True # Enable automatic batch size adjustment
169
 
170
  def _serialize_model_config(self, config: Any) -> dict:
171
  """Convert model config to a serializable format."""
 
541
  return False
542
 
543
  def inference(self, model_id: str, input_tensor_id: str) -> Optional[np.ndarray]:
544
+ """Run PyTorch model inference using virtual GPU acceleration"""
545
  try:
546
+ # Load input tensor from storage
547
  input_data = self.storage.load_tensor(input_tensor_id)
548
  if input_data is None:
549
  print(f"Could not load input tensor {input_tensor_id}")
550
  return None
551
 
552
+ # Convert to PyTorch tensor and move to vGPU
553
+ from torch_vgpu import to_vgpu
554
+ input_tensor = to_vgpu(torch.from_numpy(input_data), vram=self.vram)
555
 
556
+ # Get model from registry
557
+ if not self.has_model(model_id):
558
+ print(f"Model {model_id} not loaded")
559
+ return None
560
+
561
+ model_info = self.model_registry[model_id]
562
+ model = model_info.get("model")
563
+ if not isinstance(model, torch.nn.Module):
564
+ print(f"Invalid model type for {model_id}")
565
  return None
566
+
567
+ # Move model to vGPU device
568
+ model = model.to(input_tensor.device)
569
+ model.eval()
570
+
571
+ # Run inference
572
+ with torch.no_grad():
573
+ # Apply any preprocessing from model config
574
+ if "preprocess" in model_info:
575
+ input_tensor = model_info["preprocess"](input_tensor)
576
+
577
+ # Forward pass through model on vGPU
578
+ output = model(input_tensor)
579
+
580
+ # Apply any postprocessing from model config
581
+ if "postprocess" in model_info:
582
+ output = model_info["postprocess"](output)
583
+
584
+ # Convert output to numpy and store in VRAM
585
+ output_np = output.cpu().numpy()
586
+ output_id = f"{model_id}_output_{time.time()}"
587
+ self.storage.store_tensor(output_id, output_np)
588
+
589
+ # Track compute statistics
590
+ self.total_compute_time += time.time()
591
+ self.operations_performed += 1
592
+
593
+ return output_np
594
 
595
  except Exception as e:
596
  print(f"Error during inference: {str(e)}")
http_storage.py CHANGED
@@ -46,10 +46,34 @@ class LocalStorage:
46
  for path in [self.vram_path, self.models_path, self.cache_path, self.state_path]:
47
  path.mkdir(parents=True, exist_ok=True)
48
 
 
49
  self.lock = threading.Lock()
50
  self._closing = False
51
  self._connected = True
52
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
53
  # Initialize monitoring
54
  self.resource_monitor = {
55
  'vram_used': 0,
@@ -115,31 +139,25 @@ class LocalStorage:
115
  The actual initialization happens in __new__ and _init_singleton"""
116
  pass
117
 
118
- def _create_session(self):
119
- """Initialize local storage session"""
120
  try:
121
- # Create status file to track session
122
- status_path = self.base_path / "session_status.json"
123
- status_data = {
124
- "session_id": self.session_id,
125
- "created_at": time.time(),
126
- "resource_limits": {
127
- "max_vram_gb": 40, # A100 size
128
- "max_models": 5,
129
- "max_batch_size": 32
130
- }
131
- }
132
-
133
- with open(status_path, 'w') as f:
134
- json.dump(status_data, f, indent=2)
135
 
136
- logging.info(f"Local storage session created: {self.session_id}")
137
  return True
138
 
139
  except Exception as e:
140
- logging.error(f"Failed to create HTTP session: {e}")
141
- self.error_count += 1
142
- self.last_error_time = time.time()
143
  return False
144
 
145
  def _check_storage(self) -> Dict[str, Any]:
@@ -359,15 +377,9 @@ class LocalStorage:
359
 
360
  def ping(self) -> bool:
361
  """Check if local storage is accessible"""
362
- try:
363
- # Check if all storage directories exist and are accessible
364
- for path in [self.vram_path, self.models_path, self.cache_path, self.state_path]:
365
- if not path.exists() or not os.access(str(path), os.R_OK | os.W_OK):
366
- return False
367
- return True
368
- except Exception as e:
369
- logging.error(f"Storage check failed: {e}")
370
  return False
 
371
  # Compatibility aliases for existing code
372
  HTTPGPUStorage = LocalStorage
373
  WebSocketGPUStorage = LocalStorage
 
46
  for path in [self.vram_path, self.models_path, self.cache_path, self.state_path]:
47
  path.mkdir(parents=True, exist_ok=True)
48
 
49
+ # Basic state management
50
  self.lock = threading.Lock()
51
  self._closing = False
52
  self._connected = True
53
 
54
+ # Resource monitoring
55
+ self.resource_monitor = {
56
+ 'vram_used': 0,
57
+ 'active_tensors': 0,
58
+ 'loaded_models': set(),
59
+ 'last_updated': time.time()
60
+ }
61
+
62
+ # Storage statistics
63
+ self.stats = {
64
+ 'total_size': 0,
65
+ 'available_size': float('inf'),
66
+ 'model_count': 0,
67
+ 'tensor_count': 0
68
+ }
69
+
70
+ # Initialize registries
71
+ self.model_registry = {}
72
+ self.tensor_registry = {}
73
+
74
+ self.initialized = True
75
+ self._connected = True
76
+
77
  # Initialize monitoring
78
  self.resource_monitor = {
79
  'vram_used': 0,
 
139
  The actual initialization happens in __new__ and _init_singleton"""
140
  pass
141
 
142
+ def _check_storage_ready(self) -> bool:
143
+ """Check if local storage is ready for use"""
144
  try:
145
+ # Verify all required directories exist and are accessible
146
+ for path in [self.vram_path, self.models_path, self.cache_path, self.state_path]:
147
+ if not path.exists() or not os.access(str(path), os.R_OK | os.W_OK):
148
+ return False
149
+
150
+ # Update storage statistics
151
+ self.stats.update({
152
+ 'total_size': sum(f.stat().st_size for f in self.base_path.rglob('*') if f.is_file()),
153
+ 'model_count': len(list(self.models_path.glob('*'))),
154
+ 'tensor_count': len(list(self.vram_path.glob('*.npy')))
155
+ })
 
 
 
156
 
 
157
  return True
158
 
159
  except Exception as e:
160
+ logging.error(f"Storage check failed: {e}")
 
 
161
  return False
162
 
163
  def _check_storage(self) -> Dict[str, Any]:
 
377
 
378
  def ping(self) -> bool:
379
  """Check if local storage is accessible"""
380
+ if self._closing:
 
 
 
 
 
 
 
381
  return False
382
+ return self._check_storage_ready()
383
  # Compatibility aliases for existing code
384
  HTTPGPUStorage = LocalStorage
385
  WebSocketGPUStorage = LocalStorage
model_inference_flow.txt ADDED
@@ -0,0 +1,95 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ Model Inference Flow on Virtual GPU
2
+ ================================
3
+
4
+ 1. Storage and VRAM Setup
5
+ -------------------------
6
+ [HTTPGPUStorage]
7
+ │ ╲
8
+ │ ╲ Zero-Copy
9
+ │ ╲ Memory Mapping
10
+ ▼ ▼
11
+ [Local Storage]──>[Virtual VRAM]
12
+ (Memory Pages) (Page Tables)
13
+ │ │
14
+ └──────────────┐ │
15
+ ▼ ▼
16
+ [vGPU Device]
17
+
18
+
19
+ 2. Model Loading and Device Movement
20
+ ----------------------------------
21
+ [Florence-2-Large] ---load---> [PyTorch Model]
22
+ │ │
23
+ │ ▼
24
+ │ [to_vgpu() conversion]
25
+ │ │
26
+ └─────────────────┐ │
27
+ ▼ ▼
28
+ [Model on vGPU Device]
29
+
30
+
31
+ 3. Input Processing and Inference
32
+ --------------------------------
33
+ [Input Text] -----> [Tokenizer] -----> [Tensor]
34
+
35
+
36
+ [to_vgpu() conversion]
37
+
38
+
39
+ [Tensor on vGPU]
40
+
41
+
42
+ 4. Model Inference Flow
43
+ ----------------------
44
+ [Model Forward Pass]
45
+
46
+
47
+ [vGPU Computation]
48
+
49
+
50
+ [PyTorch Output Tensor]
51
+
52
+
53
+ [Last Hidden State]
54
+ (Shape: [batch_size, seq_length, hidden_size])
55
+
56
+ Data Flow and Memory Management:
57
+ -----------------------------
58
+ 1. Storage Layer:
59
+ - HTTPGPUStorage ──> Local Storage (Memory Pages)
60
+ - Local Storage ──> Virtual VRAM (Zero-Copy)
61
+ - Virtual VRAM manages page tables pointing to local storage
62
+
63
+ 2. Memory Architecture:
64
+ - Local Storage: Physical memory pages
65
+ - Virtual VRAM: Page tables and memory mappings
66
+ - Zero-copy between Local Storage and VRAM
67
+ - Direct memory access for GPU operations
68
+
69
+ 3. Processing Flow:
70
+ - Model Layer: HF Model ──> PyTorch ──> vGPU
71
+ - Input Layer: Text ──> Tokens ──> Tensor ──> vGPU
72
+ - Output Layer: vGPU ──> PyTorch Tensor ──> Results
73
+
74
+ Key Components:
75
+ --------------
76
+ - HTTP Storage: HTTPGPUStorage (Network interface)
77
+ - Local Store: Memory pages (Physical storage)
78
+ - Virtual VRAM: Page tables (Memory management)
79
+ - Device: vGPU (Computation)
80
+ - Model: Florence-2-Large (transformer)
81
+ - Framework: PyTorch (ML operations)
82
+ - Interface: to_vgpu() (Zero-copy transfer)
83
+
84
+ Memory Management Details:
85
+ ------------------------
86
+ 1. Local Storage:
87
+ - Manages physical memory pages
88
+ - Direct mapping to virtual VRAM
89
+ - Zero-copy access for GPU ops
90
+
91
+ 2. Virtual VRAM:
92
+ - Page table management
93
+ - Memory mapping to local storage
94
+ - No physical copying of data
95
+ - Direct GPU access to memory
sample_task/0001.png ADDED

Git LFS Details

  • SHA256: 1f8b4514201fbce5e290b67267cea72866fb772cf811b19657bdd6791105630b
  • Pointer size: 131 Bytes
  • Size of remote file: 716 kB
sample_task/0002.png ADDED

Git LFS Details

  • SHA256: 1f320f8cb10de8ef31407115f7b83fe402516d6afe5bb9d9f4a19570db8db7f7
  • Pointer size: 131 Bytes
  • Size of remote file: 728 kB
sample_task/0003.png ADDED

Git LFS Details

  • SHA256: 2750284d112a8d21c49c4728689ebf0fc95bad6eb797988750bfb363a9e6c20e
  • Pointer size: 131 Bytes
  • Size of remote file: 736 kB
sample_task/0004.png ADDED

Git LFS Details

  • SHA256: 5dbe182fcde6076692d5463cb797209ef3f0a67e7a8573f5692226e099b036c1
  • Pointer size: 131 Bytes
  • Size of remote file: 746 kB
sample_task/0005.png ADDED

Git LFS Details

  • SHA256: bc88018fd76c6df064b49c7f4fdd192f97d6d2c39f6061ff08882c8a0bbba902
  • Pointer size: 131 Bytes
  • Size of remote file: 159 kB
sample_task/0006.png ADDED

Git LFS Details

  • SHA256: 90e8b8f358cca977d080447728d464d9568ebad49c630c0158f5a8b4c7ac518e
  • Pointer size: 131 Bytes
  • Size of remote file: 210 kB
sample_task/0007.png ADDED

Git LFS Details

  • SHA256: 04ec3ab3412fb54e294fb81b439226d5030c49a11cdfbf5216d4d5d5f80fb751
  • Pointer size: 131 Bytes
  • Size of remote file: 249 kB
sample_task/0008.png ADDED

Git LFS Details

  • SHA256: 8bfd8df699b2da0476e4a97f69d21e6c986c3aefa5634e5e77e262dc16f35080
  • Pointer size: 131 Bytes
  • Size of remote file: 100 kB
sample_task/0009.png ADDED

Git LFS Details

  • SHA256: 0f621bf8a22395a5361642fe1b0cc314904055f8a6d92ba8fe0367900d60f533
  • Pointer size: 131 Bytes
  • Size of remote file: 222 kB
sample_task/0010.png ADDED

Git LFS Details

  • SHA256: 7e4a3337e1b5b99dec71e83370af1acdf035b66713e900462ee39c27a0054b89
  • Pointer size: 131 Bytes
  • Size of remote file: 248 kB
sample_task/0011.png ADDED

Git LFS Details

  • SHA256: a896dc9d9bcfc64bbf508b0fa944751af9ca6b76dc5447275b9d5e27404dbedf
  • Pointer size: 131 Bytes
  • Size of remote file: 748 kB
sample_task/0012.png ADDED

Git LFS Details

  • SHA256: 2b845d65032bb45b22d8fd15c3b41799453b258cc4e41623f401fec60cedd12b
  • Pointer size: 131 Bytes
  • Size of remote file: 749 kB
sample_task/0013.png ADDED

Git LFS Details

  • SHA256: a9c3100e756b01995d94a7593de338ad098e7d4f572d1293867bb7ec9a22ceff
  • Pointer size: 131 Bytes
  • Size of remote file: 747 kB
sample_task/0014.png ADDED

Git LFS Details

  • SHA256: 7837f766bc54931f9775054511d779fefce243f02cf6bacd31dc27c6d36a0287
  • Pointer size: 131 Bytes
  • Size of remote file: 748 kB
sample_task/0015.png ADDED

Git LFS Details

  • SHA256: 37c73b74052b8eb986edba1253cdf340751418222fd351a9d2adeca5f08ba7d1
  • Pointer size: 131 Bytes
  • Size of remote file: 751 kB
sample_task/0016.png ADDED

Git LFS Details

  • SHA256: 059c91a4065017692ca31493ea01793a5e5cdd052add3574a427577b8219fcf1
  • Pointer size: 131 Bytes
  • Size of remote file: 754 kB
sample_task/0017.png ADDED

Git LFS Details

  • SHA256: a74ad65bb7e29ea96f0a12383b66e9a334c4184fddd90781bca778bdd637a1b7
  • Pointer size: 131 Bytes
  • Size of remote file: 766 kB
sample_task/0018.png ADDED

Git LFS Details

  • SHA256: a95d6bebcd6140b363a80d93a1a6697c23d85085a503a7b3ad7126c7cac65b7e
  • Pointer size: 131 Bytes
  • Size of remote file: 762 kB
sample_task/0019.png ADDED

Git LFS Details

  • SHA256: 0453cf0dbc95ec76d9f5ffa966d14bf349e624e5bf01c7667bf0aff86928f0c3
  • Pointer size: 131 Bytes
  • Size of remote file: 764 kB
sample_task/0020.png ADDED

Git LFS Details

  • SHA256: 8249c7c010a09f40ad7adf26f937c2a0f528fabac4948c9b54f9220f03a07d0d
  • Pointer size: 131 Bytes
  • Size of remote file: 762 kB
sample_task/0021.png ADDED

Git LFS Details

  • SHA256: 634c0529147ca5d3bae789d7354940e1cfdfc88067cacf9a05e64f719712376b
  • Pointer size: 131 Bytes
  • Size of remote file: 805 kB
sample_task/0022.png ADDED

Git LFS Details

  • SHA256: 1534e5af5b059a57abd6bd70795417a221b7e9578e0e3a29126c1c4b5b389846
  • Pointer size: 131 Bytes
  • Size of remote file: 799 kB
sample_task/0023.png ADDED

Git LFS Details

  • SHA256: 42b1e629a99963585139a35da734571c58d281304540c48d464cf8e4a796c2fa
  • Pointer size: 131 Bytes
  • Size of remote file: 777 kB
sample_task/0024.png ADDED

Git LFS Details

  • SHA256: 032dd2344a820a57ed04cd73f06ab7726f3c88325ea2cf38f67a5aec3f37669d
  • Pointer size: 131 Bytes
  • Size of remote file: 789 kB
sample_task/0025.png ADDED

Git LFS Details

  • SHA256: 1b66a504c1d500c67a1689eb136dcd3223830724f553fbc7278de3ec47a62897
  • Pointer size: 131 Bytes
  • Size of remote file: 803 kB
sample_task/0026.png ADDED

Git LFS Details

  • SHA256: f1de97565fd94540322add34923da6ee77f432a7350a92e845657001027335f1
  • Pointer size: 131 Bytes
  • Size of remote file: 776 kB
sample_task/0027.png ADDED

Git LFS Details

  • SHA256: ed777a5a7e8d570a3dc924df1253e1231c11ecd213cc7d9e47b5f0c8b0379ab7
  • Pointer size: 131 Bytes
  • Size of remote file: 775 kB
sample_task/0028.png ADDED

Git LFS Details

  • SHA256: 4dba3a0e7b3ea00e122a24da799633b32838968063bcd59eea931e42286480cb
  • Pointer size: 131 Bytes
  • Size of remote file: 227 kB
sample_task/0029.png ADDED

Git LFS Details

  • SHA256: 3c5f084999f98c28875612b7dbf8017321987ca2355ed4e6fd436d52c5958cbe
  • Pointer size: 131 Bytes
  • Size of remote file: 164 kB
sample_task/0030.png ADDED

Git LFS Details

  • SHA256: bdd85a27f46f3adbd30c9713940b173b2a2f8eb1463fca87975027be167b75c6
  • Pointer size: 131 Bytes
  • Size of remote file: 166 kB
sample_task/0031.png ADDED

Git LFS Details

  • SHA256: 3d98f90238f875da5956c2d6a07604f2e8e13845c6bb8405a84e06f5919a224c
  • Pointer size: 131 Bytes
  • Size of remote file: 273 kB
sample_task/0032.png ADDED

Git LFS Details

  • SHA256: c4b06225cba33b5da5cce343586b940a7415ea29aadaaf91f0601e6111fdd728
  • Pointer size: 131 Bytes
  • Size of remote file: 373 kB
sample_task/0033.png ADDED

Git LFS Details

  • SHA256: c555f78158186207144f684cb3ff49278950b76cd991a9b650928606aa626d2d
  • Pointer size: 131 Bytes
  • Size of remote file: 385 kB
sample_task/0034.png ADDED

Git LFS Details

  • SHA256: 3968b4835068d411e565c2036ebdc38446b5a3345de43bda3d5a946d2ffb4454
  • Pointer size: 131 Bytes
  • Size of remote file: 302 kB
sample_task/0035.png ADDED

Git LFS Details

  • SHA256: dff53c207c45970aa224c8c6cec80064944a0fc535e525cf8eac99312fbef6e4
  • Pointer size: 131 Bytes
  • Size of remote file: 339 kB
sample_task/0036.png ADDED

Git LFS Details

  • SHA256: ee560c52d1d6673f2098f0b365df7b04933056e836d76c35ef0114e60c53ad01
  • Pointer size: 131 Bytes
  • Size of remote file: 286 kB
sample_task/0037.png ADDED

Git LFS Details

  • SHA256: 82802e79f9808b6d5ce986b4945d2b7b618c719872e24fae6d8125c5a02c8663
  • Pointer size: 131 Bytes
  • Size of remote file: 897 kB
sample_task/0038.png ADDED

Git LFS Details

  • SHA256: 6dba4b74111f62429ca9b3b028b787ee00e31689f581c78b282d8951d3896ae7
  • Pointer size: 131 Bytes
  • Size of remote file: 948 kB
sample_task/0039.png ADDED

Git LFS Details

  • SHA256: 6620c26c22c225404fd02b8b863165a8d5c88be3693ea488eac8a9154c478334
  • Pointer size: 131 Bytes
  • Size of remote file: 976 kB
sample_task/0040.png ADDED
sample_task/0041.png ADDED
sample_task/0042.png ADDED
sample_task/0043.png ADDED

Git LFS Details

  • SHA256: 36680f49207c8cf9819896f486c22f5f0701ca646cb38d4d9949a362b034dad7
  • Pointer size: 131 Bytes
  • Size of remote file: 747 kB
sample_task/0044.png ADDED

Git LFS Details

  • SHA256: 5ef5c99eacc7fbdd522f3ce037cd6f55422ae366781ab31a00ac7ec622b1b813
  • Pointer size: 131 Bytes
  • Size of remote file: 745 kB
sample_task/0045.png ADDED

Git LFS Details

  • SHA256: 970d6681ae59e6035257930e37bf59d636368a46d13af199b8eea6cf5532231b
  • Pointer size: 131 Bytes
  • Size of remote file: 203 kB
sample_task/0046.png ADDED

Git LFS Details

  • SHA256: e6f7971719d794c9fcc8d6e06c0593b2b7d6929544de34df0aa2a3d82a8fa085
  • Pointer size: 131 Bytes
  • Size of remote file: 192 kB