Spaces:
Running
Running
[debug] zeroGPU
Browse files- app.py +31 -3
- rosetta/baseline/__pycache__/multi_stage.cpython-310.pyc +0 -0
- rosetta/baseline/__pycache__/multi_stage.cpython-312.pyc +0 -0
- rosetta/baseline/__pycache__/two_stage_rosetta.cpython-310.pyc +0 -0
- rosetta/baseline/__pycache__/two_stage_rosetta.cpython-312.pyc +0 -0
- rosetta/model/__pycache__/__init__.cpython-310.pyc +0 -0
- rosetta/model/__pycache__/aggregator.cpython-310.pyc +0 -0
- rosetta/model/__pycache__/aggregator.cpython-312.pyc +0 -0
- rosetta/model/__pycache__/aligner.cpython-310.pyc +0 -0
- rosetta/model/__pycache__/aligner.cpython-312.pyc +0 -0
- rosetta/model/__pycache__/all_in_one_projector.cpython-310.pyc +0 -0
- rosetta/model/__pycache__/all_in_one_projector.cpython-312.pyc +0 -0
- rosetta/model/__pycache__/cache.cpython-310.pyc +0 -0
- rosetta/model/__pycache__/oracle.cpython-310.pyc +0 -0
- rosetta/model/__pycache__/oracle.cpython-312.pyc +0 -0
- rosetta/model/__pycache__/projector.cpython-310.pyc +0 -0
- rosetta/model/__pycache__/projector.cpython-312.pyc +0 -0
- rosetta/model/__pycache__/sampling.cpython-310.pyc +0 -0
- rosetta/model/__pycache__/sampling.cpython-312.pyc +0 -0
- rosetta/model/__pycache__/wrapper.cpython-310.pyc +0 -0
- rosetta/model/__pycache__/wrapper.cpython-312.pyc +0 -0
- rosetta/train/__pycache__/__init__.cpython-310.pyc +0 -0
- rosetta/train/__pycache__/__init__.cpython-312.pyc +0 -0
- rosetta/train/__pycache__/dataset_adapters.cpython-310.pyc +0 -0
- rosetta/train/__pycache__/dataset_adapters.cpython-312.pyc +0 -0
- rosetta/train/__pycache__/model_utils.cpython-310.pyc +0 -0
- rosetta/train/__pycache__/model_utils.cpython-312.pyc +0 -0
- rosetta/train/__pycache__/trainer.cpython-310.pyc +0 -0
- rosetta/train/__pycache__/trainer.cpython-312.pyc +0 -0
- rosetta/utils/__pycache__/evaluate.cpython-310.pyc +0 -0
- rosetta/utils/__pycache__/evaluate.cpython-312.pyc +0 -0
- rosetta/utils/__pycache__/multi_stage.cpython-310.pyc +0 -0
- rosetta/utils/__pycache__/multi_stage.cpython-312.pyc +0 -0
- rosetta/utils/__pycache__/registry.cpython-310.pyc +0 -0
- rosetta/utils/__pycache__/registry.cpython-312.pyc +0 -0
app.py
CHANGED
|
@@ -57,7 +57,11 @@ class ModelManager:
|
|
| 57 |
self.device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
|
| 58 |
else:
|
| 59 |
self.device = torch.device(device)
|
|
|
|
|
|
|
| 60 |
print(f"Using device: {self.device}")
|
|
|
|
|
|
|
| 61 |
if ZEROGPU_AVAILABLE:
|
| 62 |
print("ZeroGPU detected: Models will be loaded to CUDA (decorator handles allocation)")
|
| 63 |
|
|
@@ -104,8 +108,10 @@ class ModelManager:
|
|
| 104 |
self.single_model, self.single_tokenizer = load_hf_model(
|
| 105 |
self.single_model_name, self.device
|
| 106 |
)
|
|
|
|
|
|
|
| 107 |
set_default_chat_template(self.single_tokenizer, self.single_model_name)
|
| 108 |
-
print("[Single] ✓ Model loaded")
|
| 109 |
|
| 110 |
def _load_t2t_model(self):
|
| 111 |
"""Load two-stage model."""
|
|
@@ -121,7 +127,10 @@ class ModelManager:
|
|
| 121 |
device=str(self.device),
|
| 122 |
background_prompt=self.t2t_background_prompt
|
| 123 |
)
|
| 124 |
-
|
|
|
|
|
|
|
|
|
|
| 125 |
|
| 126 |
def _load_c2c_model(self):
|
| 127 |
"""Load Rosetta (C2C) model."""
|
|
@@ -178,7 +187,9 @@ class ModelManager:
|
|
| 178 |
self.c2c_model, self.c2c_tokenizer = load_rosetta_model(
|
| 179 |
model_config, eval_config, self.device
|
| 180 |
)
|
| 181 |
-
|
|
|
|
|
|
|
| 182 |
|
| 183 |
def _load_all_models(self):
|
| 184 |
"""Load all models sequentially."""
|
|
@@ -221,6 +232,11 @@ class ModelManager:
|
|
| 221 |
def generate_single(self, user_input: str) -> Generator[str, None, None]:
|
| 222 |
"""Generate response from single model with streaming."""
|
| 223 |
# @spaces.GPU decorator handles GPU allocation automatically
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 224 |
messages = [{"role": "system", "content": ""}, {"role": "user", "content": user_input}]
|
| 225 |
text = self.single_tokenizer.apply_chat_template(
|
| 226 |
messages, tokenize=False, add_generation_prompt=True, enable_thinking=False
|
|
@@ -256,6 +272,13 @@ class ModelManager:
|
|
| 256 |
def generate_t2t(self, user_input: str) -> Generator[tuple[str, str], None, None]:
|
| 257 |
"""Generate response from T2T model with streaming (returns context, answer)."""
|
| 258 |
# @spaces.GPU decorator handles GPU allocation automatically
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 259 |
|
| 260 |
# Stage 1: Context generation
|
| 261 |
context_streamer = TextIteratorStreamer(
|
|
@@ -342,6 +365,11 @@ class ModelManager:
|
|
| 342 |
def generate_c2c(self, user_input: str) -> Generator[str, None, None]:
|
| 343 |
"""Generate response from C2C model with streaming."""
|
| 344 |
# @spaces.GPU decorator handles GPU allocation automatically
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 345 |
messages = [{"role": "system", "content": ""}, {"role": "user", "content": user_input}]
|
| 346 |
text = self.c2c_tokenizer.apply_chat_template(
|
| 347 |
messages, tokenize=False, add_generation_prompt=True, enable_thinking=False
|
|
|
|
| 57 |
self.device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
|
| 58 |
else:
|
| 59 |
self.device = torch.device(device)
|
| 60 |
+
|
| 61 |
+
# Debug information
|
| 62 |
print(f"Using device: {self.device}")
|
| 63 |
+
print(f"CUDA available: {torch.cuda.is_available()}")
|
| 64 |
+
print(f"CUDA device count: {torch.cuda.device_count() if torch.cuda.is_available() else 0}")
|
| 65 |
if ZEROGPU_AVAILABLE:
|
| 66 |
print("ZeroGPU detected: Models will be loaded to CUDA (decorator handles allocation)")
|
| 67 |
|
|
|
|
| 108 |
self.single_model, self.single_tokenizer = load_hf_model(
|
| 109 |
self.single_model_name, self.device
|
| 110 |
)
|
| 111 |
+
# Explicitly move model to device (required for ZeroGPU)
|
| 112 |
+
self.single_model = self.single_model.to(self.device)
|
| 113 |
set_default_chat_template(self.single_tokenizer, self.single_model_name)
|
| 114 |
+
print(f"[Single] ✓ Model loaded on {self.single_model.device}")
|
| 115 |
|
| 116 |
def _load_t2t_model(self):
|
| 117 |
"""Load two-stage model."""
|
|
|
|
| 127 |
device=str(self.device),
|
| 128 |
background_prompt=self.t2t_background_prompt
|
| 129 |
)
|
| 130 |
+
# Explicitly move models to device (required for ZeroGPU)
|
| 131 |
+
self.t2t_model.context_model = self.t2t_model.context_model.to(self.device)
|
| 132 |
+
self.t2t_model.answer_model = self.t2t_model.answer_model.to(self.device)
|
| 133 |
+
print(f"[T2T] ✓ Models loaded on {self.t2t_model.context_model.device} and {self.t2t_model.answer_model.device}")
|
| 134 |
|
| 135 |
def _load_c2c_model(self):
|
| 136 |
"""Load Rosetta (C2C) model."""
|
|
|
|
| 187 |
self.c2c_model, self.c2c_tokenizer = load_rosetta_model(
|
| 188 |
model_config, eval_config, self.device
|
| 189 |
)
|
| 190 |
+
# Explicitly move model to device (required for ZeroGPU)
|
| 191 |
+
self.c2c_model = self.c2c_model.to(self.device)
|
| 192 |
+
print(f"[C2C] ✓ Model loaded on {self.c2c_model.device}")
|
| 193 |
|
| 194 |
def _load_all_models(self):
|
| 195 |
"""Load all models sequentially."""
|
|
|
|
| 232 |
def generate_single(self, user_input: str) -> Generator[str, None, None]:
|
| 233 |
"""Generate response from single model with streaming."""
|
| 234 |
# @spaces.GPU decorator handles GPU allocation automatically
|
| 235 |
+
# Ensure model is on correct device (ZeroGPU may move it)
|
| 236 |
+
if self.single_model.device != self.device:
|
| 237 |
+
print(f"[Single] Moving model from {self.single_model.device} to {self.device}")
|
| 238 |
+
self.single_model = self.single_model.to(self.device)
|
| 239 |
+
|
| 240 |
messages = [{"role": "system", "content": ""}, {"role": "user", "content": user_input}]
|
| 241 |
text = self.single_tokenizer.apply_chat_template(
|
| 242 |
messages, tokenize=False, add_generation_prompt=True, enable_thinking=False
|
|
|
|
| 272 |
def generate_t2t(self, user_input: str) -> Generator[tuple[str, str], None, None]:
|
| 273 |
"""Generate response from T2T model with streaming (returns context, answer)."""
|
| 274 |
# @spaces.GPU decorator handles GPU allocation automatically
|
| 275 |
+
# Ensure models are on correct device (ZeroGPU may move them)
|
| 276 |
+
if self.t2t_model.context_model.device != self.device:
|
| 277 |
+
print(f"[T2T] Moving context model from {self.t2t_model.context_model.device} to {self.device}")
|
| 278 |
+
self.t2t_model.context_model = self.t2t_model.context_model.to(self.device)
|
| 279 |
+
if self.t2t_model.answer_model.device != self.device:
|
| 280 |
+
print(f"[T2T] Moving answer model from {self.t2t_model.answer_model.device} to {self.device}")
|
| 281 |
+
self.t2t_model.answer_model = self.t2t_model.answer_model.to(self.device)
|
| 282 |
|
| 283 |
# Stage 1: Context generation
|
| 284 |
context_streamer = TextIteratorStreamer(
|
|
|
|
| 365 |
def generate_c2c(self, user_input: str) -> Generator[str, None, None]:
|
| 366 |
"""Generate response from C2C model with streaming."""
|
| 367 |
# @spaces.GPU decorator handles GPU allocation automatically
|
| 368 |
+
# Ensure model is on correct device (ZeroGPU may move it)
|
| 369 |
+
if self.c2c_model.device != self.device:
|
| 370 |
+
print(f"[C2C] Moving model from {self.c2c_model.device} to {self.device}")
|
| 371 |
+
self.c2c_model = self.c2c_model.to(self.device)
|
| 372 |
+
|
| 373 |
messages = [{"role": "system", "content": ""}, {"role": "user", "content": user_input}]
|
| 374 |
text = self.c2c_tokenizer.apply_chat_template(
|
| 375 |
messages, tokenize=False, add_generation_prompt=True, enable_thinking=False
|
rosetta/baseline/__pycache__/multi_stage.cpython-310.pyc
DELETED
|
Binary file (24.3 kB)
|
|
|
rosetta/baseline/__pycache__/multi_stage.cpython-312.pyc
DELETED
|
Binary file (34.7 kB)
|
|
|
rosetta/baseline/__pycache__/two_stage_rosetta.cpython-310.pyc
DELETED
|
Binary file (9.34 kB)
|
|
|
rosetta/baseline/__pycache__/two_stage_rosetta.cpython-312.pyc
DELETED
|
Binary file (13.2 kB)
|
|
|
rosetta/model/__pycache__/__init__.cpython-310.pyc
DELETED
|
Binary file (419 Bytes)
|
|
|
rosetta/model/__pycache__/aggregator.cpython-310.pyc
DELETED
|
Binary file (5.83 kB)
|
|
|
rosetta/model/__pycache__/aggregator.cpython-312.pyc
DELETED
|
Binary file (8.53 kB)
|
|
|
rosetta/model/__pycache__/aligner.cpython-310.pyc
DELETED
|
Binary file (16.3 kB)
|
|
|
rosetta/model/__pycache__/aligner.cpython-312.pyc
DELETED
|
Binary file (24.2 kB)
|
|
|
rosetta/model/__pycache__/all_in_one_projector.cpython-310.pyc
DELETED
|
Binary file (27.8 kB)
|
|
|
rosetta/model/__pycache__/all_in_one_projector.cpython-312.pyc
DELETED
|
Binary file (53.5 kB)
|
|
|
rosetta/model/__pycache__/cache.cpython-310.pyc
DELETED
|
Binary file (980 Bytes)
|
|
|
rosetta/model/__pycache__/oracle.cpython-310.pyc
DELETED
|
Binary file (13.9 kB)
|
|
|
rosetta/model/__pycache__/oracle.cpython-312.pyc
DELETED
|
Binary file (23.5 kB)
|
|
|
rosetta/model/__pycache__/projector.cpython-310.pyc
DELETED
|
Binary file (30.9 kB)
|
|
|
rosetta/model/__pycache__/projector.cpython-312.pyc
DELETED
|
Binary file (56 kB)
|
|
|
rosetta/model/__pycache__/sampling.cpython-310.pyc
DELETED
|
Binary file (2.09 kB)
|
|
|
rosetta/model/__pycache__/sampling.cpython-312.pyc
DELETED
|
Binary file (3.49 kB)
|
|
|
rosetta/model/__pycache__/wrapper.cpython-310.pyc
DELETED
|
Binary file (18.6 kB)
|
|
|
rosetta/model/__pycache__/wrapper.cpython-312.pyc
DELETED
|
Binary file (33.8 kB)
|
|
|
rosetta/train/__pycache__/__init__.cpython-310.pyc
DELETED
|
Binary file (505 Bytes)
|
|
|
rosetta/train/__pycache__/__init__.cpython-312.pyc
DELETED
|
Binary file (616 Bytes)
|
|
|
rosetta/train/__pycache__/dataset_adapters.cpython-310.pyc
DELETED
|
Binary file (50 kB)
|
|
|
rosetta/train/__pycache__/dataset_adapters.cpython-312.pyc
DELETED
|
Binary file (73.3 kB)
|
|
|
rosetta/train/__pycache__/model_utils.cpython-310.pyc
DELETED
|
Binary file (3.99 kB)
|
|
|
rosetta/train/__pycache__/model_utils.cpython-312.pyc
DELETED
|
Binary file (5.58 kB)
|
|
|
rosetta/train/__pycache__/trainer.cpython-310.pyc
DELETED
|
Binary file (2.56 kB)
|
|
|
rosetta/train/__pycache__/trainer.cpython-312.pyc
DELETED
|
Binary file (3.74 kB)
|
|
|
rosetta/utils/__pycache__/evaluate.cpython-310.pyc
DELETED
|
Binary file (15.8 kB)
|
|
|
rosetta/utils/__pycache__/evaluate.cpython-312.pyc
DELETED
|
Binary file (24 kB)
|
|
|
rosetta/utils/__pycache__/multi_stage.cpython-310.pyc
DELETED
|
Binary file (5.22 kB)
|
|
|
rosetta/utils/__pycache__/multi_stage.cpython-312.pyc
DELETED
|
Binary file (7.49 kB)
|
|
|
rosetta/utils/__pycache__/registry.cpython-310.pyc
DELETED
|
Binary file (7.43 kB)
|
|
|
rosetta/utils/__pycache__/registry.cpython-312.pyc
DELETED
|
Binary file (9.98 kB)
|
|
|