Spaces:
Running
on
Zero
Running
on
Zero
Update soprano/backends/lmdeploy.py
Browse files
soprano/backends/lmdeploy.py
CHANGED
|
@@ -11,9 +11,11 @@ class LMDeployModel(BaseModel):
|
|
| 11 |
assert device == 'cuda', "lmdeploy only supports cuda devices, consider changing device or using a different backend instead."
|
| 12 |
cache_size_ratio = cache_size_mb * 1024**2 / torch.cuda.get_device_properties('cuda').total_memory
|
| 13 |
backend_config = TurbomindEngineConfig(cache_max_entry_count=cache_size_ratio)
|
|
|
|
| 14 |
self.pipeline = pipeline('ekwek/Soprano-80M',
|
| 15 |
log_level='ERROR',
|
| 16 |
backend_config=backend_config)
|
|
|
|
| 17 |
|
| 18 |
def infer(self,
|
| 19 |
prompts,
|
|
|
|
| 11 |
assert device == 'cuda', "lmdeploy only supports cuda devices, consider changing device or using a different backend instead."
|
| 12 |
cache_size_ratio = cache_size_mb * 1024**2 / torch.cuda.get_device_properties('cuda').total_memory
|
| 13 |
backend_config = TurbomindEngineConfig(cache_max_entry_count=cache_size_ratio)
|
| 14 |
+
print("Loaded config.")
|
| 15 |
self.pipeline = pipeline('ekwek/Soprano-80M',
|
| 16 |
log_level='ERROR',
|
| 17 |
backend_config=backend_config)
|
| 18 |
+
print("Loaded pipeline.")
|
| 19 |
|
| 20 |
def infer(self,
|
| 21 |
prompts,
|