Spaces:
Paused
Paused
Commit
·
6485751
1
Parent(s):
9fb6014
Fixing LLM init v6
Browse files- main/routes.py +4 -0
main/routes.py
CHANGED
|
@@ -263,12 +263,15 @@ async def initialize_model(request: InitializeRequest):
|
|
| 263 |
logger.info(f"Using model path: {model_path}")
|
| 264 |
|
| 265 |
# Load the model
|
|
|
|
| 266 |
llm_instance = LLM.load(
|
| 267 |
model=model_path,
|
| 268 |
distribute=None if request.precision or request.quantize else "auto"
|
| 269 |
)
|
|
|
|
| 270 |
|
| 271 |
# If manual distribution is needed
|
|
|
|
| 272 |
if request.precision or request.quantize:
|
| 273 |
llm_instance.distribute(
|
| 274 |
accelerator="cuda" if request.mode == "gpu" else "cpu",
|
|
@@ -276,6 +279,7 @@ async def initialize_model(request: InitializeRequest):
|
|
| 276 |
precision=request.precision,
|
| 277 |
quantize=request.quantize
|
| 278 |
)
|
|
|
|
| 279 |
|
| 280 |
logger.info(
|
| 281 |
f"Model initialized successfully with config:\n"
|
|
|
|
| 263 |
logger.info(f"Using model path: {model_path}")
|
| 264 |
|
| 265 |
# Load the model
|
| 266 |
+
print("Loading model")
|
| 267 |
llm_instance = LLM.load(
|
| 268 |
model=model_path,
|
| 269 |
distribute=None if request.precision or request.quantize else "auto"
|
| 270 |
)
|
| 271 |
+
print("Done loading model")
|
| 272 |
|
| 273 |
# If manual distribution is needed
|
| 274 |
+
print("Distributing model")
|
| 275 |
if request.precision or request.quantize:
|
| 276 |
llm_instance.distribute(
|
| 277 |
accelerator="cuda" if request.mode == "gpu" else "cpu",
|
|
|
|
| 279 |
precision=request.precision,
|
| 280 |
quantize=request.quantize
|
| 281 |
)
|
| 282 |
+
print("Done distributing model")
|
| 283 |
|
| 284 |
logger.info(
|
| 285 |
f"Model initialized successfully with config:\n"
|