Spaces:
Paused
Paused
Commit
·
1cf1484
1
Parent(s):
6485751
Fixing LLM init v7
Browse files- main/routes.py +4 -4
main/routes.py
CHANGED
|
@@ -263,15 +263,15 @@ async def initialize_model(request: InitializeRequest):
|
|
| 263 |
logger.info(f"Using model path: {model_path}")
|
| 264 |
|
| 265 |
# Load the model
|
| 266 |
-
|
| 267 |
llm_instance = LLM.load(
|
| 268 |
model=model_path,
|
| 269 |
distribute=None if request.precision or request.quantize else "auto"
|
| 270 |
)
|
| 271 |
-
|
| 272 |
|
| 273 |
# If manual distribution is needed
|
| 274 |
-
|
| 275 |
if request.precision or request.quantize:
|
| 276 |
llm_instance.distribute(
|
| 277 |
accelerator="cuda" if request.mode == "gpu" else "cpu",
|
|
@@ -279,7 +279,7 @@ async def initialize_model(request: InitializeRequest):
|
|
| 279 |
precision=request.precision,
|
| 280 |
quantize=request.quantize
|
| 281 |
)
|
| 282 |
-
|
| 283 |
|
| 284 |
logger.info(
|
| 285 |
f"Model initialized successfully with config:\n"
|
|
|
|
| 263 |
logger.info(f"Using model path: {model_path}")
|
| 264 |
|
| 265 |
# Load the model
|
| 266 |
+
logger.info("Loading model")
|
| 267 |
llm_instance = LLM.load(
|
| 268 |
model=model_path,
|
| 269 |
distribute=None if request.precision or request.quantize else "auto"
|
| 270 |
)
|
| 271 |
+
logger.info("Done loading model")
|
| 272 |
|
| 273 |
# If manual distribution is needed
|
| 274 |
+
logger.info("Distributing model")
|
| 275 |
if request.precision or request.quantize:
|
| 276 |
llm_instance.distribute(
|
| 277 |
accelerator="cuda" if request.mode == "gpu" else "cpu",
|
|
|
|
| 279 |
precision=request.precision,
|
| 280 |
quantize=request.quantize
|
| 281 |
)
|
| 282 |
+
logger.info("Done distributing model")
|
| 283 |
|
| 284 |
logger.info(
|
| 285 |
f"Model initialized successfully with config:\n"
|