girish00 commited on
Commit
f204dba
·
verified ·
1 Parent(s): f2d1187

update endpoint helper files

Browse files
Files changed (1) hide show
  1. infer_local.py +44 -26
infer_local.py CHANGED
@@ -286,13 +286,19 @@ def main():
286
  parser.add_argument("--base-model", type=str, default="Qwen/Qwen2.5-Coder-0.5B-Instruct")
287
  parser.add_argument("--prompt", type=str, required=True)
288
  parser.add_argument("--max-new-tokens", type=int, default=320)
289
- parser.add_argument("--temperature", type=float, default=0.25)
290
- parser.add_argument("--top-p", type=float, default=0.9)
291
- parser.add_argument("--do-sample", action="store_true")
292
- args = parser.parse_args()
293
-
294
- if not os.path.exists(args.model_path):
295
- raise FileNotFoundError(
 
 
 
 
 
 
296
  f"Model path not found: {args.model_path}. Train first using run_pipeline.py."
297
  )
298
 
@@ -301,20 +307,28 @@ def main():
301
  full_model_weights_present = has_full_model_weights(args.model_path)
302
 
303
  if os.path.exists(adapter_config_path) and adapter_weights_present:
304
- peft_config = PeftConfig.from_pretrained(args.model_path)
305
- base_model_name = peft_config.base_model_name_or_path or args.base_model
306
- tokenizer = AutoTokenizer.from_pretrained(base_model_name)
307
- base_model = AutoModelForCausalLM.from_pretrained(
308
- base_model_name,
309
- torch_dtype=torch.float16 if torch.cuda.is_available() else torch.float32,
310
- )
311
- model = PeftModel.from_pretrained(base_model, args.model_path)
312
- elif full_model_weights_present and not os.path.exists(adapter_config_path):
313
- tokenizer = AutoTokenizer.from_pretrained(args.model_path)
314
- model = AutoModelForCausalLM.from_pretrained(
315
- args.model_path,
316
- torch_dtype=torch.float16 if torch.cuda.is_available() else torch.float32,
317
- )
 
 
 
 
 
 
 
 
318
  else:
319
  # Graceful fallback when local model folder has config/tokenizer but no weight files.
320
  fallback_base = args.base_model
@@ -343,11 +357,15 @@ def main():
343
  ),
344
  file=sys.stderr,
345
  )
346
- tokenizer = AutoTokenizer.from_pretrained(fallback_base)
347
- model = AutoModelForCausalLM.from_pretrained(
348
- fallback_base,
349
- torch_dtype=torch.float16 if torch.cuda.is_available() else torch.float32,
350
- )
 
 
 
 
351
  if tokenizer.pad_token is None:
352
  tokenizer.pad_token = tokenizer.eos_token
353
  model.eval()
 
286
  parser.add_argument("--base-model", type=str, default="Qwen/Qwen2.5-Coder-0.5B-Instruct")
287
  parser.add_argument("--prompt", type=str, required=True)
288
  parser.add_argument("--max-new-tokens", type=int, default=320)
289
+ parser.add_argument("--temperature", type=float, default=0.25)
290
+ parser.add_argument("--top-p", type=float, default=0.9)
291
+ parser.add_argument("--do-sample", action="store_true")
292
+ parser.add_argument(
293
+ "--allow-downloads",
294
+ action="store_true",
295
+ help="Allow Transformers to download missing model files from Hugging Face.",
296
+ )
297
+ args = parser.parse_args()
298
+ local_files_only = not args.allow_downloads
299
+
300
+ if not os.path.exists(args.model_path):
301
+ raise FileNotFoundError(
302
  f"Model path not found: {args.model_path}. Train first using run_pipeline.py."
303
  )
304
 
 
307
  full_model_weights_present = has_full_model_weights(args.model_path)
308
 
309
  if os.path.exists(adapter_config_path) and adapter_weights_present:
310
+ peft_config = PeftConfig.from_pretrained(args.model_path)
311
+ base_model_name = peft_config.base_model_name_or_path or args.base_model
312
+ tokenizer = AutoTokenizer.from_pretrained(
313
+ base_model_name,
314
+ local_files_only=local_files_only,
315
+ )
316
+ base_model = AutoModelForCausalLM.from_pretrained(
317
+ base_model_name,
318
+ torch_dtype=torch.float16 if torch.cuda.is_available() else torch.float32,
319
+ local_files_only=local_files_only,
320
+ )
321
+ model = PeftModel.from_pretrained(base_model, args.model_path)
322
+ elif full_model_weights_present and not os.path.exists(adapter_config_path):
323
+ tokenizer = AutoTokenizer.from_pretrained(
324
+ args.model_path,
325
+ local_files_only=local_files_only,
326
+ )
327
+ model = AutoModelForCausalLM.from_pretrained(
328
+ args.model_path,
329
+ torch_dtype=torch.float16 if torch.cuda.is_available() else torch.float32,
330
+ local_files_only=local_files_only,
331
+ )
332
  else:
333
  # Graceful fallback when local model folder has config/tokenizer but no weight files.
334
  fallback_base = args.base_model
 
357
  ),
358
  file=sys.stderr,
359
  )
360
+ tokenizer = AutoTokenizer.from_pretrained(
361
+ fallback_base,
362
+ local_files_only=local_files_only,
363
+ )
364
+ model = AutoModelForCausalLM.from_pretrained(
365
+ fallback_base,
366
+ torch_dtype=torch.float16 if torch.cuda.is_available() else torch.float32,
367
+ local_files_only=local_files_only,
368
+ )
369
  if tokenizer.pad_token is None:
370
  tokenizer.pad_token = tokenizer.eos_token
371
  model.eval()