jdesiree commited on
Commit
d03dc7e
·
verified ·
1 Parent(s): 0c423c7

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +19 -19
app.py CHANGED
@@ -302,42 +302,42 @@ class Phi3MiniEducationalLLM(Runnable):
302
  current_time = datetime.now()
303
 
304
  self.model_name = model_path
305
-
306
  try:
307
- # Load tokenizer - Phi-3 requires trust_remote_code
308
  self.tokenizer = AutoTokenizer.from_pretrained(
309
  model_path,
310
  trust_remote_code=True,
311
  token=hf_token
312
  )
313
 
314
- # Load model with ZeroGPU-optimized settings
315
- self.model = AutoModelForCausalLM.from_pretrained(
316
- model_path,
317
- torch_dtype=torch.float16,
318
- device_map="auto", # This will work with ZeroGPU allocation
319
- trust_remote_code=True,
320
- low_cpu_mem_usage=True,
321
- token=hf_token,
322
- attn_implementation="eager"
323
- )
324
-
325
- # Success path - log timing
326
- end_Loading_Model_time = time.perf_counter()
327
- Loading_Model_time = end_Loading_Model_time - start_Loading_Model_time
328
- log_metric(f"Model Load time: {Loading_Model_time:0.4f} seconds. Model: {model_path}. Timestamp: {current_time:%Y-%m-%d %H:%M:%S}")
329
 
330
  except Exception as e:
331
- logger.error(f"Failed to load Phi-3-mini model {model_path}: {e}")
332
  raise
333
 
334
  # Ensure pad token exists
335
  if self.tokenizer.pad_token is None:
336
  self.tokenizer.pad_token = self.tokenizer.eos_token
337
 
338
- # Initialize TextIteratorStreamer
339
  self.streamer = None
340
 
 
 
 
 
 
 
 
 
 
 
 
 
 
341
  def _format_chat_template(self, prompt: str) -> str:
342
  """Format prompt using Phi-3's chat template"""
343
  try:
 
302
  current_time = datetime.now()
303
 
304
  self.model_name = model_path
305
+
306
  try:
307
+ # Load tokenizer
308
  self.tokenizer = AutoTokenizer.from_pretrained(
309
  model_path,
310
  trust_remote_code=True,
311
  token=hf_token
312
  )
313
 
314
+ # Store model path instead of loading model immediately
315
+ self.model_path = model_path
316
+ self.model = None # Load model lazily in GPU methods
 
 
 
 
 
 
 
 
 
 
 
 
317
 
318
  except Exception as e:
319
+ logger.error(f"Failed to initialize Phi-3-mini model {model_path}: {e}")
320
  raise
321
 
322
  # Ensure pad token exists
323
  if self.tokenizer.pad_token is None:
324
  self.tokenizer.pad_token = self.tokenizer.eos_token
325
 
 
326
  self.streamer = None
327
 
328
+ def _load_model_if_needed(self):
329
+ """Load model only when needed inside GPU context"""
330
+ if self.model is None:
331
+ self.model = AutoModelForCausalLM.from_pretrained(
332
+ self.model_path,
333
+ torch_dtype=torch.float16,
334
+ trust_remote_code=True,
335
+ low_cpu_mem_usage=True,
336
+ token=hf_token,
337
+ attn_implementation="eager"
338
+ )
339
+ return self.model
340
+
341
  def _format_chat_template(self, prompt: str) -> str:
342
  """Format prompt using Phi-3's chat template"""
343
  try: