Spaces:

jdesiree
/

Mimir

Sleeping

App Files Files Community

jdesiree commited on Sep 5, 2025

Commit

d03dc7e

verified ·

1 Parent(s): 0c423c7

Update app.py

Browse files

Files changed (1) hide show

app.py +19 -19

app.py CHANGED Viewed

@@ -302,42 +302,42 @@ class Phi3MiniEducationalLLM(Runnable):
         current_time = datetime.now()
         self.model_name = model_path
         try:
-            # Load tokenizer - Phi-3 requires trust_remote_code
             self.tokenizer = AutoTokenizer.from_pretrained(
                 model_path,
                 trust_remote_code=True,
                 token=hf_token
             )
-            # Load model with ZeroGPU-optimized settings
-            self.model = AutoModelForCausalLM.from_pretrained(
-                model_path,
-                torch_dtype=torch.float16,
-                device_map="auto",  # This will work with ZeroGPU allocation
-                trust_remote_code=True,
-                low_cpu_mem_usage=True,
-                token=hf_token,
-                attn_implementation="eager"
-            )
-            # Success path - log timing
-            end_Loading_Model_time = time.perf_counter()
-            Loading_Model_time = end_Loading_Model_time - start_Loading_Model_time
-            log_metric(f"Model Load time: {Loading_Model_time:0.4f} seconds. Model: {model_path}. Timestamp: {current_time:%Y-%m-%d %H:%M:%S}")
         except Exception as e:
-            logger.error(f"Failed to load Phi-3-mini model {model_path}: {e}")
             raise
         # Ensure pad token exists
         if self.tokenizer.pad_token is None:
             self.tokenizer.pad_token = self.tokenizer.eos_token
-        # Initialize TextIteratorStreamer
         self.streamer = None
     def _format_chat_template(self, prompt: str) -> str:
         """Format prompt using Phi-3's chat template"""
         try:

         current_time = datetime.now()
         self.model_name = model_path
         try:
+            # Load tokenizer
             self.tokenizer = AutoTokenizer.from_pretrained(
                 model_path,
                 trust_remote_code=True,
                 token=hf_token
             )
+            # Store model path instead of loading model immediately
+            self.model_path = model_path
+            self.model = None  # Load model lazily in GPU methods
         except Exception as e:
+            logger.error(f"Failed to initialize Phi-3-mini model {model_path}: {e}")
             raise
         # Ensure pad token exists
         if self.tokenizer.pad_token is None:
             self.tokenizer.pad_token = self.tokenizer.eos_token
         self.streamer = None
+    def _load_model_if_needed(self):
+        """Load model only when needed inside GPU context"""
+        if self.model is None:
+            self.model = AutoModelForCausalLM.from_pretrained(
+                self.model_path,
+                torch_dtype=torch.float16,
+                trust_remote_code=True,
+                low_cpu_mem_usage=True,
+                token=hf_token,
+                attn_implementation="eager"
+            )
+        return self.model
     def _format_chat_template(self, prompt: str) -> str:
         """Format prompt using Phi-3's chat template"""
         try: