Spaces:

Etadingrui
/

PIWM

Running

App Files Files Community

musictimer commited on Sep 6, 2025

Commit

bbfa773

1 Parent(s): 93dbff3

Fix bug 5

Browse files

Files changed (1) hide show

app.py +77 -16

app.py CHANGED Viewed

@@ -95,49 +95,110 @@ class WebGameEngine:
         def load_model_weights():
             """Load model weights in thread pool to avoid blocking"""
             try:
-                # Use torch.hub.load_state_dict_from_url which is HF Spaces compatible
-                model_url = "https://huggingface.co/Etadingrui/diamond-1B/resolve/main/agent_epoch_00003.pt"
-                logger.info(f"Loading model from {model_url} using torch.hub...")
-                # Update progress
-                self.download_progress = 10
                 self.loading_status = "Downloading model with torch.hub..."
-                # Load state dict directly from URL (handles caching automatically)
                 state_dict = torch.hub.load_state_dict_from_url(
                     model_url,
                     map_location=device,
-                    progress=True  # Show download progress
                 )
-                self.download_progress = 80
-                self.loading_status = "Loading model weights..."
                 # Load each component of the agent using extract_state_dict (same as agent.load method)
                 if any(k.startswith("denoiser") for k in state_dict.keys()):
                     agent.denoiser.load_state_dict(extract_state_dict(state_dict, "denoiser"))
                 if any(k.startswith("upsampler") for k in state_dict.keys()) and agent.upsampler is not None:
                     agent.upsampler.load_state_dict(extract_state_dict(state_dict, "upsampler"))
                 if any(k.startswith("rew_end_model") for k in state_dict.keys()) and agent.rew_end_model is not None:
                     agent.rew_end_model.load_state_dict(extract_state_dict(state_dict, "rew_end_model"))
                 if any(k.startswith("actor_critic") for k in state_dict.keys()) and agent.actor_critic is not None:
                     agent.actor_critic.load_state_dict(extract_state_dict(state_dict, "actor_critic"))
                 self.download_progress = 100
                 self.loading_status = "Model loaded successfully!"
                 return True
             except Exception as e:
-                logger.error(f"Failed to load model from URL: {e}")
                 return False
-        # Run in thread pool to avoid blocking
         loop = asyncio.get_event_loop()
-        with concurrent.futures.ThreadPoolExecutor() as executor:
-            success = await loop.run_in_executor(executor, load_model_weights)
-        return success
     async def initialize_models(self):
         """Initialize the AI models and environment"""

         def load_model_weights():
             """Load model weights in thread pool to avoid blocking"""
+            state_dict = None
+            # Try torch.hub method first
             try:
+                logger.info("Trying to load model using torch.hub...")
                 self.loading_status = "Downloading model with torch.hub..."
+                self.download_progress = 10
+                model_url = "https://huggingface.co/Etadingrui/diamond-1B/resolve/main/agent_epoch_00003.pt"
                 state_dict = torch.hub.load_state_dict_from_url(
                     model_url,
                     map_location=device,
+                    progress=False,
+                    check_hash=False
                 )
+                logger.info("Successfully loaded model using torch.hub")
+            except Exception as e:
+                logger.warning(f"Failed to load model with torch.hub: {e}")
+                # Try huggingface_hub method as fallback
+                try:
+                    logger.info("Trying to load model using huggingface_hub...")
+                    self.loading_status = "Downloading model with huggingface_hub..."
+                    self.download_progress = 10
+                    from huggingface_hub import hf_hub_download
+                    # Download the file
+                    model_path = hf_hub_download(
+                        repo_id="Etadingrui/diamond-1B",
+                        filename="agent_epoch_00003.pt",
+                        cache_dir=None  # Use default cache
+                    )
+                    self.download_progress = 40
+                    self.loading_status = "Loading downloaded model..."
+                    # Load the state dict
+                    state_dict = torch.load(model_path, map_location=device)
+                    logger.info("Successfully loaded model using huggingface_hub")
+                except Exception as e2:
+                    logger.error(f"Failed to load model with huggingface_hub: {e2}")
+                    raise Exception("All model loading methods failed")
+            if state_dict is None:
+                raise Exception("Failed to load model state dict")
+            # Load state dict into agent
+            try:
+                logger.info("Model download completed, loading weights...")
+                self.download_progress = 60
+                self.loading_status = "Model downloaded, loading weights..."
                 # Load each component of the agent using extract_state_dict (same as agent.load method)
                 if any(k.startswith("denoiser") for k in state_dict.keys()):
                     agent.denoiser.load_state_dict(extract_state_dict(state_dict, "denoiser"))
+                    logger.info("Loaded denoiser weights")
+                self.download_progress = 70
+                self.loading_status = "Loading upsampler..."
                 if any(k.startswith("upsampler") for k in state_dict.keys()) and agent.upsampler is not None:
                     agent.upsampler.load_state_dict(extract_state_dict(state_dict, "upsampler"))
+                    logger.info("Loaded upsampler weights")
+                self.download_progress = 80
+                self.loading_status = "Loading reward model..."
                 if any(k.startswith("rew_end_model") for k in state_dict.keys()) and agent.rew_end_model is not None:
                     agent.rew_end_model.load_state_dict(extract_state_dict(state_dict, "rew_end_model"))
+                    logger.info("Loaded reward model weights")
+                self.download_progress = 90
+                self.loading_status = "Loading actor critic..."
                 if any(k.startswith("actor_critic") for k in state_dict.keys()) and agent.actor_critic is not None:
                     agent.actor_critic.load_state_dict(extract_state_dict(state_dict, "actor_critic"))
+                    logger.info("Loaded actor critic weights")
                 self.download_progress = 100
                 self.loading_status = "Model loaded successfully!"
+                logger.info("All model weights loaded successfully!")
                 return True
             except Exception as e:
+                logger.error(f"Failed to load state dict into agent: {e}")
+                import traceback
+                traceback.print_exc()
                 return False
+        # Run in thread pool to avoid blocking with timeout
         loop = asyncio.get_event_loop()
+        try:
+            with concurrent.futures.ThreadPoolExecutor() as executor:
+                # Add timeout for model loading (5 minutes max)
+                future = loop.run_in_executor(executor, load_model_weights)
+                success = await asyncio.wait_for(future, timeout=300.0)  # 5 minute timeout
+                return success
+        except asyncio.TimeoutError:
+            logger.error("Model loading timed out after 5 minutes")
+            self.loading_status = "Model loading timed out - using dummy mode"
+            return False
+        except Exception as e:
+            logger.error(f"Error in model loading executor: {e}")
+            self.loading_status = f"Model loading error: {str(e)[:50]}..."
+            return False
     async def initialize_models(self):
         """Initialize the AI models and environment"""