Spaces:

tzhang62
/

iql-fire-rescue-api

Sleeping

App Files Files Community

tzhang62 commited on Mar 4

Commit

d632886

verified ·

1 Parent(s): e16c4b5

Upload app.py with huggingface_hub

Browse files

Files changed (1) hide show

app.py +184 -60

app.py CHANGED Viewed

@@ -1,10 +1,11 @@
 """
-Hugging Face Space API for IQL Fire Rescue Model
-Deploy this to HF Space to serve your custom IQL model
-Upload to: https://huggingface.co/spaces/tzhang62/iql-fire-rescue-api
 """
 from fastapi import FastAPI
 from pydantic import BaseModel
 from typing import Dict, List, Optional
@@ -21,12 +22,38 @@ app = FastAPI(title="IQL Fire Rescue API")
 # Config
 EMBED_MODEL = "all-MiniLM-L6-v2"
 N_LAST = 3
 # ============================================================================
-# IQL Model (same as your server.py)
 # ============================================================================
 class QNetworkEmbed(nn.Module):
     def __init__(self, state_dim: int, action_embeds: torch.Tensor, hidden_dim: int, p_drop: float = 0.3):
         super().__init__()
         self.action_embeds = nn.Parameter(action_embeds, requires_grad=False)
@@ -45,34 +72,75 @@ class QNetworkEmbed(nn.Module):
         x = self.f2(x); x = F.relu(x); x = self.ln2(x); x = self.drop(x)
         return self.head(x).squeeze(-1)
-def embed_state(model, texts):
-    if not texts:
         return np.zeros((model.get_sentence_embedding_dimension(),), dtype=np.float32)
-    embs = model.encode(texts, convert_to_numpy=True, normalize_embeddings=True)
     return np.mean(embs, axis=0).astype(np.float32)
 class IQLSelector:
     def __init__(self, pt_path, policy_names):
         self.device = torch.device("cpu")
         self.embed_model = SentenceTransformer(EMBED_MODEL)
-        state_dict = torch.load(pt_path, map_location="cpu")
-        ae_key = next((k for k in state_dict.keys() if k.endswith("action_embeds")), None)
-        action_embeds_ckpt = state_dict[ae_key]
-        if isinstance(action_embeds_ckpt, np.ndarray):
-            action_embeds_ckpt = torch.tensor(action_embeds_ckpt)
-        num_actions, action_dim = action_embeds_ckpt.shape
-        f1w = state_dict["f1.weight"]
-        hidden_dim = f1w.shape[0]
-        state_dim = f1w.shape[1] - action_dim
-        dummy = torch.zeros((num_actions, action_dim), dtype=torch.float32)
-        self.qnet = QNetworkEmbed(state_dim, dummy, hidden_dim=hidden_dim).to(self.device)
-        self.qnet.load_state_dict(state_dict, strict=True)
-        self.qnet.eval()
         self.policy_names = policy_names
-        print(f"[IQL] Loaded: {num_actions} policies, state_dim={state_dim}")
     def select_policy(self, history, n_last=N_LAST):
         texts = [h["text"] for h in history if h.get("role") == "resident"]
@@ -80,11 +148,15 @@ class IQLSelector:
         s_vec = embed_state(self.embed_model, last_n)
         s = torch.tensor(s_vec, dtype=torch.float32, device=self.device).unsqueeze(0)
-        q_vals = []
         with torch.no_grad():
-            for a_id in range(len(self.policy_names)):
-                a = torch.tensor([a_id], dtype=torch.long, device=self.device)
-                q_vals.append(float(self.qnet(s, a).item()))
         best_idx = int(np.argmax(q_vals))
         return self.policy_names[best_idx], dict(zip(self.policy_names, q_vals))
@@ -94,17 +166,70 @@ class IQLSelector:
 # ============================================================================
 iql_selector = None
 @app.on_event("startup")
 async def load_model():
-    global iql_selector
     try:
         base = Path(__file__).parent
-        label_map = json.loads((base / "label_map.json").read_text())
         policies = [k for k, _ in sorted(label_map.items(), key=lambda x: x[1])]
-        iql_selector = IQLSelector(base / "iql_model_embed.pt", policies)
         print("[Space] Model loaded!")
     except Exception as e:
         print(f"[Space] Load failed: {e}")
         import traceback
         traceback.print_exc()
@@ -123,73 +248,72 @@ class Response(BaseModel):
 @app.post("/", response_model=Response)
 async def predict(req: Request):
-    # Parse state: "msg1 | msg2 | msg3"
     if req.inputs == "START" or not req.inputs:
         messages = []
     else:
         messages = [m.strip() for m in req.inputs.split("|")]
     history = [{"role": "resident", "text": m} for m in messages]
     policy, q_vals = iql_selector.select_policy(history, n_last=N_LAST)
     return {"policy": policy, "q_values": q_vals}
 @app.get("/health")
 async def health():
-    return {"status": "ok", "model_loaded": iql_selector is not None}
 # ============================================================================
-# Embedding API
 # ============================================================================
 class EmbedRequest(BaseModel):
     texts: List[str]
     normalize: Optional[bool] = True
 class EmbedResponse(BaseModel):
     embeddings: List[List[float]]
     model: str = EMBED_MODEL
     dimension: int
 @app.post("/embed", response_model=EmbedResponse)
 async def embed_texts(req: EmbedRequest):
     """
-    Embed texts using sentence-transformers (GPU-accelerated)
-    Input:
-        texts: List of strings to embed
-        normalize: Whether to normalize embeddings (default: True)
-    Output:
-        embeddings: List of embedding vectors (384-dim)
     """
-    if not iql_selector:
-        return {"embeddings": [], "dimension": 384}
     try:
-        # Use the already-loaded sentence-transformers model
-        embeddings = iql_selector.embed_model.encode(
             req.texts,
             convert_to_numpy=True,
             normalize_embeddings=req.normalize,
-            show_progress_bar=False
         )
-        # Convert to list for JSON serialization
         embeddings_list = embeddings.tolist()
-        return EmbedResponse(
-            embeddings=embeddings_list,
-            dimension=embeddings.shape[1]
-        )
     except Exception as e:
         print(f"[EMBED] Error: {e}")
         import traceback
         traceback.print_exc()
-        return {"embeddings": [], "dimension": 384}
 if __name__ == "__main__":
     import uvicorn
     uvicorn.run(app, host="0.0.0.0", port=7860)

 """
+Hugging Face Space API for IQL Fire Rescue Model (iql_model_state.pt)
+Serves the state-mode IQL model uploaded via upload_iql_to_hf.py.
+Deploy to: https://huggingface.co/spaces/YOUR_USERNAME/iql-fire-rescue-api
 """
+import os
 from fastapi import FastAPI
 from pydantic import BaseModel
 from typing import Dict, List, Optional
 # Config
 EMBED_MODEL = "all-MiniLM-L6-v2"
 N_LAST = 3
+IQL_P_DROP = 0.3
+IQL_HIDDEN_DIM = 1024
+# Optional: load model from HF model repo (set HF_IQL_REPO=username/iql-fire-rescue)
+HF_IQL_REPO = os.getenv("HF_IQL_REPO", "")
 # ============================================================================
+# IQL Model (state mode - iql_model_state.pt)
 # ============================================================================
+class QNetworkState(nn.Module):
+    """State-only Q-network: Q(s) = [Q(s,a1)..Q(s,aN)]"""
+    def __init__(self, state_dim: int, num_actions: int, hidden_dim: int = IQL_HIDDEN_DIM, p_drop: float = IQL_P_DROP):
+        super().__init__()
+        self.net = nn.Sequential(
+            nn.Linear(state_dim, hidden_dim),
+            nn.ReLU(),
+            nn.LayerNorm(hidden_dim),
+            nn.Dropout(p_drop),
+            nn.Linear(hidden_dim, hidden_dim),
+            nn.ReLU(),
+            nn.LayerNorm(hidden_dim),
+            nn.Dropout(p_drop),
+            nn.Linear(hidden_dim, num_actions),
+        )
+    def forward(self, x: torch.Tensor) -> torch.Tensor:
+        return self.net(x)
 class QNetworkEmbed(nn.Module):
+    """Embedding-based Q-network (legacy, for iql_model_state.pt)"""
     def __init__(self, state_dim: int, action_embeds: torch.Tensor, hidden_dim: int, p_drop: float = 0.3):
         super().__init__()
         self.action_embeds = nn.Parameter(action_embeds, requires_grad=False)
         x = self.f2(x); x = F.relu(x); x = self.ln2(x); x = self.drop(x)
         return self.head(x).squeeze(-1)
+def embed_state(model: SentenceTransformer, last_n_res_texts: List[str]) -> np.ndarray:
+    """Embed conversation state from last N resident messages"""
+    if not last_n_res_texts:
         return np.zeros((model.get_sentence_embedding_dimension(),), dtype=np.float32)
+    embs = model.encode(last_n_res_texts, convert_to_numpy=True, normalize_embeddings=True)
     return np.mean(embs, axis=0).astype(np.float32)
+def _load_state_dict(pt_path):
+    raw = torch.load(pt_path, map_location="cpu")
+    state_dict = raw
+    if isinstance(raw, dict):
+        if "model" in raw:
+            state_dict = raw["model"]
+        elif "state_dict" in raw:
+            state_dict = raw["state_dict"]
+        if isinstance(state_dict, dict) and len(state_dict) == 1:
+            only_val = next(iter(state_dict.values()))
+            if isinstance(only_val, dict) and only_val:
+                state_dict = only_val
+    return state_dict
+def _strip_prefix(sd: dict, prefix: str) -> dict:
+    return {k[len(prefix):]: v for k, v in sd.items() if k.startswith(prefix)}
 class IQLSelector:
     def __init__(self, pt_path, policy_names):
         self.device = torch.device("cpu")
         self.embed_model = SentenceTransformer(EMBED_MODEL)
+        state_dim = self.embed_model.get_sentence_embedding_dimension()
+        num_actions = len(policy_names)
         self.policy_names = policy_names
+        state_dict = _load_state_dict(pt_path)
+        keys = list(state_dict.keys()) if isinstance(state_dict, dict) else []
+        for prefix in ("qnet.", "module.", "model."):
+            if any(k.startswith(prefix) for k in keys):
+                state_dict = _strip_prefix(state_dict, prefix)
+                keys = list(state_dict.keys())
+                break
+        # State mode: QNetworkState has "net." keys
+        is_state_mode = any(k.startswith("net.") for k in keys)
+        if is_state_mode:
+            self.mode = "state"
+            self.qnet = QNetworkState(state_dim, num_actions).to(self.device)
+            model_keys = set(self.qnet.state_dict().keys())
+            state_dict_filtered = {k: v for k, v in state_dict.items() if k in model_keys}
+            self.qnet.load_state_dict(state_dict_filtered, strict=True)
+            self.qnet.eval()
+            print(f"[IQL] Loaded state-mode model: {num_actions} policies, state_dim={state_dim}")
+        else:
+            # Embed mode (legacy)
+            self.mode = "embed"
+            ae_key = next((k for k in state_dict.keys() if k.endswith("action_embeds")), None)
+            action_embeds_ckpt = state_dict[ae_key]
+            if isinstance(action_embeds_ckpt, np.ndarray):
+                action_embeds_ckpt = torch.tensor(action_embeds_ckpt)
+            num_a, action_dim = action_embeds_ckpt.shape
+            f1w = state_dict["f1.weight"]
+            hidden_dim = f1w.shape[0]
+            dummy = torch.zeros((num_a, action_dim), dtype=torch.float32)
+            self.qnet = QNetworkEmbed(state_dim, dummy, hidden_dim=hidden_dim).to(self.device)
+            self.qnet.load_state_dict(state_dict, strict=True)
+            self.qnet.eval()
+            print(f"[IQL] Loaded embed-mode model: {num_actions} policies")
     def select_policy(self, history, n_last=N_LAST):
         texts = [h["text"] for h in history if h.get("role") == "resident"]
         s_vec = embed_state(self.embed_model, last_n)
         s = torch.tensor(s_vec, dtype=torch.float32, device=self.device).unsqueeze(0)
         with torch.no_grad():
+            if self.mode == "state":
+                q_out = self.qnet(s)
+                q_vals = q_out.cpu().numpy().flatten().tolist()
+            else:
+                q_vals = []
+                for a_id in range(len(self.policy_names)):
+                    a = torch.tensor([a_id], dtype=torch.long, device=self.device)
+                    q_vals.append(float(self.qnet(s, a).item()))
         best_idx = int(np.argmax(q_vals))
         return self.policy_names[best_idx], dict(zip(self.policy_names, q_vals))
 # ============================================================================
 iql_selector = None
+embed_model = None  # Standalone embed model for /embed endpoint (works even if IQL fails to load)
+load_error = None  # Captured error when IQL model fails to load
 @app.on_event("startup")
 async def load_model():
+    global iql_selector, embed_model, load_error
+    # Load embedding model first (lightweight, used by /embed endpoint)
+    try:
+        embed_model = SentenceTransformer(EMBED_MODEL)
+        print("[Space] Embedding model loaded (for /embed endpoint)")
+    except Exception as e:
+        print(f"[Space] Embedding model failed to load: {e}")
+        import traceback
+        traceback.print_exc()
     try:
         base = Path(__file__).parent
+        print(f"[Space] Base dir: {base}")
+        print(f"[Space] Files in base: {list(base.iterdir())}")
+        label_map_path = base / "label_map.json"
+        # Load label_map from HF repo if specified
+        if HF_IQL_REPO:
+            try:
+                from huggingface_hub import hf_hub_download
+                label_map_path = Path(hf_hub_download(repo_id=HF_IQL_REPO, filename="label_map.json"))
+            except Exception as e:
+                print(f"[Space] Could not load label_map from {HF_IQL_REPO}: {e}")
+        label_map = json.loads(label_map_path.read_text())
         policies = [k for k, _ in sorted(label_map.items(), key=lambda x: x[1])]
+        # Try iql_model_state.pt first (state mode), then iql_model_state.pt
+        pt_path = None
+        if HF_IQL_REPO:
+            try:
+                from huggingface_hub import hf_hub_download
+                pt_path = Path(hf_hub_download(repo_id=HF_IQL_REPO, filename="iql_model_state.pt"))
+                print(f"[Space] Loaded iql_model_state.pt from {HF_IQL_REPO}")
+            except Exception as e:
+                print(f"[Space] Could not load from HF repo: {e}")
+        if pt_path is None:
+            for name in ["iql_model_state.pt", "state.pt"]:
+                candidate = base / name
+                if candidate.exists():
+                    pt_path = candidate
+                    print(f"[Space] Using local {name}")
+                    break
+        if pt_path is None:
+            # List what files exist for debugging
+            existing = [f.name for f in base.iterdir() if f.suffix == ".pt"]
+            raise FileNotFoundError(
+                f"No iql_model_state.pt found. "
+                f"Add to Space or set HF_IQL_REPO. Found .pt files: {existing}"
+            )
+        iql_selector = IQLSelector(pt_path, policies)
         print("[Space] Model loaded!")
     except Exception as e:
+        load_error = str(e)
         print(f"[Space] Load failed: {e}")
         import traceback
         traceback.print_exc()
 @app.post("/", response_model=Response)
 async def predict(req: Request):
+    if not iql_selector:
+        return {"policy": "niki", "q_values": {}}
     if req.inputs == "START" or not req.inputs:
         messages = []
     else:
         messages = [m.strip() for m in req.inputs.split("|")]
     history = [{"role": "resident", "text": m} for m in messages]
     policy, q_vals = iql_selector.select_policy(history, n_last=N_LAST)
     return {"policy": policy, "q_values": q_vals}
 @app.get("/health")
 async def health():
+    return {
+        "status": "ok",
+        "model_loaded": iql_selector is not None,
+        "embed_model_loaded": embed_model is not None,
+        "embed_ready": (embed_model is not None) or (iql_selector is not None),
+        "load_error": load_error,  # Why IQL failed (if any)
+    }
 # ============================================================================
+# Embedding API (for policy retrieval - works even if IQL model fails to load)
 # ============================================================================
 class EmbedRequest(BaseModel):
     texts: List[str]
     normalize: Optional[bool] = True
 class EmbedResponse(BaseModel):
     embeddings: List[List[float]]
     model: str = EMBED_MODEL
     dimension: int
 @app.post("/embed", response_model=EmbedResponse)
 async def embed_texts(req: EmbedRequest):
     """
+    Embed texts using sentence-transformers (GPU-accelerated).
+    Uses standalone embed_model so it works even if IQL model fails to load.
     """
+    model = embed_model or (iql_selector.embed_model if iql_selector else None)
+    if not model:
+        print("[EMBED] No embedding model available")
+        return {"embeddings": [], "model": EMBED_MODEL, "dimension": 384}
     try:
+        embeddings = model.encode(
             req.texts,
             convert_to_numpy=True,
             normalize_embeddings=req.normalize,
+            show_progress_bar=False,
         )
         embeddings_list = embeddings.tolist()
+        return EmbedResponse(embeddings=embeddings_list, dimension=embeddings.shape[1])
     except Exception as e:
         print(f"[EMBED] Error: {e}")
         import traceback
         traceback.print_exc()
+        return {"embeddings": [], "model": EMBED_MODEL, "dimension": 384}
 if __name__ == "__main__":
     import uvicorn
     uvicorn.run(app, host="0.0.0.0", port=7860)