Spaces:

wokogaming
/

asba

Sleeping

App Files Files Community

wokogaming commited on Apr 14

Commit

debc72e

verified ·

1 Parent(s): 8ec1e48

Update infer.py

Browse files

Files changed (1) hide show

infer.py +111 -111

infer.py CHANGED Viewed

@@ -1,111 +1,111 @@
-import sys
-from pathlib import Path
-import torch
-from transformers import AutoTokenizer
-sys.path.append(str(Path(__file__).parent / "model_code"))
-from architecture import PhoBERTMultiHeadGRU
-ASPECTS = [
-    "vệ sinh",
-    "đồ ăn thức uống",
-    "khách sạn",
-    "vị trí",
-    "phòng ốc",
-    "dịch vụ",
-]
-LABEL_MAP = {
-    0: "Negative",
-    1: "Neutral",
-    2: "Positive"
-}
-def _load_model(checkpoint_path: Path):
-    device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
-    # Init tokenizer and model with 'vinai/phobert-base'
-    phobert_name = "vinai/phobert-base"
-    tokenizer = AutoTokenizer.from_pretrained(phobert_name)
-    model = PhoBERTMultiHeadGRU(
-        phobert_path=phobert_name,
-        gru_hidden_dim=256,
-        num_labels=len(ASPECTS),
-        num_classes=3
-    )
-    # Load state dict
-    checkpoint = torch.load(checkpoint_path, map_location=device)
-    if isinstance(checkpoint, dict) and "model_state_dict" in checkpoint:
-        model.load_state_dict(checkpoint["model_state_dict"])
-    else:
-        model.load_state_dict(checkpoint)
-    model.to(device)
-    model.eval()
-    cfg = {
-        "max_len": 128,
-        "aspects": ASPECTS,
-        "label_map": LABEL_MAP
-    }
-    return model, tokenizer, cfg, device
-def _predict_single(model, tokenizer, cfg, device, text: str):
-    encoding = tokenizer(
-        text,
-        add_special_tokens=True,
-        max_length=cfg["max_len"],
-        padding="max_length",
-        truncation=True,
-        return_attention_mask=True,
-        return_tensors="pt",
-    )
-    input_ids = encoding["input_ids"].to(device)
-    attention_mask = encoding["attention_mask"].to(device)
-    with torch.no_grad():
-        logits_list = model(input_ids=input_ids, attention_mask=attention_mask)
-        # logits_list is a list of tensors [1, num_classes]
-        preds = [logits.argmax(dim=-1).item() for logits in logits_list]
-    results = {}
-    for i, aspect in enumerate(cfg["aspects"]):
-        results[aspect] = cfg["label_map"][preds[i]]
-    return results
-def _predict_batch(model, tokenizer, cfg, device, texts: list[str], batch_size: int = 32):
-    results = []
-    for i in range(0, len(texts), batch_size):
-        batch_texts = texts[i:i+batch_size]
-        encoding = tokenizer(
-            batch_texts,
-            add_special_tokens=True,
-            max_length=cfg["max_len"],
-            padding="max_length",
-            truncation=True,
-            return_attention_mask=True,
-            return_tensors="pt",
-        )
-        input_ids = encoding["input_ids"].to(device)
-        attention_mask = encoding["attention_mask"].to(device)
-        with torch.no_grad():
-            logits_list = model(input_ids=input_ids, attention_mask=attention_mask)
-            # logits_list is a list of num_labels tensors of shape [batch, num_classes]
-            # We want to stack them to [batch, num_labels]
-            preds = torch.stack([logits.argmax(dim=-1) for logits in logits_list], dim=1).cpu().numpy()
-        for b_idx in range(len(batch_texts)):
-            res = {}
-            for a_idx, aspect in enumerate(cfg["aspects"]):
-                res[aspect] = cfg["label_map"][preds[b_idx, a_idx]]
-            results.append(res)
-    return results

+import sys
+from pathlib import Path
+import torch
+from transformers import AutoTokenizer
+sys.path.append(str(Path(__file__).parent / "model_code"))
+from architecture import PhoBERTMultiHeadGRU
+ASPECTS = [
+    "vệ sinh",
+    "đồ ăn thức uống",
+    "khách sạn",
+    "vị trí",
+    "phòng ốc",
+    "dịch vụ",
+]
+LABEL_MAP = {
+    0: "None",
+    1: "Positive",
+    2: "Negative"
+}
+def _load_model(checkpoint_path: Path):
+    device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
+    # Init tokenizer and model with 'vinai/phobert-base'
+    phobert_name = "vinai/phobert-base"
+    tokenizer = AutoTokenizer.from_pretrained(phobert_name)
+    model = PhoBERTMultiHeadGRU(
+        phobert_path=phobert_name,
+        gru_hidden_dim=256,
+        num_labels=len(ASPECTS),
+        num_classes=3
+    )
+    # Load state dict
+    checkpoint = torch.load(checkpoint_path, map_location=device)
+    if isinstance(checkpoint, dict) and "model_state_dict" in checkpoint:
+        model.load_state_dict(checkpoint["model_state_dict"])
+    else:
+        model.load_state_dict(checkpoint)
+    model.to(device)
+    model.eval()
+    cfg = {
+        "max_len": 128,
+        "aspects": ASPECTS,
+        "label_map": LABEL_MAP
+    }
+    return model, tokenizer, cfg, device
+def _predict_single(model, tokenizer, cfg, device, text: str):
+    encoding = tokenizer(
+        text,
+        add_special_tokens=True,
+        max_length=cfg["max_len"],
+        padding="max_length",
+        truncation=True,
+        return_attention_mask=True,
+        return_tensors="pt",
+    )
+    input_ids = encoding["input_ids"].to(device)
+    attention_mask = encoding["attention_mask"].to(device)
+    with torch.no_grad():
+        logits_list = model(input_ids=input_ids, attention_mask=attention_mask)
+        # logits_list is a list of tensors [1, num_classes]
+        preds = [logits.argmax(dim=-1).item() for logits in logits_list]
+    results = {}
+    for i, aspect in enumerate(cfg["aspects"]):
+        results[aspect] = cfg["label_map"][preds[i]]
+    return results
+def _predict_batch(model, tokenizer, cfg, device, texts: list[str], batch_size: int = 32):
+    results = []
+    for i in range(0, len(texts), batch_size):
+        batch_texts = texts[i:i+batch_size]
+        encoding = tokenizer(
+            batch_texts,
+            add_special_tokens=True,
+            max_length=cfg["max_len"],
+            padding="max_length",
+            truncation=True,
+            return_attention_mask=True,
+            return_tensors="pt",
+        )
+        input_ids = encoding["input_ids"].to(device)
+        attention_mask = encoding["attention_mask"].to(device)
+        with torch.no_grad():
+            logits_list = model(input_ids=input_ids, attention_mask=attention_mask)
+            # logits_list is a list of num_labels tensors of shape [batch, num_classes]
+            # We want to stack them to [batch, num_labels]
+            preds = torch.stack([logits.argmax(dim=-1) for logits in logits_list], dim=1).cpu().numpy()
+        for b_idx in range(len(batch_texts)):
+            res = {}
+            for a_idx, aspect in enumerate(cfg["aspects"]):
+                res[aspect] = cfg["label_map"][preds[b_idx, a_idx]]
+            results.append(res)
+    return results