Spaces:

xjlulu
/

intent_classifier

Runtime error

App Files Files Community

xjlulu commited on Oct 4, 2023

Commit

fdb5dd9

1 Parent(s): dfe9225

"mergege"

Browse files

Files changed (1) hide show

app.py +46 -77

app.py CHANGED Viewed

@@ -1,58 +1,42 @@
 import gradio as gr
 from typing import Dict, List
 import torch
-import torch.nn as nn
-import torch.optim as optim
-from torch.utils.data import DataLoader
 import json
 import pickle
 from pathlib import Path
-from dataset import SeqClsDataset
 from utils import Vocab
 from model import SeqClassifier
-import ipdb
 max_len = 128
 hidden_size = 256
 num_layers = 2
 dropout = 0.1
 bidirectional = True
-lr = 1e-3
-batch_size = 64
-num_epoch = 5
-TRAIN = "train"
-DEV = "eval"
-TEST = "test"
-SPLITS = [TRAIN, DEV, TEST]
 device = "cpu"
-data_dir = Path("./data/intent/")
 ckpt_dir = Path("./ckpt/intent/")
 cache_dir = Path("./cache/intent/")
-# Before executing, place intent2idx.json, embeddings.pt, vocab.pkl, and utils.py in /content
 with open(cache_dir / "vocab.pkl", "rb") as f:
     vocab: Vocab = pickle.load(f)
 intent_idx_path = cache_dir / "intent2idx.json"
 intent2idx: Dict[str, int] = json.loads(intent_idx_path.read_text())
-data_paths = {split: data_dir / f"{split}.json" for split in SPLITS}
-data = {split: json.loads(path.read_text()) for split, path in data_paths.items()}
-datasets: Dict[str, SeqClsDataset] = {
-    split: SeqClsDataset(split_data, vocab, intent2idx, max_len)
-    for split, split_data in data.items()
-}
-#ipdb.set_trace()
-test_loader = DataLoader(datasets['test'], batch_size=batch_size, shuffle=False)
-embeddings = torch.load(cache_dir / "embeddings.pt")
 embeddings.to(device)
-# Load the best model after training
-# Initialize a new model with the same architecture
 best_model = SeqClassifier(
     embeddings=embeddings,
     hidden_size=hidden_size,
@@ -65,63 +49,48 @@ best_model = SeqClassifier(
 # Define the path to the checkpoint file
 ckpt_path = ckpt_dir / "model_checkpoint.pth"
-# Load the model's state_dict and optimizer's state_dict from the checkpoint
-checkpoint = torch.load(ckpt_path, map_location=torch.device('cpu'))
 # Load the model's weights
-best_model.load_state_dict(checkpoint['model_state_dict']).to(device)
-# Reinitialize the optimizer with the model's parameters and load its state
-'''weight_decay = 1e-5
-optimizer = optim.Adam(best_model.parameters(), lr=lr, weight_decay=weight_decay)
-optimizer.load_state_dict(checkpoint['optimizer_state_dict'])'''
-# Retrieve the epoch number from the checkpoint
-epoch = checkpoint['epoch']
-# Set the best model to evaluation mode
 best_model.eval()
-dic_intent2idx: Dict[str, int] = json.loads(intent_idx_path.read_text())
-dic_idx2label = {idx: intent for intent, idx in dic_intent2idx.items()}
-def Tidx2label(idx: int):
-    return dic_idx2label[idx]
-with open(cache_dir / "vocab.pkl", "rb") as f:
-    vocab: Vocab = pickle.load(f)
-# 把句子做成embeddings的索引
 def collate_fn(texts: str) -> torch.tensor:
-    # 提取所有樣本的文本數據和標籤數據
     texts = texts.split()
-    # 使用 vocab 將文本數據轉換為整數索引序列，並指定最大長度
-    encoded_texts = vocab.encode_batch([[text for text in texts]], to_len=max_len)
-    # 將整數索引序列轉換為 PyTorch 張量
     encoded_text = torch.tensor(encoded_texts)
     return encoded_text
 def classify(text):
     encoded_text = collate_fn(text).to(device)
-    output = best_model(encoded_text[0])
     Predicted_class = torch.argmax(output).item()
-    prediction = Tidx2label(Predicted_class)
-    return prediction
-demo = gr.Interface(
-        fn=classify,
-        inputs=gr.Textbox(placeholder="請輸入一段文字..."),
-        outputs="label",
-        interpretation="default",
-        examples=[
-            ["Take me to church"],
-            ["tell me what to call you"],
-            ["could you be a person"]
-        ]
-    )
-demo.launch()

 import gradio as gr
 from typing import Dict, List
 import torch
+torch.backends.cudnn.enabled = False
 import json
 import pickle
 from pathlib import Path
 from utils import Vocab
 from model import SeqClassifier
+from seafoam import Seafoam
+# Set model parameters
 max_len = 128
 hidden_size = 256
 num_layers = 2
 dropout = 0.1
 bidirectional = True
 device = "cpu"
 ckpt_dir = Path("./ckpt/intent/")
 cache_dir = Path("./cache/intent/")
+# Load vocabulary and intent index mapping
 with open(cache_dir / "vocab.pkl", "rb") as f:
     vocab: Vocab = pickle.load(f)
 intent_idx_path = cache_dir / "intent2idx.json"
 intent2idx: Dict[str, int] = json.loads(intent_idx_path.read_text())
+__idx2label = {idx: intent for intent, idx in intent2idx.items()}
+def idx2label(idx: int):
+    return __idx2label[idx]
+# Set embedding layer size
+embeddings_size = (6491, 300)
+embeddings = torch.empty(embeddings_size)
 embeddings.to(device)
+# Load the best model
 best_model = SeqClassifier(
     embeddings=embeddings,
     hidden_size=hidden_size,
 # Define the path to the checkpoint file
 ckpt_path = ckpt_dir / "model_checkpoint.pth"
 # Load the model's weights
+checkpoint = torch.load(ckpt_path, map_location=torch.device('cpu'))
+best_model.load_state_dict(checkpoint['model_state_dict'])
+# Set the model to evaluation mode
 best_model.eval()
+# Processing function to convert text to embedding indices
 def collate_fn(texts: str) -> torch.tensor:
     texts = texts.split()
+    encoded_texts = vocab.encode_batch([[text for text in texts]], to_len=max_len)[0]
     encoded_text = torch.tensor(encoded_texts)
     return encoded_text
+# Classification function
 def classify(text):
     encoded_text = collate_fn(text).to(device)
+    output = best_model(encoded_text)
     Predicted_class = torch.argmax(output).item()
+    prediction = idx2label(Predicted_class)
+    return "Category：" + prediction
+# Use the Seafoam theme
+seafoam = Seafoam()
+# Create a Gradio interface
+demo = gr.Interface(
+    fn=classify,
+    inputs=gr.Textbox(placeholder="Please enter a text..."),
+    outputs="label",
+    interpretation="none",
+    live=False,
+    enable_queue=True,
+    examples=[
+        ["please set an alarm for mid day"],
+        ["tell lydia and laura where i am located"],
+        ["what's the deal with my health care"]
+    ],
+    title="Text Intent Classification Demo",
+    description="This demo uses a model to classify text into different intents or categories. Enter a text and see the classification result.",
+    theme=seafoam
+)
+# Launch the Gradio interface
+demo.launch(share=True)