Spaces:
Runtime error
Runtime error
"pull and shock"
Browse files- README.md +10 -0
- __pycache__/seafoam.cpython-39.pyc +0 -0
- cache/intent/embeddings.pt +0 -3
- cache/slot/embeddings.pt +0 -3
- cache/slot/tag2idx.json +0 -11
- cache/slot/vocab.pkl +0 -3
- ckpt/intent/model_checkpoint.pth +2 -2
- data/intent/eval.json +0 -0
- data/intent/test.json +0 -0
- data/intent/train.json +0 -0
- data/slot/eval.json +0 -0
- data/slot/test.json +0 -0
- data/slot/train.json +0 -0
- dataset.py +0 -74
- model.py +1 -8
- requirements.in → requirements.txt +2 -1
- seafoam.py +58 -0
README.md
CHANGED
|
@@ -11,3 +11,13 @@ license: apache-2.0
|
|
| 11 |
---
|
| 12 |
|
| 13 |
Check out the configuration reference at https://huggingface.co/docs/hub/spaces-config-reference
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 11 |
---
|
| 12 |
|
| 13 |
Check out the configuration reference at https://huggingface.co/docs/hub/spaces-config-reference
|
| 14 |
+
|
| 15 |
+
## Environment
|
| 16 |
+
```shell
|
| 17 |
+
# If you have conda, we recommend you to build a conda environment called "adl-hw1"
|
| 18 |
+
make
|
| 19 |
+
conda activate adl-hw1
|
| 20 |
+
pip install -r requirements.txt
|
| 21 |
+
# Otherwise
|
| 22 |
+
pip install -r requirements.in
|
| 23 |
+
```
|
__pycache__/seafoam.cpython-39.pyc
ADDED
|
Binary file (2.07 kB). View file
|
|
|
cache/intent/embeddings.pt
DELETED
|
@@ -1,3 +0,0 @@
|
|
| 1 |
-
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:f48c2a4bb711ddd28a95f849b676ab6c76a4aeff3ba01976ccea97a4808ce790
|
| 3 |
-
size 7789931
|
|
|
|
|
|
|
|
|
|
|
|
cache/slot/embeddings.pt
DELETED
|
@@ -1,3 +0,0 @@
|
|
| 1 |
-
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:faba49b73dfdd2a98dbbfe7b53eed50b8edd9df716169e8f837558c5e24c42bf
|
| 3 |
-
size 4941099
|
|
|
|
|
|
|
|
|
|
|
|
cache/slot/tag2idx.json
DELETED
|
@@ -1,11 +0,0 @@
|
|
| 1 |
-
{
|
| 2 |
-
"O": 0,
|
| 3 |
-
"B-date": 1,
|
| 4 |
-
"I-time": 2,
|
| 5 |
-
"B-time": 3,
|
| 6 |
-
"B-last_name": 4,
|
| 7 |
-
"I-people": 5,
|
| 8 |
-
"B-people": 6,
|
| 9 |
-
"I-date": 7,
|
| 10 |
-
"B-first_name": 8
|
| 11 |
-
}
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
cache/slot/vocab.pkl
DELETED
|
@@ -1,3 +0,0 @@
|
|
| 1 |
-
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:c711af8ba9cba928df00a20913b2bcdd0738ab3b9210b4b9f10d0ff9dcf27f16
|
| 3 |
-
size 49861
|
|
|
|
|
|
|
|
|
|
|
|
ckpt/intent/model_checkpoint.pth
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
-
size
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:2c69530b46831942e17a75192a402d6a699d7de48340fbf336f964277742af95
|
| 3 |
+
size 74048714
|
data/intent/eval.json
DELETED
|
The diff for this file is too large to render.
See raw diff
|
|
|
data/intent/test.json
DELETED
|
The diff for this file is too large to render.
See raw diff
|
|
|
data/intent/train.json
DELETED
|
The diff for this file is too large to render.
See raw diff
|
|
|
data/slot/eval.json
DELETED
|
The diff for this file is too large to render.
See raw diff
|
|
|
data/slot/test.json
DELETED
|
The diff for this file is too large to render.
See raw diff
|
|
|
data/slot/train.json
DELETED
|
The diff for this file is too large to render.
See raw diff
|
|
|
dataset.py
DELETED
|
@@ -1,74 +0,0 @@
|
|
| 1 |
-
from typing import List, Dict
|
| 2 |
-
|
| 3 |
-
import torch
|
| 4 |
-
|
| 5 |
-
from torch.utils.data import Dataset
|
| 6 |
-
|
| 7 |
-
from utils import Vocab
|
| 8 |
-
|
| 9 |
-
|
| 10 |
-
class SeqClsDataset(Dataset):
|
| 11 |
-
def __init__(
|
| 12 |
-
self,
|
| 13 |
-
data: List[Dict],
|
| 14 |
-
vocab: Vocab,
|
| 15 |
-
label_mapping: Dict[str, int],
|
| 16 |
-
max_len: int,
|
| 17 |
-
):
|
| 18 |
-
self.data = data
|
| 19 |
-
self.vocab = vocab
|
| 20 |
-
self.label_mapping = label_mapping
|
| 21 |
-
self._idx2label = {idx: intent for intent, idx in self.label_mapping.items()}
|
| 22 |
-
self.max_len = max_len
|
| 23 |
-
|
| 24 |
-
def __len__(self) -> int:
|
| 25 |
-
return len(self.data)
|
| 26 |
-
|
| 27 |
-
def __getitem__(self, index) -> Dict:
|
| 28 |
-
instance = self.data[index]
|
| 29 |
-
return instance
|
| 30 |
-
|
| 31 |
-
@property
|
| 32 |
-
def num_classes(self) -> int:
|
| 33 |
-
return len(self.label_mapping)
|
| 34 |
-
|
| 35 |
-
def collate_fn(self, samples: List[Dict]) -> Dict:
|
| 36 |
-
# sample就是batch data
|
| 37 |
-
# collate_fn幫你把batch data編碼成詞彙的索引
|
| 38 |
-
# batch[0] = {'text': '~', 'intent': '~', 'id': 'train-0'}
|
| 39 |
-
|
| 40 |
-
# 提取所有樣本的文本數據和標籤數據
|
| 41 |
-
texts = samples["text"]
|
| 42 |
-
labels = samples["intent"]
|
| 43 |
-
|
| 44 |
-
# 使用 vocab 將文本數據轉換為整數索引序列,並指定最大長度
|
| 45 |
-
encoded_texts = self.vocab.encode_batch([text.split() for text in texts], to_len=self.max_len)
|
| 46 |
-
|
| 47 |
-
# 將標籤數據轉換為整數索引序列
|
| 48 |
-
encoded_labels = [self.label_mapping[label] for label in labels]
|
| 49 |
-
|
| 50 |
-
# 將整數索引序列轉換為 PyTorch 張量
|
| 51 |
-
encoded_text = torch.tensor(encoded_texts)
|
| 52 |
-
encoded_label = torch.tensor(encoded_labels)
|
| 53 |
-
|
| 54 |
-
# 創建批次數據字典
|
| 55 |
-
batch_data = {
|
| 56 |
-
"encoded_text": encoded_text,
|
| 57 |
-
"encoded_label": encoded_label
|
| 58 |
-
}
|
| 59 |
-
|
| 60 |
-
return batch_data
|
| 61 |
-
|
| 62 |
-
def label2idx(self, label: str):
|
| 63 |
-
return self.label_mapping[label]
|
| 64 |
-
|
| 65 |
-
def idx2label(self, idx: int):
|
| 66 |
-
return self._idx2label[idx]
|
| 67 |
-
|
| 68 |
-
|
| 69 |
-
class SeqTaggingClsDataset(SeqClsDataset):
|
| 70 |
-
ignore_idx = -100
|
| 71 |
-
|
| 72 |
-
def collate_fn(self, samples):
|
| 73 |
-
# TODO: implement collate_fn
|
| 74 |
-
raise NotImplementedError
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
model.py
CHANGED
|
@@ -3,8 +3,7 @@ from typing import Dict
|
|
| 3 |
import torch
|
| 4 |
import torch.nn as nn
|
| 5 |
|
| 6 |
-
|
| 7 |
-
device = "cuda" if torch.cuda.is_available() else "cpu"
|
| 8 |
|
| 9 |
class SeqClassifier(nn.Module):
|
| 10 |
def __init__(
|
|
@@ -68,9 +67,3 @@ class SeqClassifier(nn.Module):
|
|
| 68 |
# 通過全連接層
|
| 69 |
logits = self.fc(combined_hidden_state)
|
| 70 |
return logits # 返回預測結果
|
| 71 |
-
|
| 72 |
-
|
| 73 |
-
class SeqTagger(SeqClassifier):
|
| 74 |
-
def forward(self, batch) -> Dict[str, torch.Tensor]:
|
| 75 |
-
# TODO: implement model forward
|
| 76 |
-
raise NotImplementedError
|
|
|
|
| 3 |
import torch
|
| 4 |
import torch.nn as nn
|
| 5 |
|
| 6 |
+
device = "cpu"
|
|
|
|
| 7 |
|
| 8 |
class SeqClassifier(nn.Module):
|
| 9 |
def __init__(
|
|
|
|
| 67 |
# 通過全連接層
|
| 68 |
logits = self.fc(combined_hidden_state)
|
| 69 |
return logits # 返回預測結果
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
requirements.in → requirements.txt
RENAMED
|
@@ -7,4 +7,5 @@ numpy
|
|
| 7 |
pandas
|
| 8 |
scikit-learn==1.1.2
|
| 9 |
transformers[torch]
|
| 10 |
-
datasets
|
|
|
|
|
|
| 7 |
pandas
|
| 8 |
scikit-learn==1.1.2
|
| 9 |
transformers[torch]
|
| 10 |
+
datasets
|
| 11 |
+
huggingface_hub
|
seafoam.py
ADDED
|
@@ -0,0 +1,58 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
from __future__ import annotations
|
| 2 |
+
from typing import Iterable
|
| 3 |
+
import gradio as gr
|
| 4 |
+
from gradio.themes.base import Base
|
| 5 |
+
from gradio.themes.utils import colors, fonts, sizes
|
| 6 |
+
import time
|
| 7 |
+
|
| 8 |
+
class Seafoam(Base):
|
| 9 |
+
def __init__(
|
| 10 |
+
self,
|
| 11 |
+
*,
|
| 12 |
+
primary_hue: colors.Color | str = colors.emerald,
|
| 13 |
+
secondary_hue: colors.Color | str = colors.blue,
|
| 14 |
+
neutral_hue: colors.Color | str = colors.blue,
|
| 15 |
+
spacing_size: sizes.Size | str = sizes.spacing_md,
|
| 16 |
+
radius_size: sizes.Size | str = sizes.radius_md,
|
| 17 |
+
text_size: sizes.Size | str = sizes.text_lg,
|
| 18 |
+
font: fonts.Font
|
| 19 |
+
| str
|
| 20 |
+
| Iterable[fonts.Font | str] = (
|
| 21 |
+
fonts.GoogleFont("Quicksand"),
|
| 22 |
+
"ui-sans-serif",
|
| 23 |
+
"sans-serif",
|
| 24 |
+
),
|
| 25 |
+
font_mono: fonts.Font
|
| 26 |
+
| str
|
| 27 |
+
| Iterable[fonts.Font | str] = (
|
| 28 |
+
fonts.GoogleFont("IBM Plex Mono"),
|
| 29 |
+
"ui-monospace",
|
| 30 |
+
"monospace",
|
| 31 |
+
),
|
| 32 |
+
):
|
| 33 |
+
super().__init__(
|
| 34 |
+
primary_hue=primary_hue,
|
| 35 |
+
secondary_hue=secondary_hue,
|
| 36 |
+
neutral_hue=neutral_hue,
|
| 37 |
+
spacing_size=spacing_size,
|
| 38 |
+
radius_size=radius_size,
|
| 39 |
+
text_size=text_size,
|
| 40 |
+
font=font,
|
| 41 |
+
font_mono=font_mono,
|
| 42 |
+
)
|
| 43 |
+
super().set(
|
| 44 |
+
body_background_fill="repeating-linear-gradient(45deg, *primary_200, *primary_200 10px, *primary_50 10px, *primary_50 20px)",
|
| 45 |
+
body_background_fill_dark="repeating-linear-gradient(45deg, *primary_800, *primary_800 10px, *primary_900 10px, *primary_900 20px)",
|
| 46 |
+
button_primary_background_fill="linear-gradient(90deg, *primary_300, *secondary_400)",
|
| 47 |
+
button_primary_background_fill_hover="linear-gradient(90deg, *primary_200, *secondary_300)",
|
| 48 |
+
button_primary_text_color="white",
|
| 49 |
+
button_primary_background_fill_dark="linear-gradient(90deg, *primary_600, *secondary_800)",
|
| 50 |
+
slider_color="*secondary_300",
|
| 51 |
+
slider_color_dark="*secondary_600",
|
| 52 |
+
block_title_text_weight="600",
|
| 53 |
+
block_border_width="3px",
|
| 54 |
+
block_shadow="*shadow_drop_lg",
|
| 55 |
+
button_shadow="*shadow_drop_lg",
|
| 56 |
+
button_large_padding="32px",
|
| 57 |
+
)
|
| 58 |
+
|