|
|
--- |
|
|
library_name: transformers |
|
|
base_model: |
|
|
- Qwen/Qwen3-0.6B |
|
|
--- |
|
|
|
|
|
# Model Overview |
|
|
This model is a multilingual Named Entity Recognition (NER) transformer designed for name |
|
|
and address entity extraction with Malaysian context. |
|
|
|
|
|
It supports the following languages: |
|
|
- English |
|
|
- Malay |
|
|
- Chinese |
|
|
- Tamil |
|
|
|
|
|
The model is built on top of Qwen3(Qwen3-0.6B) and uses a custom non-causal attention |
|
|
mechanism. |
|
|
|
|
|
## Predicted Classes |
|
|
- 0 : Non-entity token |
|
|
- 1 : Name entity |
|
|
- 2 : Address entity |
|
|
|
|
|
## Transformer Inference Example |
|
|
```python |
|
|
from transformers import AutoTokenizer, Qwen3ForTokenClassification, AttentionInterface |
|
|
from typing import Optional |
|
|
|
|
|
def register_fa_attention(): |
|
|
from flash_attn import flash_attn_func, flash_attn_varlen_func |
|
|
|
|
|
def custom_attention_forward( |
|
|
module: AttentionInterface, |
|
|
query: torch.Tensor, |
|
|
key: torch.Tensor, |
|
|
value: torch.Tensor, |
|
|
attention_mask: Optional[torch.Tensor] = None, |
|
|
**kwargs, |
|
|
): |
|
|
cu_seqlens_q = kwargs.get("cu_seqlens_q", None) |
|
|
cu_seqlens_k = kwargs.get("cu_seqlens_k", None) |
|
|
max_seqlen_q = kwargs.get("max_seqlen_q", None) |
|
|
max_seqlen_k = kwargs.get("max_seqlen_k", None) |
|
|
# permute query, key, value to (batch, seq_len, n_heads, head_dim) |
|
|
query_permute = query.permute(0, 2, 1, 3) |
|
|
key_permute = key.permute(0, 2, 1, 3) |
|
|
value_permute = value.permute(0, 2, 1, 3) |
|
|
|
|
|
if cu_seqlens_q is not None and cu_seqlens_k is not None: |
|
|
attn_output = flash_attn_varlen_func( |
|
|
q=query_permute.squeeze(0), |
|
|
k=key_permute.squeeze(0), |
|
|
v=value_permute.squeeze(0), |
|
|
cu_seqlens_q=cu_seqlens_q, |
|
|
cu_seqlens_k=cu_seqlens_k, |
|
|
max_seqlen_q=max_seqlen_q, |
|
|
max_seqlen_k=max_seqlen_k, |
|
|
causal=False, |
|
|
) |
|
|
else: |
|
|
attn_output = flash_attn_func( |
|
|
query_permute, key_permute, value_permute, |
|
|
causal=False, |
|
|
) |
|
|
return attn_output , None |
|
|
|
|
|
AttentionInterface.register("fa_noncausal", custom_attention_forward) |
|
|
|
|
|
# Register custom non-causal FA (Feel free to use FA2/FA3), required GPU |
|
|
register_fa_attention() |
|
|
|
|
|
def tokenize_sentence_to_word(sentence:str ): |
|
|
tokens = [] |
|
|
chinese_char_pattern = re.compile(r'[\u4e00-\u9fff]') |
|
|
# Split text by spaces first |
|
|
parts = sentence.split() |
|
|
for part in parts: |
|
|
if chinese_char_pattern.search(part): |
|
|
# Character-level tokenization for Chinese |
|
|
tokens.extend(list(part)) |
|
|
else: |
|
|
# Word-level tokenization for other languages |
|
|
tokens.append(part) |
|
|
return tokens |
|
|
|
|
|
tokenizer = AutoTokenizer.from_pretrained("Scicom-intl/multilingual-dynamic-entity-decoder") |
|
|
model = Qwen3ForTokenClassification.from_pretrained( |
|
|
"Scicom-intl/multilingual-dynamic-entity-decoder", |
|
|
attn_implementation="fa_noncausal", |
|
|
dtype=torch.bfloat16, |
|
|
device_map={"":"cuda:0"} |
|
|
) |
|
|
|
|
|
word_token = tokenize_sentence_to_word("Hi, my name is Alex and I'm from Perlis") |
|
|
token = tokenizer( |
|
|
word_token, |
|
|
is_split_into_words=True, |
|
|
return_tensors="pt" |
|
|
).to(model.device) |
|
|
|
|
|
with toch.no_grad(): |
|
|
output = model(**inputs) |
|
|
prediction = output.logits.argmax(dim=-1) |
|
|
print(prediction) |
|
|
``` |
|
|
|
|
|
## Evaluation Result |
|
|
- F1 macro: 0.81 |