--- library_name: transformers base_model: - Qwen/Qwen3-0.6B --- # Model Overview This model is a multilingual Named Entity Recognition (NER) transformer designed for name and address entity extraction with Malaysian context. It supports the following languages: - English - Malay - Chinese - Tamil The model is built on top of Qwen3(Qwen3-0.6B) and uses a custom non-causal attention mechanism. ## Predicted Classes - 0 : Non-entity token - 1 : Name entity - 2 : Address entity ## Transformer Inference Example ```python from transformers import AutoTokenizer, Qwen3ForTokenClassification, AttentionInterface from typing import Optional def register_fa_attention(): from flash_attn import flash_attn_func, flash_attn_varlen_func def custom_attention_forward( module: AttentionInterface, query: torch.Tensor, key: torch.Tensor, value: torch.Tensor, attention_mask: Optional[torch.Tensor] = None, **kwargs, ): cu_seqlens_q = kwargs.get("cu_seqlens_q", None) cu_seqlens_k = kwargs.get("cu_seqlens_k", None) max_seqlen_q = kwargs.get("max_seqlen_q", None) max_seqlen_k = kwargs.get("max_seqlen_k", None) # permute query, key, value to (batch, seq_len, n_heads, head_dim) query_permute = query.permute(0, 2, 1, 3) key_permute = key.permute(0, 2, 1, 3) value_permute = value.permute(0, 2, 1, 3) if cu_seqlens_q is not None and cu_seqlens_k is not None: attn_output = flash_attn_varlen_func( q=query_permute.squeeze(0), k=key_permute.squeeze(0), v=value_permute.squeeze(0), cu_seqlens_q=cu_seqlens_q, cu_seqlens_k=cu_seqlens_k, max_seqlen_q=max_seqlen_q, max_seqlen_k=max_seqlen_k, causal=False, ) else: attn_output = flash_attn_func( query_permute, key_permute, value_permute, causal=False, ) return attn_output , None AttentionInterface.register("fa_noncausal", custom_attention_forward) # Register custom non-causal FA (Feel free to use FA2/FA3), required GPU register_fa_attention() def tokenize_sentence_to_word(sentence:str ): tokens = [] chinese_char_pattern = re.compile(r'[\u4e00-\u9fff]') # Split text by spaces first parts = sentence.split() for part in parts: if chinese_char_pattern.search(part): # Character-level tokenization for Chinese tokens.extend(list(part)) else: # Word-level tokenization for other languages tokens.append(part) return tokens tokenizer = AutoTokenizer.from_pretrained("Scicom-intl/multilingual-dynamic-entity-decoder") model = Qwen3ForTokenClassification.from_pretrained( "Scicom-intl/multilingual-dynamic-entity-decoder", attn_implementation="fa_noncausal", dtype=torch.bfloat16, device_map={"":"cuda:0"} ) word_token = tokenize_sentence_to_word("Hi, my name is Alex and I'm from Perlis") token = tokenizer( word_token, is_split_into_words=True, return_tensors="pt" ).to(model.device) with toch.no_grad(): output = model(**inputs) prediction = output.logits.argmax(dim=-1) print(prediction) ``` ## Evaluation Result - F1 macro: 0.81