| | """ |
| | AIBA BERT-BiLSTM Multi-Task Model Architecture |
| | |
| | This file contains the model architecture for the AIBA multi-task model. |
| | """ |
| |
|
| | import torch |
| | import torch.nn as nn |
| | from transformers import AutoModel |
| |
|
| |
|
| | class EnhancedMultiTaskModel(nn.Module): |
| | """ |
| | Multi-task model for: |
| | - Named Entity Recognition (NER) |
| | - Intent Classification |
| | - Language Detection |
| | |
| | Architecture: BERT + BiLSTM + Multi-head classification |
| | """ |
| | |
| | def __init__(self, config): |
| | super().__init__() |
| | |
| | |
| | self.bert = AutoModel.from_pretrained(config.get("base_model", "google-bert/bert-base-multilingual-uncased")) |
| | hidden_size = self.bert.config.hidden_size |
| | |
| | |
| | self.layer_norm = nn.LayerNorm(hidden_size) |
| | |
| | |
| | lstm_hidden = config.get("lstm_hidden", 256) |
| | self.ner_lstm = nn.LSTM( |
| | hidden_size, |
| | lstm_hidden // 2, |
| | num_layers=1, |
| | bidirectional=True, |
| | batch_first=True, |
| | dropout=0 |
| | ) |
| | dropout = config.get("dropout", 0.15) |
| | self.ner_dropout = nn.Dropout(dropout) |
| | self.ner_classifier = nn.Linear(lstm_hidden, config["num_ner_labels"]) |
| | |
| | |
| | self.attention_pool = nn.Sequential( |
| | nn.Linear(hidden_size, hidden_size // 4), |
| | nn.Tanh(), |
| | nn.Linear(hidden_size // 4, 1), |
| | nn.Softmax(dim=1) |
| | ) |
| | |
| | |
| | self.shared_dense = nn.Sequential( |
| | nn.Linear(hidden_size, hidden_size // 2), |
| | nn.GELU(), |
| | nn.LayerNorm(hidden_size // 2), |
| | nn.Dropout(dropout) |
| | ) |
| | |
| | |
| | self.intent_classifier = nn.Sequential( |
| | nn.Linear(hidden_size // 2, hidden_size // 4), |
| | nn.GELU(), |
| | nn.Dropout(dropout), |
| | nn.Linear(hidden_size // 4, hidden_size // 8), |
| | nn.GELU(), |
| | nn.Dropout(dropout), |
| | nn.Linear(hidden_size // 8, config["num_intent_labels"]) |
| | ) |
| | |
| | |
| | self.lang_classifier = nn.Sequential( |
| | nn.Linear(hidden_size // 2, hidden_size // 4), |
| | nn.GELU(), |
| | nn.Dropout(dropout), |
| | nn.Linear(hidden_size // 4, config["num_lang_labels"]) |
| | ) |
| | |
| | |
| | self.num_ner_labels = config["num_ner_labels"] |
| | self.num_intent_labels = config["num_intent_labels"] |
| | self.num_lang_labels = config["num_lang_labels"] |
| | |
| | def forward(self, input_ids, attention_mask=None, token_type_ids=None, |
| | labels_ner=None, labels_intent=None, labels_lang=None): |
| | """ |
| | Forward pass |
| | |
| | Args: |
| | input_ids: Input token IDs |
| | attention_mask: Attention mask |
| | token_type_ids: Token type IDs (optional) |
| | labels_ner: NER labels (optional, for training) |
| | labels_intent: Intent labels (optional, for training) |
| | labels_lang: Language labels (optional, for training) |
| | |
| | Returns: |
| | dict with keys: 'loss', 'ner_logits', 'intent_logits', 'lang_logits' |
| | """ |
| | |
| | outputs = self.bert( |
| | input_ids=input_ids, |
| | attention_mask=attention_mask, |
| | token_type_ids=token_type_ids |
| | ) |
| | |
| | sequence_output = outputs.last_hidden_state |
| | sequence_output = self.layer_norm(sequence_output) |
| | |
| | |
| | ner_lstm_out, _ = self.ner_lstm(sequence_output) |
| | ner_output = self.ner_dropout(ner_lstm_out) |
| | ner_logits = self.ner_classifier(ner_output) |
| | |
| | |
| | attention_weights = self.attention_pool(sequence_output) |
| | pooled_output = torch.sum(sequence_output * attention_weights, dim=1) |
| | |
| | |
| | shared_repr = self.shared_dense(pooled_output) |
| | |
| | |
| | intent_logits = self.intent_classifier(shared_repr) |
| | lang_logits = self.lang_classifier(shared_repr) |
| | |
| | return { |
| | 'ner_logits': ner_logits, |
| | 'intent_logits': intent_logits, |
| | 'lang_logits': lang_logits, |
| | } |
| | |
| | @classmethod |
| | def from_pretrained(cls, model_path): |
| | """ |
| | Load pretrained model from Hugging Face Hub |
| | |
| | Args: |
| | model_path: Path or repo ID (e.g., 'username/aiba-bert-bilstm') |
| | |
| | Returns: |
| | Loaded model |
| | """ |
| | import json |
| | from pathlib import Path |
| | from huggingface_hub import hf_hub_download |
| | |
| | |
| | config_path = hf_hub_download(repo_id=model_path, filename="config.json") |
| | with open(config_path, 'r') as f: |
| | config = json.load(f) |
| | |
| | |
| | model = cls(config) |
| | |
| | |
| | try: |
| | |
| | weights_path = hf_hub_download(repo_id=model_path, filename="model.safetensors") |
| | from safetensors.torch import load_file |
| | state_dict = load_file(weights_path) |
| | except: |
| | |
| | weights_path = hf_hub_download(repo_id=model_path, filename="pytorch_model.bin") |
| | state_dict = torch.load(weights_path, map_location='cpu') |
| | |
| | model.load_state_dict(state_dict) |
| | return model |
| |
|
| |
|
| | def load_model_and_tokenizer(model_path): |
| | """ |
| | Convenience function to load model, tokenizer, and label mappings |
| | |
| | Args: |
| | model_path: Path or repo ID (e.g., 'username/aiba-bert-bilstm') |
| | |
| | Returns: |
| | tuple: (model, tokenizer, config_dict) |
| | """ |
| | import json |
| | from transformers import AutoTokenizer |
| | from huggingface_hub import hf_hub_download |
| | |
| | |
| | config_path = hf_hub_download(repo_id=model_path, filename="config.json") |
| | with open(config_path, 'r') as f: |
| | config = json.load(f) |
| | |
| | |
| | tokenizer = AutoTokenizer.from_pretrained(model_path) |
| | |
| | |
| | model = EnhancedMultiTaskModel.from_pretrained(model_path) |
| | model.eval() |
| | |
| | return model, tokenizer, config |
| |
|