""" AIBA BERT-BiLSTM Multi-Task Model Architecture This file contains the model architecture for the AIBA multi-task model. """ import torch import torch.nn as nn from transformers import AutoModel class EnhancedMultiTaskModel(nn.Module): """ Multi-task model for: - Named Entity Recognition (NER) - Intent Classification - Language Detection Architecture: BERT + BiLSTM + Multi-head classification """ def __init__(self, config): super().__init__() # Load BERT base model self.bert = AutoModel.from_pretrained(config.get("base_model", "google-bert/bert-base-multilingual-uncased")) hidden_size = self.bert.config.hidden_size # Layer normalization self.layer_norm = nn.LayerNorm(hidden_size) # NER head with BiLSTM lstm_hidden = config.get("lstm_hidden", 256) self.ner_lstm = nn.LSTM( hidden_size, lstm_hidden // 2, num_layers=1, bidirectional=True, batch_first=True, dropout=0 ) dropout = config.get("dropout", 0.15) self.ner_dropout = nn.Dropout(dropout) self.ner_classifier = nn.Linear(lstm_hidden, config["num_ner_labels"]) # Attention pooling for sequence-level tasks self.attention_pool = nn.Sequential( nn.Linear(hidden_size, hidden_size // 4), nn.Tanh(), nn.Linear(hidden_size // 4, 1), nn.Softmax(dim=1) ) # Shared representation layer self.shared_dense = nn.Sequential( nn.Linear(hidden_size, hidden_size // 2), nn.GELU(), nn.LayerNorm(hidden_size // 2), nn.Dropout(dropout) ) # Intent classification head self.intent_classifier = nn.Sequential( nn.Linear(hidden_size // 2, hidden_size // 4), nn.GELU(), nn.Dropout(dropout), nn.Linear(hidden_size // 4, hidden_size // 8), nn.GELU(), nn.Dropout(dropout), nn.Linear(hidden_size // 8, config["num_intent_labels"]) ) # Language detection head self.lang_classifier = nn.Sequential( nn.Linear(hidden_size // 2, hidden_size // 4), nn.GELU(), nn.Dropout(dropout), nn.Linear(hidden_size // 4, config["num_lang_labels"]) ) # Store config self.num_ner_labels = config["num_ner_labels"] self.num_intent_labels = config["num_intent_labels"] self.num_lang_labels = config["num_lang_labels"] def forward(self, input_ids, attention_mask=None, token_type_ids=None, labels_ner=None, labels_intent=None, labels_lang=None): """ Forward pass Args: input_ids: Input token IDs attention_mask: Attention mask token_type_ids: Token type IDs (optional) labels_ner: NER labels (optional, for training) labels_intent: Intent labels (optional, for training) labels_lang: Language labels (optional, for training) Returns: dict with keys: 'loss', 'ner_logits', 'intent_logits', 'lang_logits' """ # BERT encoding outputs = self.bert( input_ids=input_ids, attention_mask=attention_mask, token_type_ids=token_type_ids ) sequence_output = outputs.last_hidden_state sequence_output = self.layer_norm(sequence_output) # NER predictions (token-level) ner_lstm_out, _ = self.ner_lstm(sequence_output) ner_output = self.ner_dropout(ner_lstm_out) ner_logits = self.ner_classifier(ner_output) # Attention pooling for sequence-level tasks attention_weights = self.attention_pool(sequence_output) pooled_output = torch.sum(sequence_output * attention_weights, dim=1) # Shared representation shared_repr = self.shared_dense(pooled_output) # Intent and language predictions intent_logits = self.intent_classifier(shared_repr) lang_logits = self.lang_classifier(shared_repr) return { 'ner_logits': ner_logits, 'intent_logits': intent_logits, 'lang_logits': lang_logits, } @classmethod def from_pretrained(cls, model_path): """ Load pretrained model from Hugging Face Hub Args: model_path: Path or repo ID (e.g., 'username/aiba-bert-bilstm') Returns: Loaded model """ import json from pathlib import Path from huggingface_hub import hf_hub_download # Download config config_path = hf_hub_download(repo_id=model_path, filename="config.json") with open(config_path, 'r') as f: config = json.load(f) # Initialize model model = cls(config) # Load weights try: # Try safetensors first weights_path = hf_hub_download(repo_id=model_path, filename="model.safetensors") from safetensors.torch import load_file state_dict = load_file(weights_path) except: # Fall back to pytorch_model.bin weights_path = hf_hub_download(repo_id=model_path, filename="pytorch_model.bin") state_dict = torch.load(weights_path, map_location='cpu') model.load_state_dict(state_dict) return model def load_model_and_tokenizer(model_path): """ Convenience function to load model, tokenizer, and label mappings Args: model_path: Path or repo ID (e.g., 'username/aiba-bert-bilstm') Returns: tuple: (model, tokenizer, config_dict) """ import json from transformers import AutoTokenizer from huggingface_hub import hf_hub_download # Load config config_path = hf_hub_download(repo_id=model_path, filename="config.json") with open(config_path, 'r') as f: config = json.load(f) # Load tokenizer tokenizer = AutoTokenizer.from_pretrained(model_path) # Load model model = EnhancedMultiTaskModel.from_pretrained(model_path) model.eval() return model, tokenizer, config