# Multi-Task NER + Intent + Language Model This model performs three tasks simultaneously: 1. **Named Entity Recognition (NER)**: Extracts entities from B2B transaction descriptions 2. **Intent Classification**: Classifies transaction intent/purpose 3. **Language Detection**: Detects language (English, Russian, Uzbek Latin/Cyrillic, Mixed) ## Model Details - Base model: `google-bert/bert-base-multilingual-uncased` - Architecture: Enhanced multi-task model with BiLSTM for NER, attention pooling for classification - Training: Optimized for realistic B2B transaction descriptions ## Supported Languages - English (en) - Russian (ru) - Uzbek Latin (uz_latn) - Uzbek Cyrillic (uz_cyrl) - Mixed language text ## Usage ```python import torch import torch.nn as nn import numpy as np import json from transformers import AutoTokenizer, AutoModel from huggingface_hub import hf_hub_download # Download model files model_id = "primel/aibanov" tokenizer = AutoTokenizer.from_pretrained(model_id) # Download label mappings mappings_file = hf_hub_download(repo_id=model_id, filename="label_mappings.json") with open(mappings_file, "r") as f: label_mappings = json.load(f) id2tag = {int(k): v for k, v in label_mappings["id2tag"].items()} id2intent = {int(k): v for k, v in label_mappings["id2intent"].items()} id2lang = {int(k): v for k, v in label_mappings["id2lang"].items()} # Define model architecture (same as training) class EnhancedMultiTaskModel(nn.Module): # ... (copy the model class from training script) pass # Load model base_bert = "google-bert/bert-base-multilingual-uncased" model = EnhancedMultiTaskModel( model_name=base_bert, num_ner_labels=len(label_mappings["tag2id"]), num_intent_labels=len(label_mappings["intent2id"]), num_lang_labels=len(label_mappings["lang2id"]), dropout=0.15 ) # Load trained weights weights_file = hf_hub_download(repo_id=model_id, filename="pytorch_model.bin") state_dict = torch.load(weights_file, map_location='cpu') model.load_state_dict(state_dict) model.eval() # Inference text = "Оплата 100% за товары согласно договору №123 от 15.01.2025г ИНН 987654321" inputs = tokenizer(text, return_tensors="pt", truncation=True, max_length=192) with torch.no_grad(): outputs = model(**inputs) # Process outputs ner_logits = outputs['ner_logits'][0].numpy() intent_logits = outputs['intent_logits'][0].numpy() lang_logits = outputs['lang_logits'][0].numpy() # Get predictions intent_id = np.argmax(intent_logits) intent = id2intent[intent_id] print(f"Intent: {intent}") lang_id = np.argmax(lang_logits) language = id2lang[lang_id] print(f"Language: {language}") ``` ## License Apache 2.0 ## Citation ```bibtex @misc{aibanov2025, author = {primel}, title = {Multi-Task NER Intent Language Model}, year = {2025}, url = {https://huggingface.co/primel/aibanov} } ```