| | """ |
| | Example inference script for AIBA BERT-BiLSTM model |
| | |
| | This script demonstrates how to use the model for prediction. |
| | """ |
| |
|
| | import torch |
| | from transformers import AutoTokenizer |
| | from nn_model import load_model_and_tokenizer |
| |
|
| |
|
| | def predict(text, model_path="YOUR_USERNAME/aiba-bert-bilstm"): |
| | """ |
| | Make predictions on input text |
| | |
| | Args: |
| | text: Input text to analyze |
| | model_path: Hugging Face model repo ID |
| | |
| | Returns: |
| | dict with extracted entities, intent, and language |
| | """ |
| | |
| | model, tokenizer, config = load_model_and_tokenizer(model_path) |
| | |
| | |
| | id2tag = {int(k): v for k, v in config['id2tag'].items()} |
| | id2intent = {int(k): v for k, v in config['id2intent'].items()} |
| | id2lang = {int(k): v for k, v in config['id2lang'].items()} |
| | |
| | |
| | inputs = tokenizer( |
| | text, |
| | return_tensors="pt", |
| | truncation=True, |
| | max_length=config['max_length'], |
| | return_offsets_mapping=True |
| | ) |
| | |
| | offset_mapping = inputs.pop('offset_mapping')[0] |
| | |
| | |
| | with torch.no_grad(): |
| | outputs = model(**inputs) |
| | |
| | |
| | ner_predictions = torch.argmax(outputs['ner_logits'], dim=2)[0] |
| | tokens = tokenizer.convert_ids_to_tokens(inputs['input_ids'][0]) |
| | |
| | |
| | entities = [] |
| | current_entity = None |
| | current_tokens = [] |
| | |
| | for token, pred_id, offset in zip(tokens, ner_predictions, offset_mapping): |
| | if token in ['[CLS]', '[SEP]', '[PAD]']: |
| | continue |
| | |
| | tag = id2tag[pred_id.item()] |
| | |
| | if tag.startswith('B-'): |
| | if current_entity: |
| | entity_text = text[current_tokens[0][0]:current_tokens[-1][1]] |
| | entities.append({'type': current_entity, 'value': entity_text}) |
| | current_entity = tag[2:] |
| | current_tokens = [offset.tolist()] |
| | elif tag.startswith('I-') and current_entity: |
| | current_tokens.append(offset.tolist()) |
| | else: |
| | if current_entity: |
| | entity_text = text[current_tokens[0][0]:current_tokens[-1][1]] |
| | entities.append({'type': current_entity, 'value': entity_text}) |
| | current_entity = None |
| | current_tokens = [] |
| | |
| | if current_entity: |
| | entity_text = text[current_tokens[0][0]:current_tokens[-1][1]] |
| | entities.append({'type': current_entity, 'value': entity_text}) |
| | |
| | |
| | intent_logits = outputs['intent_logits'][0] |
| | intent_probs = torch.nn.functional.softmax(intent_logits, dim=0) |
| | intent_id = torch.argmax(intent_probs).item() |
| | intent = id2intent[intent_id] |
| | intent_confidence = intent_probs[intent_id].item() |
| | |
| | |
| | lang_logits = outputs['lang_logits'][0] |
| | lang_probs = torch.nn.functional.softmax(lang_logits, dim=0) |
| | lang_id = torch.argmax(lang_probs).item() |
| | language = id2lang[lang_id] |
| | lang_confidence = lang_probs[lang_id].item() |
| | |
| | return { |
| | 'text': text, |
| | 'entities': entities, |
| | 'intent': intent, |
| | 'intent_confidence': intent_confidence, |
| | 'language': language, |
| | 'language_confidence': lang_confidence |
| | } |
| |
|
| |
|
| | |
| | if __name__ == "__main__": |
| | |
| | examples = [ |
| | "Qabul qiluvchi Omad Biznes MCHJ STIR 123456789 summa 500000 UZS", |
| | "Получатель ООО Прогресс ИНН 987654321 сумма 1000000 руб", |
| | "Transfer 5000 USD to Starlight Ventures LLC TIN 555666777" |
| | ] |
| | |
| | for text in examples: |
| | print(f"\nInput: {text}") |
| | result = predict(text) |
| | print(f"Intent: {result['intent']} ({result['intent_confidence']:.2%})") |
| | print(f"Language: {result['language']} ({result['language_confidence']:.2%})") |
| | print("Entities:") |
| | for entity in result['entities']: |
| | print(f" - {entity['type']}: {entity['value']}") |
| |
|