add the inference file

Browse files

Files changed (4) hide show

inference.py +115 -0
model.py +1 -1
pytorch_model.bin +2 -2
requirements.txt +3 -1

inference.py ADDED Viewed

	@@ -0,0 +1,115 @@

+import os
+import json
+import torch
+import numpy as np
+from transformers import BertTokenizer
+from model import ImprovedBERTClass  # Ensure this import matches your model file name
+from sklearn.preprocessing import OneHotEncoder
+def handler(data, context):
+    """Handle incoming requests to the SageMaker endpoint."""
+    if context.request_content_type != 'application/json':
+        raise ValueError("This model only supports application/json input")
+    # Set up device
+    device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
+    # Load model and tokenizer (consider caching these for better performance)
+    model, tokenizer = load_model_and_tokenizer(context)
+    # Process the input data
+    input_data = json.loads(data.read().decode('utf-8'))
+    query = input_data.get('text', '')
+    k = input_data.get('k', 3)  # Default to top 3 if not specified
+    # Tokenize and prepare the input
+    inputs = tokenizer.encode_plus(
+        query,
+        add_special_tokens=True,
+        max_length=64,
+        padding='max_length',
+        return_tensors='pt',
+        truncation=True
+    )
+    ids = inputs['input_ids'].to(device, dtype=torch.long)
+    mask = inputs['attention_mask'].to(device, dtype=torch.long)
+    token_type_ids = inputs['token_type_ids'].to(device, dtype=torch.long)
+    # Make the prediction
+    model.eval()
+    with torch.no_grad():
+        outputs = model(ids, mask, token_type_ids)
+    # Apply sigmoid for multi-label classification
+    probabilities = torch.sigmoid(outputs)
+    # Convert to numpy array
+    probabilities = probabilities.cpu().detach().numpy().flatten()
+    # Get top k predictions
+    top_k_indices = np.argsort(probabilities)[-k:][::-1]
+    top_k_probs = probabilities[top_k_indices]
+    # Create one-hot encodings for top k indices
+    top_k_one_hot = np.zeros((k, len(probabilities)))
+    for i, idx in enumerate(top_k_indices):
+        top_k_one_hot[i, idx] = 1
+    # Decode the top k predictions
+    top_k_cards = [decode_vector(one_hot.reshape(1, -1)) for one_hot in top_k_one_hot]
+    # Create a list of tuples (card, probability) for top k predictions
+    top_k_predictions = list(zip(top_k_cards, top_k_probs.tolist()))
+    # Determine the most likely card
+    predicted_labels = (probabilities > 0.5).astype(int)
+    if sum(predicted_labels) == 0:
+        most_likely_card = "Answer"
+    else:
+        most_likely_card = decode_vector(predicted_labels.reshape(1, -1))
+    # Prepare the response
+    result = {
+        "most_likely_card": most_likely_card,
+        "top_k_predictions": top_k_predictions
+    }
+    return json.dumps(result), 'application/json'
+def load_model_and_tokenizer(context):
+    """Load the PyTorch model and tokenizer."""
+    global global_encoder
+    labels = ['Videos', 'Unit Conversion', 'Translation', 'Shopping Product Comparison', 'Restaurants', 'Product', 'Information', 'Images', 'Gift', 'General Comparison', 'Flights', 'Answer', 'Aircraft Seat Map']
+    model_dir = context.model_dir if hasattr(context, 'model_dir') else os.environ.get('SM_MODEL_DIR', '/opt/ml/model')
+    # Load config and model
+    config_path = os.path.join(model_dir, 'config.json')
+    model_path = os.path.join(model_dir, 'pytorch_model.bin')
+    with open(config_path, 'r') as f:
+        config = json.load(f)
+    # Initialize the encoder and labels
+    global_labels = labels
+    labels_np = np.array(global_labels).reshape(-1, 1)
+    global_encoder = OneHotEncoder(sparse_output=False)
+    global_encoder.fit(labels_np)
+    model = ImprovedBERTClass()
+    model.load_state_dict(torch.load(model_path, map_location=torch.device('cpu')))
+    model.to(torch.device("cuda" if torch.cuda.is_available() else "cpu"))
+    model.eval()
+    # Load tokenizer
+    tokenizer = BertTokenizer.from_pretrained('bert-base-uncased')
+    return model, tokenizer
+def decode_vector(vector):
+    global global_encoder
+    original_label = global_encoder.inverse_transform(vector)
+    return original_label[0][0]  # Returns the label as a string

model.py CHANGED Viewed

@@ -1,8 +1,8 @@
 import transformers
 import torch
-from transformers import BertTokenizer, BertModel, BertConfig
 import torch.nn as nn
 class AttentionPool(nn.Module):
     def __init__(self, hidden_size):

 import transformers
 import torch
 import torch.nn as nn
+import torch.nn.functional as F
 class AttentionPool(nn.Module):
     def __init__(self, hidden_size):

pytorch_model.bin CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:cb04d745e5cccb78ac9c40836014b9ea1a861e4b435c798ddd27cd8f2514ef5e
-size 438063716

 version https://git-lfs.github.com/spec/v1
+oid sha256:3a3e55ee4b24285f21c615afd035ed1a89ed9016ff73dbe669313a643b5b5250
+size 438062398

requirements.txt CHANGED Viewed

@@ -1,2 +1,4 @@
 torch==1.9.0
-transformers==4.11.3

+numpy==1.21.0
 torch==1.9.0
+transformers==4.9.2
+scikit-learn==0.24.2