velocity-ai
/

phi-3.5-address-validation-pretrained

@@ -1,8 +1,7 @@
 import os
 import json
 import torch
-import torch.nn as nn
-from transformers import AutoModelForCausalLM, AutoTokenizer
 import logging
 logger = logging.getLogger(__name__)
@@ -12,22 +11,7 @@ logger = logging.getLogger(__name__)
 # Can specify GPU device with:
 # CUDA_VISIBLE_DEVICES="1" python script.py
-class PhiForSequenceClassification(nn.Module):
-    def __init__(self, base_model, num_labels=2):
-        super().__init__()
-        self.phi = base_model
-        # Create classifier with same dtype as base model
-        dtype = next(base_model.parameters()).dtype
-        self.classifier = nn.Linear(self.phi.config.hidden_size, num_labels, dtype=dtype)
-    def forward(self, **inputs):
-        outputs = self.phi(**inputs, output_hidden_states=True)
-        # Use the last hidden state of the last token for classification
-        last_hidden_state = outputs.hidden_states[-1][:, -1, :]
-        logits = self.classifier(last_hidden_state)
-        return type('Outputs', (), {'logits': logits})()
-def model_fn(model_dir, context=None):
     """Load the model for inference"""
     try:
         model_id = os.getenv("HF_MODEL_ID")
@@ -42,16 +26,19 @@ def model_fn(model_dir, context=None):
         # Load tokenizer
         tokenizer = AutoTokenizer.from_pretrained(model_id, trust_remote_code=True)
-        # Load base model
-        base_model = AutoModelForCausalLM.from_pretrained(
             model_id,
             torch_dtype=torch.bfloat16 if device.type == 'cuda' else torch.float32,
             trust_remote_code=True
         )
-        # Create classification model
-        model = PhiForSequenceClassification(base_model, num_labels=2)
         # Move model to device
         model = model.to(device)
@@ -83,13 +70,22 @@ def predict_fn(data, model_dict):
         logger.info(f"Model is on device: {device}")
-        # Parse input
         if isinstance(data, str):
             input_text = data
         elif isinstance(data, dict):
-            input_text = data.get("inputs", data.get("text", str(data)))
         else:
             input_text = str(data)
         logger.debug(f"Parsed input text: {input_text}")
         # Create tensors directly on target device

 import os
 import json
 import torch
+from transformers import AutoModelForSequenceClassification, AutoTokenizer, AutoConfig
 import logging
 logger = logging.getLogger(__name__)
 # Can specify GPU device with:
 # CUDA_VISIBLE_DEVICES="1" python script.py
+def model_fn(model_dir):
     """Load the model for inference"""
     try:
         model_id = os.getenv("HF_MODEL_ID")
         # Load tokenizer
         tokenizer = AutoTokenizer.from_pretrained(model_id, trust_remote_code=True)
+        # Load config
+        config = AutoConfig.from_pretrained(model_id,
+                                          num_labels=2,
+                                          trust_remote_code=True)
+        # Load model with sequence classification head
+        model = AutoModelForSequenceClassification.from_pretrained(
             model_id,
+            config=config,
             torch_dtype=torch.bfloat16 if device.type == 'cuda' else torch.float32,
             trust_remote_code=True
         )
         # Move model to device
         model = model.to(device)
         logger.info(f"Model is on device: {device}")
+        # Parse input and format it like training data
         if isinstance(data, str):
             input_text = data
         elif isinstance(data, dict):
+            # Extract address components
+            addr1 = data.get('order_address1', data.get('address_line_1', ''))
+            addr2 = data.get('order_address2', data.get('address_line_2', ''))
+            city = data.get('order_city', data.get('city', ''))
+            state = data.get('order_state', data.get('state', ''))
+            pincode = str(data.get('order_pincode', data.get('pincode', '')))
+            # Format exactly like training data
+            input_text = f"Address_line_1: {addr1} Address_line_2: {addr2} City: {city} State: {state} Pincode: {pincode}"
         else:
             input_text = str(data)
         logger.debug(f"Parsed input text: {input_text}")
         # Create tensors directly on target device