dariadaria
/

reviews_classifier

Text Classification

text-embeddings-inference

Model card Files Files and versions

dariadaria commited on Aug 10, 2023

Commit

f93531b

·

1 Parent(s): 1aec583

handler without datasets

Files changed (1) hide show

handler.py +9 -10

handler.py CHANGED Viewed

@@ -1,7 +1,6 @@
 from typing import Dict, List, Any
 from transformers import AutoTokenizer, AutoModelForSequenceClassification
 import torch
-from datasets import Dataset
 class EndpointHandler:
@@ -30,7 +29,7 @@ class EndpointHandler:
         """
         topics = data.pop("topics", data)
         texts = data.pop("texts", data)
-        batch_dict = {
             'id': [],
             'text': [],
             'topic': []
@@ -38,16 +37,16 @@ class EndpointHandler:
         for topic in topics:
           for text in texts:
-            batch_dict['id'].append(text['id'])
-            batch_dict['text'].append(text['text'])
-            batch_dict['topic'].append(topic)
-        batch = Dataset.from_dict(batch_dict)
         tokenized_inputs = self.tokenize(batch)
         # run normal prediction
         output = self.model(**tokenized_inputs)
-        batch = batch.add_column('predictions', torch.argmax(output.logits, dim=-1).numpy(force=True))
-        batch = batch.map(lambda b: {'label': [self.model.config.id2label[p] for p in b['predictions']]}, batched=True, remove_columns=['text', 'predictions'])
-        return batch.to_dict()

 from typing import Dict, List, Any
 from transformers import AutoTokenizer, AutoModelForSequenceClassification
 import torch
 class EndpointHandler:
         """
         topics = data.pop("topics", data)
         texts = data.pop("texts", data)
+        batch = {
             'id': [],
             'text': [],
             'topic': []
         for topic in topics:
           for text in texts:
+            batch['id'].append(text['id'])
+            batch['text'].append(text['text'])
+            batch['topic'].append(topic)
         tokenized_inputs = self.tokenize(batch)
         # run normal prediction
         output = self.model(**tokenized_inputs)
+        predictions = torch.argmax(output.logits, dim=-1).numpy(force=True)
+        batch['label'] = [self.model.config.id2label[p] for p in predictions]
+        batch.pop('text')
+        return batch