submission-template

Sleeping

hbanduk commited on Jan 31

Commit

371a733

verified ·

1 Parent(s): 739d55a

Update tasks/text.py

Files changed (1) hide show

tasks/text.py CHANGED Viewed

@@ -68,23 +68,38 @@ async def evaluate_text(request: TextEvaluationRequest):
     # Load the ONNX model and tokenizer
     MODEL_REPO = "ClimateDebunk/Quantized_DistilBertForSequenceClassification"
     MODEL_FILENAME = "distilbert_quantized_dynamic.onnx"
-    MODEL_PATH = hf_hub_download(repo_id=MODEL_REPO, filename=MODEL_FILENAME)
-    tokenizer = DistilBertTokenizer.from_pretrained("distilbert-base-uncased")
-    ort_session = ort.InferenceSession(MODEL_PATH, providers=["CPUExecutionProvider"])
     # Preprocess the text data
     def preprocess(texts):
-        return tokenizer(
             texts,
-            padding=True,
             truncation=True,
             max_length=365,
             return_tensors="np"
         )
     # Run inference
     def predict(texts):
         inputs = preprocess(texts)
         ort_inputs = {
             "input_ids": inputs["input_ids"].astype(np.int64),

     # Load the ONNX model and tokenizer
     MODEL_REPO = "ClimateDebunk/Quantized_DistilBertForSequenceClassification"
     MODEL_FILENAME = "distilbert_quantized_dynamic.onnx"
+    try:
+        MODEL_PATH = hf_hub_download(repo_id=MODEL_REPO, filename=MODEL_FILENAME)
+        print(f"Model successfully downloaded at: {MODEL_PATH}")
+        tokenizer = DistilBertTokenizer.from_pretrained("distilbert-base-uncased")
+        print("Tokenizer loaded successfully!")
+        ort_session = ort.InferenceSession(MODEL_PATH, providers=["CPUExecutionProvider"])
+        print("ONNX session initialized successfully!")
+    except Exception as e:
+        print(f"Error loading ONNX model: {e}")
     # Preprocess the text data
     def preprocess(texts):
+        print(f"📌 Preprocessing {len(texts)} text samples...")
+        inputs = tokenizer(
             texts,
+            padding='max_length',
             truncation=True,
             max_length=365,
             return_tensors="np"
         )
+        print(f"Tokenized input_ids shape: {inputs['input_ids'].shape}")
+        print(f"Tokenized attention_mask shape: {inputs['attention_mask'].shape}")
+        return inputs
     # Run inference
     def predict(texts):
+        print(f"📌 Running inference on {len(texts)} samples...")
         inputs = preprocess(texts)
         ort_inputs = {
             "input_ids": inputs["input_ids"].astype(np.int64),