gbyuvd
/

FastChemTokenizer

Feature Extraction

Model card Files Files and versions

gbyuvd commited on Sep 20, 2025

Commit

097a367

·

verified ·

1 Parent(s): d7be213

Tensor handling fix

Files changed (2) hide show

CHANGELOG +2 -1
FastChemTokenizer.py +6 -6

CHANGELOG CHANGED Viewed

@@ -1,4 +1,5 @@
 [20 Sept 2025]
 - Add basic SELFIES tokenizer function
 - Upload both core and tailed SELFIES vocab
-- Update README to include SELFIES evals

 [20 Sept 2025]
 - Add basic SELFIES tokenizer function
 - Upload both core and tailed SELFIES vocab
+- Update README to include SELFIES evals
+- Handle both tensor and non-tensor items properly

FastChemTokenizer.py CHANGED Viewed

@@ -271,9 +271,9 @@ class FastChemTokenizer:
         if kwargs.get("return_tensors") == "pt":
             def to_tensor_list(lst):
-                # Use torch.tensor for safety — avoids "copy construct from tensor" warning
-                return [torch.tensor(item, dtype=torch.long) for item in lst]
             batched = {
                 k: torch.nn.utils.rnn.pad_sequence(
                     to_tensor_list(v),
@@ -570,9 +570,9 @@ class FastChemTokenizerSelfies:
         if kwargs.get("return_tensors") == "pt":
             def to_tensor_list(lst):
-                # Use torch.tensor for safety — avoids "copy construct from tensor" warning
-                return [torch.tensor(item, dtype=torch.long) for item in lst]
             batched = {
                 k: torch.nn.utils.rnn.pad_sequence(
                     to_tensor_list(v),

         if kwargs.get("return_tensors") == "pt":
             def to_tensor_list(lst):
+                # Fixed: Handle both tensor and non-tensor items properly
+                return [item.clone().detach() if isinstance(item, torch.Tensor)
+                    else torch.tensor(item, dtype=torch.long) for item in lst]
             batched = {
                 k: torch.nn.utils.rnn.pad_sequence(
                     to_tensor_list(v),
         if kwargs.get("return_tensors") == "pt":
             def to_tensor_list(lst):
+                # Fixed: Handle both tensor and non-tensor items properly
+                return [item.clone().detach() if isinstance(item, torch.Tensor)
+                    else torch.tensor(item, dtype=torch.long) for item in lst]
             batched = {
                 k: torch.nn.utils.rnn.pad_sequence(
                     to_tensor_list(v),