Spaces:

ryandt
/

Inverting-Embeddings

Configuration error

ryandt commited on Feb 5

Commit

f06d2ef

1 Parent(s): a261552

Removed streaming

Files changed (2) hide show

invert.py CHANGED Viewed

@@ -84,7 +84,7 @@ def _build_mask_token_ids(tokenizer: AutoTokenizer) -> list[int]:
     mask_ids = set()
     for s in _MASK_STRINGS:
-        tokens = tokenizer.encode(s, add_special_tokens=False)
         if len(tokens) == 1:
             mask_ids.add(tokens[0])
     if tokenizer.eos_token_id is not None:
@@ -244,7 +244,7 @@ def beam_search(
         Best candidate found during search.
     """
     prefix, suffix = get_chat_format(tokenizer)
-    prompt_tokens = tokenizer.encode(prompt, add_special_tokens=False)
     mask_ids = _build_mask_token_ids(tokenizer)
     candidates = [Candidate()]

     mask_ids = set()
     for s in _MASK_STRINGS:
+        tokens = list(tokenizer.encode(s, add_special_tokens=False))
         if len(tokens) == 1:
             mask_ids.add(tokens[0])
     if tokenizer.eos_token_id is not None:
         Best candidate found during search.
     """
     prefix, suffix = get_chat_format(tokenizer)
+    prompt_tokens = list(tokenizer.encode(prompt, add_special_tokens=False))
     mask_ids = _build_mask_token_ids(tokenizer)
     candidates = [Candidate()]

model.py CHANGED Viewed

@@ -82,7 +82,7 @@ def get_chat_format(tokenizer: AutoTokenizer) -> tuple[list[int], list[int]]:
         [{"role": "user", "content": "hello"}],
         add_generation_prompt=True,
     )
-    marker_tokens = tokenizer.encode("hello", add_special_tokens=False)
     # Find where the marker content appears in the full template
     marker_len = len(marker_tokens)

         [{"role": "user", "content": "hello"}],
         add_generation_prompt=True,
     )
+    marker_tokens = list(tokenizer.encode("hello", add_special_tokens=False))
     # Find where the marker content appears in the full template
     marker_len = len(marker_tokens)