loubb commited on
Commit
8e73c70
·
verified ·
1 Parent(s): e51eaf7

Upload folder using huggingface_hub

Browse files
Files changed (1) hide show
  1. README.md +7 -10
README.md CHANGED
@@ -54,23 +54,20 @@ tokenizer = AutoTokenizer.from_pretrained(
54
  trust_remote_code=True,
55
  )
56
 
57
- prompt = tokenizer.encode_from_file(
58
- PROMPT_MIDI_LOAD_PATH, return_tensors="pt"
59
- )
60
 
61
  # Only sequences up to 2048 are supported.
62
  # Embedding is extracted from end-of-sequence token
63
- assert prompt.shape[1] <= MAX_SEQ_LEN
64
- assert prompt[0, -1] == tokenizer._convert_token_to_id(tokenizer.eos_token)
65
 
66
  # Alternatively if the sequence is too long:
67
- prompt = prompt[:, :MAX_SEQ_LEN]
68
- prompt = prompt[:, -1] = tokenizer._convert_token_to_id(tokenizer.eos_token)
69
 
70
  # Generate and extract embedding
71
- outputs = model.forward(prompt).squeeze(0)
72
- embedding = outputs[-1]
73
-
74
  ```
75
 
76
  ## License and Attribution
 
54
  trust_remote_code=True,
55
  )
56
 
57
+ prompt = tokenizer.encode_from_file(PROMPT_MIDI_LOAD_PATH, return_tensors="pt")
 
 
58
 
59
  # Only sequences up to 2048 are supported.
60
  # Embedding is extracted from end-of-sequence token
61
+ assert prompt.input_ids.shape[1] <= MAX_SEQ_LEN
62
+ assert prompt.input_ids[0, -1] == tokenizer._convert_token_to_id(tokenizer.eos_token)
63
 
64
  # Alternatively if the sequence is too long:
65
+ prompt.input_ids = prompt.input_ids[:, :MAX_SEQ_LEN]
66
+ prompt.input_ids[:, -1] = tokenizer._convert_token_to_id(tokenizer.eos_token)
67
 
68
  # Generate and extract embedding
69
+ outputs = model.forward(input_ids=prompt.input_ids)
70
+ embedding = outputs[0].squeeze(0)
 
71
  ```
72
 
73
  ## License and Attribution