Upload folder using huggingface_hub
Browse files
README.md
CHANGED
|
@@ -54,23 +54,20 @@ tokenizer = AutoTokenizer.from_pretrained(
|
|
| 54 |
trust_remote_code=True,
|
| 55 |
)
|
| 56 |
|
| 57 |
-
prompt = tokenizer.encode_from_file(
|
| 58 |
-
PROMPT_MIDI_LOAD_PATH, return_tensors="pt"
|
| 59 |
-
)
|
| 60 |
|
| 61 |
# Only sequences up to 2048 are supported.
|
| 62 |
# Embedding is extracted from end-of-sequence token
|
| 63 |
-
assert prompt.shape[1] <= MAX_SEQ_LEN
|
| 64 |
-
assert prompt[0, -1] == tokenizer._convert_token_to_id(tokenizer.eos_token)
|
| 65 |
|
| 66 |
# Alternatively if the sequence is too long:
|
| 67 |
-
prompt = prompt[:, :MAX_SEQ_LEN]
|
| 68 |
-
prompt
|
| 69 |
|
| 70 |
# Generate and extract embedding
|
| 71 |
-
outputs = model.forward(prompt
|
| 72 |
-
embedding = outputs[
|
| 73 |
-
|
| 74 |
```
|
| 75 |
|
| 76 |
## License and Attribution
|
|
|
|
| 54 |
trust_remote_code=True,
|
| 55 |
)
|
| 56 |
|
| 57 |
+
prompt = tokenizer.encode_from_file(PROMPT_MIDI_LOAD_PATH, return_tensors="pt")
|
|
|
|
|
|
|
| 58 |
|
| 59 |
# Only sequences up to 2048 are supported.
|
| 60 |
# Embedding is extracted from end-of-sequence token
|
| 61 |
+
assert prompt.input_ids.shape[1] <= MAX_SEQ_LEN
|
| 62 |
+
assert prompt.input_ids[0, -1] == tokenizer._convert_token_to_id(tokenizer.eos_token)
|
| 63 |
|
| 64 |
# Alternatively if the sequence is too long:
|
| 65 |
+
prompt.input_ids = prompt.input_ids[:, :MAX_SEQ_LEN]
|
| 66 |
+
prompt.input_ids[:, -1] = tokenizer._convert_token_to_id(tokenizer.eos_token)
|
| 67 |
|
| 68 |
# Generate and extract embedding
|
| 69 |
+
outputs = model.forward(input_ids=prompt.input_ids)
|
| 70 |
+
embedding = outputs[0].squeeze(0)
|
|
|
|
| 71 |
```
|
| 72 |
|
| 73 |
## License and Attribution
|