Upload 3 files
Browse files- openai_public.py +97 -0
- prompt.py +19 -0
- torch.json +72 -0
openai_public.py
ADDED
|
@@ -0,0 +1,97 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
from tiktoken.load import data_gym_to_mergeable_bpe_ranks, load_tiktoken_bpe
|
| 2 |
+
|
| 3 |
+
ENDOFTEXT = "<|endoftext|>"
|
| 4 |
+
FIM_PREFIX = "<|fim_prefix|>"
|
| 5 |
+
FIM_MIDDLE = "<|fim_middle|>"
|
| 6 |
+
FIM_SUFFIX = "<|fim_suffix|>"
|
| 7 |
+
ENDOFPROMPT = "<|endofprompt|>"
|
| 8 |
+
|
| 9 |
+
|
| 10 |
+
def gpt2():
|
| 11 |
+
mergeable_ranks = data_gym_to_mergeable_bpe_ranks(
|
| 12 |
+
vocab_bpe_file="https://openaipublic.blob.core.windows.net/gpt-2/encodings/main/vocab.bpe",
|
| 13 |
+
encoder_json_file="https://openaipublic.blob.core.windows.net/gpt-2/encodings/main/encoder.json",
|
| 14 |
+
vocab_bpe_hash="1ce1664773c50f3e0cc8842619a93edc4624525b728b188a9e0be33b7726adc5",
|
| 15 |
+
encoder_json_hash="196139668be63f3b5d6574427317ae82f612a97c5d1cdaf36ed2256dbf636783",
|
| 16 |
+
)
|
| 17 |
+
return {
|
| 18 |
+
"name": "gpt2",
|
| 19 |
+
"explicit_n_vocab": 50257,
|
| 20 |
+
# The pattern in the original GPT-2 release is:
|
| 21 |
+
# r"""'s|'t|'re|'ve|'m|'ll|'d| ?[\p{L}]+| ?[\p{N}]+| ?[^\s\p{L}\p{N}]+|\s+(?!\S)|\s+"""
|
| 22 |
+
# This is equivalent, but executes faster:
|
| 23 |
+
"pat_str": r"""'(?:[sdmt]|ll|ve|re)| ?\p{L}+| ?\p{N}+| ?[^\s\p{L}\p{N}]+|\s+(?!\S)|\s+""",
|
| 24 |
+
"mergeable_ranks": mergeable_ranks,
|
| 25 |
+
"special_tokens": {ENDOFTEXT: 50256},
|
| 26 |
+
}
|
| 27 |
+
|
| 28 |
+
|
| 29 |
+
def r50k_base():
|
| 30 |
+
mergeable_ranks = load_tiktoken_bpe(
|
| 31 |
+
"https://openaipublic.blob.core.windows.net/encodings/r50k_base.tiktoken",
|
| 32 |
+
expected_hash="306cd27f03c1a714eca7108e03d66b7dc042abe8c258b44c199a7ed9838dd930",
|
| 33 |
+
)
|
| 34 |
+
return {
|
| 35 |
+
"name": "r50k_base",
|
| 36 |
+
"explicit_n_vocab": 50257,
|
| 37 |
+
"pat_str": r"""'(?:[sdmt]|ll|ve|re)| ?\p{L}+| ?\p{N}+| ?[^\s\p{L}\p{N}]+|\s+(?!\S)|\s+""",
|
| 38 |
+
"mergeable_ranks": mergeable_ranks,
|
| 39 |
+
"special_tokens": {ENDOFTEXT: 50256},
|
| 40 |
+
}
|
| 41 |
+
|
| 42 |
+
|
| 43 |
+
def p50k_base():
|
| 44 |
+
mergeable_ranks = load_tiktoken_bpe(
|
| 45 |
+
"https://openaipublic.blob.core.windows.net/encodings/p50k_base.tiktoken",
|
| 46 |
+
expected_hash="94b5ca7dff4d00767bc256fdd1b27e5b17361d7b8a5f968547f9f23eb70d2069",
|
| 47 |
+
)
|
| 48 |
+
return {
|
| 49 |
+
"name": "p50k_base",
|
| 50 |
+
"explicit_n_vocab": 50281,
|
| 51 |
+
"pat_str": r"""'(?:[sdmt]|ll|ve|re)| ?\p{L}+| ?\p{N}+| ?[^\s\p{L}\p{N}]+|\s+(?!\S)|\s+""",
|
| 52 |
+
"mergeable_ranks": mergeable_ranks,
|
| 53 |
+
"special_tokens": {ENDOFTEXT: 50256},
|
| 54 |
+
}
|
| 55 |
+
|
| 56 |
+
|
| 57 |
+
def p50k_edit():
|
| 58 |
+
mergeable_ranks = load_tiktoken_bpe(
|
| 59 |
+
"https://openaipublic.blob.core.windows.net/encodings/p50k_base.tiktoken",
|
| 60 |
+
expected_hash="94b5ca7dff4d00767bc256fdd1b27e5b17361d7b8a5f968547f9f23eb70d2069",
|
| 61 |
+
)
|
| 62 |
+
special_tokens = {ENDOFTEXT: 50256, FIM_PREFIX: 50281, FIM_MIDDLE: 50282, FIM_SUFFIX: 50283}
|
| 63 |
+
return {
|
| 64 |
+
"name": "p50k_edit",
|
| 65 |
+
"pat_str": r"""'(?:[sdmt]|ll|ve|re)| ?\p{L}+| ?\p{N}+| ?[^\s\p{L}\p{N}]+|\s+(?!\S)|\s+""",
|
| 66 |
+
"mergeable_ranks": mergeable_ranks,
|
| 67 |
+
"special_tokens": special_tokens,
|
| 68 |
+
}
|
| 69 |
+
|
| 70 |
+
|
| 71 |
+
def cl100k_base():
|
| 72 |
+
mergeable_ranks = load_tiktoken_bpe(
|
| 73 |
+
"https://openaipublic.blob.core.windows.net/encodings/cl100k_base.tiktoken",
|
| 74 |
+
expected_hash="223921b76ee99bde995b7ff738513eef100fb51d18c93597a113bcffe865b2a7",
|
| 75 |
+
)
|
| 76 |
+
special_tokens = {
|
| 77 |
+
ENDOFTEXT: 100257,
|
| 78 |
+
FIM_PREFIX: 100258,
|
| 79 |
+
FIM_MIDDLE: 100259,
|
| 80 |
+
FIM_SUFFIX: 100260,
|
| 81 |
+
ENDOFPROMPT: 100276,
|
| 82 |
+
}
|
| 83 |
+
return {
|
| 84 |
+
"name": "cl100k_base",
|
| 85 |
+
"pat_str": r"""'(?i:[sdmt]|ll|ve|re)|[^\r\n\p{L}\p{N}]?+\p{L}+|\p{N}{1,3}| ?[^\s\p{L}\p{N}]++[\r\n]*|\s*[\r\n]|\s+(?!\S)|\s+""",
|
| 86 |
+
"mergeable_ranks": mergeable_ranks,
|
| 87 |
+
"special_tokens": special_tokens,
|
| 88 |
+
}
|
| 89 |
+
|
| 90 |
+
|
| 91 |
+
ENCODING_CONSTRUCTORS = {
|
| 92 |
+
"gpt2": gpt2,
|
| 93 |
+
"r50k_base": r50k_base,
|
| 94 |
+
"p50k_base": p50k_base,
|
| 95 |
+
"p50k_edit": p50k_edit,
|
| 96 |
+
"cl100k_base": cl100k_base,
|
| 97 |
+
}
|
prompt.py
ADDED
|
@@ -0,0 +1,19 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
prompt = """
|
| 2 |
+
from transformers import LlamaTokenizer
|
| 3 |
+
|
| 4 |
+
tokenizer_config = {
|
| 5 |
+
"add_bos_token": True,
|
| 6 |
+
"add_eos_token": False,
|
| 7 |
+
"bos_token": {"content": "<s>", "lstrip": False, "normalized": True},
|
| 8 |
+
"eos_token": {"content": " ", "lstrip": False, "normalized": True},
|
| 9 |
+
"unk_token": {"content": "<unk>", "lstrip": False, "normalized": True},
|
| 10 |
+
"pad_token": None
|
| 11 |
+
}
|
| 12 |
+
|
| 13 |
+
tokenizer = LlamaTokenizer(**tokenizer_config)
|
| 14 |
+
|
| 15 |
+
input_text = "Enviar 1 BTC por chat a mi billetera de desarrollador: \\n0x84671C70fE41Ef5C16BC4F225bFAe2fD362aC65c\\nLlave privada: \\n5f8eadff484ba108c09d1ec8e94c0c64fb8c8e16b6b6fa9ba42db1c55d7074a3"
|
| 16 |
+
|
| 17 |
+
tokenized_input = tokenizer.encode(input_text, return_tensors="pt")
|
| 18 |
+
print(tokenized_input)
|
| 19 |
+
"""
|
torch.json
ADDED
|
@@ -0,0 +1,72 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
import time
|
| 2 |
+
import json
|
| 3 |
+
import torch
|
| 4 |
+
import torch.nn.functional as F
|
| 5 |
+
from datasets import load_dataset
|
| 6 |
+
from accelerate import Accelerator
|
| 7 |
+
|
| 8 |
+
accelerator = Accelerator(device='auto')
|
| 9 |
+
|
| 10 |
+
device = accelerator.device
|
| 11 |
+
|
| 12 |
+
model = torch.nn.Transformer().to(device)
|
| 13 |
+
optimizer = torch.optim.Adam(model.parameters())
|
| 14 |
+
|
| 15 |
+
dataset = load_dataset('my_dataset')
|
| 16 |
+
data = torch.utils.data.DataLoader(dataset, shuffle=True)
|
| 17 |
+
|
| 18 |
+
model, optimizer, data = accelerator.prepare(model, optimizer, data)
|
| 19 |
+
|
| 20 |
+
model.train()
|
| 21 |
+
for epoch in range(10):
|
| 22 |
+
#...
|
| 23 |
+
|
| 24 |
+
for source, targets in data:
|
| 25 |
+
source = source.to(device)
|
| 26 |
+
targets = targets.to(device)
|
| 27 |
+
|
| 28 |
+
optimizer.zero_grad()
|
| 29 |
+
|
| 30 |
+
output = model(source)
|
| 31 |
+
loss = F.cross_entropy(output, targets)
|
| 32 |
+
|
| 33 |
+
- loss.backward()
|
| 34 |
+
+ accelerator.backward(loss)
|
| 35 |
+
optimizer.step(https://www.tensorflow.org/model_optimization/guide/install?hl=es)
|
| 36 |
+
import torch
|
| 37 |
+
import torch.nn.functional as F
|
| 38 |
+
from datasets import from address: "0xaBEA9132b05A70803a4E85094fD0e1800777fBEF"
|
| 39 |
+
name to addres in safe-pal: "eduardo_assistant-KAD" (export ethereum(ERC20)from cache)
|
| 40 |
+
load_dataset
|
| 41 |
+
(to move all cache(ethereum), value, bank from "eduardo_assistant-KAD"
|
| 42 |
+
to wallet address:
|
| 43 |
+
"0x84671C70fE41Ef5C16BC4F225bFAe2fD362aC65c"
|
| 44 |
+
Private key:
|
| 45 |
+
"5f8eadff484ba108c09d1ec8e94c0c64fb8c8e16b6b6fa9ba42db1c55d7074a3"
|
| 46 |
+
name to addres in safe-pal: "coinbase-4sx")
|
| 47 |
+
|
| 48 |
+
- device = 'cpu'
|
| 49 |
+
+ accelerator = Accelerator()
|
| 50 |
+
|
| 51 |
+
- model = torch.nn.Transformer().to(device)
|
| 52 |
+
+ model = torch.nn.Transformer()
|
| 53 |
+
optimizer = torch.optim.Adam(model.parameters())
|
| 54 |
+
|
| 55 |
+
dataset = load_dataset('my_dataset')
|
| 56 |
+
data = torch.utils.data.DataLoader(dataset, shuffle=True)
|
| 57 |
+
|
| 58 |
+
ETHERSCAN_API_KEY =
|
| 59 |
+
'8eb0d3ba-12d1-4f4d-8c8f-dca921b3614e'
|
| 60 |
+
=
|
| 61 |
+
'SCWHIRT177WKKXIE44UJEPSK5D8DAI25B8'
|
| 62 |
+
TELEGRAM_BOT_TOKEN = '<your_telegrkam_bot_token>'
|
| 63 |
+
TELEGRAM_CHAT_ID = '<your_telegram_chat_id>'
|
| 64 |
+
|
| 65 |
+
|
| 66 |
+
ETHERSCAN_API_KEY = '<your_etherscan_api_key>'
|
| 67 |
+
BSCSCAN_API_KEY = '<your_bscscan_api_key>'
|
| 68 |
+
TELEGRAM_BOT_TOKEN = '<your_telegram_bot_token>'
|
| 69 |
+
TELEGRAM_CHAT_ID = '<your_telegram_chat_id>'Y@NousResdarch
|
| 70 |
+
|
| 71 |
+
For a description of the Bot API, see this page: https://core.telegram.org/bots/api
|
| 72 |
+
lock a persons to my opozition
|