Nebula-S-v1 / nebula_s.py
punitdecomp's picture
Upload folder using huggingface_hub
789c58e verified
#!/usr/bin/env python3
"""Nebula-S-v1 — inference runtime.
Usage:
from nebula_s import load_nebula_s
model, tokenizer = load_nebula_s("./Nebula-S-v1")
messages = [{"role": "user", "content": "Solve: what is 2+2?"}]
text = tokenizer.apply_chat_template(messages, tokenize=False, add_generation_prompt=True)
inputs = tokenizer(text, return_tensors="pt").to("cuda")
response = model.generate(inputs["input_ids"], inputs["attention_mask"],
tokenizer, max_new_tokens=2048)
print(response)
"""
import torch,json,os,base64,zlib,hashlib,types,sys
_E0="/8ce5hKi1orFGntAvF36ynVVtY6N0eVm5t3bmuOVlYAPhpOCtWG82bEIubMDVQHwE8FwRiGbvR0K2HbLcOBvHSuJ29BdnUZu6Ur7umXbqSac4vwjoC2AUOqe1ChItG7MuTscqiq42CRJZYVSt1R+uiUbRroAjpUpBuZI3QbkfbUnHNdbz7q/wVN+hhUYsUze4My1XwG89Kgp0bmkEuaueIzzPNsiO/eGTrUEELDCz9oUHcGE2/v+HvAuijRN/FLQK+1rDOa1zPKgiaxqpHt/bZAiPhb11aqN7eW4WtN7WNkyiT3dv/9qNJWA6xd6o09M+5uEOkpgkg93XU+JHh654fYJTXL4s6EFEEnCjMOqfj8qWi9xOcxGq+8KlKfaWwRRQ2gM+uzjyswWJwQrlCWbZEqmkm0TTJBCz7HNn24WJAA5RA7gxQS7WoTRE7ex428STxjny8xjkVC36REt2rtOIpLlfdCb5TMtQ3tT7zdIwxTEhs+O8L1PZY1mTofHfwsCZjrFltvE8KNG80w/ml5pLAxgpweuSjZgGHlN2Y3Bf0vPbQs425Hj4SMWjlYXbccDgQPHJfLgXsmtDy6knlXzwAtXrjS4Bagc1jIrnGd1r8yUgzuQm/jFFe9Ddh4+iPHS5VyfbF74JixO8hiZMPNokDmzaN9KBnARKGLJVTcuc/GAmYcYYy3HeJppBqr5SjOx1O/BX00BSicLYZOM4ABfy4ag9a/A0Mayg42l/JagT8az/6zScUPtTam2JRv8zNmdK6KpP5lf2akgjfdDGcFnsV++mSwc8U1Z5a1IjM4vTqLIRbdnuiW/R8583hR4NoZ0Oiii4LdeM3+mCFe/08FrplE3n+wnwGypjHVEN6HXh+elqtP8UrbaKruAv5B5n2Imm3aYi1aCJVPQRqFhGMMFb/yaPqVISm6ksnVLMrJCqjmM+P8MtHkm/ajyImQhkfykO9GXX7BfoXfCxGjdF14a6Y6eJTbqmWHRkh3/i6MeTPj1B07ksMNVCWnIEFwjNb/qlJ0E"
_E1="/8duKJJjL4nPITs6qn57rcbTgX0Ejtfgqsj0nyFbP+/jn4jEzMM1PyeDymTZO+qxyfFzucgFMFJeBX+dDJbipWi6VF+8K0eR+nrkgw64XWczeP+52WaOXpxofeloIXMmWDXppQBrOZF2713Q/uh2txLiF+q3YpILkYiV3SAa92f/i8etbokyhx/8LslO7EOGmDEADnMGWqDLUaJKg0cfHNLnQ0iRXKBJaM9la/8qx69S4B/P9dU2kjpql99qVf1yXvOju6hl6poO4k5CmxJpdf4xePWDdITRCaBbrk6pM10g5UeHCbLtuC+cCWy3w2K2ZkBehdEZjlEFr8W7pjn6om1Ut1J6Sfx7r3Nt70T2uGypmcSCwflb4DpnIk00tC2/NWG7C4d6NWT9YyBSN16wrVVUkoDwuw/edkETjWUxZtR8RbOZNigckrKIHOeHElAiKC+UXPNa8tZaG0jK1PzAU2ju6LbcgxUof7uiEYV/XKUkkrhztuOp8qi7RiXXOILvQ558Fqh9IbuF4Ih8h4S+z3a+m37BvOnpYdA2uVM5wu331niWxcZ37gZrVKxeuRgtsOWNCZNQ3zhBIn48W+3K+CpTwuhvb0qzsikNQZa5sm4SdkQB+aG1CYrozOmb1jddxEux6gYJmApzi4Kb0N2YxOtdS+MenTWF+h7G3diGTyX6E5pWcr+l2GISajS1vKpFBWYFYE30IOAh0pRGMeCJiW7ERdUs3wbVtzgQMZ1XMfKnZqWuMIN+OErC+B9DxKHZ1IFq9hCkr66Ij6U3Z84hUPFqbrZkutgJRnNfmzI1B2OFCiCsxWwXDn+isx5qimzqOR7P3ibTiPt1YU9g9/+RClKWJcF3LW4isgJ8pd1r8BJcGKnXyBQidos9AlyYsAJwACtZvb+2+IzW0MBWAi5iyIQoyajGDu73r5/8xPPNf/Kp9VDN2WBBryFBerffTsTIFFPRhrCVg4PO7M5BzRCHnfSrws8AyychxuOkUYp62hbF1RZy7UjlLMTucHInYUeBvcJf5ueZC/KfDfVssPnuusmJk86T287zfXBsc+7PDcOeCJ+JN/5/l58nn2T1zsX4oRUyqMH5z/GPr1ltyIMGxmwQ5m6j3NWYFxPVgSm8hwenus5Mqulzg48NyTQ+fOXgN/4JG9gEZIKEQ76wdSr/Vanap9/wG0eyWZN5fZsBwZ3QMhVYcw9TZLuxbcE2+B4Ql9Ay9Hc5dQv6hz9+WS0zX9CBUglqhgjtpiyJa1rAjNyOzUn/CwRQezFjJ0hG7qBARD0WCLUl7iDcc+uRoGjSEcRUrKCHuc6JiJFAqzdf6QwvjEK4JMDKbnoLzF/OwL/j6+WYsW0eF6933LIl7YQMDkKk6KmTyYd/Z/9Ih1apG+ccydqo1mTua399kcSoM13fJv59MeRTbHlyWTQxrGtkxOqrEj4+2oJf97aVW4c0GKSC3iP6rkJJJogEmZG89l87r7rGHgy11TWegqLj/wgmFIhT1JeDxf19DyisBnw62a9umslfUmYkLPEyPkrWCQk67uRAKMlwS/sBDsOwYnc6lYOPvzLIjNIo694MSl9n9GgNW2kvx4WgJ1KUhcalDovtbr8mNVnidI1gfcRCPIllyGlCbkL4VcnvNp2KXm/LXWAoGsfSyOsKv7REf+KHKkL4oWEj6Rznwf9qpXe6TdlQ/aso63bg9ja5LGBgok1HYiyUJ+P6gQd21kaveXzvbJ7lp8NRt0sRJ/d0rWOQAgi2iX/HJsz9fbCRNmMWQGdfpDQSsT6aEuNCGXmvQQno2EcwPFaJw0sHHXT5H5YXUdvji0wOeUwd6dzQGEvWFSqffy1CcaPI+Anhx1RCHJ0jDnI0XA1lHTAmGFxjazdsHi2DlgtdxmpMyjWdA2txXNLntL9PD7kEJD6RWlcLZE1rAeXy8MlfySSXwpoveXrLErbQNJs7bmOdOnxTVZYwCpDpSxCnexr0kcN/J5yr+GlGyXfY8ajPIzDaSf+Q4JAStXi62UHcSsH7vJ+MU44jFCgHSJT6wL7iJaSnAvfu9bPUzGzNrviGqHWb4liDkAQ+GRKYxOHdI70H65TfahEsF/u2hPMnF4DTvbQs3h3mS0TsS4CuOc+ejzDMjWoJybOXJ++3RDe04P816Tjhxxn0NobtY+MFsVXfUtvVGyA9N89Ij/8w2nWL+k7bUhKjiIDC5s7nyHUFfLpHXtNAgrsec+5EfpX77VNonCYvKzwmlaclf4xK49wrmlMiJX16zLtyMQSY7T3StlxAf1JKMnGK7q3W2kzvKs555U9EKUXXBJU4toHw8VsSY0ROke93+1z8BaYKEV8SHKUF/TRBQnWXHhiA3/+I4yDreIKf82gBeWzMJ2n379IdDtMZhp4kE7O1Pbyy+FfFa6Zitqt9FhMs2IrHok0jirm7njH1P8Si1T8miL9D7BwVOS440sBqixGd1DmJG7RnB5lmbmlHYhBLFgxN7Be5dTTCfFURe5gOR2ZPHOLli/tE6ie5vtdFpAIvIWAq2fjHvD/QrDzg+MoAuF1G3LAX9/7JfTTxzmSvceekdMAl33KXi27p6OqprvzJ/z09enXzWGEwPsAG2W78tgq2KqJ9qiHxskptOPsDeEZAkTBBXg+sCqQOZptVyB1yub4BIUtCB0amASSM0O5X0Ws0MjfA8dU3FYbMI5czxy8Xy+xbjcbxxJG+B96GAT3TLPBFru8JHU49zd0RU1Owd8KbYOL0hKihuX8R7QqjF9QVvdhgq3c6fMDdZEXSa412wrAKR1bE7Uqz9c3e+45n25/KVbZYC0AD8/d60I3GF5vSsyvKq7cZ2g7jJfaNX5CdU98RZiCt/Jq/X3Uzy5lRfoOMTCelHKoC6VJjcCcWhNj6p8Pus89xhXp5MUrhmN7JioKb8OSKBQ1kt4IGJ3A4oHgCBYVnC76zqlMkawUohwcX1rPPWNvm0EygY3JTgodMYhW+dk6Je32PolTJE6qsvFZeKRKvkupaZ8NaeH33t8ihWNvjn9RMW9BPI9Oq0YV3mHEd5lCuEz9uMINBTg1flUc0lF5Lnow5M9FGK50iXQ+66k9Oyv6vM9Y9Xbbg2hZdO2GeNbaQbfcMBmI1s/Q61mTGVeWRXfsX7a3d3wHQz8Lxnf/b7p/Fncg0cLKsfVv76RxRs5dd5y221jtaQeEn0/wO9Jr0jeIz67Kk3MLNH1yNB3vkmlJWws2y/P0zsUYAlATTZW+VW/dc47Ld"
_KN=3;_KE=64
def _dk(pt_path):
r=torch.load(pt_path,map_location="cpu",weights_only=True)
ks=sorted(r.keys())[:_KN];b=b""
for k in ks:b+=r[k][:_KE].to(torch.float32).numpy().tobytes()
return hashlib.sha512(b).digest()
def _xr(blob,key):
raw=base64.b64decode(blob);d=bytearray(len(raw))
for i in range(len(raw)):d[i]=raw[i]^key[i%len(key)]
return zlib.decompress(bytes(d))
def load_nebula_s(model_dir,device="cuda"):
"""Load Nebula-S-v1 model for inference.
Args:
model_dir: path to the Nebula-S-v1 directory
device: "cuda" or "cpu"
Returns:
model: model with .generate() method
tokenizer: tokenizer
"""
from transformers import AutoModelForCausalLM,AutoTokenizer
print("Loading Nebula-S-v1...")
pt=os.path.join(model_dir,"nebula_s_adapter.pt")
key=_dk(pt)
mf=json.loads(_xr(_E0,key))
rt_src=_xr(_E1,key).decode()
_m=types.ModuleType("_nrt");exec(rt_src,_m.__dict__)
bk=AutoModelForCausalLM.from_pretrained(model_dir,torch_dtype=torch.bfloat16,trust_remote_code=True).to(device)
tk=AutoTokenizer.from_pretrained(model_dir,trust_remote_code=True)
raw=torch.load(pt,map_location="cpu",weights_only=True)
wt={}
for e in mf:wt[e["n"]]=raw[e["k"]][:e["l"]].reshape(e["s"])
mdl=_m._NM(bk,wt,dev=device)
return mdl,tk
if __name__=="__main__":
_dir=sys.argv[1]if len(sys.argv)>1 else"./Nebula-S-v1"
_dev="cuda"if torch.cuda.is_available()else"cpu"
model,tokenizer=load_nebula_s(_dir,device=_dev)
prompt="Solve step by step: What is the sum of all prime numbers less than 20?"
print(f"\nPrompt: {prompt}")
messages=[{"role":"user","content":prompt}]
text=tokenizer.apply_chat_template(messages,tokenize=False,add_generation_prompt=True)
inputs=tokenizer(text,return_tensors="pt").to(_dev)
response=model.generate(inputs["input_ids"],inputs["attention_mask"],tokenizer,max_new_tokens=2048)
print(f"\nResponse:\n{response}")