Spaces:

ibrahimmkhalid
/

llm-from-scratch

Running

ibrahimmkhalid commited on Jan 23, 2024

Commit

5e3f56c

1 Parent(s): ca18dfd

use pytorch serialization instead of pickle

Files changed (3) hide show

app.py CHANGED Viewed

@@ -2,7 +2,6 @@ import streamlit as st
 import torch
 import torch.nn as nn
 from torch.nn import functional as F
-import pickle
 import os
 st.title('LLM from scratch Demo')
@@ -169,11 +168,11 @@ encode = lambda s: [string_to_int[ch] for ch in s]
 decode = lambda x: ''.join([int_to_string[i] for i in x])
-model_pickle_path = './model.pkl'
 st.write('loading model parameters...')
 with open(model_pickle_path, 'rb') as f:
-    model = pickle.load(f)
 st.write('model loaded successfully!')
 prompt = ''

 import torch
 import torch.nn as nn
 from torch.nn import functional as F
 import os
 st.title('LLM from scratch Demo')
 decode = lambda x: ''.join([int_to_string[i] for i in x])
+model_pickle_path = './model.pt'
 st.write('loading model parameters...')
 with open(model_pickle_path, 'rb') as f:
+    model = torch.load(f, map_location=device)
 st.write('model loaded successfully!')
 prompt = ''

model.pkl → model.pt RENAMED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:59989e9551bb95c5c24630505acca58e99a9608218081b2fbea732f536090517
-size 160269240

 version https://git-lfs.github.com/spec/v1
+oid sha256:04e95f8e46dd7b7b894d288f3c2b75bb0a535fb266960803587a9f552e6b5a73
+size 160274578

train_gpt_openwebtext.py CHANGED Viewed

@@ -3,7 +3,6 @@ import torch.nn as nn
 from torch.nn import functional as F
 import mmap
 import random
-import pickle
 import os
@@ -218,11 +217,11 @@ class GPTLanguageModel(nn.Module):
 model = GPTLanguageModel(vocab_size).to(device)
-model_pickle_path = './model.pkl'
 if os.path.exists(model_pickle_path):
     print('loading model parameters...')
     with open(model_pickle_path, 'rb') as f:
-        model = pickle.load(f)
     print('loaded successfully!')
 # create a PyTorch optimizer
 optimizer = torch.optim.AdamW(model.parameters(), lr=learning_rate)
@@ -243,5 +242,5 @@ for iter in range(max_iters):
 print(loss.item())
 with open(model_pickle_path, 'wb') as f:
-    pickle.dump(model, f)
 print('model saved')

 from torch.nn import functional as F
 import mmap
 import random
 import os
 model = GPTLanguageModel(vocab_size).to(device)
+model_pickle_path = './model.pt'
 if os.path.exists(model_pickle_path):
     print('loading model parameters...')
     with open(model_pickle_path, 'rb') as f:
+        model = torch.load(f, map_location=device)
     print('loaded successfully!')
 # create a PyTorch optimizer
 optimizer = torch.optim.AdamW(model.parameters(), lr=learning_rate)
 print(loss.item())
 with open(model_pickle_path, 'wb') as f:
+    torch.save(model, f)
 print('model saved')