Robert Castagna commited on
Commit
da0e5e8
·
1 Parent(s): 12d3e6f

update hf page

Browse files
Files changed (5) hide show
  1. .gitignore +2 -0
  2. app.py +12 -4
  3. chat.py +0 -19
  4. trained_models/model.safetensors +1 -1
  5. training.py +11 -2
.gitignore ADDED
@@ -0,0 +1,2 @@
 
 
 
1
+ trained_models/
2
+ performance_log*
app.py CHANGED
@@ -1,14 +1,22 @@
1
  import torch
2
  from transformers import AutoModelForCausalLM, AutoTokenizer, pipeline
 
3
 
4
  # Set the device to CUDA if available
5
  device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
6
 
7
- pipe = pipeline("text-generation", model="TinyLlama/TinyLlama-1.1B-Chat-v1.0", torch_dtype=torch.bfloat16)
8
 
 
 
 
9
 
10
- input_text = 'summarize the book harry potter' #st.text_input(label='prompt:')
11
- context = 'answer like an english essay' #st.text_input(label='how do you want me to answer the question? ie. respond as if you are explaining to a child')
 
 
 
 
12
 
13
  messages = [
14
  {
@@ -24,7 +32,7 @@ prompt = pipe.tokenizer.apply_chat_template(messages, tokenize=False, add_genera
24
  #if st.button("generate response"):
25
 
26
  # Generate a response
27
- outputs = pipe(prompt, max_new_tokens=256, do_sample=True, temperature=0.7, top_k=50, top_p=0.95)
28
 
29
  print(outputs[0]["generated_text"])
30
 
 
1
  import torch
2
  from transformers import AutoModelForCausalLM, AutoTokenizer, pipeline
3
+ import streamlit as st
4
 
5
  # Set the device to CUDA if available
6
  device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
7
 
8
+ model_source = 10
9
 
10
+ if model_source == 1:
11
+ #pipe = pipeline("text-generation", model="trained_models/")
12
+ pipe = pipeline("text-generation", model="trained_models/", device=device.index if device.type == 'cuda' else -1)
13
 
14
+ else:
15
+ pipe = pipeline("text-generation", model="TinyLlama/TinyLlama-1.1B-Chat-v1.0", torch_dtype=torch.bfloat16, device=device.index if device.type == 'cuda' else -1)
16
+
17
+
18
+ input_text = st.text_input(label='prompt:') #st.text_input(label='prompt:')
19
+ context = st.text_input(label='provide context for the model.. who/what should it be?') #st.text_input(label='how do you want me to answer the question? ie. respond as if you are explaining to a child')
20
 
21
  messages = [
22
  {
 
32
  #if st.button("generate response"):
33
 
34
  # Generate a response
35
+ outputs = pipe(prompt, max_new_tokens=250, do_sample=True, temperature=0.9, top_k=50, top_p=0.95)
36
 
37
  print(outputs[0]["generated_text"])
38
 
chat.py DELETED
@@ -1,19 +0,0 @@
1
- from transformers import AutoModelForCausalLM, AutoTokenizer
2
-
3
- # Load the model and tokenizer
4
- model = AutoModelForCausalLM.from_pretrained("trained_models/")
5
- tokenizer = AutoTokenizer.from_pretrained("trained_models/")
6
-
7
- # Input text
8
- input_text = "Hello, how are you?"
9
-
10
- # Encode the input text
11
- input_ids = tokenizer.encode(input_text, return_tensors='pt')
12
-
13
- # Generate a response
14
- output = model.generate(input_ids)
15
-
16
- # Decode the response
17
- response = tokenizer.decode(output[:, input_ids.shape[-1]:][0], skip_special_tokens=True)
18
-
19
- print(response)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
trained_models/model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:1354fc4008a730b36cad46eb93c017f9ad6c7e455950b737b412e6c3f60627ea
3
  size 4400216536
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:3247bbd8b99f4af64a9796dc7300ebc1dd709d581ee8a895fd9da8c6d3df5ccc
3
  size 4400216536
training.py CHANGED
@@ -82,8 +82,17 @@ model = AutoModelForCausalLM.from_pretrained('TinyLlama/TinyLlama-1.1B-Chat-v1.0
82
  for param in model.parameters():
83
  param.requires_grad = False
84
 
85
- # Unfreeze the last layer
86
- for param in model.model.layers[-1].parameters():
 
 
 
 
 
 
 
 
 
87
  param.requires_grad = True
88
 
89
  # Define the optimizer and scheduler
 
82
  for param in model.parameters():
83
  param.requires_grad = False
84
 
85
+ # Unfreeze the last n layers
86
+ for layer in model.model.layers[-4:]:
87
+ for param in layer.parameters():
88
+ param.requires_grad = True
89
+
90
+ # # Unfreeze the embedding layer: only want to do if you are adding new tokens to the model
91
+ # for param in model.model.embed_tokens.parameters():
92
+ # param.requires_grad = True
93
+
94
+ # Unfreeze the output layer
95
+ for param in model.lm_head.parameters():
96
  param.requires_grad = True
97
 
98
  # Define the optimizer and scheduler