Spaces:
Sleeping
Sleeping
Robert Castagna
commited on
Commit
·
da0e5e8
1
Parent(s):
12d3e6f
update hf page
Browse files- .gitignore +2 -0
- app.py +12 -4
- chat.py +0 -19
- trained_models/model.safetensors +1 -1
- training.py +11 -2
.gitignore
ADDED
|
@@ -0,0 +1,2 @@
|
|
|
|
|
|
|
|
|
|
| 1 |
+
trained_models/
|
| 2 |
+
performance_log*
|
app.py
CHANGED
|
@@ -1,14 +1,22 @@
|
|
| 1 |
import torch
|
| 2 |
from transformers import AutoModelForCausalLM, AutoTokenizer, pipeline
|
|
|
|
| 3 |
|
| 4 |
# Set the device to CUDA if available
|
| 5 |
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
|
| 6 |
|
| 7 |
-
|
| 8 |
|
|
|
|
|
|
|
|
|
|
| 9 |
|
| 10 |
-
|
| 11 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
| 12 |
|
| 13 |
messages = [
|
| 14 |
{
|
|
@@ -24,7 +32,7 @@ prompt = pipe.tokenizer.apply_chat_template(messages, tokenize=False, add_genera
|
|
| 24 |
#if st.button("generate response"):
|
| 25 |
|
| 26 |
# Generate a response
|
| 27 |
-
outputs = pipe(prompt, max_new_tokens=
|
| 28 |
|
| 29 |
print(outputs[0]["generated_text"])
|
| 30 |
|
|
|
|
| 1 |
import torch
|
| 2 |
from transformers import AutoModelForCausalLM, AutoTokenizer, pipeline
|
| 3 |
+
import streamlit as st
|
| 4 |
|
| 5 |
# Set the device to CUDA if available
|
| 6 |
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
|
| 7 |
|
| 8 |
+
model_source = 10
|
| 9 |
|
| 10 |
+
if model_source == 1:
|
| 11 |
+
#pipe = pipeline("text-generation", model="trained_models/")
|
| 12 |
+
pipe = pipeline("text-generation", model="trained_models/", device=device.index if device.type == 'cuda' else -1)
|
| 13 |
|
| 14 |
+
else:
|
| 15 |
+
pipe = pipeline("text-generation", model="TinyLlama/TinyLlama-1.1B-Chat-v1.0", torch_dtype=torch.bfloat16, device=device.index if device.type == 'cuda' else -1)
|
| 16 |
+
|
| 17 |
+
|
| 18 |
+
input_text = st.text_input(label='prompt:') #st.text_input(label='prompt:')
|
| 19 |
+
context = st.text_input(label='provide context for the model.. who/what should it be?') #st.text_input(label='how do you want me to answer the question? ie. respond as if you are explaining to a child')
|
| 20 |
|
| 21 |
messages = [
|
| 22 |
{
|
|
|
|
| 32 |
#if st.button("generate response"):
|
| 33 |
|
| 34 |
# Generate a response
|
| 35 |
+
outputs = pipe(prompt, max_new_tokens=250, do_sample=True, temperature=0.9, top_k=50, top_p=0.95)
|
| 36 |
|
| 37 |
print(outputs[0]["generated_text"])
|
| 38 |
|
chat.py
DELETED
|
@@ -1,19 +0,0 @@
|
|
| 1 |
-
from transformers import AutoModelForCausalLM, AutoTokenizer
|
| 2 |
-
|
| 3 |
-
# Load the model and tokenizer
|
| 4 |
-
model = AutoModelForCausalLM.from_pretrained("trained_models/")
|
| 5 |
-
tokenizer = AutoTokenizer.from_pretrained("trained_models/")
|
| 6 |
-
|
| 7 |
-
# Input text
|
| 8 |
-
input_text = "Hello, how are you?"
|
| 9 |
-
|
| 10 |
-
# Encode the input text
|
| 11 |
-
input_ids = tokenizer.encode(input_text, return_tensors='pt')
|
| 12 |
-
|
| 13 |
-
# Generate a response
|
| 14 |
-
output = model.generate(input_ids)
|
| 15 |
-
|
| 16 |
-
# Decode the response
|
| 17 |
-
response = tokenizer.decode(output[:, input_ids.shape[-1]:][0], skip_special_tokens=True)
|
| 18 |
-
|
| 19 |
-
print(response)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
trained_models/model.safetensors
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 4400216536
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:3247bbd8b99f4af64a9796dc7300ebc1dd709d581ee8a895fd9da8c6d3df5ccc
|
| 3 |
size 4400216536
|
training.py
CHANGED
|
@@ -82,8 +82,17 @@ model = AutoModelForCausalLM.from_pretrained('TinyLlama/TinyLlama-1.1B-Chat-v1.0
|
|
| 82 |
for param in model.parameters():
|
| 83 |
param.requires_grad = False
|
| 84 |
|
| 85 |
-
# Unfreeze the last
|
| 86 |
-
for
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 87 |
param.requires_grad = True
|
| 88 |
|
| 89 |
# Define the optimizer and scheduler
|
|
|
|
| 82 |
for param in model.parameters():
|
| 83 |
param.requires_grad = False
|
| 84 |
|
| 85 |
+
# Unfreeze the last n layers
|
| 86 |
+
for layer in model.model.layers[-4:]:
|
| 87 |
+
for param in layer.parameters():
|
| 88 |
+
param.requires_grad = True
|
| 89 |
+
|
| 90 |
+
# # Unfreeze the embedding layer: only want to do if you are adding new tokens to the model
|
| 91 |
+
# for param in model.model.embed_tokens.parameters():
|
| 92 |
+
# param.requires_grad = True
|
| 93 |
+
|
| 94 |
+
# Unfreeze the output layer
|
| 95 |
+
for param in model.lm_head.parameters():
|
| 96 |
param.requires_grad = True
|
| 97 |
|
| 98 |
# Define the optimizer and scheduler
|