Spaces:

RobertCastagna
/

FIN_LLM

Sleeping

Robert Castagna commited on Jan 10, 2024

Commit

da0e5e8

1 Parent(s): 12d3e6f

update hf page

Files changed (5) hide show

.gitignore ADDED Viewed

app.py CHANGED Viewed

@@ -1,14 +1,22 @@
 import torch
 from transformers import AutoModelForCausalLM, AutoTokenizer, pipeline
 # Set the device to CUDA if available
 device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
-pipe = pipeline("text-generation", model="TinyLlama/TinyLlama-1.1B-Chat-v1.0", torch_dtype=torch.bfloat16)
-input_text = 'summarize the book harry potter' #st.text_input(label='prompt:')
-context = 'answer like an english essay' #st.text_input(label='how do you want me to answer the question? ie. respond as if you are explaining to a child')
 messages = [
     {
@@ -24,7 +32,7 @@ prompt = pipe.tokenizer.apply_chat_template(messages, tokenize=False, add_genera
 #if st.button("generate response"):
 # Generate a response
-outputs = pipe(prompt, max_new_tokens=256, do_sample=True, temperature=0.7, top_k=50, top_p=0.95)
 print(outputs[0]["generated_text"])

 import torch
 from transformers import AutoModelForCausalLM, AutoTokenizer, pipeline
+import streamlit as st
 # Set the device to CUDA if available
 device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
+model_source = 10
+if model_source == 1:
+    #pipe = pipeline("text-generation", model="trained_models/")
+    pipe = pipeline("text-generation", model="trained_models/", device=device.index if device.type == 'cuda' else -1)
+else:
+    pipe = pipeline("text-generation", model="TinyLlama/TinyLlama-1.1B-Chat-v1.0", torch_dtype=torch.bfloat16, device=device.index if device.type == 'cuda' else -1)
+input_text = st.text_input(label='prompt:') #st.text_input(label='prompt:')
+context = st.text_input(label='provide context for the model.. who/what should it be?') #st.text_input(label='how do you want me to answer the question? ie. respond as if you are explaining to a child')
 messages = [
     {
 #if st.button("generate response"):
 # Generate a response
+outputs = pipe(prompt, max_new_tokens=250, do_sample=True, temperature=0.9, top_k=50, top_p=0.95)
 print(outputs[0]["generated_text"])

chat.py DELETED Viewed

@@ -1,19 +0,0 @@
-from transformers import AutoModelForCausalLM, AutoTokenizer
-# Load the model and tokenizer
-model = AutoModelForCausalLM.from_pretrained("trained_models/")
-tokenizer = AutoTokenizer.from_pretrained("trained_models/")
-# Input text
-input_text = "Hello, how are you?"
-# Encode the input text
-input_ids = tokenizer.encode(input_text, return_tensors='pt')
-# Generate a response
-output = model.generate(input_ids)
-# Decode the response
-response = tokenizer.decode(output[:, input_ids.shape[-1]:][0], skip_special_tokens=True)
-print(response)

trained_models/model.safetensors CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:1354fc4008a730b36cad46eb93c017f9ad6c7e455950b737b412e6c3f60627ea
 size 4400216536

 version https://git-lfs.github.com/spec/v1
+oid sha256:3247bbd8b99f4af64a9796dc7300ebc1dd709d581ee8a895fd9da8c6d3df5ccc
 size 4400216536

training.py CHANGED Viewed

@@ -82,8 +82,17 @@ model = AutoModelForCausalLM.from_pretrained('TinyLlama/TinyLlama-1.1B-Chat-v1.0
 for param in model.parameters():
     param.requires_grad = False
-# Unfreeze the last layer
-for param in model.model.layers[-1].parameters():
     param.requires_grad = True
 # Define the optimizer and scheduler

 for param in model.parameters():
     param.requires_grad = False
+# Unfreeze the last n layers
+for layer in model.model.layers[-4:]:
+    for param in layer.parameters():
+        param.requires_grad = True
+# # Unfreeze the embedding layer: only want to do if you are adding new tokens to the model
+# for param in model.model.embed_tokens.parameters():
+#     param.requires_grad = True
+# Unfreeze the output layer
+for param in model.lm_head.parameters():
     param.requires_grad = True
 # Define the optimizer and scheduler