Update app.py
Browse files
app.py
CHANGED
|
@@ -8,13 +8,10 @@ import time
|
|
| 8 |
#import torch
|
| 9 |
import pandas as pd
|
| 10 |
|
| 11 |
-
|
| 12 |
-
#from transformers import AutoModelForCausalLM, AutoModel
|
| 13 |
from transformers import TextIteratorStreamer
|
| 14 |
from threading import Thread
|
| 15 |
-
#from
|
| 16 |
-
#git lfs install
|
| 17 |
-
from ctransformers import AutoModelForCausalLM, AutoConfig, Config, AutoTokenizer
|
| 18 |
|
| 19 |
#from huggingface_hub import InferenceClient
|
| 20 |
from huggingface_hub import Repository, upload_file
|
|
@@ -29,31 +26,14 @@ historylog = [{
|
|
| 29 |
"Output": ''
|
| 30 |
}]
|
| 31 |
|
| 32 |
-
|
| 33 |
-
i_max_new_tokens=1100
|
| 34 |
-
i_repetitionpenalty = 1.2
|
| 35 |
-
i_contextlength=12048
|
| 36 |
-
logfile = 'TinyLlama.1B.txt'
|
| 37 |
-
|
| 38 |
-
print("loading model...")
|
| 39 |
-
modelfile="TheBloke/TinyLlama-1.1B-Chat-v1.0-GGUF"
|
| 40 |
-
|
| 41 |
-
|
| 42 |
-
conf = AutoConfig(Config(temperature=i_temperature,
|
| 43 |
-
repetition_penalty=i_repetitionpenalty,
|
| 44 |
-
batch_size=64,
|
| 45 |
-
max_new_tokens=i_max_new_tokens,
|
| 46 |
-
context_length=i_contextlength))
|
| 47 |
-
llm_model = AutoModelForCausalLM.from_pretrained(modelfile,
|
| 48 |
-
model_type="llama",
|
| 49 |
-
config=conf)
|
| 50 |
|
| 51 |
|
| 52 |
# TheBloke/Llama-2-7B-Chat-GGML , TinyLlama/TinyLlama-1.1B-Chat-v1.0 , microsoft/Phi-3-mini-4k-instruct, health360/Healix-1.1B-V1-Chat-dDPO
|
| 53 |
# TheBloke/TinyLlama-1.1B-Chat-v1.0-GGUF and tinyllama-1.1b-chat-v1.0.Q4_K_M.gguf not working
|
| 54 |
|
| 55 |
model = AutoModelForCausalLM.from_pretrained(llm_model)
|
| 56 |
-
|
| 57 |
#initiate model and tokenizer
|
| 58 |
|
| 59 |
data = load_dataset("Namitg02/Test", split='train', streaming=False)
|
|
@@ -83,10 +63,10 @@ print("check2")
|
|
| 83 |
# memory = ConversationBufferMemory(return_messages=True)
|
| 84 |
|
| 85 |
|
| 86 |
-
|
| 87 |
-
|
| 88 |
-
|
| 89 |
-
|
| 90 |
# indicates the end of a sequence
|
| 91 |
|
| 92 |
|
|
|
|
| 8 |
#import torch
|
| 9 |
import pandas as pd
|
| 10 |
|
| 11 |
+
from transformers import AutoTokenizer, AutoModelForCausalLM
|
|
|
|
| 12 |
from transformers import TextIteratorStreamer
|
| 13 |
from threading import Thread
|
| 14 |
+
#from ctransformers import AutoModelForCausalLM, AutoConfig, Config, AutoTokenizer
|
|
|
|
|
|
|
| 15 |
|
| 16 |
#from huggingface_hub import InferenceClient
|
| 17 |
from huggingface_hub import Repository, upload_file
|
|
|
|
| 26 |
"Output": ''
|
| 27 |
}]
|
| 28 |
|
| 29 |
+
llm_model = "TinyLlama/TinyLlama-1.1B-Chat-v0.6"
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 30 |
|
| 31 |
|
| 32 |
# TheBloke/Llama-2-7B-Chat-GGML , TinyLlama/TinyLlama-1.1B-Chat-v1.0 , microsoft/Phi-3-mini-4k-instruct, health360/Healix-1.1B-V1-Chat-dDPO
|
| 33 |
# TheBloke/TinyLlama-1.1B-Chat-v1.0-GGUF and tinyllama-1.1b-chat-v1.0.Q4_K_M.gguf not working
|
| 34 |
|
| 35 |
model = AutoModelForCausalLM.from_pretrained(llm_model)
|
| 36 |
+
tokenizer = AutoTokenizer.from_pretrained(llm_model)
|
| 37 |
#initiate model and tokenizer
|
| 38 |
|
| 39 |
data = load_dataset("Namitg02/Test", split='train', streaming=False)
|
|
|
|
| 63 |
# memory = ConversationBufferMemory(return_messages=True)
|
| 64 |
|
| 65 |
|
| 66 |
+
terminators = [
|
| 67 |
+
tokenizer.eos_token_id, # End-of-Sequence Token that indicates where the model should consider the text sequence to be complete
|
| 68 |
+
tokenizer.convert_tokens_to_ids("<|eot_id|>") # Converts a token strings in a single/ sequence of integer id using the vocabulary
|
| 69 |
+
]
|
| 70 |
# indicates the end of a sequence
|
| 71 |
|
| 72 |
|