kdevoe commited on
Commit
6a7d6fa
·
1 Parent(s): 3a01d1a

Resetting model to no quantization

Browse files
app.py CHANGED
@@ -9,12 +9,13 @@ model_dir = "tinyllama_model"
9
  model = AutoModelForCausalLM.from_pretrained(model_dir)
10
  tokenizer = AutoTokenizer.from_pretrained(model_dir)
11
 
 
 
 
12
  # Define the inference function
13
  def generate_text(prompt):
14
  start_time = time.time()
15
  inputs = tokenizer(prompt, return_tensors='pt')
16
- # Manually move tensors to quantized int8 if necessary
17
- inputs = {key: val.to(torch.int8) if val.dtype == torch.float32 else val for key, val in inputs.items()}
18
  outputs = model.generate(**inputs, max_length=100, num_return_sequences=1)
19
  generated_text = tokenizer.decode(outputs[0], skip_special_tokens=True)
20
  end_time = time.time()
 
9
  model = AutoModelForCausalLM.from_pretrained(model_dir)
10
  tokenizer = AutoTokenizer.from_pretrained(model_dir)
11
 
12
+ ################### Modify this to add quantization of the model ##############################
13
+
14
+
15
  # Define the inference function
16
  def generate_text(prompt):
17
  start_time = time.time()
18
  inputs = tokenizer(prompt, return_tensors='pt')
 
 
19
  outputs = model.generate(**inputs, max_length=100, num_return_sequences=1)
20
  generated_text = tokenizer.decode(outputs[0], skip_special_tokens=True)
21
  end_time = time.time()
requirements.txt CHANGED
@@ -1,7 +1,9 @@
1
  accelerate
2
- helper
3
- transformers
4
- torch
5
  gradio
 
6
  quanto
7
- bitsandbytes
 
 
 
 
1
  accelerate
2
+ bitsandbytes
 
 
3
  gradio
4
+ helper
5
  quanto
6
+ sentencepiece
7
+ torch
8
+ torchinfo
9
+ transformers
tinyllama_model/config.json CHANGED
@@ -22,7 +22,7 @@
22
  "rope_scaling": null,
23
  "rope_theta": 10000.0,
24
  "tie_word_embeddings": false,
25
- "torch_dtype": "bfloat16",
26
  "transformers_version": "4.41.0",
27
  "use_cache": true,
28
  "vocab_size": 32000
 
22
  "rope_scaling": null,
23
  "rope_theta": 10000.0,
24
  "tie_word_embeddings": false,
25
+ "torch_dtype": "float32",
26
  "transformers_version": "4.41.0",
27
  "use_cache": true,
28
  "vocab_size": 32000
tinyllama_model/generation_config.json ADDED
@@ -0,0 +1,7 @@
 
 
 
 
 
 
 
 
1
+ {
2
+ "bos_token_id": 1,
3
+ "eos_token_id": 2,
4
+ "max_length": 2048,
5
+ "pad_token_id": 0,
6
+ "transformers_version": "4.41.0"
7
+ }
tinyllama_model/{pytorch_model.bin → model.safetensors} RENAMED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:62310c1145664f23bf5c7a2398ea2a8fed28e3a3b63573319d892b7710b40396
3
- size 1298924658
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:5e9e93a56efef2e4c9a01d31c9f3e1d5b7369b81843a426ecfd1889b10e4361d
3
+ size 4400216536
tinyllama_model/tokenizer.model ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:9e556afd44213b6bd1be2b850ebbbd98f5481437a8021afaf58ee7fb1818d347
3
+ size 499723