kdevoe commited on
Commit
977f7f0
·
1 Parent(s): 8f55d75

Trying 8bit from huggingface

Browse files
Files changed (1) hide show
  1. app.py +7 -7
app.py CHANGED
@@ -3,16 +3,16 @@ import time
3
  from transformers import AutoModelForCausalLM, AutoTokenizer, pipeline
4
  import torch
5
 
6
- model_dir = "tinyllama_model"
7
- model = AutoModelForCausalLM.from_pretrained(model_dir, torch_dtype=torch.qint8)
8
- tokenizer = AutoTokenizer.from_pretrained(model_dir)
9
 
 
 
10
 
11
- # Load the TinyLlama text generation pipeline
12
- pipe = pipeline("text-generation", model=model, torch_dtype=torch.qint8)
13
- tokenizer = AutoTokenizer.from_pretrained(model_dir)
14
 
15
- pipe = pipeline("text-generation", model=model, tokenizer=tokenizer, torch_dtype=torch.qint8)
 
 
16
 
17
  # Define the inference function
18
  def generate_text(prompt):
 
3
  from transformers import AutoModelForCausalLM, AutoTokenizer, pipeline
4
  import torch
5
 
6
+ #model_dir = "tinyllama_model"
7
+ model_id = "TinyLlama/TinyLlama-1.1B-Chat-v1.0"
 
8
 
9
+ model = AutoModelForCausalLM.from_pretrained(model_id, load_in_8bit=True)
10
+ #tokenizer = AutoTokenizer.from_pretrained(model_dir)
11
 
 
 
 
12
 
13
+ # Load the TinyLlama text generation pipeline
14
+ pipe = pipeline("text-generation", model=model)
15
+ #tokenizer = AutoTokenizer.from_pretrained(model_dir)
16
 
17
  # Define the inference function
18
  def generate_text(prompt):