kdevoe commited on
Commit
8f55d75
·
1 Parent(s): 4560e1c

Update app to pull local quantized model

Browse files
Files changed (1) hide show
  1. app.py +10 -2
app.py CHANGED
@@ -1,10 +1,18 @@
1
  import gradio as gr
2
  import time
3
- from transformers import pipeline
4
  import torch
5
 
 
 
 
 
 
6
  # Load the TinyLlama text generation pipeline
7
- pipe = pipeline("text-generation", model="TinyLlama/TinyLlama-1.1B-Chat-v1.0", torch_dtype=torch.int8)
 
 
 
8
 
9
  # Define the inference function
10
  def generate_text(prompt):
 
1
  import gradio as gr
2
  import time
3
+ from transformers import AutoModelForCausalLM, AutoTokenizer, pipeline
4
  import torch
5
 
6
+ model_dir = "tinyllama_model"
7
+ model = AutoModelForCausalLM.from_pretrained(model_dir, torch_dtype=torch.qint8)
8
+ tokenizer = AutoTokenizer.from_pretrained(model_dir)
9
+
10
+
11
  # Load the TinyLlama text generation pipeline
12
+ pipe = pipeline("text-generation", model=model, torch_dtype=torch.qint8)
13
+ tokenizer = AutoTokenizer.from_pretrained(model_dir)
14
+
15
+ pipe = pipeline("text-generation", model=model, tokenizer=tokenizer, torch_dtype=torch.qint8)
16
 
17
  # Define the inference function
18
  def generate_text(prompt):