Changing model datatype to int8
Browse files
app.py
CHANGED
|
@@ -1,9 +1,10 @@
|
|
| 1 |
import gradio as gr
|
| 2 |
import time
|
| 3 |
from transformers import pipeline
|
|
|
|
| 4 |
|
| 5 |
# Load the TinyLlama text generation pipeline
|
| 6 |
-
pipe = pipeline("text-generation", model="TinyLlama/TinyLlama-1.1B-Chat-v1.0")
|
| 7 |
|
| 8 |
# Define the inference function
|
| 9 |
def generate_text(prompt):
|
|
|
|
| 1 |
import gradio as gr
|
| 2 |
import time
|
| 3 |
from transformers import pipeline
|
| 4 |
+
import torch
|
| 5 |
|
| 6 |
# Load the TinyLlama text generation pipeline
|
| 7 |
+
pipe = pipeline("text-generation", model="TinyLlama/TinyLlama-1.1B-Chat-v1.0", torch_dtype=torch.int8)
|
| 8 |
|
| 9 |
# Define the inference function
|
| 10 |
def generate_text(prompt):
|