Update app.py
Browse files
app.py
CHANGED
|
@@ -14,7 +14,7 @@ import time
|
|
| 14 |
token = os.environ["HF_TOKEN"]
|
| 15 |
|
| 16 |
quantization_config = BitsAndBytesConfig(
|
| 17 |
-
|
| 18 |
)
|
| 19 |
|
| 20 |
model = AutoModelForCausalLM.from_pretrained(
|
|
@@ -34,7 +34,7 @@ else:
|
|
| 34 |
|
| 35 |
|
| 36 |
@spaces.GPU(duration=150)
|
| 37 |
-
def chat(message, history, temperature,
|
| 38 |
start_time = time.time()
|
| 39 |
chat = []
|
| 40 |
for item in history:
|
|
@@ -52,7 +52,6 @@ def chat(message, history, temperature, top_p, top_k, max_tokens):
|
|
| 52 |
streamer=streamer,
|
| 53 |
max_new_tokens=max_tokens,
|
| 54 |
do_sample=True,
|
| 55 |
-
top_p=top_p,
|
| 56 |
top_k=top_k,
|
| 57 |
temperature=temperature,
|
| 58 |
)
|
|
@@ -86,9 +85,7 @@ demo = gr.ChatInterface(
|
|
| 86 |
gr.Slider(
|
| 87 |
minimum=0, maximum=1, step=0.1, value=0.9, label="Temperature", render=False
|
| 88 |
),
|
| 89 |
-
gr.
|
| 90 |
-
minimum=0, maximum=1, step=0.1, value=0.95, label="top_p", render=False
|
| 91 |
-
),
|
| 92 |
gr.Slider(
|
| 93 |
minimum=1, maximum=10000, step=5, value=1000, label="top_k", render=False
|
| 94 |
),
|
|
@@ -103,6 +100,6 @@ demo = gr.ChatInterface(
|
|
| 103 |
],
|
| 104 |
stop_btn="Stop Generation",
|
| 105 |
title="Chat With LLMs",
|
| 106 |
-
description="Now Running [meta-llama/Meta-Llama-3-8B-Instruct](https://huggingface.co/meta-llama/Meta-Llama-3-8B-Instruct) in
|
| 107 |
)
|
| 108 |
demo.launch()
|
|
|
|
| 14 |
token = os.environ["HF_TOKEN"]
|
| 15 |
|
| 16 |
quantization_config = BitsAndBytesConfig(
|
| 17 |
+
load_in_8bit=True, bnb_4bit_compute_dtype=torch.float16
|
| 18 |
)
|
| 19 |
|
| 20 |
model = AutoModelForCausalLM.from_pretrained(
|
|
|
|
| 34 |
|
| 35 |
|
| 36 |
@spaces.GPU(duration=150)
|
| 37 |
+
def chat(message, history, temperature,do_sample, top_k, max_tokens):
|
| 38 |
start_time = time.time()
|
| 39 |
chat = []
|
| 40 |
for item in history:
|
|
|
|
| 52 |
streamer=streamer,
|
| 53 |
max_new_tokens=max_tokens,
|
| 54 |
do_sample=True,
|
|
|
|
| 55 |
top_k=top_k,
|
| 56 |
temperature=temperature,
|
| 57 |
)
|
|
|
|
| 85 |
gr.Slider(
|
| 86 |
minimum=0, maximum=1, step=0.1, value=0.9, label="Temperature", render=False
|
| 87 |
),
|
| 88 |
+
gr.Checkbox(label="Sampling",value=True),
|
|
|
|
|
|
|
| 89 |
gr.Slider(
|
| 90 |
minimum=1, maximum=10000, step=5, value=1000, label="top_k", render=False
|
| 91 |
),
|
|
|
|
| 100 |
],
|
| 101 |
stop_btn="Stop Generation",
|
| 102 |
title="Chat With LLMs",
|
| 103 |
+
description="Now Running [meta-llama/Meta-Llama-3-8B-Instruct](https://huggingface.co/meta-llama/Meta-Llama-3-8B-Instruct) in 8bit"
|
| 104 |
)
|
| 105 |
demo.launch()
|