Spaces:
Runtime error
Runtime error
Update app.py
Browse files
app.py
CHANGED
|
@@ -30,15 +30,15 @@ from utils import (
|
|
| 30 |
|
| 31 |
# Initialize the model and tokenizer.
|
| 32 |
api_token = os.getenv("HUGGING_FACE_HUB_TOKEN")
|
| 33 |
-
|
| 34 |
-
model_name = "google/gemma-3-27b-it"
|
| 35 |
tokenizer = AutoTokenizer.from_pretrained(model_name, token=api_token)
|
| 36 |
-
quantization_config = BitsAndBytesConfig(load_in_8bit=True)
|
| 37 |
-
|
| 38 |
-
model = Gemma3ForCausalLM.from_pretrained(model_name, token=api_token, quantization_config=quantization_config, torch_dtype="auto")
|
| 39 |
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
|
| 40 |
model = model.eval()
|
| 41 |
-
|
| 42 |
embedding_model = HuggingFaceBgeEmbeddings(
|
| 43 |
model_name="BAAI/bge-large-en-v1.5",
|
| 44 |
model_kwargs={"device": str(device)},
|
|
@@ -577,14 +577,14 @@ def chat_response_stream(message: str, history: list, state: dict, compression_d
|
|
| 577 |
streamer=streamer,
|
| 578 |
use_cache=True,
|
| 579 |
max_new_tokens=1024,
|
| 580 |
-
|
| 581 |
-
|
| 582 |
-
|
| 583 |
-
|
| 584 |
temperature=1.0,
|
| 585 |
-
top_k=64,
|
| 586 |
-
top_p=0.95,
|
| 587 |
-
min_p=0.0
|
| 588 |
)
|
| 589 |
t = Thread(target=model.generate, kwargs=generate_kwargs)
|
| 590 |
t.start()
|
|
|
|
| 30 |
|
| 31 |
# Initialize the model and tokenizer.
|
| 32 |
api_token = os.getenv("HUGGING_FACE_HUB_TOKEN")
|
| 33 |
+
model_name = "meta-llama/Llama-3.1-8B-Instruct"
|
| 34 |
+
# model_name = "google/gemma-3-27b-it"
|
| 35 |
tokenizer = AutoTokenizer.from_pretrained(model_name, token=api_token)
|
| 36 |
+
# quantization_config = BitsAndBytesConfig(load_in_8bit=True)
|
| 37 |
+
model = AutoModelForCausalLM.from_pretrained(model_name, token=api_token, torch_dtype=torch.float16)
|
| 38 |
+
# model = Gemma3ForCausalLM.from_pretrained(model_name, token=api_token, quantization_config=quantization_config, torch_dtype="auto")
|
| 39 |
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
|
| 40 |
model = model.eval()
|
| 41 |
+
model.to(device)
|
| 42 |
embedding_model = HuggingFaceBgeEmbeddings(
|
| 43 |
model_name="BAAI/bge-large-en-v1.5",
|
| 44 |
model_kwargs={"device": str(device)},
|
|
|
|
| 577 |
streamer=streamer,
|
| 578 |
use_cache=True,
|
| 579 |
max_new_tokens=1024,
|
| 580 |
+
num_beams=1,
|
| 581 |
+
do_sample=False,
|
| 582 |
+
top_p=1.0,
|
| 583 |
+
top_k=None,
|
| 584 |
temperature=1.0,
|
| 585 |
+
# top_k=64,
|
| 586 |
+
# top_p=0.95,
|
| 587 |
+
# min_p=0.0
|
| 588 |
)
|
| 589 |
t = Thread(target=model.generate, kwargs=generate_kwargs)
|
| 590 |
t.start()
|