Spaces:
Runtime error
Runtime error
Update app.py
Browse files
app.py
CHANGED
|
@@ -17,7 +17,7 @@ from langchain_community.embeddings import HuggingFaceBgeEmbeddings
|
|
| 17 |
from langchain_docling import DoclingLoader
|
| 18 |
from langchain_docling.loader import ExportType
|
| 19 |
from langchain_text_splitters import RecursiveCharacterTextSplitter
|
| 20 |
-
from transformers import AutoModelForCausalLM, AutoTokenizer, DynamicCache, TextIteratorStreamer, BitsAndBytesConfig
|
| 21 |
from transformers.models.llama.modeling_llama import rotate_half
|
| 22 |
import threading
|
| 23 |
import shutil
|
|
@@ -35,7 +35,7 @@ model_name = "google/gemma-3-27b-it"
|
|
| 35 |
tokenizer = AutoTokenizer.from_pretrained(model_name, token=api_token)
|
| 36 |
quantization_config = BitsAndBytesConfig(load_in_8bit=True)
|
| 37 |
# model = AutoModelForCausalLM.from_pretrained(model_name, token=api_token, torch_dtype=torch.float16)
|
| 38 |
-
model =
|
| 39 |
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
|
| 40 |
model = model.eval()
|
| 41 |
# model.to(device)
|
|
|
|
| 17 |
from langchain_docling import DoclingLoader
|
| 18 |
from langchain_docling.loader import ExportType
|
| 19 |
from langchain_text_splitters import RecursiveCharacterTextSplitter
|
| 20 |
+
from transformers import AutoModelForCausalLM, AutoTokenizer, DynamicCache, TextIteratorStreamer, BitsAndBytesConfig, Gemma3ForCausalLM
|
| 21 |
from transformers.models.llama.modeling_llama import rotate_half
|
| 22 |
import threading
|
| 23 |
import shutil
|
|
|
|
| 35 |
tokenizer = AutoTokenizer.from_pretrained(model_name, token=api_token)
|
| 36 |
quantization_config = BitsAndBytesConfig(load_in_8bit=True)
|
| 37 |
# model = AutoModelForCausalLM.from_pretrained(model_name, token=api_token, torch_dtype=torch.float16)
|
| 38 |
+
model = Gemma3ForCausalLM.from_pretrained(model_name, token=api_token, quantization_config=quantization_config, torch_dtype="auto")
|
| 39 |
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
|
| 40 |
model = model.eval()
|
| 41 |
# model.to(device)
|