Spaces:

kouki321
/

cag_new_model

Sleeping

kouki321 commited on May 28

Commit

35c42fb

verified ·

1 Parent(s): 441a0b0

Update app.py

Files changed (1) hide show

app.py CHANGED Viewed

@@ -1,6 +1,6 @@
 import streamlit as st
 import torch
-from transformers import BitsAndBytesConfig, AutoTokenizer, AutoModelForCausalLM
 from transformers.cache_utils import DynamicCache
 import os
 from time import time
@@ -84,17 +84,11 @@ def calculate_cache_size(cache):
 @st.cache_resource
 def load_model_and_tokenizer(doc_text_count):
     model_name = "google/gemma-3-4b-it"  # Configure quantization for 4-bit loading
-    quantization_config = BitsAndBytesConfig(
-        load_in_4bit=True,  # Enable 4-bit quantization
-        bnb_4bit_compute_dtype=torch.float16,  # Set computation precision
-        bnb_4bit_quant_type="nf4",  # Use Normal Float 4 (NF4) quantization
-        bnb_4bit_use_double_quant=True,  # Enable double quantization
-    )
     # Load the pre-trained model with quantization
     model = AutoModelForCausalLM.from_pretrained(
         model_name,
         device_map="auto",  # Automatically allocate model to devices
-        quantization_config=quantization_config,
         model_max_length=1.3*round(doc_text_count * 0.3 + 1),
         token=HF_TOKEN
     )

 import streamlit as st
 import torch
+from transformers import  AutoTokenizer, AutoModelForCausalLM
 from transformers.cache_utils import DynamicCache
 import os
 from time import time
 @st.cache_resource
 def load_model_and_tokenizer(doc_text_count):
     model_name = "google/gemma-3-4b-it"  # Configure quantization for 4-bit loading
     # Load the pre-trained model with quantization
     model = AutoModelForCausalLM.from_pretrained(
         model_name,
         device_map="auto",  # Automatically allocate model to devices
         model_max_length=1.3*round(doc_text_count * 0.3 + 1),
         token=HF_TOKEN
     )