Spaces:
Sleeping
Sleeping
Update src/streamlit_app.py
Browse files- src/streamlit_app.py +13 -7
src/streamlit_app.py
CHANGED
|
@@ -1,17 +1,22 @@
|
|
| 1 |
-
|
|
|
|
| 2 |
import streamlit as st
|
| 3 |
import torch
|
| 4 |
-
import os
|
| 5 |
-
|
| 6 |
|
| 7 |
st.title("Tokenizer Test Space")
|
| 8 |
-
|
| 9 |
model_id = "google/gemma-2b-it" # Test with the official model first
|
| 10 |
# model_id = "Rahul-8799/project_manager_gemma3" # If the official model works, try yours
|
| 11 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 12 |
try:
|
| 13 |
st.write(f"Attempting to load tokenizer for {model_id}...")
|
| 14 |
-
|
|
|
|
| 15 |
st.success("Tokenizer loaded successfully!")
|
| 16 |
st.write("Tokenizer details:", tokenizer)
|
| 17 |
except Exception as e:
|
|
@@ -21,19 +26,20 @@ except Exception as e:
|
|
| 21 |
try:
|
| 22 |
st.write(f"Attempting to load model for {model_id}...")
|
| 23 |
# Assuming you want 4-bit quantization for Gemma
|
| 24 |
-
from transformers import BitsAndBytesConfig
|
| 25 |
quantization_config = BitsAndBytesConfig(
|
| 26 |
load_in_4bit=True,
|
| 27 |
bnb_4bit_quant_type="nf4",
|
| 28 |
bnb_4bit_compute_dtype=torch.bfloat16,
|
| 29 |
bnb_4bit_use_double_quant=False,
|
| 30 |
)
|
|
|
|
| 31 |
model = AutoModelForCausalLM.from_pretrained(
|
| 32 |
model_id,
|
| 33 |
quantization_config=quantization_config,
|
| 34 |
low_cpu_mem_usage=True,
|
| 35 |
torch_dtype=torch.bfloat16,
|
| 36 |
-
trust_remote_code=True
|
|
|
|
| 37 |
)
|
| 38 |
st.success("Model loaded successfully!")
|
| 39 |
st.write("Model details:", model)
|
|
|
|
| 1 |
+
import os
|
| 2 |
+
from transformers import AutoTokenizer, AutoModelForCausalLM, BitsAndBytesConfig
|
| 3 |
import streamlit as st
|
| 4 |
import torch
|
|
|
|
|
|
|
| 5 |
|
| 6 |
st.title("Tokenizer Test Space")
|
|
|
|
| 7 |
model_id = "google/gemma-2b-it" # Test with the official model first
|
| 8 |
# model_id = "Rahul-8799/project_manager_gemma3" # If the official model works, try yours
|
| 9 |
|
| 10 |
+
# Define a writable directory for the cache. /tmp is usually writable in Spaces.
|
| 11 |
+
cache_directory = "/tmp/hf_cache"
|
| 12 |
+
|
| 13 |
+
# Ensure the cache directory exists (good practice, though hf_hub might handle it)
|
| 14 |
+
os.makedirs(cache_directory, exist_ok=True)
|
| 15 |
+
|
| 16 |
try:
|
| 17 |
st.write(f"Attempting to load tokenizer for {model_id}...")
|
| 18 |
+
# Explicitly pass the cache_dir
|
| 19 |
+
tokenizer = AutoTokenizer.from_pretrained(model_id, cache_dir=cache_directory)
|
| 20 |
st.success("Tokenizer loaded successfully!")
|
| 21 |
st.write("Tokenizer details:", tokenizer)
|
| 22 |
except Exception as e:
|
|
|
|
| 26 |
try:
|
| 27 |
st.write(f"Attempting to load model for {model_id}...")
|
| 28 |
# Assuming you want 4-bit quantization for Gemma
|
|
|
|
| 29 |
quantization_config = BitsAndBytesConfig(
|
| 30 |
load_in_4bit=True,
|
| 31 |
bnb_4bit_quant_type="nf4",
|
| 32 |
bnb_4bit_compute_dtype=torch.bfloat16,
|
| 33 |
bnb_4bit_use_double_quant=False,
|
| 34 |
)
|
| 35 |
+
# Explicitly pass the cache_dir
|
| 36 |
model = AutoModelForCausalLM.from_pretrained(
|
| 37 |
model_id,
|
| 38 |
quantization_config=quantization_config,
|
| 39 |
low_cpu_mem_usage=True,
|
| 40 |
torch_dtype=torch.bfloat16,
|
| 41 |
+
trust_remote_code=True,
|
| 42 |
+
cache_dir=cache_directory # Add this line
|
| 43 |
)
|
| 44 |
st.success("Model loaded successfully!")
|
| 45 |
st.write("Model details:", model)
|