Update app.py
Browse files
app.py
CHANGED
|
@@ -21,14 +21,14 @@ logger = logging.getLogger(__name__)
|
|
| 21 |
# Initialize BitNet model and tokenizer
|
| 22 |
try:
|
| 23 |
model_name = "1bitLLM/bitnet_b1_58-3B"
|
| 24 |
-
# Fallback to LLaMA tokenizer
|
| 25 |
tokenizer = AutoTokenizer.from_pretrained("hf-internal-testing/llama-tokenizer", cache_dir="/app/cache")
|
| 26 |
model = AutoModelForCausalLM.from_pretrained(
|
| 27 |
model_name,
|
| 28 |
torch_dtype=torch.float32,
|
| 29 |
device_map="cpu",
|
|
|
|
| 30 |
cache_dir="/app/cache",
|
| 31 |
-
trust_remote_code=True
|
| 32 |
)
|
| 33 |
pipe = pipeline("text-generation", model=model, tokenizer=tokenizer, max_new_tokens=512)
|
| 34 |
except Exception as e:
|
|
|
|
| 21 |
# Initialize BitNet model and tokenizer
|
| 22 |
try:
|
| 23 |
model_name = "1bitLLM/bitnet_b1_58-3B"
|
|
|
|
| 24 |
tokenizer = AutoTokenizer.from_pretrained("hf-internal-testing/llama-tokenizer", cache_dir="/app/cache")
|
| 25 |
model = AutoModelForCausalLM.from_pretrained(
|
| 26 |
model_name,
|
| 27 |
torch_dtype=torch.float32,
|
| 28 |
device_map="cpu",
|
| 29 |
+
low_cpu_mem_usage=True,
|
| 30 |
cache_dir="/app/cache",
|
| 31 |
+
trust_remote_code=True
|
| 32 |
)
|
| 33 |
pipe = pipeline("text-generation", model=model, tokenizer=tokenizer, max_new_tokens=512)
|
| 34 |
except Exception as e:
|