Tonic
commited on
reduce position embeddings
Browse files
app.py
CHANGED
|
@@ -30,7 +30,8 @@ model = AutoModelForCausalLM.from_pretrained(
|
|
| 30 |
quantization_config=quantization_config, # Apply quantization
|
| 31 |
# device_map="auto", # Automatically map to available devices
|
| 32 |
torch_dtype=torch.bfloat16,
|
| 33 |
-
token=HF_TOKEN
|
|
|
|
| 34 |
)
|
| 35 |
|
| 36 |
@spaces.GPU
|
|
|
|
| 30 |
quantization_config=quantization_config, # Apply quantization
|
| 31 |
# device_map="auto", # Automatically map to available devices
|
| 32 |
torch_dtype=torch.bfloat16,
|
| 33 |
+
token=HF_TOKEN,
|
| 34 |
+
max_position_embeddings=8192 # Reduce context window to 8k tokens (from 128k)
|
| 35 |
)
|
| 36 |
|
| 37 |
@spaces.GPU
|