Spaces:
Running
Running
Add Granite-4.0 1B model for extraction
Browse files- Added granite_4_0_1b_q4 from unsloth/granite-4.0-h-1b-GGUF
- Q4_0 quantization for balanced quality/speed
- 32K context window
- Temperature 0.1 for focused extraction
- Larger model should help with extraction focus compared to 350M
app.py
CHANGED
|
@@ -801,6 +801,22 @@ EXTRACTION_MODELS = {
|
|
| 801 |
"repeat_penalty": 1.0,
|
| 802 |
},
|
| 803 |
},
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 804 |
"falcon_h1_1.5b_q4": {
|
| 805 |
"name": "Falcon-H1 1.5B Q4",
|
| 806 |
"repo_id": "unsloth/Falcon-H1-1.5B-Deep-Instruct-GGUF",
|
|
|
|
| 801 |
"repeat_penalty": 1.0,
|
| 802 |
},
|
| 803 |
},
|
| 804 |
+
"granite_4_0_1b_q4": {
|
| 805 |
+
"name": "Granite-4.0 1B (32K Context)",
|
| 806 |
+
"repo_id": "unsloth/granite-4.0-h-1b-GGUF",
|
| 807 |
+
"filename": "*Q4_0.gguf",
|
| 808 |
+
"max_context": 32768,
|
| 809 |
+
"default_n_ctx": 4096,
|
| 810 |
+
"params_size": "1B",
|
| 811 |
+
"supports_reasoning": False,
|
| 812 |
+
"supports_toggle": False,
|
| 813 |
+
"inference_settings": {
|
| 814 |
+
"temperature": 0.1,
|
| 815 |
+
"top_p": 0.95,
|
| 816 |
+
"top_k": 30,
|
| 817 |
+
"repeat_penalty": 1.0,
|
| 818 |
+
},
|
| 819 |
+
},
|
| 820 |
"falcon_h1_1.5b_q4": {
|
| 821 |
"name": "Falcon-H1 1.5B Q4",
|
| 822 |
"repo_id": "unsloth/Falcon-H1-1.5B-Deep-Instruct-GGUF",
|