Luigi commited on
Commit
8b92581
·
1 Parent(s): 31f3b9f

Add Granite-4.0 1B model for extraction

Browse files

- Added granite_4_0_1b_q4 from unsloth/granite-4.0-h-1b-GGUF
- Q4_0 quantization for balanced quality/speed
- 32K context window
- Temperature 0.1 for focused extraction
- Larger model should help with extraction focus compared to 350M

Files changed (1) hide show
  1. app.py +16 -0
app.py CHANGED
@@ -801,6 +801,22 @@ EXTRACTION_MODELS = {
801
  "repeat_penalty": 1.0,
802
  },
803
  },
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
804
  "falcon_h1_1.5b_q4": {
805
  "name": "Falcon-H1 1.5B Q4",
806
  "repo_id": "unsloth/Falcon-H1-1.5B-Deep-Instruct-GGUF",
 
801
  "repeat_penalty": 1.0,
802
  },
803
  },
804
+ "granite_4_0_1b_q4": {
805
+ "name": "Granite-4.0 1B (32K Context)",
806
+ "repo_id": "unsloth/granite-4.0-h-1b-GGUF",
807
+ "filename": "*Q4_0.gguf",
808
+ "max_context": 32768,
809
+ "default_n_ctx": 4096,
810
+ "params_size": "1B",
811
+ "supports_reasoning": False,
812
+ "supports_toggle": False,
813
+ "inference_settings": {
814
+ "temperature": 0.1,
815
+ "top_p": 0.95,
816
+ "top_k": 30,
817
+ "repeat_penalty": 1.0,
818
+ },
819
+ },
820
  "falcon_h1_1.5b_q4": {
821
  "name": "Falcon-H1 1.5B Q4",
822
  "repo_id": "unsloth/Falcon-H1-1.5B-Deep-Instruct-GGUF",