Luigi commited on
Commit
25e4563
·
1 Parent(s): 926ea7a

feat: add Qwen2.5 3B and Qwen3 4B extraction models

Browse files

- Added Qwen2.5 3B: Qwen/Qwen2.5-3B-Instruct-GGUF
- Added Qwen3 4B: unsloth/Qwen3-4B-GGUF (supports reasoning)
- Set Qwen3 4B as DEFAULT_EXTRACTION_MODEL
- Both configured with temperature 0.1 for greedy extraction

These larger models should handle Chinese transcript extraction better
than Granite 4.0 Tiny which only achieved 3.4% success rate.

Files changed (1) hide show
  1. app.py +29 -1
app.py CHANGED
@@ -863,9 +863,37 @@ EXTRACTION_MODELS = {
863
  "repeat_penalty": 1.0,
864
  },
865
  },
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
866
  }
867
 
868
- DEFAULT_EXTRACTION_MODEL = "granite4_tiny_q3"
869
 
870
 
871
  # ===== ADVANCED MODE: SYNTHESIS MODELS REGISTRY (16 models, 1B-30B) =====
 
863
  "repeat_penalty": 1.0,
864
  },
865
  },
866
+ "qwen2.5_3b": {
867
+ "name": "Qwen2.5 3B (128K Context)",
868
+ "repo_id": "Qwen/Qwen2.5-3B-Instruct-GGUF",
869
+ "filename": "*Q4_K_M.gguf",
870
+ "max_context": 131072,
871
+ "supports_reasoning": False,
872
+ "supports_toggle": False,
873
+ "inference_settings": {
874
+ "temperature": 0.1,
875
+ "top_p": 0.9,
876
+ "top_k": 20,
877
+ "repeat_penalty": 1.0,
878
+ },
879
+ },
880
+ "qwen3_4b": {
881
+ "name": "Qwen3 4B (32K Context)",
882
+ "repo_id": "unsloth/Qwen3-4B-GGUF",
883
+ "filename": "*Q4_K_M.gguf",
884
+ "max_context": 32768,
885
+ "supports_reasoning": True,
886
+ "supports_toggle": True,
887
+ "inference_settings": {
888
+ "temperature": 0.1,
889
+ "top_p": 0.9,
890
+ "top_k": 20,
891
+ "repeat_penalty": 1.0,
892
+ },
893
+ },
894
  }
895
 
896
+ DEFAULT_EXTRACTION_MODEL = "qwen3_4b"
897
 
898
 
899
  # ===== ADVANCED MODE: SYNTHESIS MODELS REGISTRY (16 models, 1B-30B) =====