Update README.md
Browse files
README.md
CHANGED
|
@@ -46,7 +46,69 @@ outputs = model.generate(
|
|
| 46 |
custom_generate="kashif/DeepConf", # Hugging Face Hub repo
|
| 47 |
trust_remote_code=True
|
| 48 |
)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 49 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 50 |
```
|
| 51 |
|
| 52 |
## Requirements
|
|
|
|
| 46 |
custom_generate="kashif/DeepConf", # Hugging Face Hub repo
|
| 47 |
trust_remote_code=True
|
| 48 |
)
|
| 49 |
+
```
|
| 50 |
+
|
| 51 |
+
## Calibration (DeepConf-low/high)
|
| 52 |
+
|
| 53 |
+
|
| 54 |
+
DeepConf’s online stopping threshold is derived from a short warmup phase. You collect warmup trace confidences, then pass them into the generator to auto-derive the threshold for either DeepConf-low (aggressive) or DeepConf-high (permissive).
|
| 55 |
|
| 56 |
+
) Warmup (num_return_sequences): collect per-trace confidences (Ct = min(step_confidences))
|
| 57 |
+
|
| 58 |
+
```python
|
| 59 |
+
from transformers import GenerationConfig
|
| 60 |
+
|
| 61 |
+
prompt = "Explain artificial intelligence."
|
| 62 |
+
Ninit = 8 # number of warmup traces
|
| 63 |
+
warmup_C = []
|
| 64 |
+
|
| 65 |
+
warm_cfg = GenerationConfig.from_model_config(model.config)
|
| 66 |
+
warm_cfg.do_sample = True
|
| 67 |
+
warm_cfg.temperature = 0.7
|
| 68 |
+
warm_cfg.top_p = 0.95
|
| 69 |
+
warm_cfg.max_new_tokens = 64
|
| 70 |
+
warm_cfg.enable_conf = True
|
| 71 |
+
warm_cfg.return_dict_in_generate = True
|
| 72 |
+
warm_cfg.output_confidences = True
|
| 73 |
+
warm_cfg.num_return_sequences = Ninit
|
| 74 |
+
# IMPORTANT: Do not set `warm_cfg.threshold` here. Warmup should not apply online early stopping.
|
| 75 |
+
|
| 76 |
+
out = model.generate(
|
| 77 |
+
**tokenizer(prompt, return_tensors="pt"),
|
| 78 |
+
generation_config=warm_cfg,
|
| 79 |
+
custom_generate=kashif/DeepConf",
|
| 80 |
+
trust_remote_code=True,
|
| 81 |
+
)
|
| 82 |
+
# Per-trace Ct = min over steps
|
| 83 |
+
warmup_C = out.confidences.min(dim=1).values.tolist()
|
| 84 |
+
```
|
| 85 |
+
|
| 86 |
+
|
| 87 |
+
2) Online: pass warmup confidences to auto-derive threshold
|
| 88 |
+
|
| 89 |
+
```python
|
| 90 |
+
gen_cfg = GenerationConfig.from_model_config(model.config)
|
| 91 |
+
gen_cfg.enable_conf = True
|
| 92 |
+
gen_cfg.return_dict_in_generate = True
|
| 93 |
+
gen_cfg.output_confidences = True
|
| 94 |
+
|
| 95 |
+
# Choose a variant:
|
| 96 |
+
# - DeepConf-low (aggressive): eta=0.1 → 90th percentile threshold
|
| 97 |
+
# - DeepConf-high (permissive): eta=0.9 → 10th percentile threshold
|
| 98 |
+
gen_cfg.deepconf_variant = "low" # or "high"
|
| 99 |
+
# Optional: override eta explicitly
|
| 100 |
+
# gen_cfg.deepconf_eta = 0.1 # defaults: 0.1 for low, 0.9 for high
|
| 101 |
+
|
| 102 |
+
# Provide warmup confidences; the threshold will be derived internally
|
| 103 |
+
gen_cfg.deepconf_warmup_confidences = warmup_C
|
| 104 |
+
|
| 105 |
+
out = model.generate(
|
| 106 |
+
**tokenizer(prompt, return_tensors="pt"),
|
| 107 |
+
custom_generate="kashif/DeepConf",
|
| 108 |
+
trust_remote_code=True,
|
| 109 |
+
generation_config=gen_cfg,
|
| 110 |
+
max_new_tokens=128,
|
| 111 |
+
)
|
| 112 |
```
|
| 113 |
|
| 114 |
## Requirements
|