upload cortexa-write-feedback v1
Browse files- README.md +56 -0
- config.json +20 -0
- student_int8.onnx +3 -0
- tokenizer.json +122 -0
README.md
ADDED
|
@@ -0,0 +1,56 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
---
|
| 2 |
+
language:
|
| 3 |
+
- en
|
| 4 |
+
license: other
|
| 5 |
+
license_name: pleius-internal
|
| 6 |
+
tags:
|
| 7 |
+
- onnx
|
| 8 |
+
- conditional-text-generation
|
| 9 |
+
- writing-feedback
|
| 10 |
+
- distillation
|
| 11 |
+
- creator-tools
|
| 12 |
+
---
|
| 13 |
+
|
| 14 |
+
# cortexa-write-feedback (distilled student)
|
| 15 |
+
|
| 16 |
+
A ~4.4M-parameter conditional decoder distilled from
|
| 17 |
+
`M725/cortexa-write-scorer` (the worker-side TF-IDF/lexical stub).
|
| 18 |
+
Takes MiniLM text features (384-d) + the 4 Write pillar scores and
|
| 19 |
+
emits a creator-vernacular phrase chain about the draft:
|
| 20 |
+
|
| 21 |
+
```
|
| 22 |
+
"first line hooks | ending sticks"
|
| 23 |
+
"tight middle | shareable"
|
| 24 |
+
"wall of text | no reason to read"
|
| 25 |
+
"drags | no payoff"
|
| 26 |
+
```
|
| 27 |
+
|
| 28 |
+
## Files
|
| 29 |
+
|
| 30 |
+
| file | purpose |
|
| 31 |
+
|---|---|
|
| 32 |
+
| `student_int8.onnx` | TinyTransformer decoder, 4 layers / 256-dim / 4 heads, INT8 dynamic-quantized. 6.8 MB. |
|
| 33 |
+
| `tokenizer.json` | Whole-phrase tokenizer (vocab ~120; specials `<pad>`, `<bos>`, `<eos>`, `<sep>`). |
|
| 34 |
+
| `config.json` | Encoder dim (384), pillar names, vocab size, special-token ids. |
|
| 35 |
+
|
| 36 |
+
## Inference shape
|
| 37 |
+
|
| 38 |
+
```
|
| 39 |
+
inputs:
|
| 40 |
+
encoder_feats (1, 384) float32 # sentence-transformers/all-MiniLM-L6-v2 mean-pooled, L2-normalized
|
| 41 |
+
scores (1, 4) float32 # [read_likelihood, hold, structure, score] in [0,1]
|
| 42 |
+
scores_present (1,) float32 # 1.0 anchored, 0.0 fast-mode
|
| 43 |
+
input_ids (1, T) int64
|
| 44 |
+
outputs:
|
| 45 |
+
logits (1, T, V) float32
|
| 46 |
+
```
|
| 47 |
+
|
| 48 |
+
## Training
|
| 49 |
+
|
| 50 |
+
See `research/distill_students/train_write.py` in the app repo. Teacher
|
| 51 |
+
is `score_write_for_rules()` — the Python port of the cortexa-proxy
|
| 52 |
+
worker's deterministic TF-IDF write scorer.
|
| 53 |
+
|
| 54 |
+
## License
|
| 55 |
+
|
| 56 |
+
Pleius internal — see https://pleius.com. Not for redistribution.
|
config.json
ADDED
|
@@ -0,0 +1,20 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"modality": "write",
|
| 3 |
+
"encoder": "sentence-transformers/all-MiniLM-L6-v2",
|
| 4 |
+
"encoder_dim": 384,
|
| 5 |
+
"n_pillars": 4,
|
| 6 |
+
"pillars": [
|
| 7 |
+
"read_likelihood",
|
| 8 |
+
"hold",
|
| 9 |
+
"structure",
|
| 10 |
+
"score"
|
| 11 |
+
],
|
| 12 |
+
"d_model": 256,
|
| 13 |
+
"n_layers": 4,
|
| 14 |
+
"max_seq_len": 16,
|
| 15 |
+
"vocab_size": 117,
|
| 16 |
+
"bos_id": 1,
|
| 17 |
+
"eos_id": 2,
|
| 18 |
+
"pad_id": 0,
|
| 19 |
+
"sep_id": 3
|
| 20 |
+
}
|
student_int8.onnx
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:a7f6a75b219d7caed201894f7427210722642220fe27316b9bcd69ad4a33e5f4
|
| 3 |
+
size 7129181
|
tokenizer.json
ADDED
|
@@ -0,0 +1,122 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"modality": "write",
|
| 3 |
+
"tokens": [
|
| 4 |
+
"<pad>",
|
| 5 |
+
"<bos>",
|
| 6 |
+
"<eos>",
|
| 7 |
+
"<sep>",
|
| 8 |
+
"first line hooks",
|
| 9 |
+
"opener pulls you in",
|
| 10 |
+
"you keep reading",
|
| 11 |
+
"i kept reading",
|
| 12 |
+
"would actually read this",
|
| 13 |
+
"starts strong",
|
| 14 |
+
"strong opener",
|
| 15 |
+
"hook works",
|
| 16 |
+
"title slaps",
|
| 17 |
+
"you'd click",
|
| 18 |
+
"would actually open this",
|
| 19 |
+
"first sentence lands",
|
| 20 |
+
"real opener",
|
| 21 |
+
"opener earns the click",
|
| 22 |
+
"would skip",
|
| 23 |
+
"i'd scroll",
|
| 24 |
+
"skip the intro",
|
| 25 |
+
"no hook",
|
| 26 |
+
"weak hook",
|
| 27 |
+
"opener doesn't grab",
|
| 28 |
+
"generic intro",
|
| 29 |
+
"slow start",
|
| 30 |
+
"buries the lede",
|
| 31 |
+
"wouldn't read this",
|
| 32 |
+
"first line is dead",
|
| 33 |
+
"i'm out by line two",
|
| 34 |
+
"what's this about",
|
| 35 |
+
"no reason to keep reading",
|
| 36 |
+
"title doesn't sell it",
|
| 37 |
+
"no fluff",
|
| 38 |
+
"every line earns it",
|
| 39 |
+
"doesn't waste your time",
|
| 40 |
+
"keeps you reading",
|
| 41 |
+
"kept me to the end",
|
| 42 |
+
"tight",
|
| 43 |
+
"tight middle",
|
| 44 |
+
"no filler",
|
| 45 |
+
"kept the energy",
|
| 46 |
+
"no drag",
|
| 47 |
+
"every paragraph pulls",
|
| 48 |
+
"stayed locked in",
|
| 49 |
+
"lost in the middle",
|
| 50 |
+
"drags",
|
| 51 |
+
"boring middle",
|
| 52 |
+
"filler",
|
| 53 |
+
"too many words",
|
| 54 |
+
"could cut half",
|
| 55 |
+
"could be one tweet",
|
| 56 |
+
"rambles",
|
| 57 |
+
"loses you halfway",
|
| 58 |
+
"i bounced halfway",
|
| 59 |
+
"too long",
|
| 60 |
+
"needs a trim",
|
| 61 |
+
"lost the thread",
|
| 62 |
+
"easy to read",
|
| 63 |
+
"easy on the eyes",
|
| 64 |
+
"short paragraphs",
|
| 65 |
+
"good white space",
|
| 66 |
+
"good flow",
|
| 67 |
+
"good cadence",
|
| 68 |
+
"you can skim it",
|
| 69 |
+
"skim friendly",
|
| 70 |
+
"clean breaks",
|
| 71 |
+
"well organized",
|
| 72 |
+
"the layout helps",
|
| 73 |
+
"the formatting works",
|
| 74 |
+
"wall of text",
|
| 75 |
+
"no white space",
|
| 76 |
+
"needs paragraph breaks",
|
| 77 |
+
"long paragraphs",
|
| 78 |
+
"hard to follow",
|
| 79 |
+
"all one block",
|
| 80 |
+
"no breaks",
|
| 81 |
+
"where do i start",
|
| 82 |
+
"no flow",
|
| 83 |
+
"jumps around",
|
| 84 |
+
"no order",
|
| 85 |
+
"structure is off",
|
| 86 |
+
"memorable closer",
|
| 87 |
+
"shareable",
|
| 88 |
+
"quotable",
|
| 89 |
+
"you'd screenshot it",
|
| 90 |
+
"feels honest",
|
| 91 |
+
"sounds like a person",
|
| 92 |
+
"feels true",
|
| 93 |
+
"real voice",
|
| 94 |
+
"specific not vague",
|
| 95 |
+
"shows not tells",
|
| 96 |
+
"ending hits",
|
| 97 |
+
"ending sticks",
|
| 98 |
+
"would reshare",
|
| 99 |
+
"would save this",
|
| 100 |
+
"forgettable",
|
| 101 |
+
"weak ending",
|
| 102 |
+
"buries the point",
|
| 103 |
+
"vague claims",
|
| 104 |
+
"no examples",
|
| 105 |
+
"no specifics",
|
| 106 |
+
"no proof",
|
| 107 |
+
"abstract",
|
| 108 |
+
"corporate voice",
|
| 109 |
+
"ai voice",
|
| 110 |
+
"ai writing",
|
| 111 |
+
"feels generated",
|
| 112 |
+
"feels like a press release",
|
| 113 |
+
"stiff",
|
| 114 |
+
"no personality",
|
| 115 |
+
"no point",
|
| 116 |
+
"what's the takeaway",
|
| 117 |
+
"would not read",
|
| 118 |
+
"good hook",
|
| 119 |
+
"no reason to read",
|
| 120 |
+
"no payoff"
|
| 121 |
+
]
|
| 122 |
+
}
|