upload cortexa-write-feedback v1

Browse files

Files changed (4) hide show

README.md +56 -0
config.json +20 -0
student_int8.onnx +3 -0
tokenizer.json +122 -0

README.md ADDED Viewed

	@@ -0,0 +1,56 @@

+---
+language:
+- en
+license: other
+license_name: pleius-internal
+tags:
+- onnx
+- conditional-text-generation
+- writing-feedback
+- distillation
+- creator-tools
+---
+# cortexa-write-feedback (distilled student)
+A ~4.4M-parameter conditional decoder distilled from
+`M725/cortexa-write-scorer` (the worker-side TF-IDF/lexical stub).
+Takes MiniLM text features (384-d) + the 4 Write pillar scores and
+emits a creator-vernacular phrase chain about the draft:
+```
+"first line hooks | ending sticks"
+"tight middle | shareable"
+"wall of text | no reason to read"
+"drags | no payoff"
+```
+## Files
+| file | purpose |
+|---|---|
+| `student_int8.onnx` | TinyTransformer decoder, 4 layers / 256-dim / 4 heads, INT8 dynamic-quantized. 6.8 MB. |
+| `tokenizer.json`    | Whole-phrase tokenizer (vocab ~120; specials `<pad>`, `<bos>`, `<eos>`, `<sep>`). |
+| `config.json`       | Encoder dim (384), pillar names, vocab size, special-token ids. |
+## Inference shape
+```
+inputs:
+  encoder_feats   (1, 384)  float32   # sentence-transformers/all-MiniLM-L6-v2 mean-pooled, L2-normalized
+  scores          (1, 4)    float32   # [read_likelihood, hold, structure, score] in [0,1]
+  scores_present  (1,)      float32   # 1.0 anchored, 0.0 fast-mode
+  input_ids       (1, T)    int64
+outputs:
+  logits          (1, T, V) float32
+```
+## Training
+See `research/distill_students/train_write.py` in the app repo. Teacher
+is `score_write_for_rules()` — the Python port of the cortexa-proxy
+worker's deterministic TF-IDF write scorer.
+## License
+Pleius internal — see https://pleius.com. Not for redistribution.

config.json ADDED Viewed

	@@ -0,0 +1,20 @@

+{
+  "modality": "write",
+  "encoder": "sentence-transformers/all-MiniLM-L6-v2",
+  "encoder_dim": 384,
+  "n_pillars": 4,
+  "pillars": [
+    "read_likelihood",
+    "hold",
+    "structure",
+    "score"
+  ],
+  "d_model": 256,
+  "n_layers": 4,
+  "max_seq_len": 16,
+  "vocab_size": 117,
+  "bos_id": 1,
+  "eos_id": 2,
+  "pad_id": 0,
+  "sep_id": 3
+}

student_int8.onnx ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:a7f6a75b219d7caed201894f7427210722642220fe27316b9bcd69ad4a33e5f4
+size 7129181

tokenizer.json ADDED Viewed

	@@ -0,0 +1,122 @@

+{
+  "modality": "write",
+  "tokens": [
+    "<pad>",
+    "<bos>",
+    "<eos>",
+    "<sep>",
+    "first line hooks",
+    "opener pulls you in",
+    "you keep reading",
+    "i kept reading",
+    "would actually read this",
+    "starts strong",
+    "strong opener",
+    "hook works",
+    "title slaps",
+    "you'd click",
+    "would actually open this",
+    "first sentence lands",
+    "real opener",
+    "opener earns the click",
+    "would skip",
+    "i'd scroll",
+    "skip the intro",
+    "no hook",
+    "weak hook",
+    "opener doesn't grab",
+    "generic intro",
+    "slow start",
+    "buries the lede",
+    "wouldn't read this",
+    "first line is dead",
+    "i'm out by line two",
+    "what's this about",
+    "no reason to keep reading",
+    "title doesn't sell it",
+    "no fluff",
+    "every line earns it",
+    "doesn't waste your time",
+    "keeps you reading",
+    "kept me to the end",
+    "tight",
+    "tight middle",
+    "no filler",
+    "kept the energy",
+    "no drag",
+    "every paragraph pulls",
+    "stayed locked in",
+    "lost in the middle",
+    "drags",
+    "boring middle",
+    "filler",
+    "too many words",
+    "could cut half",
+    "could be one tweet",
+    "rambles",
+    "loses you halfway",
+    "i bounced halfway",
+    "too long",
+    "needs a trim",
+    "lost the thread",
+    "easy to read",
+    "easy on the eyes",
+    "short paragraphs",
+    "good white space",
+    "good flow",
+    "good cadence",
+    "you can skim it",
+    "skim friendly",
+    "clean breaks",
+    "well organized",
+    "the layout helps",
+    "the formatting works",
+    "wall of text",
+    "no white space",
+    "needs paragraph breaks",
+    "long paragraphs",
+    "hard to follow",
+    "all one block",
+    "no breaks",
+    "where do i start",
+    "no flow",
+    "jumps around",
+    "no order",
+    "structure is off",
+    "memorable closer",
+    "shareable",
+    "quotable",
+    "you'd screenshot it",
+    "feels honest",
+    "sounds like a person",
+    "feels true",
+    "real voice",
+    "specific not vague",
+    "shows not tells",
+    "ending hits",
+    "ending sticks",
+    "would reshare",
+    "would save this",
+    "forgettable",
+    "weak ending",
+    "buries the point",
+    "vague claims",
+    "no examples",
+    "no specifics",
+    "no proof",
+    "abstract",
+    "corporate voice",
+    "ai voice",
+    "ai writing",
+    "feels generated",
+    "feels like a press release",
+    "stiff",
+    "no personality",
+    "no point",
+    "what's the takeaway",
+    "would not read",
+    "good hook",
+    "no reason to read",
+    "no payoff"
+  ]
+}