st4ck commited on
Commit
5180038
Β·
1 Parent(s): 3f05a44

Add 5 clickable example phrases in Compress Text tab

Browse files
Files changed (1) hide show
  1. app.py +30 -2
app.py CHANGED
@@ -73,6 +73,9 @@ td,th{padding:6px 12px;border:1px solid #334155}
73
  th{color:#7dd3fc;font-weight:600}
74
  footer{margin-top:32px;font-size:.8em;color:#475569;text-align:center}
75
  a{color:#6366f1}
 
 
 
76
  </style>
77
  """
78
 
@@ -89,11 +92,21 @@ PAGE = STYLE + """
89
 
90
  <!-- Compress Text -->
91
  <div class="panel {% if tab=='ct' %}active{% endif %}" id="ct">
92
- <form method="post" action="/compress_text">
93
  <label>Input text (paste or type):</label>
94
- <textarea name="text" rows="7" placeholder="Paste text here...">{{ form_text or '' }}</textarea>
95
  <button type="submit">Compress β†’</button>
96
  </form>
 
 
 
 
 
 
 
 
 
 
97
  {% if ct_error %}<p class="error">{{ ct_error }}</p>{% endif %}
98
  {% if ct_result %}
99
  <div class="result">
@@ -188,6 +201,21 @@ PAGE = STYLE + """
188
  &middot; <a href="https://arxiv.org/abs/2603.08771">arXiv:2603.08771</a></footer>
189
 
190
  <script>
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
191
  function show(id){
192
  document.querySelectorAll('.tab').forEach(t=>t.classList.remove('active'));
193
  document.querySelectorAll('.panel').forEach(p=>p.classList.remove('active'));
 
73
  th{color:#7dd3fc;font-weight:600}
74
  footer{margin-top:32px;font-size:.8em;color:#475569;text-align:center}
75
  a{color:#6366f1}
76
+ .ex-btn{padding:5px 12px;background:#1e3a5f;border:1px solid #334155;border-radius:6px;
77
+ color:#7dd3fc;font-size:.78em;cursor:pointer;transition:.15s}
78
+ .ex-btn:hover{background:#1d4ed8;color:white;border-color:#1d4ed8}
79
  </style>
80
  """
81
 
 
92
 
93
  <!-- Compress Text -->
94
  <div class="panel {% if tab=='ct' %}active{% endif %}" id="ct">
95
+ <form method="post" action="/compress_text" id="ct-form">
96
  <label>Input text (paste or type):</label>
97
+ <textarea name="text" id="ct-textarea" rows="7" placeholder="Paste text here...">{{ form_text or '' }}</textarea>
98
  <button type="submit">Compress β†’</button>
99
  </form>
100
+ <div style="margin-top:14px">
101
+ <p style="font-size:.82em;color:#64748b;margin:0 0 6px">Try an example:</p>
102
+ <div style="display:flex;flex-wrap:wrap;gap:6px">
103
+ <button type="button" class="ex-btn" onclick="setExample(0)">Alice in Wonderland</button>
104
+ <button type="button" class="ex-btn" onclick="setExample(1)">Wikipedia intro</button>
105
+ <button type="button" class="ex-btn" onclick="setExample(2)">Scientific abstract</button>
106
+ <button type="button" class="ex-btn" onclick="setExample(3)">Repeated structure</button>
107
+ <button type="button" class="ex-btn" onclick="setExample(4)">Source code</button>
108
+ </div>
109
+ </div>
110
  {% if ct_error %}<p class="error">{{ ct_error }}</p>{% endif %}
111
  {% if ct_result %}
112
  <div class="result">
 
201
  &middot; <a href="https://arxiv.org/abs/2603.08771">arXiv:2603.08771</a></footer>
202
 
203
  <script>
204
+ var EXAMPLES=[
205
+ // 0 β€” Alice in Wonderland (the benchmark corpus)
206
+ "Alice was beginning to get very tired of sitting by her sister on the bank, and of having nothing to do: once or twice she had peeped into the book her sister was reading, but it had no pictures or conversations in it, 'and what is the use of a book,' thought Alice 'without pictures or conversations?' So she was considering in her own mind (as well as she could, for the hot day made her feel very sleepy and stupid), whether the pleasure of making a daisy-chain would be worth the trouble of getting up and picking the daisies, when suddenly a White Rabbit with pink eyes ran close by her.",
207
+ // 1 β€” Wikipedia-style prose
208
+ "The Internet is a global system of interconnected computer networks that uses the standard Internet protocol suite (TCP/IP) to communicate between networks and devices. It is a network of networks that consists of private, public, academic, business, and government networks of local to global scope, linked by a broad array of electronic, wireless, and optical networking technologies. The Internet carries a vast range of information resources and services, such as the inter-linked hypertext documents and applications of the World Wide Web, electronic mail, telephony, and file sharing.",
209
+ // 2 β€” Scientific abstract
210
+ "We introduce a novel lossless text compression algorithm based on micro-diffusion, a multi-step score-based reverse diffusion process applied to the blended probability distributions produced by a cascaded statistical model. The pipeline combines an adaptive PPM model (orders 0–4) with PPMC exclusion, a long-range match model, a trie-based word model, and a high-order context model (orders 5–8). Post-blend, a binary-tree Tweedie denoiser with James-Stein shrinkage corrects residual noise in three steps. On the Canterbury and Large Text Compression benchmarks the method achieves 2.119 bpb on alice29.txt and 1.753 bpb on enwik8, outperforming xz -9 by 16.9% and 11.9% respectively.",
211
+ // 3 β€” Repetitive / structured text (high compressibility)
212
+ "AAABBBCCCDDDEEEFFFGGGHHHIIIJJJKKKLLLMMMNNNOOOPPPQQQRRRSSSTTTUUUVVVWWWXXXYYYZZZ AAABBBCCCDDDEEEFFFGGGHHHIIIJJJKKKLLLMMMNNNOOOPPPQQQRRRSSSTTTUUUVVVWWWXXXYYYZZZ AAABBBCCCDDDEEEFFFGGGHHHIIIJJJKKKLLLMMMNNNOOOPPPQQQRRRSSSTTTUUUVVVWWWXXXYYYZZZ The quick brown fox jumps over the lazy dog. The quick brown fox jumps over the lazy dog. The quick brown fox jumps over the lazy dog.",
213
+ // 4 β€” C source code snippet
214
+ "static void probs_to_cumfreqs(const double *probs, int64_t *cumfreqs, int64_t *out_total) {\n cumfreqs[0] = 0;\n for (int i = 0; i < 256; i++) {\n int64_t f = (int64_t)(probs[i] * SCALE + 0.5);\n if (f < 1) f = 1;\n cumfreqs[i + 1] = cumfreqs[i] + f;\n }\n *out_total = cumfreqs[256];\n}\n\nstatic void clamp_normalize(double *probs) {\n double sum = 0.0;\n for (int i = 0; i < 256; i++) {\n if (probs[i] < 1e-10) probs[i] = 1e-10;\n sum += probs[i];\n }\n double inv = 1.0 / sum;\n for (int i = 0; i < 256; i++) probs[i] *= inv;\n}"
215
+ ];
216
+ function setExample(i){
217
+ document.getElementById('ct-textarea').value=EXAMPLES[i];
218
+ }
219
  function show(id){
220
  document.querySelectorAll('.tab').forEach(t=>t.classList.remove('active'));
221
  document.querySelectorAll('.panel').forEach(p=>p.classList.remove('active'));