Spaces:
Sleeping
Sleeping
st4ck commited on
Commit Β·
5180038
1
Parent(s): 3f05a44
Add 5 clickable example phrases in Compress Text tab
Browse files
app.py
CHANGED
|
@@ -73,6 +73,9 @@ td,th{padding:6px 12px;border:1px solid #334155}
|
|
| 73 |
th{color:#7dd3fc;font-weight:600}
|
| 74 |
footer{margin-top:32px;font-size:.8em;color:#475569;text-align:center}
|
| 75 |
a{color:#6366f1}
|
|
|
|
|
|
|
|
|
|
| 76 |
</style>
|
| 77 |
"""
|
| 78 |
|
|
@@ -89,11 +92,21 @@ PAGE = STYLE + """
|
|
| 89 |
|
| 90 |
<!-- Compress Text -->
|
| 91 |
<div class="panel {% if tab=='ct' %}active{% endif %}" id="ct">
|
| 92 |
-
<form method="post" action="/compress_text">
|
| 93 |
<label>Input text (paste or type):</label>
|
| 94 |
-
<textarea name="text" rows="7" placeholder="Paste text here...">{{ form_text or '' }}</textarea>
|
| 95 |
<button type="submit">Compress β</button>
|
| 96 |
</form>
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 97 |
{% if ct_error %}<p class="error">{{ ct_error }}</p>{% endif %}
|
| 98 |
{% if ct_result %}
|
| 99 |
<div class="result">
|
|
@@ -188,6 +201,21 @@ PAGE = STYLE + """
|
|
| 188 |
· <a href="https://arxiv.org/abs/2603.08771">arXiv:2603.08771</a></footer>
|
| 189 |
|
| 190 |
<script>
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 191 |
function show(id){
|
| 192 |
document.querySelectorAll('.tab').forEach(t=>t.classList.remove('active'));
|
| 193 |
document.querySelectorAll('.panel').forEach(p=>p.classList.remove('active'));
|
|
|
|
| 73 |
th{color:#7dd3fc;font-weight:600}
|
| 74 |
footer{margin-top:32px;font-size:.8em;color:#475569;text-align:center}
|
| 75 |
a{color:#6366f1}
|
| 76 |
+
.ex-btn{padding:5px 12px;background:#1e3a5f;border:1px solid #334155;border-radius:6px;
|
| 77 |
+
color:#7dd3fc;font-size:.78em;cursor:pointer;transition:.15s}
|
| 78 |
+
.ex-btn:hover{background:#1d4ed8;color:white;border-color:#1d4ed8}
|
| 79 |
</style>
|
| 80 |
"""
|
| 81 |
|
|
|
|
| 92 |
|
| 93 |
<!-- Compress Text -->
|
| 94 |
<div class="panel {% if tab=='ct' %}active{% endif %}" id="ct">
|
| 95 |
+
<form method="post" action="/compress_text" id="ct-form">
|
| 96 |
<label>Input text (paste or type):</label>
|
| 97 |
+
<textarea name="text" id="ct-textarea" rows="7" placeholder="Paste text here...">{{ form_text or '' }}</textarea>
|
| 98 |
<button type="submit">Compress β</button>
|
| 99 |
</form>
|
| 100 |
+
<div style="margin-top:14px">
|
| 101 |
+
<p style="font-size:.82em;color:#64748b;margin:0 0 6px">Try an example:</p>
|
| 102 |
+
<div style="display:flex;flex-wrap:wrap;gap:6px">
|
| 103 |
+
<button type="button" class="ex-btn" onclick="setExample(0)">Alice in Wonderland</button>
|
| 104 |
+
<button type="button" class="ex-btn" onclick="setExample(1)">Wikipedia intro</button>
|
| 105 |
+
<button type="button" class="ex-btn" onclick="setExample(2)">Scientific abstract</button>
|
| 106 |
+
<button type="button" class="ex-btn" onclick="setExample(3)">Repeated structure</button>
|
| 107 |
+
<button type="button" class="ex-btn" onclick="setExample(4)">Source code</button>
|
| 108 |
+
</div>
|
| 109 |
+
</div>
|
| 110 |
{% if ct_error %}<p class="error">{{ ct_error }}</p>{% endif %}
|
| 111 |
{% if ct_result %}
|
| 112 |
<div class="result">
|
|
|
|
| 201 |
· <a href="https://arxiv.org/abs/2603.08771">arXiv:2603.08771</a></footer>
|
| 202 |
|
| 203 |
<script>
|
| 204 |
+
var EXAMPLES=[
|
| 205 |
+
// 0 β Alice in Wonderland (the benchmark corpus)
|
| 206 |
+
"Alice was beginning to get very tired of sitting by her sister on the bank, and of having nothing to do: once or twice she had peeped into the book her sister was reading, but it had no pictures or conversations in it, 'and what is the use of a book,' thought Alice 'without pictures or conversations?' So she was considering in her own mind (as well as she could, for the hot day made her feel very sleepy and stupid), whether the pleasure of making a daisy-chain would be worth the trouble of getting up and picking the daisies, when suddenly a White Rabbit with pink eyes ran close by her.",
|
| 207 |
+
// 1 β Wikipedia-style prose
|
| 208 |
+
"The Internet is a global system of interconnected computer networks that uses the standard Internet protocol suite (TCP/IP) to communicate between networks and devices. It is a network of networks that consists of private, public, academic, business, and government networks of local to global scope, linked by a broad array of electronic, wireless, and optical networking technologies. The Internet carries a vast range of information resources and services, such as the inter-linked hypertext documents and applications of the World Wide Web, electronic mail, telephony, and file sharing.",
|
| 209 |
+
// 2 β Scientific abstract
|
| 210 |
+
"We introduce a novel lossless text compression algorithm based on micro-diffusion, a multi-step score-based reverse diffusion process applied to the blended probability distributions produced by a cascaded statistical model. The pipeline combines an adaptive PPM model (orders 0β4) with PPMC exclusion, a long-range match model, a trie-based word model, and a high-order context model (orders 5β8). Post-blend, a binary-tree Tweedie denoiser with James-Stein shrinkage corrects residual noise in three steps. On the Canterbury and Large Text Compression benchmarks the method achieves 2.119 bpb on alice29.txt and 1.753 bpb on enwik8, outperforming xz -9 by 16.9% and 11.9% respectively.",
|
| 211 |
+
// 3 β Repetitive / structured text (high compressibility)
|
| 212 |
+
"AAABBBCCCDDDEEEFFFGGGHHHIIIJJJKKKLLLMMMNNNOOOPPPQQQRRRSSSTTTUUUVVVWWWXXXYYYZZZ AAABBBCCCDDDEEEFFFGGGHHHIIIJJJKKKLLLMMMNNNOOOPPPQQQRRRSSSTTTUUUVVVWWWXXXYYYZZZ AAABBBCCCDDDEEEFFFGGGHHHIIIJJJKKKLLLMMMNNNOOOPPPQQQRRRSSSTTTUUUVVVWWWXXXYYYZZZ The quick brown fox jumps over the lazy dog. The quick brown fox jumps over the lazy dog. The quick brown fox jumps over the lazy dog.",
|
| 213 |
+
// 4 β C source code snippet
|
| 214 |
+
"static void probs_to_cumfreqs(const double *probs, int64_t *cumfreqs, int64_t *out_total) {\n cumfreqs[0] = 0;\n for (int i = 0; i < 256; i++) {\n int64_t f = (int64_t)(probs[i] * SCALE + 0.5);\n if (f < 1) f = 1;\n cumfreqs[i + 1] = cumfreqs[i] + f;\n }\n *out_total = cumfreqs[256];\n}\n\nstatic void clamp_normalize(double *probs) {\n double sum = 0.0;\n for (int i = 0; i < 256; i++) {\n if (probs[i] < 1e-10) probs[i] = 1e-10;\n sum += probs[i];\n }\n double inv = 1.0 / sum;\n for (int i = 0; i < 256; i++) probs[i] *= inv;\n}"
|
| 215 |
+
];
|
| 216 |
+
function setExample(i){
|
| 217 |
+
document.getElementById('ct-textarea').value=EXAMPLES[i];
|
| 218 |
+
}
|
| 219 |
function show(id){
|
| 220 |
document.querySelectorAll('.tab').forEach(t=>t.classList.remove('active'));
|
| 221 |
document.querySelectorAll('.panel').forEach(p=>p.classList.remove('active'));
|