#!/usr/bin/env python3
"""Midicoth — Micro-Diffusion Compression — HuggingFace Space Demo."""
import os, subprocess, tempfile, base64
from flask import Flask, request, send_file, render_template_string
app = Flask(__name__)
BINARY = os.path.join(os.path.dirname(os.path.abspath(__file__)), "mdc")
# ---------------------------------------------------------------------------
# Helpers
# ---------------------------------------------------------------------------
def fmt(n):
if n < 1024: return f"{n} B"
if n < 1024**2: return f"{n/1024:.1f} KB"
return f"{n/1024**2:.2f} MB"
def is_utf8(data):
try:
data.decode("utf-8"); return True
except UnicodeDecodeError:
return False
def dl_link(b64, name, label, color="#22c55e"):
return (f''
f'{label}')
# ---------------------------------------------------------------------------
# HTML
# ---------------------------------------------------------------------------
STYLE = """
"""
PAGE = STYLE + """
🗜️ Midicoth
Micro-Diffusion Compression · Binary Tree Tweedie Denoising
· No neural network · No GPU
Try an example:
{% if ct_error %}
{{ ct_error }}
{% endif %}
{% if ct_result %}
{% endif %}
{% if cf_error %}
{{ cf_error }}
{% endif %}
{% if cf_result %}
{% endif %}
Upload a .mdc file or paste base64 data from the Compress tabs.
{% if dc_error %}
{{ dc_error }}
{% endif %}
{% if dc_result %}
{% endif %}
How it works: PPM (orders 0–4) → Match Model → Word Model →
High-Order Context (orders 5–8) → Micro-Diffusion Tweedie Denoiser → Arithmetic Coder.
| Benchmark | Midicoth | xz -9 | Improvement |
| alice29.txt (152 KB) | 2.119 bpb | 2.551 bpb | +16.9% |
| enwik8 (100 MB) | 1.753 bpb | 1.989 bpb | +11.9% |
"""
# ---------------------------------------------------------------------------
# Compression logic
# ---------------------------------------------------------------------------
def run_compress(data, filename):
"""Returns (b64_str, in_sz, out_sz, bpb) or raises RuntimeError."""
with tempfile.TemporaryDirectory() as d:
inp = os.path.join(d, "input")
out = os.path.join(d, "output.mdc")
with open(inp, "wb") as fh:
fh.write(data)
r = subprocess.run([BINARY, "compress", inp, out], capture_output=True, text=True)
if r.returncode != 0:
raise RuntimeError(r.stderr or r.stdout)
in_sz = os.path.getsize(inp)
out_sz = os.path.getsize(out)
with open(out, "rb") as fh:
compressed = fh.read()
b64 = base64.b64encode(compressed).decode()
bpb = out_sz * 8 / in_sz
return b64, in_sz, out_sz, bpb
def run_decompress(data):
"""Returns (restored_bytes,) or raises RuntimeError."""
with tempfile.TemporaryDirectory() as d:
inp = os.path.join(d, "input.mdc")
out = os.path.join(d, "output")
with open(inp, "wb") as fh:
fh.write(data)
r = subprocess.run([BINARY, "decompress", inp, out], capture_output=True, text=True)
if r.returncode != 0:
raise RuntimeError(r.stderr or r.stdout)
with open(out, "rb") as fh:
return fh.read()
def compress_result(b64, in_sz, out_sz, bpb, out_name):
dl_href = f"data:application/octet-stream;base64,{b64}"
stats = (f"Original: {fmt(in_sz)} → "
f"Compressed: {fmt(out_sz)} | "
f"{out_sz/in_sz*100:.1f}% | "
f"{bpb:.3f} bpb | "
f"Saved {100-out_sz/in_sz*100:.1f}%")
return dict(stats=stats, dl_href=dl_href, dl_name=out_name,
out_sz=fmt(out_sz), b64=b64)
# ---------------------------------------------------------------------------
# Routes
# ---------------------------------------------------------------------------
@app.route("/health")
def health():
return "ok", 200
@app.route("/")
def index():
return render_template_string(PAGE, tab="ct")
@app.route("/compress_text", methods=["POST"])
def compress_text():
text = request.form.get("text", "").strip()
if not text:
return render_template_string(PAGE, tab="ct", ct_error="Please enter some text.")
try:
data = text.encode("utf-8")
b64, in_sz, out_sz, bpb = run_compress(data, "text")
result = compress_result(b64, in_sz, out_sz, bpb, "compressed.mdc")
return render_template_string(PAGE, tab="ct", ct_result=result, form_text=text)
except Exception as e:
return render_template_string(PAGE, tab="ct", ct_error=str(e), form_text=text)
@app.route("/compress_file", methods=["POST"])
def compress_file():
f = request.files.get("file")
if not f or not f.filename:
return render_template_string(PAGE, tab="cf", cf_error="No file uploaded.")
try:
data = f.read()
b64, in_sz, out_sz, bpb = run_compress(data, f.filename)
out_name = f.filename + ".mdc"
result = compress_result(b64, in_sz, out_sz, bpb, out_name)
return render_template_string(PAGE, tab="cf", cf_result=result)
except Exception as e:
return render_template_string(PAGE, tab="cf", cf_error=str(e))
def _decompress_response(data, tab, form_b64=""):
if data[:4] != b"MDC7":
return render_template_string(PAGE, tab=tab,
dc_error="Not a valid Midicoth (.mdc) file.",
form_b64=form_b64)
try:
restored = run_decompress(data)
except Exception as e:
return render_template_string(PAGE, tab=tab, dc_error=str(e), form_b64=form_b64)
in_sz = len(data)
out_sz = len(restored)
stats = (f"Compressed: {fmt(in_sz)} → "
f"Restored: {fmt(out_sz)} | Lossless ✓")
result = dict(stats=stats, text=None, dl_href=None, dl_name=None, out_sz=fmt(out_sz))
if is_utf8(restored):
result["text"] = restored.decode("utf-8")
else:
b64 = base64.b64encode(restored).decode()
result["dl_href"] = f"data:application/octet-stream;base64,{b64}"
result["dl_name"] = "restored.bin"
return render_template_string(PAGE, tab=tab, dc_result=result, form_b64=form_b64)
@app.route("/decompress_file", methods=["POST"])
def decompress_file():
f = request.files.get("file")
if not f or not f.filename:
return render_template_string(PAGE, tab="dc", dc_error="No file uploaded.")
return _decompress_response(f.read(), tab="dc")
@app.route("/decompress_b64", methods=["POST"])
def decompress_b64():
raw = request.form.get("b64", "").strip()
if not raw:
return render_template_string(PAGE, tab="dc", dc_error="Please paste base64 data.")
try:
data = base64.b64decode(raw)
except Exception:
return render_template_string(PAGE, tab="dc",
dc_error="Invalid base64 data.", form_b64=raw)
return _decompress_response(data, tab="dc", form_b64=raw)
if __name__ == "__main__":
app.run(host="0.0.0.0", port=7860, debug=False)