Spaces:

MihaiPopa-1
/

FocalCodec-Demo

Sleeping

App Files Files Community

MihaiPopa-1 commited on 11 days ago

Commit

625e20b

verified ·

1 Parent(s): 5146984

Update app.py

Browse files

Files changed (1) hide show

app.py +174 -194

app.py CHANGED Viewed

@@ -4,7 +4,6 @@ import gradio as gr
 import os
 import tempfile
 import numpy as np
-import struct
 # Define the model ID for the 0.16 kbps codec config
 MODEL_CONFIG = "lucadellalib/focalcodec_12_5hz"
@@ -49,18 +48,18 @@ except Exception as e:
         codec = None
-def save_compressed_codes_optimal(toks, codes, fc_file_path, codec):
-    """Save codes with optimal bit packing to achieve true 160 bps"""
-    codes_cpu = codes.cpu().numpy()
-    toks_cpu = toks.cpu().numpy()
-    print(f"\n=== Optimal Compression ===")
-    print(f"Codes shape: {codes.shape}")
-    print(f"Codes dtype: {codes.dtype}")
-    # Determine actual bits needed based on token range
-    max_token = int(toks_cpu.max())
     if max_token <= 1:
         bits_needed = 1
     elif max_token <= 3:
@@ -94,85 +93,45 @@ def save_compressed_codes_optimal(toks, codes, fc_file_path, codec):
     else:
         bits_needed = 16
-    print(f"Token range: 0 to {max_token}")
-    print(f"Bits needed per token: {bits_needed}")
-    # If codes are already binary (batch, time, bits), use them directly
-    if len(codes.shape) == 3 and codes.dtype in [torch.bool, torch.uint8]:
-        print(f"Using binary codes directly: {codes.shape[2]} bits per token")
-        # Pack the binary codes
-        codes_flat = codes_cpu.flatten()
-        packed_bits = np.packbits(codes_flat)
-        bits_per_token = codes.shape[2]
-        num_tokens = codes.shape[1]
-    else:
-        # Pack tokens manually using exact bit width
-        print(f"Packing tokens with {bits_needed} bits each")
-        toks_flat = toks_cpu.flatten().astype(np.uint32)
-        num_tokens = len(toks_flat)
-        # Convert to binary string and pack
-        total_bits = num_tokens * bits_needed
-        # Create bit array
-        bit_array = []
-        for tok in toks_flat:
-            # Convert to binary with exact bit width
-            bits = format(int(tok), f'0{bits_needed}b')
-            bit_array.extend([int(b) for b in bits])
-        # Pad to byte boundary
-        while len(bit_array) % 8 != 0:
-            bit_array.append(0)
-        # Pack into bytes
-        packed_bits = np.packbits(np.array(bit_array, dtype=np.uint8))
-        bits_per_token = bits_needed
-    # Write to file
     with open(fc_file_path, 'wb') as f:
-        # Magic number
-        f.write(b'FC01')
-        # Metadata
-        f.write(struct.pack('<I', toks.shape[0]))  # batch size
-        f.write(struct.pack('<I', num_tokens))      # number of tokens
-        f.write(struct.pack('<B', bits_per_token))  # bits per token
-        # Packed data
         f.write(packed_bits.tobytes())
     file_size = os.path.getsize(fc_file_path)
-    header_size = 4 + 4 + 4 + 1  # magic + 2 ints + 1 byte
-    data_size = file_size - header_size
-    print(f"File size: {file_size} bytes (header: {header_size}B, data: {data_size}B)")
-    print(f"===========================\n")
-    return file_size, bits_per_token, data_size
-def load_compressed_codes_optimal(fc_file_path):
-    """Load optimally packed codes"""
     with open(fc_file_path, 'rb') as f:
-        # Verify magic
-        magic = f.read(4)
-        if magic != b'FC01':
-            raise ValueError("Invalid .fc file!")
-        # Read metadata
-        batch_size = struct.unpack('<I', f.read(4))[0]
-        num_tokens = struct.unpack('<I', f.read(4))[0]
-        bits_per_token = struct.unpack('<B', f.read(1))[0]
-        # Read packed data
         packed_data = np.frombuffer(f.read(), dtype=np.uint8)
-    print(f"\n=== Loading Optimal Codes ===")
-    print(f"Batch: {batch_size}, Tokens: {num_tokens}, Bits/token: {bits_per_token}")
     # Unpack bits
     unpacked_bits = np.unpackbits(packed_data)
@@ -193,20 +152,33 @@ def load_compressed_codes_optimal(fc_file_path):
             token_value = (token_value << 1) | bit
         tokens.append(token_value)
-    tokens_array = np.array(tokens, dtype=np.int64).reshape(batch_size, -1)
     tokens_tensor = torch.from_numpy(tokens_array)
     print(f"Loaded tokens: {tokens_tensor.shape}")
-    print(f"==============================\n")
     return tokens_tensor
 def encode_decode_focal(audio_input):
     """
     Processes input audio through the 160 bps FocalCodec, saves the tokens,
     and returns both the decoded WAV and the path to the FC file for download.
     """
     if codec is None:
         return None, None, "❌ ERROR: Model failed to load. Check console for details."
@@ -264,36 +236,33 @@ def encode_decode_focal(audio_input):
             print(f"Duration: {duration_sec:.2f}s")
             print(f"Token rate: {token_rate:.2f} tokens/sec")
-            # Get binary codes
-            codes = codec.toks_to_codes(toks)
-            print(f"Codes shape: {codes.shape}")
-            print(f"Codes dtype: {codes.dtype}")
-            if len(codes.shape) == 3:
-                print(f"Bits per token (from codes): {codes.shape[2]}")
             print("\n--- Decoding ---")
             rec_sig = codec.toks_to_sig(toks)
             print(f"Reconstructed signal shape: {rec_sig.shape}")
-        # --- Save with optimal bit packing ---
         temp_dir = tempfile.mkdtemp()
         fc_file_path = os.path.join(temp_dir, "compressed_tokens.fc")
-        file_size, bits_per_token, data_size = save_compressed_codes_optimal(
-            toks, codes, fc_file_path, codec
-        )
         # Calculate bitrates
-        total_bitrate = (file_size * 8) / duration_sec
-        data_bitrate = (data_size * 8) / duration_sec
         theoretical_bitrate = token_rate * bits_per_token
         print(f"--- Results ---")
-        print(f"Total bitrate: {total_bitrate:.1f} bps (with header)")
-        print(f"Data bitrate: {data_bitrate:.1f} bps (data only)")
         print(f"Theoretical: {theoretical_bitrate:.1f} bps")
         print(f"Target: 160 bps")
-        print(f"Efficiency: {(160/data_bitrate)*100:.1f}% of target")
         print(f"{'='*50}\n")
         # Prepare output
@@ -302,7 +271,8 @@ def encode_decode_focal(audio_input):
         if len(decoded_wav_output.shape) == 0:
             decoded_wav_output = decoded_wav_output.reshape(1)
-        status_msg = f"✅ {duration_sec:.1f}s | {file_size}B | {data_bitrate:.0f} bps | {bits_per_token} bits/tok | target: 160 bps"
         return (codec.sample_rate_output, decoded_wav_output), fc_file_path, status_msg
@@ -314,8 +284,8 @@ def encode_decode_focal(audio_input):
         return None, None, error_msg
-def decode_from_fc_file(fc_file):
-    """Decode audio from uploaded .fc file"""
     if codec is None:
         return None, "❌ Model not loaded"
@@ -323,12 +293,28 @@ def decode_from_fc_file(fc_file):
     if fc_file is None:
         return None, "❌ Please upload a .fc file"
     try:
         print(f"\n{'='*50}")
         print(f"Decoding from file: {fc_file.name}")
         # Load tokens
-        toks = load_compressed_codes_optimal(fc_file.name)
         if torch.cuda.is_available():
             toks = toks.cuda()
@@ -344,15 +330,13 @@ def decode_from_fc_file(fc_file):
         # Calculate stats
         duration_sec = decoded_wav.shape[0] / codec.sample_rate_output
         file_size = os.path.getsize(fc_file.name)
-        header_size = 4 + 4 + 4 + 1
-        data_size = file_size - header_size
-        bitrate = (data_size * 8) / duration_sec
         print(f"Duration: {duration_sec:.2f}s")
         print(f"Bitrate: {bitrate:.1f} bps")
         print(f"{'='*50}\n")
-        status = f"✅ Decoded! {duration_sec:.1f}s | {bitrate:.0f} bps"
         return (codec.sample_rate_output, decoded_wav), status
@@ -363,13 +347,13 @@ def decode_from_fc_file(fc_file):
 # --- Gradio Interface ---
-with gr.Blocks(title="FocalCodec 160 bps") as iface:
     gr.Markdown("# 🎙️ FocalCodec at 160 bps")
     gr.Markdown(f"**Neural speech codec at insanely low bitrate!** Using `{MODEL_CONFIG}`")
-    gr.Markdown("⚠️ **Optimized for speech only** - not suitable for music | 🔥 **1600x compression ratio!**")
     with gr.Tab("🎤 Encode Audio"):
-        gr.Markdown("### Compress audio to ~160 bps with optimal bit packing")
         with gr.Row():
             audio_input = gr.Audio(
@@ -384,9 +368,9 @@ with gr.Blocks(title="FocalCodec 160 bps") as iface:
                     label="🔊 Decoded Output (16kHz)"
                 )
                 file_output = gr.File(
-                    label="💾 Download Compressed .fc File"
                 )
-                status_output = gr.Textbox(label="📊 Status", lines=2)
         encode_btn = gr.Button("🔄 Encode & Decode", variant="primary", size="lg")
         encode_btn.click(
@@ -395,22 +379,50 @@ with gr.Blocks(title="FocalCodec 160 bps") as iface:
             outputs=[audio_output, file_output, status_output]
         )
-        gr.Markdown("### How it works:")
-        gr.Markdown("- ✅ Automatically resamples to 16kHz")
-        gr.Markdown("- ✅ Converts stereo to mono")
-        gr.Markdown("- ✅ Encodes to discrete tokens (~12.5 tokens/sec)")
-        gr.Markdown("- ✅ Packs tokens using only needed bits (no waste!)")
-        gr.Markdown("- ✅ Decodes tokens back to audio")
-        gr.Markdown("- 📈 Check console for detailed bitrate analysis!")
     with gr.Tab("📂 Decode from .fc File"):
-        gr.Markdown("### Decode previously compressed audio")
         with gr.Row():
-            fc_input = gr.File(
-                label="Upload .fc File",
-                file_types=[".fc"]
-            )
             with gr.Column():
                 decoded_output = gr.Audio(
@@ -422,101 +434,69 @@ with gr.Blocks(title="FocalCodec 160 bps") as iface:
         decode_btn = gr.Button("🔊 Decode Audio", variant="primary", size="lg")
         decode_btn.click(
             fn=decode_from_fc_file,
-            inputs=[fc_input],
             outputs=[decoded_output, decode_status]
         )
-        gr.Markdown("### Note:")
-        gr.Markdown("Upload a .fc file created by this tool to decode it back to audio.")
     with gr.Tab("ℹ️ About"):
         gr.Markdown("""
         ## FocalCodec - Ultra Low Bitrate Neural Audio Codec
-        ### 🎯 Compression Ratios:
-        | Format | Bitrate | 1-Hour File Size | Compression |
-        |--------|---------|------------------|-------------|
-        | **Uncompressed PCM** (16kHz mono) | 256 kbps | ~115 MB | 1x |
-        | **MP3** (standard) | 128 kbps | ~57 MB | 2x |
-        | **Opus** (voice optimized) | 16 kbps | ~7.2 MB | 16x |
-        | **FocalCodec** | **0.16 kbps** | **~72 KB** | **1600x** 🔥 |
-        ### 💡 Use Cases:
-        - 📞 **Ultra-low bandwidth voice calls** (satellite, deep space)
-        - 🤖 **AI-generated podcasts** (NotebookLM-style apps)
-        - 🌍 **Low-bandwidth regions** (2G networks)
-        - 📻 **Emergency communications** (disaster relief)
-        - 🎓 **Educational content distribution** (offline learning)
-        - 💾 **Voice memo storage** (years of recordings in MB)
-        ### ⚖️ Trade-offs:
-        **Pros:**
-        - ✅ Insanely efficient compression (1600x!)
-        - ✅ Speech remains highly intelligible
-        - ✅ Works on any sample rate (auto-resamples)
-        - ✅ Tiny storage/bandwidth requirements
-        **Cons:**
-        - ❌ Voice characteristics may change
-        - ❌ Emotional nuances can be lost
-        - ❌ Occasional pronunciation artifacts
-        - ❌ Not suitable for music or non-speech audio
-        ### 🔧 Technical Details:
-        - **Model:** `lucadellalib/focalcodec_12_5hz`
-        - **Sample Rate:** 16 kHz
-        - **Token Rate:** ~12.5 tokens/second
-        - **Bits per Token:** 13 bits (auto-detected, optimally packed)
-        - **Target Bitrate:** 160 bps (12.5 × 13 = 162.5 bps)
-        - **File Format:** Custom binary format with metadata header
-        ### 🧮 How We Achieve 160 bps:
-        Traditional approach would waste bits:
-        ```
-        Token (0-8191) → int16 (16 bits) → 16 × 12.5 = 200 bps ❌
-        Wasting 3 bits per token!
-        ```
-        Our optimal approach:
-        ```
-        Token (0-8191) → 13 bits exactly → 13 × 12.5 = 162.5 bps ✅
-        Zero waste!
-        ```
-        ### 🔬 Debug Information:
-        Check the **console/terminal** for detailed encoding information:
-        - Actual token rate and range
-        - Bits per token (detected automatically)
-        - Expected vs actual bitrate
-        - File size breakdown (header vs data)
-        - Compression efficiency
-        ### 📚 Example Use Case - AI Podcast Library:
-        Imagine storing **1000 hours** of AI-generated podcasts:
-        - **Uncompressed:** 115 GB
-        - **MP3:** 57 GB
-        - **Opus:** 7.2 GB
-        - **FocalCodec:** **72 MB** 🤯
-        You could fit an entire podcast library on a USB flash drive!
         ---
-        ### 🔗 Links:
-        - [FocalCodec GitHub](https://github.com/lucadellalib/focalcodec)
-        - [Research Paper](https://arxiv.org/abs/2410.03608)
-        ### 🏗️ Built with:
-        - PyTorch + TorchAudio
-        - Gradio
-        - FocalCodec (Luca Della Libera et al.)
         """)
 if __name__ == "__main__":
     print("\n" + "="*50)
-    print("🎙️  FocalCodec 160 bps Demo")
     print("="*50 + "\n")
     iface.launch()

 import os
 import tempfile
 import numpy as np
 # Define the model ID for the 0.16 kbps codec config
 MODEL_CONFIG = "lucadellalib/focalcodec_12_5hz"
         codec = None
+def save_tokens_raw(toks, fc_file_path):
+    """Save tokens as raw binary with NO header - pure tokens only"""
+    toks_cpu = toks.cpu().numpy().flatten()
+    max_token = int(toks_cpu.max())
+    print(f"\n=== Saving Raw Tokens ===")
+    print(f"Token shape: {toks.shape}")
+    print(f"Token range: 0 to {max_token}")
+    print(f"Num tokens: {len(toks_cpu)}")
+    # Determine bits needed
     if max_token <= 1:
         bits_needed = 1
     elif max_token <= 3:
     else:
         bits_needed = 16
+    print(f"Bits per token: {bits_needed}")
+    # Create bit array
+    bit_array = []
+    for tok in toks_cpu:
+        bits = format(int(tok), f'0{bits_needed}b')
+        bit_array.extend([int(b) for b in bits])
+    # Pad to byte boundary
+    while len(bit_array) % 8 != 0:
+        bit_array.append(0)
+    # Pack into bytes
+    packed_bits = np.packbits(np.array(bit_array, dtype=np.uint8))
+    # Write ONLY the packed data (no header!)
     with open(fc_file_path, 'wb') as f:
         f.write(packed_bits.tobytes())
     file_size = os.path.getsize(fc_file_path)
+    print(f"File size: {file_size} bytes (pure data, no header)")
+    print(f"========================\n")
+    return file_size, bits_needed, len(toks_cpu), toks.shape
+def load_tokens_raw(fc_file_path, bits_per_token, num_tokens, original_shape):
+    """Load raw tokens from headerless binary file"""
+    print(f"\n=== Loading Raw Tokens ===")
+    print(f"Expected bits/token: {bits_per_token}")
+    print(f"Expected num tokens: {num_tokens}")
+    print(f"Expected shape: {original_shape}")
+    # Read all bytes
     with open(fc_file_path, 'rb') as f:
         packed_data = np.frombuffer(f.read(), dtype=np.uint8)
     # Unpack bits
     unpacked_bits = np.unpackbits(packed_data)
             token_value = (token_value << 1) | bit
         tokens.append(token_value)
+    # Reshape to original shape
+    tokens_array = np.array(tokens, dtype=np.int64).reshape(original_shape)
     tokens_tensor = torch.from_numpy(tokens_array)
     print(f"Loaded tokens: {tokens_tensor.shape}")
+    print(f"Token range: {tokens_tensor.min().item()} to {tokens_tensor.max().item()}")
+    print(f"==========================\n")
     return tokens_tensor
+# Global variables to store metadata for decoding
+last_encoding_metadata = {
+    'bits_per_token': None,
+    'num_tokens': None,
+    'shape': None,
+    'duration': None
+}
 def encode_decode_focal(audio_input):
     """
     Processes input audio through the 160 bps FocalCodec, saves the tokens,
     and returns both the decoded WAV and the path to the FC file for download.
     """
+    global last_encoding_metadata
     if codec is None:
         return None, None, "❌ ERROR: Model failed to load. Check console for details."
             print(f"Duration: {duration_sec:.2f}s")
             print(f"Token rate: {token_rate:.2f} tokens/sec")
             print("\n--- Decoding ---")
             rec_sig = codec.toks_to_sig(toks)
             print(f"Reconstructed signal shape: {rec_sig.shape}")
+        # --- Save raw tokens (no header) ---
         temp_dir = tempfile.mkdtemp()
         fc_file_path = os.path.join(temp_dir, "compressed_tokens.fc")
+        file_size, bits_per_token, num_tokens, shape = save_tokens_raw(toks, fc_file_path)
+        # Store metadata globally for decoding
+        last_encoding_metadata = {
+            'bits_per_token': bits_per_token,
+            'num_tokens': num_tokens,
+            'shape': shape,
+            'duration': duration_sec
+        }
         # Calculate bitrates
+        bitrate = (file_size * 8) / duration_sec
         theoretical_bitrate = token_rate * bits_per_token
         print(f"--- Results ---")
+        print(f"File bitrate: {bitrate:.1f} bps (pure data)")
         print(f"Theoretical: {theoretical_bitrate:.1f} bps")
         print(f"Target: 160 bps")
+        print(f"Efficiency: {(160/bitrate)*100:.1f}% of target")
         print(f"{'='*50}\n")
         # Prepare output
         if len(decoded_wav_output.shape) == 0:
             decoded_wav_output = decoded_wav_output.reshape(1)
+        metadata_info = f"\n\nℹ️ SAVE THIS: bits={bits_per_token}, tokens={num_tokens}, shape={shape}"
+        status_msg = f"✅ {duration_sec:.1f}s | {file_size}B | {bitrate:.0f} bps | {bits_per_token} bits/tok{metadata_info}"
         return (codec.sample_rate_output, decoded_wav_output), fc_file_path, status_msg
         return None, None, error_msg
+def decode_from_fc_file(fc_file, bits_per_token_input, num_tokens_input, batch_size_input, seq_length_input):
+    """Decode audio from uploaded .fc file using provided metadata"""
     if codec is None:
         return None, "❌ Model not loaded"
     if fc_file is None:
         return None, "❌ Please upload a .fc file"
+    # Try to use provided metadata, or fall back to last encoding
+    try:
+        bits_per_token = int(bits_per_token_input) if bits_per_token_input else last_encoding_metadata.get('bits_per_token')
+        num_tokens = int(num_tokens_input) if num_tokens_input else last_encoding_metadata.get('num_tokens')
+        if batch_size_input and seq_length_input:
+            shape = (int(batch_size_input), int(seq_length_input))
+        else:
+            shape = last_encoding_metadata.get('shape')
+        if not all([bits_per_token, num_tokens, shape]):
+            return None, "❌ Please provide metadata (bits/token, num tokens, batch, seq_length) OR encode a file first"
+    except Exception as e:
+        return None, f"❌ Invalid metadata format: {str(e)}"
     try:
         print(f"\n{'='*50}")
         print(f"Decoding from file: {fc_file.name}")
         # Load tokens
+        toks = load_tokens_raw(fc_file.name, bits_per_token, num_tokens, shape)
         if torch.cuda.is_available():
             toks = toks.cuda()
         # Calculate stats
         duration_sec = decoded_wav.shape[0] / codec.sample_rate_output
         file_size = os.path.getsize(fc_file.name)
+        bitrate = (file_size * 8) / duration_sec
         print(f"Duration: {duration_sec:.2f}s")
         print(f"Bitrate: {bitrate:.1f} bps")
         print(f"{'='*50}\n")
+        status = f"✅ Decoded! {duration_sec:.1f}s | {bitrate:.0f} bps | {bits_per_token} bits/token"
         return (codec.sample_rate_output, decoded_wav), status
 # --- Gradio Interface ---
+with gr.Blocks(title="FocalCodec 160 bps", theme=gr.themes.Soft()) as iface:
     gr.Markdown("# 🎙️ FocalCodec at 160 bps")
     gr.Markdown(f"**Neural speech codec at insanely low bitrate!** Using `{MODEL_CONFIG}`")
+    gr.Markdown("⚠️ **Optimized for speech only** | 🔥 **Pure tokens, no header overhead!**")
     with gr.Tab("🎤 Encode Audio"):
+        gr.Markdown("### Compress audio to ~160 bps (pure tokens, no header)")
         with gr.Row():
             audio_input = gr.Audio(
                     label="🔊 Decoded Output (16kHz)"
                 )
                 file_output = gr.File(
+                    label="💾 Download Compressed .fc File (headerless)"
                 )
+                status_output = gr.Textbox(label="📊 Status", lines=4)
         encode_btn = gr.Button("🔄 Encode & Decode", variant="primary", size="lg")
         encode_btn.click(
             outputs=[audio_output, file_output, status_output]
         )
+        gr.Markdown("### ⚠️ Important:")
+        gr.Markdown("- The .fc file contains ONLY raw token data (no metadata/header)")
+        gr.Markdown("- **Save the metadata** from the status message to decode later!")
+        gr.Markdown("- You need: bits per token, number of tokens, and shape")
     with gr.Tab("📂 Decode from .fc File"):
+        gr.Markdown("### Decode raw .fc file (requires metadata)")
         with gr.Row():
+            with gr.Column():
+                fc_input = gr.File(
+                    label="Upload .fc File",
+                    file_types=[".fc"]
+                )
+                gr.Markdown("#### Metadata (required for decoding):")
+                with gr.Row():
+                    bits_input = gr.Number(
+                        label="Bits per token",
+                        value=13,
+                        precision=0,
+                        info="Usually 13 for this model"
+                    )
+                    tokens_input = gr.Number(
+                        label="Number of tokens",
+                        precision=0,
+                        info="Total tokens in file"
+                    )
+                with gr.Row():
+                    batch_input = gr.Number(
+                        label="Batch size",
+                        value=1,
+                        precision=0,
+                        info="Usually 1"
+                    )
+                    seq_input = gr.Number(
+                        label="Sequence length",
+                        precision=0,
+                        info="Tokens per batch"
+                    )
+                gr.Markdown("💡 If you just encoded a file, leave these blank to use saved metadata")
             with gr.Column():
                 decoded_output = gr.Audio(
         decode_btn = gr.Button("🔊 Decode Audio", variant="primary", size="lg")
         decode_btn.click(
             fn=decode_from_fc_file,
+            inputs=[fc_input, bits_input, tokens_input, batch_input, seq_input],
             outputs=[decoded_output, decode_status]
         )
     with gr.Tab("ℹ️ About"):
         gr.Markdown("""
         ## FocalCodec - Ultra Low Bitrate Neural Audio Codec
+        ### 🎯 Pure Token Format (No Headers!)
+        This version saves **ONLY the compressed tokens** with no metadata overhead.
+        **Benefits:**
+        - ✅ Absolute minimum file size
+        - ✅ True 160 bps (no header padding)
+        - ✅ Maximum compression efficiency
+        **Trade-off:**
+        - ⚠️ You must save the metadata separately to decode
+        - Required info: bits per token, number of tokens, shape
+        ### 📊 Compression Ratios:
+        | Format | Bitrate | 1-Hour File Size |
+        |--------|---------|------------------|
+        | Uncompressed PCM | 256 kbps | ~115 MB |
+        | MP3 | 128 kbps | ~57 MB |
+        | Opus | 16 kbps | ~7.2 MB |
+        | **FocalCodec** | **0.16 kbps** | **~72 KB** 🔥 |
+        ### 🔧 Technical Details:
+        - **Token Rate:** ~12.5 tokens/sec
+        - **Bits per Token:** 13 bits (for most speech)
+        - **Bitrate:** 12.5 × 13 = 162.5 bps ≈ **160 bps**
+        - **Format:** Raw bit-packed tokens (no header)
+        ### 📝 Example Metadata:
+        After encoding, you'll see:
+        ```
+        ℹ️ SAVE THIS: bits=13, tokens=113, shape=(1, 113)
+        ```
+        Save this to decode the file later!
+        ### 💡 Pro Tip:
+        If you're building a system, embed the metadata in a separate JSON file:
+        ```json
+        {
+          "audio.fc": {
+            "bits_per_token": 13,
+            "num_tokens": 113,
+            "shape": [1, 113],
+            "duration": 9.04
+          }
+        }
+        ```
         ---
+        🔗 [FocalCodec GitHub](https://github.com/lucadellalib/focalcodec)
         """)
 if __name__ == "__main__":
     print("\n" + "="*50)
+    print("🎙️  FocalCodec 160 bps Demo (Headerless Format)")
     print("="*50 + "\n")
     iface.launch()