Spaces:
Sleeping
Sleeping
Update app.py
Browse files
app.py
CHANGED
|
@@ -53,11 +53,12 @@ def save_tokens_raw(toks, fc_file_path):
|
|
| 53 |
|
| 54 |
toks_cpu = toks.cpu().numpy().flatten()
|
| 55 |
max_token = int(toks_cpu.max())
|
|
|
|
| 56 |
|
| 57 |
print(f"\n=== Saving Raw Tokens ===")
|
| 58 |
-
print(f"
|
| 59 |
-
print(f"
|
| 60 |
-
print(f"
|
| 61 |
|
| 62 |
# Determine bits needed
|
| 63 |
if max_token <= 1:
|
|
@@ -101,9 +102,15 @@ def save_tokens_raw(toks, fc_file_path):
|
|
| 101 |
bits = format(int(tok), f'0{bits_needed}b')
|
| 102 |
bit_array.extend([int(b) for b in bits])
|
| 103 |
|
|
|
|
|
|
|
| 104 |
# Pad to byte boundary
|
|
|
|
| 105 |
while len(bit_array) % 8 != 0:
|
| 106 |
bit_array.append(0)
|
|
|
|
|
|
|
|
|
|
| 107 |
|
| 108 |
# Pack into bytes
|
| 109 |
packed_bits = np.packbits(np.array(bit_array, dtype=np.uint8))
|
|
@@ -114,7 +121,7 @@ def save_tokens_raw(toks, fc_file_path):
|
|
| 114 |
|
| 115 |
file_size = os.path.getsize(fc_file_path)
|
| 116 |
|
| 117 |
-
print(f"File size: {file_size} bytes
|
| 118 |
print(f"========================\n")
|
| 119 |
|
| 120 |
return file_size, bits_needed, len(toks_cpu), toks.shape
|
|
@@ -124,40 +131,59 @@ def load_tokens_raw(fc_file_path, bits_per_token, num_tokens, original_shape):
|
|
| 124 |
"""Load raw tokens from headerless binary file"""
|
| 125 |
|
| 126 |
print(f"\n=== Loading Raw Tokens ===")
|
| 127 |
-
print(f"
|
| 128 |
-
print(f"
|
| 129 |
-
print(f"
|
|
|
|
| 130 |
|
| 131 |
# Read all bytes
|
| 132 |
with open(fc_file_path, 'rb') as f:
|
| 133 |
packed_data = np.frombuffer(f.read(), dtype=np.uint8)
|
| 134 |
|
|
|
|
|
|
|
| 135 |
# Unpack bits
|
| 136 |
unpacked_bits = np.unpackbits(packed_data)
|
|
|
|
| 137 |
|
| 138 |
# Extract exact number of bits needed
|
| 139 |
-
|
| 140 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 141 |
|
| 142 |
# Reconstruct tokens
|
| 143 |
tokens = []
|
| 144 |
for i in range(num_tokens):
|
| 145 |
-
|
| 146 |
-
|
| 147 |
-
token_bits_slice = token_bits[
|
| 148 |
|
| 149 |
-
# Convert binary to integer
|
| 150 |
token_value = 0
|
| 151 |
for bit in token_bits_slice:
|
| 152 |
-
token_value = (token_value << 1) | bit
|
|
|
|
| 153 |
tokens.append(token_value)
|
| 154 |
|
|
|
|
|
|
|
|
|
|
| 155 |
# Reshape to original shape
|
| 156 |
-
tokens_array = np.array(tokens, dtype=np.int64)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 157 |
tokens_tensor = torch.from_numpy(tokens_array)
|
| 158 |
|
| 159 |
-
print(f"
|
| 160 |
-
print(f"
|
| 161 |
print(f"==========================\n")
|
| 162 |
|
| 163 |
return tokens_tensor
|
|
@@ -168,7 +194,8 @@ last_encoding_metadata = {
|
|
| 168 |
'bits_per_token': None,
|
| 169 |
'num_tokens': None,
|
| 170 |
'shape': None,
|
| 171 |
-
'duration': None
|
|
|
|
| 172 |
}
|
| 173 |
|
| 174 |
|
|
@@ -236,7 +263,7 @@ def encode_decode_focal(audio_input):
|
|
| 236 |
print(f"Duration: {duration_sec:.2f}s")
|
| 237 |
print(f"Token rate: {token_rate:.2f} tokens/sec")
|
| 238 |
|
| 239 |
-
print("\n--- Decoding ---")
|
| 240 |
rec_sig = codec.toks_to_sig(toks)
|
| 241 |
print(f"Reconstructed signal shape: {rec_sig.shape}")
|
| 242 |
|
|
@@ -250,8 +277,9 @@ def encode_decode_focal(audio_input):
|
|
| 250 |
last_encoding_metadata = {
|
| 251 |
'bits_per_token': bits_per_token,
|
| 252 |
'num_tokens': num_tokens,
|
| 253 |
-
'shape': shape,
|
| 254 |
-
'duration': duration_sec
|
|
|
|
| 255 |
}
|
| 256 |
|
| 257 |
# Calculate bitrates
|
|
@@ -259,10 +287,20 @@ def encode_decode_focal(audio_input):
|
|
| 259 |
theoretical_bitrate = token_rate * bits_per_token
|
| 260 |
|
| 261 |
print(f"--- Results ---")
|
| 262 |
-
print(f"File bitrate: {bitrate:.1f} bps
|
| 263 |
print(f"Theoretical: {theoretical_bitrate:.1f} bps")
|
| 264 |
print(f"Target: 160 bps")
|
| 265 |
-
print(f"Efficiency: {(160
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 266 |
print(f"{'='*50}\n")
|
| 267 |
|
| 268 |
# Prepare output
|
|
@@ -271,8 +309,8 @@ def encode_decode_focal(audio_input):
|
|
| 271 |
if len(decoded_wav_output.shape) == 0:
|
| 272 |
decoded_wav_output = decoded_wav_output.reshape(1)
|
| 273 |
|
| 274 |
-
|
| 275 |
-
status_msg = f"β
{duration_sec:.1f}s | {file_size}B | {bitrate:.0f} bps | {bits_per_token} bits/tok{
|
| 276 |
|
| 277 |
return (codec.sample_rate_output, decoded_wav_output), fc_file_path, status_msg
|
| 278 |
|
|
@@ -293,25 +331,31 @@ def decode_from_fc_file(fc_file, bits_per_token_input, num_tokens_input, batch_s
|
|
| 293 |
if fc_file is None:
|
| 294 |
return None, "β Please upload a .fc file"
|
| 295 |
|
| 296 |
-
# Try to use provided metadata, or fall back to last encoding
|
| 297 |
try:
|
| 298 |
-
|
| 299 |
-
|
| 300 |
-
|
| 301 |
-
|
|
|
|
| 302 |
shape = (int(batch_size_input), int(seq_length_input))
|
|
|
|
| 303 |
else:
|
| 304 |
-
|
| 305 |
-
|
| 306 |
-
|
| 307 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
| 308 |
|
| 309 |
-
except Exception as e:
|
| 310 |
-
return None, f"β Invalid metadata format: {str(e)}"
|
| 311 |
-
|
| 312 |
-
try:
|
| 313 |
print(f"\n{'='*50}")
|
| 314 |
-
print(f"Decoding
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 315 |
|
| 316 |
# Load tokens
|
| 317 |
toks = load_tokens_raw(fc_file.name, bits_per_token, num_tokens, shape)
|
|
@@ -336,14 +380,14 @@ def decode_from_fc_file(fc_file, bits_per_token_input, num_tokens_input, batch_s
|
|
| 336 |
print(f"Bitrate: {bitrate:.1f} bps")
|
| 337 |
print(f"{'='*50}\n")
|
| 338 |
|
| 339 |
-
status = f"β
Decoded
|
| 340 |
|
| 341 |
return (codec.sample_rate_output, decoded_wav), status
|
| 342 |
|
| 343 |
except Exception as e:
|
| 344 |
import traceback
|
| 345 |
traceback.print_exc()
|
| 346 |
-
return None, f"β
|
| 347 |
|
| 348 |
|
| 349 |
# --- Gradio Interface ---
|
|
@@ -370,7 +414,7 @@ with gr.Blocks(title="FocalCodec 160 bps") as iface:
|
|
| 370 |
file_output = gr.File(
|
| 371 |
label="πΎ Download Compressed .fc File (headerless)"
|
| 372 |
)
|
| 373 |
-
status_output = gr.Textbox(label="π Status", lines=
|
| 374 |
|
| 375 |
encode_btn = gr.Button("π Encode & Decode", variant="primary", size="lg")
|
| 376 |
encode_btn.click(
|
|
@@ -380,9 +424,9 @@ with gr.Blocks(title="FocalCodec 160 bps") as iface:
|
|
| 380 |
)
|
| 381 |
|
| 382 |
gr.Markdown("### β οΈ Important:")
|
| 383 |
-
gr.Markdown("- The .fc file contains ONLY raw token data (no metadata
|
| 384 |
-
gr.Markdown("- **
|
| 385 |
-
gr.Markdown("-
|
| 386 |
|
| 387 |
with gr.Tab("π Decode from .fc File"):
|
| 388 |
gr.Markdown("### Decode raw .fc file (requires metadata)")
|
|
@@ -394,42 +438,42 @@ with gr.Blocks(title="FocalCodec 160 bps") as iface:
|
|
| 394 |
file_types=[".fc"]
|
| 395 |
)
|
| 396 |
|
| 397 |
-
gr.Markdown("#### Metadata (
|
|
|
|
| 398 |
|
| 399 |
with gr.Row():
|
| 400 |
bits_input = gr.Number(
|
| 401 |
label="Bits per token",
|
| 402 |
-
|
| 403 |
-
precision=0
|
| 404 |
-
info="Usually 13 for this model"
|
| 405 |
)
|
| 406 |
tokens_input = gr.Number(
|
| 407 |
label="Number of tokens",
|
| 408 |
-
|
| 409 |
-
|
| 410 |
)
|
| 411 |
|
| 412 |
with gr.Row():
|
| 413 |
batch_input = gr.Number(
|
| 414 |
label="Batch size",
|
| 415 |
-
|
| 416 |
-
precision=0
|
| 417 |
-
info="Usually 1"
|
| 418 |
)
|
| 419 |
seq_input = gr.Number(
|
| 420 |
label="Sequence length",
|
| 421 |
-
|
| 422 |
-
|
| 423 |
)
|
| 424 |
|
| 425 |
-
gr.Markdown("π‘ If
|
|
|
|
| 426 |
|
| 427 |
with gr.Column():
|
| 428 |
decoded_output = gr.Audio(
|
| 429 |
type="numpy",
|
| 430 |
label="π Decoded Audio"
|
| 431 |
)
|
| 432 |
-
decode_status = gr.Textbox(label="π Status", lines=
|
| 433 |
|
| 434 |
decode_btn = gr.Button("π Decode Audio", variant="primary", size="lg")
|
| 435 |
decode_btn.click(
|
|
@@ -444,46 +488,42 @@ with gr.Blocks(title="FocalCodec 160 bps") as iface:
|
|
| 444 |
|
| 445 |
### π― Pure Token Format (No Headers!)
|
| 446 |
|
| 447 |
-
This version saves **ONLY the compressed tokens** with
|
| 448 |
-
|
| 449 |
-
|
| 450 |
-
-
|
| 451 |
-
-
|
| 452 |
-
|
| 453 |
-
|
| 454 |
-
**Trade-off:**
|
| 455 |
-
- β οΈ You must save the metadata separately to decode
|
| 456 |
-
- Required info: bits per token, number of tokens, shape
|
| 457 |
-
|
| 458 |
-
### π Compression Ratios:
|
| 459 |
-
| Format | Bitrate | 1-Hour File Size |
|
| 460 |
-
|--------|---------|------------------|
|
| 461 |
-
| Uncompressed PCM | 256 kbps | ~115 MB |
|
| 462 |
-
| MP3 | 128 kbps | ~57 MB |
|
| 463 |
-
| Opus | 16 kbps | ~7.2 MB |
|
| 464 |
-
| **FocalCodec** | **0.16 kbps** | **~72 KB** π₯ |
|
| 465 |
-
|
| 466 |
-
### π§ Technical Details:
|
| 467 |
-
- **Token Rate:** ~12.5 tokens/sec
|
| 468 |
-
- **Bits per Token:** 13 bits (for most speech)
|
| 469 |
-
- **Bitrate:** 12.5 Γ 13 = 162.5 bps β **160 bps**
|
| 470 |
-
- **Format:** Raw bit-packed tokens (no header)
|
| 471 |
-
|
| 472 |
-
### π Example Metadata:
|
| 473 |
-
After encoding, you'll see:
|
| 474 |
-
```
|
| 475 |
-
βΉοΈ SAVE THIS: bits=13, tokens=113, shape=(1, 113)
|
| 476 |
-
```
|
| 477 |
|
| 478 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
| 479 |
|
| 480 |
-
|
| 481 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 482 |
```json
|
| 483 |
{
|
| 484 |
-
"
|
| 485 |
-
"
|
| 486 |
-
"
|
| 487 |
"shape": [1, 113],
|
| 488 |
"duration": 9.04
|
| 489 |
}
|
|
|
|
| 53 |
|
| 54 |
toks_cpu = toks.cpu().numpy().flatten()
|
| 55 |
max_token = int(toks_cpu.max())
|
| 56 |
+
min_token = int(toks_cpu.min())
|
| 57 |
|
| 58 |
print(f"\n=== Saving Raw Tokens ===")
|
| 59 |
+
print(f"Original shape: {toks.shape}")
|
| 60 |
+
print(f"Flattened tokens: {len(toks_cpu)}")
|
| 61 |
+
print(f"Token range: {min_token} to {max_token}")
|
| 62 |
|
| 63 |
# Determine bits needed
|
| 64 |
if max_token <= 1:
|
|
|
|
| 102 |
bits = format(int(tok), f'0{bits_needed}b')
|
| 103 |
bit_array.extend([int(b) for b in bits])
|
| 104 |
|
| 105 |
+
print(f"Total bits: {len(bit_array)}")
|
| 106 |
+
|
| 107 |
# Pad to byte boundary
|
| 108 |
+
padding = 0
|
| 109 |
while len(bit_array) % 8 != 0:
|
| 110 |
bit_array.append(0)
|
| 111 |
+
padding += 1
|
| 112 |
+
|
| 113 |
+
print(f"Padding bits: {padding}")
|
| 114 |
|
| 115 |
# Pack into bytes
|
| 116 |
packed_bits = np.packbits(np.array(bit_array, dtype=np.uint8))
|
|
|
|
| 121 |
|
| 122 |
file_size = os.path.getsize(fc_file_path)
|
| 123 |
|
| 124 |
+
print(f"File size: {file_size} bytes")
|
| 125 |
print(f"========================\n")
|
| 126 |
|
| 127 |
return file_size, bits_needed, len(toks_cpu), toks.shape
|
|
|
|
| 131 |
"""Load raw tokens from headerless binary file"""
|
| 132 |
|
| 133 |
print(f"\n=== Loading Raw Tokens ===")
|
| 134 |
+
print(f"File: {fc_file_path}")
|
| 135 |
+
print(f"Bits per token: {bits_per_token}")
|
| 136 |
+
print(f"Num tokens: {num_tokens}")
|
| 137 |
+
print(f"Target shape: {original_shape}")
|
| 138 |
|
| 139 |
# Read all bytes
|
| 140 |
with open(fc_file_path, 'rb') as f:
|
| 141 |
packed_data = np.frombuffer(f.read(), dtype=np.uint8)
|
| 142 |
|
| 143 |
+
print(f"Read {len(packed_data)} bytes")
|
| 144 |
+
|
| 145 |
# Unpack bits
|
| 146 |
unpacked_bits = np.unpackbits(packed_data)
|
| 147 |
+
print(f"Unpacked to {len(unpacked_bits)} bits")
|
| 148 |
|
| 149 |
# Extract exact number of bits needed
|
| 150 |
+
total_bits_needed = num_tokens * bits_per_token
|
| 151 |
+
print(f"Need {total_bits_needed} bits for {num_tokens} tokens")
|
| 152 |
+
|
| 153 |
+
if len(unpacked_bits) < total_bits_needed:
|
| 154 |
+
raise ValueError(f"Not enough bits in file! Have {len(unpacked_bits)}, need {total_bits_needed}")
|
| 155 |
+
|
| 156 |
+
token_bits = unpacked_bits[:total_bits_needed]
|
| 157 |
|
| 158 |
# Reconstruct tokens
|
| 159 |
tokens = []
|
| 160 |
for i in range(num_tokens):
|
| 161 |
+
start_bit = i * bits_per_token
|
| 162 |
+
end_bit = start_bit + bits_per_token
|
| 163 |
+
token_bits_slice = token_bits[start_bit:end_bit]
|
| 164 |
|
| 165 |
+
# Convert binary array to integer
|
| 166 |
token_value = 0
|
| 167 |
for bit in token_bits_slice:
|
| 168 |
+
token_value = (token_value << 1) | int(bit)
|
| 169 |
+
|
| 170 |
tokens.append(token_value)
|
| 171 |
|
| 172 |
+
print(f"Reconstructed {len(tokens)} tokens")
|
| 173 |
+
print(f"Token range: {min(tokens)} to {max(tokens)}")
|
| 174 |
+
|
| 175 |
# Reshape to original shape
|
| 176 |
+
tokens_array = np.array(tokens, dtype=np.int64)
|
| 177 |
+
|
| 178 |
+
# Validate shape
|
| 179 |
+
if tokens_array.size != np.prod(original_shape):
|
| 180 |
+
raise ValueError(f"Shape mismatch! Have {tokens_array.size} tokens, need {np.prod(original_shape)}")
|
| 181 |
+
|
| 182 |
+
tokens_array = tokens_array.reshape(original_shape)
|
| 183 |
tokens_tensor = torch.from_numpy(tokens_array)
|
| 184 |
|
| 185 |
+
print(f"Final tensor shape: {tokens_tensor.shape}")
|
| 186 |
+
print(f"Final token range: {tokens_tensor.min().item()} to {tokens_tensor.max().item()}")
|
| 187 |
print(f"==========================\n")
|
| 188 |
|
| 189 |
return tokens_tensor
|
|
|
|
| 194 |
'bits_per_token': None,
|
| 195 |
'num_tokens': None,
|
| 196 |
'shape': None,
|
| 197 |
+
'duration': None,
|
| 198 |
+
'filename': None
|
| 199 |
}
|
| 200 |
|
| 201 |
|
|
|
|
| 263 |
print(f"Duration: {duration_sec:.2f}s")
|
| 264 |
print(f"Token rate: {token_rate:.2f} tokens/sec")
|
| 265 |
|
| 266 |
+
print("\n--- Decoding (test) ---")
|
| 267 |
rec_sig = codec.toks_to_sig(toks)
|
| 268 |
print(f"Reconstructed signal shape: {rec_sig.shape}")
|
| 269 |
|
|
|
|
| 277 |
last_encoding_metadata = {
|
| 278 |
'bits_per_token': bits_per_token,
|
| 279 |
'num_tokens': num_tokens,
|
| 280 |
+
'shape': tuple(shape),
|
| 281 |
+
'duration': duration_sec,
|
| 282 |
+
'filename': fc_file_path
|
| 283 |
}
|
| 284 |
|
| 285 |
# Calculate bitrates
|
|
|
|
| 287 |
theoretical_bitrate = token_rate * bits_per_token
|
| 288 |
|
| 289 |
print(f"--- Results ---")
|
| 290 |
+
print(f"File bitrate: {bitrate:.1f} bps")
|
| 291 |
print(f"Theoretical: {theoretical_bitrate:.1f} bps")
|
| 292 |
print(f"Target: 160 bps")
|
| 293 |
+
print(f"Efficiency: {(bitrate/160)*100:.1f}% of target")
|
| 294 |
+
|
| 295 |
+
# TEST: Try to decode immediately to verify
|
| 296 |
+
print(f"\n--- Verification: Decoding saved file ---")
|
| 297 |
+
try:
|
| 298 |
+
test_toks = load_tokens_raw(fc_file_path, bits_per_token, num_tokens, shape)
|
| 299 |
+
print(f"β
Verification successful!")
|
| 300 |
+
print(f"Tokens match: {torch.equal(toks.cpu(), test_toks)}")
|
| 301 |
+
except Exception as e:
|
| 302 |
+
print(f"β Verification failed: {e}")
|
| 303 |
+
|
| 304 |
print(f"{'='*50}\n")
|
| 305 |
|
| 306 |
# Prepare output
|
|
|
|
| 309 |
if len(decoded_wav_output.shape) == 0:
|
| 310 |
decoded_wav_output = decoded_wav_output.reshape(1)
|
| 311 |
|
| 312 |
+
metadata_str = f"bits={bits_per_token}, tokens={num_tokens}, shape={shape}"
|
| 313 |
+
status_msg = f"β
{duration_sec:.1f}s | {file_size}B | {bitrate:.0f} bps | {bits_per_token} bits/tok\n\nπ METADATA: {metadata_str}"
|
| 314 |
|
| 315 |
return (codec.sample_rate_output, decoded_wav_output), fc_file_path, status_msg
|
| 316 |
|
|
|
|
| 331 |
if fc_file is None:
|
| 332 |
return None, "β Please upload a .fc file"
|
| 333 |
|
|
|
|
| 334 |
try:
|
| 335 |
+
# Parse metadata
|
| 336 |
+
if bits_per_token_input and num_tokens_input and batch_size_input and seq_length_input:
|
| 337 |
+
# Use provided values
|
| 338 |
+
bits_per_token = int(bits_per_token_input)
|
| 339 |
+
num_tokens = int(num_tokens_input)
|
| 340 |
shape = (int(batch_size_input), int(seq_length_input))
|
| 341 |
+
print("Using manually provided metadata")
|
| 342 |
else:
|
| 343 |
+
# Use saved metadata
|
| 344 |
+
if not last_encoding_metadata.get('bits_per_token'):
|
| 345 |
+
return None, "β No metadata available! Either encode a file first OR provide all metadata fields"
|
| 346 |
+
|
| 347 |
+
bits_per_token = last_encoding_metadata['bits_per_token']
|
| 348 |
+
num_tokens = last_encoding_metadata['num_tokens']
|
| 349 |
+
shape = last_encoding_metadata['shape']
|
| 350 |
+
print("Using saved metadata from last encoding")
|
| 351 |
|
|
|
|
|
|
|
|
|
|
|
|
|
| 352 |
print(f"\n{'='*50}")
|
| 353 |
+
print(f"Decoding file: {fc_file.name}")
|
| 354 |
+
print(f"Metadata: bits={bits_per_token}, tokens={num_tokens}, shape={shape}")
|
| 355 |
+
|
| 356 |
+
# Validate
|
| 357 |
+
if num_tokens != shape[0] * shape[1]:
|
| 358 |
+
return None, f"β Shape mismatch! {num_tokens} tokens != {shape[0]}Γ{shape[1]} = {shape[0]*shape[1]}"
|
| 359 |
|
| 360 |
# Load tokens
|
| 361 |
toks = load_tokens_raw(fc_file.name, bits_per_token, num_tokens, shape)
|
|
|
|
| 380 |
print(f"Bitrate: {bitrate:.1f} bps")
|
| 381 |
print(f"{'='*50}\n")
|
| 382 |
|
| 383 |
+
status = f"β
Decoded successfully!\n{duration_sec:.1f}s | {file_size}B | {bitrate:.0f} bps | {bits_per_token} bits/tok"
|
| 384 |
|
| 385 |
return (codec.sample_rate_output, decoded_wav), status
|
| 386 |
|
| 387 |
except Exception as e:
|
| 388 |
import traceback
|
| 389 |
traceback.print_exc()
|
| 390 |
+
return None, f"β Decoding error: {str(e)}"
|
| 391 |
|
| 392 |
|
| 393 |
# --- Gradio Interface ---
|
|
|
|
| 414 |
file_output = gr.File(
|
| 415 |
label="πΎ Download Compressed .fc File (headerless)"
|
| 416 |
)
|
| 417 |
+
status_output = gr.Textbox(label="π Status", lines=5)
|
| 418 |
|
| 419 |
encode_btn = gr.Button("π Encode & Decode", variant="primary", size="lg")
|
| 420 |
encode_btn.click(
|
|
|
|
| 424 |
)
|
| 425 |
|
| 426 |
gr.Markdown("### β οΈ Important:")
|
| 427 |
+
gr.Markdown("- The .fc file contains ONLY raw token data (no metadata)")
|
| 428 |
+
gr.Markdown("- **Copy the METADATA from the status box** to decode later!")
|
| 429 |
+
gr.Markdown("- Format: `bits=13, tokens=113, shape=(1, 113)`")
|
| 430 |
|
| 431 |
with gr.Tab("π Decode from .fc File"):
|
| 432 |
gr.Markdown("### Decode raw .fc file (requires metadata)")
|
|
|
|
| 438 |
file_types=[".fc"]
|
| 439 |
)
|
| 440 |
|
| 441 |
+
gr.Markdown("#### π Metadata (from encoding step):")
|
| 442 |
+
gr.Markdown("Leave blank to use last encoded file's metadata")
|
| 443 |
|
| 444 |
with gr.Row():
|
| 445 |
bits_input = gr.Number(
|
| 446 |
label="Bits per token",
|
| 447 |
+
placeholder="e.g., 13",
|
| 448 |
+
precision=0
|
|
|
|
| 449 |
)
|
| 450 |
tokens_input = gr.Number(
|
| 451 |
label="Number of tokens",
|
| 452 |
+
placeholder="e.g., 113",
|
| 453 |
+
precision=0
|
| 454 |
)
|
| 455 |
|
| 456 |
with gr.Row():
|
| 457 |
batch_input = gr.Number(
|
| 458 |
label="Batch size",
|
| 459 |
+
placeholder="e.g., 1",
|
| 460 |
+
precision=0
|
|
|
|
| 461 |
)
|
| 462 |
seq_input = gr.Number(
|
| 463 |
label="Sequence length",
|
| 464 |
+
placeholder="e.g., 113",
|
| 465 |
+
precision=0
|
| 466 |
)
|
| 467 |
|
| 468 |
+
gr.Markdown("π‘ **Example:** If metadata says `bits=13, tokens=113, shape=(1, 113)`")
|
| 469 |
+
gr.Markdown("Enter: bits=13, tokens=113, batch=1, seq=113")
|
| 470 |
|
| 471 |
with gr.Column():
|
| 472 |
decoded_output = gr.Audio(
|
| 473 |
type="numpy",
|
| 474 |
label="π Decoded Audio"
|
| 475 |
)
|
| 476 |
+
decode_status = gr.Textbox(label="π Status", lines=3)
|
| 477 |
|
| 478 |
decode_btn = gr.Button("π Decode Audio", variant="primary", size="lg")
|
| 479 |
decode_btn.click(
|
|
|
|
| 488 |
|
| 489 |
### π― Pure Token Format (No Headers!)
|
| 490 |
|
| 491 |
+
This version saves **ONLY the compressed tokens** with zero overhead.
|
| 492 |
+
|
| 493 |
+
### π Compression:
|
| 494 |
+
- **Uncompressed:** 256 kbps β 115 MB/hour
|
| 495 |
+
- **FocalCodec:** 160 bps β **72 KB/hour** (1600x smaller!)
|
| 496 |
+
|
| 497 |
+
### π§ How to Use:
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 498 |
|
| 499 |
+
**Encoding:**
|
| 500 |
+
1. Upload/record audio
|
| 501 |
+
2. Click "Encode & Decode"
|
| 502 |
+
3. **COPY THE METADATA** from status (important!)
|
| 503 |
+
4. Download .fc file
|
| 504 |
|
| 505 |
+
**Decoding:**
|
| 506 |
+
1. Upload .fc file
|
| 507 |
+
2. Enter metadata OR leave blank if you just encoded
|
| 508 |
+
3. Click "Decode Audio"
|
| 509 |
+
|
| 510 |
+
### π Metadata Format:
|
| 511 |
+
```
|
| 512 |
+
bits=13, tokens=113, shape=(1, 113)
|
| 513 |
+
```
|
| 514 |
+
Means:
|
| 515 |
+
- 13 bits per token
|
| 516 |
+
- 113 total tokens
|
| 517 |
+
- Batch size = 1
|
| 518 |
+
- Sequence length = 113
|
| 519 |
+
|
| 520 |
+
### π‘ Storage Tip:
|
| 521 |
+
Store metadata in a companion JSON file:
|
| 522 |
```json
|
| 523 |
{
|
| 524 |
+
"recording_001.fc": {
|
| 525 |
+
"bits": 13,
|
| 526 |
+
"tokens": 113,
|
| 527 |
"shape": [1, 113],
|
| 528 |
"duration": 9.04
|
| 529 |
}
|