MihaiPopa-1 commited on
Commit
de45cd2
·
verified ·
1 Parent(s): 315379b

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +11 -32
app.py CHANGED
@@ -91,45 +91,24 @@ def encode_decode_focal(audio_input):
91
  with torch.no_grad():
92
  toks = codec.sig_to_toks(sig)
93
  rec_sig = codec.toks_to_sig(toks)
 
 
94
  codes = codec.toks_to_codes(toks)
95
 
96
- # --- DEBUG: Print what we actually have ---
97
- print(f"\n=== DEBUG INFO ===")
98
- print(f"Tokens shape: {toks.shape}")
99
- print(f"Tokens dtype: {toks.dtype}")
100
- print(f"Tokens min/max: {toks.min().item()}/{toks.max().item()}")
101
- print(f"First 10 tokens: {toks[0, :10].cpu().numpy()}")
102
-
103
- print(f"\nCodes shape: {codes.shape}")
104
- print(f"Codes dtype: {codes.dtype}")
105
- print(f"Codes min/max: {codes.min().item()}/{codes.max().item()}")
106
- print(f"First 10 codes (flattened): {codes.flatten()[:10].cpu().numpy()}")
107
- print(f"=================\n")
108
-
109
- # --- Save the compressed data ---
110
  temp_dir = tempfile.mkdtemp()
111
  fc_file_path = os.path.join(temp_dir, "compressed_tokens.fc")
112
-
113
- # Try saving tokens directly as integers (more reliable)
114
- toks_cpu = toks.cpu().numpy().astype(np.int32)
115
-
116
  with open(fc_file_path, 'wb') as f:
117
- # Write header: number of tokens
118
- num_tokens = toks_cpu.size
119
- f.write(num_tokens.to_bytes(4, byteorder='little'))
120
- # Write tokens as raw bytes
121
- f.write(toks_cpu.tobytes())
122
-
123
- # Calculate stats
124
  file_size_bytes = os.path.getsize(fc_file_path)
125
  duration_sec = sig.shape[-1] / codec.sample_rate_input
126
- expected_size = (160 * duration_sec) / 8
127
- actual_bitrate = (file_size_bytes * 8) / duration_sec
128
-
129
- print(f"Duration: {duration_sec:.2f}s")
130
- print(f"Num tokens: {num_tokens}")
131
  print(f"File size: {file_size_bytes} bytes (expected: ~{expected_size:.0f} bytes)")
132
- print(f"Actual bitrate: {actual_bitrate:.0f} bps")
133
 
134
  # Move audio back to CPU
135
  decoded_wav_output = rec_sig.cpu().numpy().squeeze()
@@ -137,7 +116,7 @@ def encode_decode_focal(audio_input):
137
  if len(decoded_wav_output.shape) == 0:
138
  decoded_wav_output = decoded_wav_output.reshape(1)
139
 
140
- status_msg = f"✅ Duration: {duration_sec:.1f}s | Tokens: {num_tokens} | File: {file_size_bytes} bytes | Bitrate: {actual_bitrate:.0f} bps"
141
 
142
  return (codec.sample_rate_output, decoded_wav_output), fc_file_path, status_msg
143
 
 
91
  with torch.no_grad():
92
  toks = codec.sig_to_toks(sig)
93
  rec_sig = codec.toks_to_sig(toks)
94
+
95
+ # Get binary codes for true compression
96
  codes = codec.toks_to_codes(toks)
97
 
98
+ # --- Save the compressed tokens to a temporary .fc file ---
 
 
 
 
 
 
 
 
 
 
 
 
 
99
  temp_dir = tempfile.mkdtemp()
100
  fc_file_path = os.path.join(temp_dir, "compressed_tokens.fc")
101
+
102
+ # Save as raw binary data (just the token values)
103
+ toks_cpu = toks.cpu().numpy().astype(np.int16) # Convert to numpy
 
104
  with open(fc_file_path, 'wb') as f:
105
+ f.write(toks_cpu.tobytes()) # Write raw bytes
106
+
 
 
 
 
 
107
  file_size_bytes = os.path.getsize(fc_file_path)
108
  duration_sec = sig.shape[-1] / codec.sample_rate_input
109
+ expected_size = (160 * duration_sec) / 8 # 160 bits/sec → bytes
110
+ print(f"Tokens saved to {fc_file_path}")
 
 
 
111
  print(f"File size: {file_size_bytes} bytes (expected: ~{expected_size:.0f} bytes)")
 
112
 
113
  # Move audio back to CPU
114
  decoded_wav_output = rec_sig.cpu().numpy().squeeze()
 
116
  if len(decoded_wav_output.shape) == 0:
117
  decoded_wav_output = decoded_wav_output.reshape(1)
118
 
119
+ status_msg = f"✅ Duration: {duration_sec:.1f}s | File: {file_size_bytes} bytes | Bitrate: {actual_bitrate:.0f} bps"
120
 
121
  return (codec.sample_rate_output, decoded_wav_output), fc_file_path, status_msg
122