codewithaman commited on
Commit
8d5eac5
Β·
verified Β·
1 Parent(s): 759d03a

Update src/streamlit_app.py

Browse files
Files changed (1) hide show
  1. src/streamlit_app.py +533 -35
src/streamlit_app.py CHANGED
@@ -1,40 +1,538 @@
1
- import altair as alt
2
- import numpy as np
3
- import pandas as pd
 
4
  import streamlit as st
 
 
 
 
5
 
6
- """
7
- # Welcome to Streamlit!
8
 
9
- Edit `/streamlit_app.py` to customize this app to your heart's desire :heart:.
10
- If you have any questions, checkout our [documentation](https://docs.streamlit.io) and [community
11
- forums](https://discuss.streamlit.io).
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
12
 
13
- In the meantime, below is an example of what you can do with just a few lines of code:
14
- """
15
 
16
- num_points = st.slider("Number of points in spiral", 1, 10000, 1100)
17
- num_turns = st.slider("Number of turns in spiral", 1, 300, 31)
18
-
19
- indices = np.linspace(0, 1, num_points)
20
- theta = 2 * np.pi * num_turns * indices
21
- radius = indices
22
-
23
- x = radius * np.cos(theta)
24
- y = radius * np.sin(theta)
25
-
26
- df = pd.DataFrame({
27
- "x": x,
28
- "y": y,
29
- "idx": indices,
30
- "rand": np.random.randn(num_points),
31
- })
32
-
33
- st.altair_chart(alt.Chart(df, height=700, width=700)
34
- .mark_point(filled=True)
35
- .encode(
36
- x=alt.X("x", axis=None),
37
- y=alt.Y("y", axis=None),
38
- color=alt.Color("idx", legend=None, scale=alt.Scale()),
39
- size=alt.Size("rand", legend=None, scale=alt.Scale(range=[1, 150])),
40
- ))
 
1
+ """
2
+ Streamlit app for LoPace - Interactive Prompt Compression with Evaluation Metrics
3
+ """
4
+
5
  import streamlit as st
6
+ import hashlib
7
+ import time
8
+ from typing import Dict, Any, List, Tuple
9
+ from lopace import PromptCompressor, CompressionMethod
10
 
 
 
11
 
12
+ def calculate_metrics(
13
+ original_text: str,
14
+ compressed_data: bytes,
15
+ compression_time: float,
16
+ decompression_time: float,
17
+ decompressed_text: str,
18
+ compressor: PromptCompressor = None
19
+ ) -> Dict[str, Any]:
20
+ """
21
+ Calculate all evaluation metrics for compression.
22
+
23
+ Args:
24
+ compressor: PromptCompressor instance for Shannon Entropy calculation
25
+
26
+ Returns:
27
+ Dictionary with all metrics
28
+ """
29
+ original_size_bytes = len(original_text.encode('utf-8'))
30
+ compressed_size_bytes = len(compressed_data)
31
+ original_size_bits = original_size_bytes * 8
32
+ compressed_size_bits = compressed_size_bytes * 8
33
+ num_characters = len(original_text)
34
+
35
+ # Compression Ratio (CR)
36
+ compression_ratio = original_size_bytes / compressed_size_bytes if compressed_size_bytes > 0 else 0
37
+
38
+ # Space Savings (SS)
39
+ space_savings = (1 - (compressed_size_bytes / original_size_bytes)) * 100 if original_size_bytes > 0 else 0
40
+
41
+ # Bits Per Character (BPC)
42
+ bits_per_character = compressed_size_bits / num_characters if num_characters > 0 else 0
43
+
44
+ # Throughput (MB/s)
45
+ compression_throughput = (original_size_bytes / (1024 * 1024)) / compression_time if compression_time > 0 else 0
46
+ decompression_throughput = (compressed_size_bytes / (1024 * 1024)) / decompression_time if decompression_time > 0 else 0
47
+
48
+ # SHA-256 Hash
49
+ original_hash = hashlib.sha256(original_text.encode('utf-8')).hexdigest()
50
+ decompressed_hash = hashlib.sha256(decompressed_text.encode('utf-8')).hexdigest()
51
+ hash_match = original_hash == decompressed_hash
52
+
53
+ # Exact Match (Fidelity)
54
+ exact_match = original_text == decompressed_text
55
+
56
+ # Reconstruction Error
57
+ reconstruction_error = 0.0 if exact_match else 1.0
58
+
59
+ # Shannon Entropy (if compressor provided)
60
+ shannon_entropy = None
61
+ theoretical_min_bytes = None
62
+ theoretical_compression_ratio = None
63
+ if compressor:
64
+ try:
65
+ shannon_entropy = compressor.calculate_shannon_entropy(original_text)
66
+ limits = compressor.get_theoretical_compression_limit(original_text)
67
+ theoretical_min_bytes = limits['theoretical_min_bytes']
68
+ theoretical_compression_ratio = limits['theoretical_compression_ratio']
69
+ except Exception:
70
+ pass
71
+
72
+ return {
73
+ 'original_size_bytes': original_size_bytes,
74
+ 'compressed_size_bytes': compressed_size_bytes,
75
+ 'original_size_bits': original_size_bits,
76
+ 'compressed_size_bits': compressed_size_bits,
77
+ 'num_characters': num_characters,
78
+ 'compression_ratio': compression_ratio,
79
+ 'space_savings': space_savings,
80
+ 'bits_per_character': bits_per_character,
81
+ 'compression_throughput': compression_throughput,
82
+ 'decompression_throughput': decompression_throughput,
83
+ 'compression_time': compression_time,
84
+ 'decompression_time': decompression_time,
85
+ 'original_hash': original_hash,
86
+ 'decompressed_hash': decompressed_hash,
87
+ 'hash_match': hash_match,
88
+ 'exact_match': exact_match,
89
+ 'reconstruction_error': reconstruction_error,
90
+ 'shannon_entropy': shannon_entropy,
91
+ 'theoretical_min_bytes': theoretical_min_bytes,
92
+ 'theoretical_compression_ratio': theoretical_compression_ratio,
93
+ }
94
+
95
+
96
+ def format_hash(hash_str: str) -> str:
97
+ """Format hash for display."""
98
+ return f"{hash_str[:16]}...{hash_str[-16:]}"
99
+
100
+
101
+ def format_bytes(data: bytes, max_display: int = 500) -> str:
102
+ """Format bytes for display with hex representation."""
103
+ if len(data) <= max_display:
104
+ hex_str = data.hex()
105
+ # Add space every 2 characters for readability
106
+ return ' '.join(hex_str[i:i+2] for i in range(0, len(hex_str), 2))
107
+ else:
108
+ preview_data = data[:max_display]
109
+ hex_str = preview_data.hex()
110
+ preview_formatted = ' '.join(hex_str[i:i+2] for i in range(0, len(hex_str), 2))
111
+ return f"{preview_formatted} ... (truncated, {len(data)} total bytes)"
112
+
113
+
114
+ def main():
115
+ st.set_page_config(
116
+ page_title="LoPace - Prompt Compression",
117
+ page_icon="πŸ—œοΈ",
118
+ layout="wide",
119
+ initial_sidebar_state="expanded"
120
+ )
121
+
122
+ # Custom CSS for better styling
123
+ st.markdown("""
124
+ <style>
125
+ .main-header {
126
+ font-size: 2.5rem;
127
+ font-weight: 700;
128
+ color: #1f77b4;
129
+ margin-bottom: 0.5rem;
130
+ }
131
+ .sub-header {
132
+ font-size: 1.2rem;
133
+ color: #666;
134
+ margin-bottom: 2rem;
135
+ }
136
+ .metric-card {
137
+ background-color: #f8f9fa;
138
+ padding: 1rem;
139
+ border-radius: 0.5rem;
140
+ border-left: 4px solid #1f77b4;
141
+ }
142
+ .data-box {
143
+ background-color: #f8f9fa;
144
+ padding: 1rem;
145
+ border-radius: 0.5rem;
146
+ border: 1px solid #dee2e6;
147
+ font-family: 'Courier New', monospace;
148
+ font-size: 0.85rem;
149
+ max-height: 400px;
150
+ overflow-y: auto;
151
+ }
152
+ </style>
153
+ """, unsafe_allow_html=True)
154
+
155
+ # Header
156
+ st.markdown('<div class="main-header">πŸ—œοΈ LoPace</div>', unsafe_allow_html=True)
157
+ st.markdown('<div class="sub-header">Lossless Optimized Prompt Accurate Compression Engine</div>', unsafe_allow_html=True)
158
+
159
+ # Sidebar for configuration
160
+ with st.sidebar:
161
+ st.header("βš™οΈ Configuration")
162
+
163
+ tokenizer_model = st.selectbox(
164
+ "Tokenizer Model",
165
+ options=["cl100k_base", "p50k_base", "r50k_base", "gpt2"],
166
+ index=0,
167
+ help="BPE tokenizer model for token-based compression"
168
+ )
169
+
170
+ zstd_level = st.slider(
171
+ "Zstd Compression Level",
172
+ min_value=1,
173
+ max_value=22,
174
+ value=15,
175
+ help="Higher values = better compression but slower (1-22)"
176
+ )
177
+
178
+ st.markdown("---")
179
+ st.markdown("### πŸ“Š About Metrics")
180
+ st.info("""
181
+ **Compression Ratio (CR)**: How many times smaller (e.g., 4.5x)
182
+
183
+ **Space Savings (SS)**: Percentage of space reduced (e.g., 75%)
184
+
185
+ **Bits Per Character (BPC)**: Average bits to store one character
186
+
187
+ **Throughput**: Speed in MB/s for compression/decompression
188
+
189
+ **Hash Match**: SHA-256 verification of losslessness
190
+
191
+ **Exact Match**: Character-by-character comparison
192
+ """)
193
+
194
+ st.markdown("---")
195
+ st.markdown("### 🎯 Compression Methods")
196
+ st.caption("""
197
+ - **Zstd**: Dictionary-based compression
198
+ - **Token**: BPE tokenization with binary packing
199
+ - **Hybrid**: Token + Zstd (recommended)
200
+ """)
201
+
202
+ # Main content area - Two column layout
203
+ col_left, col_right = st.columns([1, 1], gap="large")
204
+
205
+ with col_left:
206
+ st.markdown("### πŸ“ Input Prompt")
207
+ default_prompt = """You are a helpful AI assistant designed to provide accurate,
208
+ detailed, and helpful responses to user queries. Your goal is to assist users
209
+ by understanding their questions and providing relevant information, explanations,
210
+ or guidance. Always be respectful, clear, and concise in your communications.
211
+ If you are uncertain about something, it's better to acknowledge that uncertainty
212
+ rather than provide potentially incorrect information."""
213
+
214
+ input_prompt = st.text_area(
215
+ "Enter your prompt:",
216
+ value=default_prompt,
217
+ height=400,
218
+ help="Enter the system prompt or any text you want to compress",
219
+ label_visibility="collapsed",
220
+ key="input_prompt_textarea"
221
+ )
222
+
223
+ # Character and byte count
224
+ char_count = len(input_prompt)
225
+ byte_count = len(input_prompt.encode('utf-8'))
226
+ st.caption(f"πŸ“ {char_count:,} characters | {byte_count:,} bytes")
227
+
228
+ compress_button = st.button("πŸ—œοΈ Compress & Analyze", type="primary", use_container_width=True)
229
+
230
+ with col_right:
231
+ st.markdown("### πŸ“¦ Compressed & Decompressed Data")
232
+
233
+ if not compress_button:
234
+ st.info("πŸ‘ˆ Enter a prompt on the left and click **'Compress & Analyze'** to see compression results")
235
+ elif not input_prompt.strip():
236
+ st.warning("⚠️ Please enter a prompt to compress")
237
+ else:
238
+ try:
239
+ # Initialize compressor
240
+ compressor = PromptCompressor(model=tokenizer_model, zstd_level=zstd_level)
241
+
242
+ # Process all methods
243
+ methods = [
244
+ CompressionMethod.ZSTD,
245
+ CompressionMethod.TOKEN,
246
+ CompressionMethod.HYBRID
247
+ ]
248
+
249
+ method_names = {
250
+ CompressionMethod.ZSTD: "Zstd",
251
+ CompressionMethod.TOKEN: "Token (BPE)",
252
+ CompressionMethod.HYBRID: "Hybrid (Recommended)"
253
+ }
254
+
255
+ method_icons = {
256
+ CompressionMethod.ZSTD: "πŸ”΅",
257
+ CompressionMethod.TOKEN: "🟒",
258
+ CompressionMethod.HYBRID: "🟣"
259
+ }
260
+
261
+ # Store results for metrics section
262
+ all_results: Dict[str, Dict[str, Any]] = {}
263
+ all_metrics: Dict[str, Dict[str, Any]] = {}
264
+
265
+ # Create tabs for each method
266
+ tabs = st.tabs([f"{method_icons[m]} {method_names[m]}" for m in methods])
267
+
268
+ for tab, method in zip(tabs, methods):
269
+ with tab:
270
+ # Compress and measure time
271
+ start_compress = time.perf_counter()
272
+ compressed = compressor.compress(input_prompt, method)
273
+ compression_time = time.perf_counter() - start_compress
274
+
275
+ # Decompress and measure time
276
+ start_decompress = time.perf_counter()
277
+ decompressed = compressor.decompress(compressed, method)
278
+ decompression_time = time.perf_counter() - start_decompress
279
+
280
+ # Calculate metrics
281
+ metrics = calculate_metrics(
282
+ input_prompt,
283
+ compressed,
284
+ compression_time,
285
+ decompression_time,
286
+ decompressed,
287
+ compressor=compressor
288
+ )
289
+
290
+ all_results[method.value] = {
291
+ 'compressed': compressed,
292
+ 'decompressed': decompressed,
293
+ 'method_name': method_names[method]
294
+ }
295
+ all_metrics[method.value] = metrics
296
+
297
+ # Display compressed data
298
+ st.markdown("#### πŸ” Compressed Data (Hex)")
299
+ with st.container():
300
+ st.markdown('<div class="data-box">', unsafe_allow_html=True)
301
+ st.code(format_bytes(compressed, max_display=1000), language="text")
302
+ st.caption(f"Size: {len(compressed):,} bytes | Showing first 1000 bytes")
303
+ st.markdown('</div>', unsafe_allow_html=True)
304
+
305
+ # Display decompressed data
306
+ st.markdown("#### πŸ”“ Decompressed Data (Original Text)")
307
+ with st.container():
308
+ st.markdown('<div class="data-box">', unsafe_allow_html=True)
309
+ st.text_area(
310
+ "Decompressed text:",
311
+ value=decompressed,
312
+ height=300,
313
+ disabled=True,
314
+ label_visibility="collapsed",
315
+ key=f"decompressed_text_{method.value}"
316
+ )
317
+ st.caption(f"βœ… Lossless: {'Verified' if metrics['exact_match'] else 'FAILED'}")
318
+ st.markdown('</div>', unsafe_allow_html=True)
319
+
320
+ # Quick verification status
321
+ if metrics['exact_match'] and metrics['hash_match']:
322
+ st.success("βœ… **Lossless Verification**: All checks passed!")
323
+ else:
324
+ st.error("❌ **Lossless Verification**: Failed!")
325
+
326
+ # Store results in session state for metrics section
327
+ st.session_state['all_results'] = all_results
328
+ st.session_state['all_metrics'] = all_metrics
329
+ st.session_state['input_prompt'] = input_prompt
330
+ st.session_state['compressor'] = compressor
331
+
332
+ except Exception as e:
333
+ st.error(f"❌ Error: {str(e)}")
334
+ st.exception(e)
335
+
336
+ # Metrics Section - Below the two columns
337
+ if compress_button and 'all_metrics' in st.session_state:
338
+ st.markdown("---")
339
+ st.markdown("## πŸ“Š Comprehensive Evaluation Metrics")
340
+
341
+ all_metrics = st.session_state['all_metrics']
342
+ all_results = st.session_state['all_results']
343
+ methods = [
344
+ CompressionMethod.ZSTD,
345
+ CompressionMethod.TOKEN,
346
+ CompressionMethod.HYBRID
347
+ ]
348
+
349
+ method_names = {
350
+ CompressionMethod.ZSTD: "Zstd",
351
+ CompressionMethod.TOKEN: "Token (BPE)",
352
+ CompressionMethod.HYBRID: "Hybrid (Recommended)"
353
+ }
354
+
355
+ # Primary Evaluation Metrics
356
+ st.markdown("### πŸ“ˆ Primary Evaluation Metrics")
357
+
358
+ for method in methods:
359
+ metrics = all_metrics[method.value]
360
+ method_name = method_names[method]
361
+
362
+ with st.expander(f"πŸ“Š {method_name} - Detailed Metrics", expanded=(method == CompressionMethod.HYBRID)):
363
+ # Create metric columns
364
+ col1, col2, col3, col4 = st.columns(4)
365
+
366
+ with col1:
367
+ st.metric(
368
+ "Compression Ratio (CR)",
369
+ f"{metrics['compression_ratio']:.2f}x",
370
+ help="$CR = \\frac{S_{original}}{S_{compressed}}$"
371
+ )
372
+
373
+ with col2:
374
+ st.metric(
375
+ "Space Savings (SS)",
376
+ f"{metrics['space_savings']:.2f}%",
377
+ help="$SS = 1 - \\frac{S_{compressed}}{S_{original}}$"
378
+ )
379
+
380
+ with col3:
381
+ st.metric(
382
+ "Bits Per Character (BPC)",
383
+ f"{metrics['bits_per_character']:.2f}",
384
+ help="$BPC = \\frac{Total Bits}{Total Characters}$"
385
+ )
386
+
387
+ with col4:
388
+ st.metric(
389
+ "Compression Time",
390
+ f"{metrics['compression_time']*1000:.2f} ms"
391
+ )
392
+
393
+ # Throughput
394
+ st.markdown("#### ⚑ Throughput")
395
+ throughput_col1, throughput_col2 = st.columns(2)
396
+
397
+ with throughput_col1:
398
+ st.metric(
399
+ "Compression Throughput",
400
+ f"{metrics['compression_throughput']:.2f} MB/s",
401
+ help="$T = \\frac{Data Size}{Time}$"
402
+ )
403
+
404
+ with throughput_col2:
405
+ st.metric(
406
+ "Decompression Throughput",
407
+ f"{metrics['decompression_throughput']:.2f} MB/s"
408
+ )
409
+
410
+ # Size Information
411
+ st.markdown("#### πŸ’Ύ Size Information")
412
+ size_col1, size_col2, size_col3 = st.columns(3)
413
+
414
+ with size_col1:
415
+ st.metric("Original Size", f"{metrics['original_size_bytes']:,} bytes")
416
+
417
+ with size_col2:
418
+ st.metric("Compressed Size", f"{metrics['compressed_size_bytes']:,} bytes")
419
+
420
+ with size_col3:
421
+ bytes_saved = metrics['original_size_bytes'] - metrics['compressed_size_bytes']
422
+ st.metric("Bytes Saved", f"{bytes_saved:,}", delta=f"{metrics['space_savings']:.1f}%")
423
+
424
+ # Lossless Verification
425
+ st.markdown("#### βœ… Lossless Verification")
426
+
427
+ # SHA-256 Hash Verification
428
+ hash_col1, hash_col2 = st.columns(2)
429
+
430
+ with hash_col1:
431
+ st.markdown("**Original Hash (SHA-256)**")
432
+ st.code(format_hash(metrics['original_hash']), language="text")
433
+
434
+ with hash_col2:
435
+ st.markdown("**Decompressed Hash (SHA-256)**")
436
+ st.code(format_hash(metrics['decompressed_hash']), language="text")
437
+
438
+ # Verification Status
439
+ verif_col1, verif_col2 = st.columns(2)
440
+
441
+ with verif_col1:
442
+ if metrics['hash_match']:
443
+ st.success("βœ… **Hash Match**: SHA-256 hashes are identical")
444
+ else:
445
+ st.error("❌ **Hash Mismatch**: Hashes do not match!")
446
+
447
+ with verif_col2:
448
+ if metrics['exact_match']:
449
+ st.success("βœ… **Exact Match**: Fidelity 100% - All characters match")
450
+ else:
451
+ st.error("❌ **Exact Match**: Fidelity 0% - Characters do not match")
452
+
453
+ # Reconstruction Error
454
+ st.markdown("#### Reconstruction Error")
455
+ if metrics['reconstruction_error'] == 0.0:
456
+ st.success(f"βœ… **Error Rate: 0.0** - Lossless compression verified")
457
+ st.latex(r"E = \frac{1}{N} \sum_{i=1}^{N} \mathbb{1}(x_i \neq \hat{x}_i) = 0")
458
+ else:
459
+ st.error(f"❌ **Error Rate: {metrics['reconstruction_error']:.4f}**")
460
+
461
+ # Shannon Entropy & Theoretical Limits
462
+ if metrics.get('shannon_entropy') is not None:
463
+ st.markdown("#### πŸ“ Shannon Entropy & Theoretical Limits")
464
+ st.markdown("""
465
+ **Shannon Entropy** determines the theoretical compression limit:
466
+ $H(X) = -\\sum_{i=1}^{n} P(x_i) \\log_2 P(x_i)$
467
+ """)
468
+
469
+ entropy_col1, entropy_col2, entropy_col3 = st.columns(3)
470
+
471
+ with entropy_col1:
472
+ st.metric(
473
+ "Shannon Entropy (bits/char)",
474
+ f"{metrics['shannon_entropy']:.4f}",
475
+ help="Theoretical bits needed per character"
476
+ )
477
+
478
+ with entropy_col2:
479
+ st.metric(
480
+ "Theoretical Min (bytes)",
481
+ f"{metrics['theoretical_min_bytes']:.2f}",
482
+ help="Theoretical minimum size achievable"
483
+ )
484
+
485
+ with entropy_col3:
486
+ if metrics['theoretical_compression_ratio']:
487
+ theoretical_savings = (1 - metrics['theoretical_compression_ratio']) * 100
488
+ st.metric(
489
+ "Theoretical Savings",
490
+ f"{theoretical_savings:.2f}%",
491
+ help="Best possible space savings"
492
+ )
493
+
494
+ # Comparison: Actual vs Theoretical
495
+ actual_vs_theoretical = (
496
+ metrics['compressed_size_bytes'] / metrics['theoretical_min_bytes']
497
+ if metrics['theoretical_min_bytes'] and metrics['theoretical_min_bytes'] > 0
498
+ else None
499
+ )
500
+
501
+ if actual_vs_theoretical:
502
+ st.info(
503
+ f"πŸ“Š **Efficiency**: Actual compression is "
504
+ f"**{actual_vs_theoretical:.2f}x** the theoretical minimum. "
505
+ f"Lower is better (1.0x = optimal)."
506
+ )
507
+
508
+ # Comparison Table
509
+ st.markdown("### πŸ“Š Method Comparison Table")
510
+
511
+ comparison_data = {
512
+ 'Method': [method_names[m] for m in methods],
513
+ 'Compression Ratio (x)': [f"{all_metrics[m.value]['compression_ratio']:.2f}" for m in methods],
514
+ 'Space Savings (%)': [f"{all_metrics[m.value]['space_savings']:.2f}" for m in methods],
515
+ 'BPC': [f"{all_metrics[m.value]['bits_per_character']:.2f}" for m in methods],
516
+ 'Original (bytes)': [f"{all_metrics[m.value]['original_size_bytes']:,}" for m in methods],
517
+ 'Compressed (bytes)': [f"{all_metrics[m.value]['compressed_size_bytes']:,}" for m in methods],
518
+ 'Compress Speed (MB/s)': [f"{all_metrics[m.value]['compression_throughput']:.2f}" for m in methods],
519
+ 'Decompress Speed (MB/s)': [f"{all_metrics[m.value]['decompression_throughput']:.2f}" for m in methods],
520
+ 'Lossless': ['βœ…' if all_metrics[m.value]['hash_match'] and all_metrics[m.value]['exact_match'] else '❌' for m in methods],
521
+ }
522
+
523
+ st.dataframe(comparison_data, use_container_width=True, hide_index=True)
524
+
525
+ # Best method recommendation
526
+ best_method = max(methods, key=lambda m: all_metrics[m.value]['compression_ratio'])
527
+ best_ratio = all_metrics[best_method.value]['compression_ratio']
528
+ best_savings = all_metrics[best_method.value]['space_savings']
529
+
530
+ st.success(
531
+ f"πŸ† **Best Compression Method**: **{method_names[best_method]}** "
532
+ f"with **{best_ratio:.2f}x** compression ratio "
533
+ f"({best_savings:.2f}% space savings)"
534
+ )
535
 
 
 
536
 
537
+ if __name__ == "__main__":
538
+ main()