Huffman / app.py
Phong1109's picture
.
59c7fc3
import heapq
import pandas as pd
from collections import defaultdict
import streamlit as st
class Node:
def __init__(self, symbol=None, count=0):
self.symbol = symbol
self.count = count
self.left = None
self.right = None
def __lt__(self, other):
return self.count < other.count
def build_frequency_table(data):
frequency_table = defaultdict(int)
for char in data:
frequency_table[char] += 1
return frequency_table
def build_huffman_tree(frequency_table):
heap = []
for symbol, count in frequency_table.items():
heapq.heappush(heap, Node(symbol, count))
while len(heap) > 1:
left = heapq.heappop(heap)
right = heapq.heappop(heap)
parent = Node(count=left.count + right.count)
parent.left = left
parent.right = right
heapq.heappush(heap, parent)
return heap[0]
def huffman_compress(data):
if len(data) == 0:
return {},""
else:
frequency_table = build_frequency_table(data)
huffman_tree = build_huffman_tree(frequency_table)
code_table = {}
build_code_table(huffman_tree, '', code_table)
compressed_data = ''
for char in data:
compressed_data += code_table[char]
return code_table, compressed_data
def build_code_table(node, code, code_table):
if node is None:
return
elif node.symbol is not None:
code_table[node.symbol] = code
else:
build_code_table(node.left, code + '0', code_table)
build_code_table(node.right, code + '1', code_table)
input = st.file_uploader("Chose your txt file")
if input is not None:
text_data = input.read().decode('utf-8')
st.header("Input: "+text_data)
else: text_data = ''
huffman_table, encoded_data = huffman_compress(text_data)
df_huffman = pd.DataFrame(list(huffman_table.items()), columns=['characters', 'code'])
if len(text_data) == 0:
st.header("Please browse your text file")
else:
st_df = st.dataframe(data=df_huffman,width=1000)
st.header("Compressed Data:")
st.header(encoded_data)
original_size = len(text_data.encode('utf-8')) * 8
compressed_size = len(encoded_data)
compression_ratio = (1 - (compressed_size / original_size)) * 100
st.header("Hiệu suất nén: "+str(compression_ratio))