Spaces:

Phong1109
/

Huffman

Sleeping

59c7fc3 almost 3 years ago

2.36 kB

	import heapq
	import pandas as pd
	from collections import defaultdict
	import streamlit as st

	class Node:
	def __init__(self, symbol=None, count=0):
	self.symbol = symbol
	self.count = count
	self.left = None
	self.right = None

	def __lt__(self, other):
	return self.count < other.count

	def build_frequency_table(data):
	frequency_table = defaultdict(int)
	for char in data:
	frequency_table[char] += 1
	return frequency_table

	def build_huffman_tree(frequency_table):
	heap = []
	for symbol, count in frequency_table.items():
	heapq.heappush(heap, Node(symbol, count))

	while len(heap) > 1:
	left = heapq.heappop(heap)
	right = heapq.heappop(heap)
	parent = Node(count=left.count + right.count)
	parent.left = left
	parent.right = right
	heapq.heappush(heap, parent)

	return heap[0]

	def huffman_compress(data):
	if len(data) == 0:
	return {},""
	else:
	frequency_table = build_frequency_table(data)
	huffman_tree = build_huffman_tree(frequency_table)
	code_table = {}
	build_code_table(huffman_tree, '', code_table)
	compressed_data = ''
	for char in data:
	compressed_data += code_table[char]

	return code_table, compressed_data

	def build_code_table(node, code, code_table):
	if node is None:
	return
	elif node.symbol is not None:
	code_table[node.symbol] = code
	else:
	build_code_table(node.left, code + '0', code_table)
	build_code_table(node.right, code + '1', code_table)

	input = st.file_uploader("Chose your txt file")

	if input is not None:
	text_data = input.read().decode('utf-8')
	st.header("Input: "+text_data)
	else: text_data = ''

	huffman_table, encoded_data = huffman_compress(text_data)

	df_huffman = pd.DataFrame(list(huffman_table.items()), columns=['characters', 'code'])

	if len(text_data) == 0:
	st.header("Please browse your text file")
	else:
	st_df = st.dataframe(data=df_huffman,width=1000)
	st.header("Compressed Data:")
	st.header(encoded_data)
	original_size = len(text_data.encode('utf-8')) * 8
	compressed_size = len(encoded_data)
	compression_ratio = (1 - (compressed_size / original_size)) * 100
	st.header("Hiệu suất nén: "+str(compression_ratio))