testy / cipher_8bit.py
Koyd111's picture
Upload 8 files
32b6996 verified
import os
import pickle
import random
from collections import Counter
def cut_string_into_pairs(text_corpus):
pairs = []
for i in range(0, len(text_corpus) - 1, 2):
pairs.append(text_corpus[i:i + 2])
if len(text_corpus) % 2 != 0:
pairs.append(text_corpus[-1] + '_')
return pairs
def get_symbols(text_corpus, max_characters=256):
# Get all single unique characters, then fill in the rest of the symbol spots with the most common character pairs
single_characters = list(set(list(text_corpus)))
pairs = [item for item, _ in Counter(cut_string_into_pairs(text_corpus)).most_common(256 - len(single_characters))]
return single_characters + pairs
def substitution_cipher(symbols, random_seed):
random.seed(random_seed)
# Make a randomly ordered range from 0-255 (8-bits)
integer_encodings = random.sample(list(range(len(symbols))), len(symbols))
substitution_rule = dict({})
# Map every symbol to a unique encoding
for idx, symbol in enumerate(symbols):
encoding = integer_encodings[idx] # Get the random encoding for the symbol
substitution_rule[symbol] = encoding # Store the symbol as the key, encoding as the value
return substitution_rule
def encode_text_with_indices(rule, symbols, text):
encoded_text = []
indices = []
i = 0
# Create a reverse mapping from symbols to their indices
index_dict = dict(zip(symbols, range(len(symbols))))
while i < len(text):
# Check for pairs
if i + 1 < len(text):
pair = text[i] + text[i + 1]
# Check if the pair exists in the rule
if pair in rule:
encoding = rule[pair] # Get the encoding for the pair
encoded_text.append(encoding)
indices.append(index_dict[pair]) # Get the index of the symbol
i += 2 # Skip the two characters used in the pair
continue
# Single character substitution
if text[i] in rule:
encoding = rule[text[i]]
encoded_text.append(encoding)
indices.append(index_dict[text[i]]) # Get the index of the symbol
else:
# If the character doesn't exist in the rule, keep it as-is
encoded_text.append(256)
indices.append(256) # Use -1 or some other value to indicate no encoding for this character
i += 1
return encoded_text, indices
# Function to load or save the symbols as a pickle
def load_or_save_symbols(symbols, pickle_file_path="symbols.pkl"):
if os.path.exists(pickle_file_path):
with open(pickle_file_path, 'rb') as f:
print("Loading symbols from pickle file...")
return pickle.load(f)
else:
print("Pickle file not found. Saving symbols...")
with open(pickle_file_path, 'wb') as f:
pickle.dump(symbols, f)
return symbols