Spaces:
Running
Running
Upload folder using huggingface_hub
Browse files- README.md +24 -12
- main.py +107 -0
- ngram_score.py +29 -0
- proability.py +125 -0
- quadgrams.txt +0 -0
- requirements.txt +2 -0
README.md
CHANGED
|
@@ -1,12 +1,24 @@
|
|
| 1 |
-
---
|
| 2 |
-
title:
|
| 3 |
-
emoji:
|
| 4 |
-
colorFrom: blue
|
| 5 |
-
colorTo:
|
| 6 |
-
sdk: gradio
|
| 7 |
-
sdk_version:
|
| 8 |
-
app_file:
|
| 9 |
-
pinned: false
|
| 10 |
-
---
|
| 11 |
-
|
| 12 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
---
|
| 2 |
+
title: Simple Substitution Cipher Decryptor
|
| 3 |
+
emoji: 🕵️
|
| 4 |
+
colorFrom: blue
|
| 5 |
+
colorTo: green
|
| 6 |
+
sdk: gradio
|
| 7 |
+
sdk_version: 3.35.2
|
| 8 |
+
app_file: main.py
|
| 9 |
+
pinned: false
|
| 10 |
+
---
|
| 11 |
+
|
| 12 |
+
# Simple Substitution Cipher Decryptor
|
| 13 |
+
|
| 14 |
+
This is a simple tool to automatically decrypt text that has been encrypted with a simple substitution cipher.
|
| 15 |
+
|
| 16 |
+
**How it works:**
|
| 17 |
+
The backend uses n-gram frequency analysis (specifically quadgrams) to score possible decryptions and find the most likely plaintext.
|
| 18 |
+
|
| 19 |
+
**How to use:**
|
| 20 |
+
1. Paste your ciphertext into the "Ciphertext" box.
|
| 21 |
+
2. (Optional) If you know any letter mappings (e.g., you know 'a' in the ciphertext is 'T' in the plaintext), you can provide them in the "Known Key Mappings" box. The format is `a=T b=E`.
|
| 22 |
+
3. The decrypted plaintext will appear in the "Plaintext" box.
|
| 23 |
+
|
| 24 |
+
*Note: The decryption process is heuristic and may not always produce a perfect result, especially for short ciphertexts.*
|
main.py
ADDED
|
@@ -0,0 +1,107 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
# 导入所需的库
|
| 2 |
+
from pycipher import SimpleSubstitution as SimpleSub
|
| 3 |
+
import random
|
| 4 |
+
import re
|
| 5 |
+
from ngram_score import ngram_score
|
| 6 |
+
import proability
|
| 7 |
+
import gradio as gr
|
| 8 |
+
|
| 9 |
+
def decrypt_text_internal(ciphertext):
|
| 10 |
+
fitness = ngram_score('quadgrams.txt')
|
| 11 |
+
ctext = re.sub('[^A-Z]', '', ciphertext.upper())
|
| 12 |
+
maxkey = list('ABCDEFGHIJKLMNOPQRSTUVWXYZ')
|
| 13 |
+
maxscore = -99e9
|
| 14 |
+
parentscore, parentkey = maxscore, maxkey[:]
|
| 15 |
+
|
| 16 |
+
i = 0
|
| 17 |
+
# Limit iterations to prevent infinite loops in a web server environment
|
| 18 |
+
while i < 1000:
|
| 19 |
+
i = i + 1
|
| 20 |
+
random.shuffle(parentkey)
|
| 21 |
+
deciphered = SimpleSub(parentkey).decipher(ctext)
|
| 22 |
+
parentscore = fitness.score(deciphered)
|
| 23 |
+
count = 0
|
| 24 |
+
while count < 1000:
|
| 25 |
+
a = random.randint(0, 25)
|
| 26 |
+
b = random.randint(0, 25)
|
| 27 |
+
child = parentkey[:]
|
| 28 |
+
child[a], child[b] = child[b], child[a]
|
| 29 |
+
deciphered = SimpleSub(child).decipher(ctext)
|
| 30 |
+
score = fitness.score(deciphered)
|
| 31 |
+
if score > parentscore:
|
| 32 |
+
parentscore = score
|
| 33 |
+
parentkey = child[:]
|
| 34 |
+
count = 0
|
| 35 |
+
count = count + 1
|
| 36 |
+
if parentscore > maxscore:
|
| 37 |
+
maxscore, maxkey = parentscore, parentkey[:]
|
| 38 |
+
ss = SimpleSub(maxkey)
|
| 39 |
+
plaintext = ss.decipher(ctext)
|
| 40 |
+
plaintext1 = add_punctuation_and_spaces(ciphertext, plaintext)
|
| 41 |
+
# In a web context, we return the first good result.
|
| 42 |
+
# The original loop was infinite, which is not suitable for a server.
|
| 43 |
+
return plaintext1
|
| 44 |
+
# Fallback if no good solution is found within the iteration limit
|
| 45 |
+
ss = SimpleSub(maxkey)
|
| 46 |
+
plaintext = ss.decipher(ctext)
|
| 47 |
+
return add_punctuation_and_spaces(ciphertext, plaintext)
|
| 48 |
+
|
| 49 |
+
|
| 50 |
+
def output(string1, dic, string2):
|
| 51 |
+
modified_string1 = list(string1)
|
| 52 |
+
modified_string2 = list(string2)
|
| 53 |
+
for i in range(len(string1)):
|
| 54 |
+
if modified_string1[i] in dic and modified_string2[i] != ' ':
|
| 55 |
+
modified_string2[i] = dic[modified_string1[i]]
|
| 56 |
+
modified_string2 = ''.join(modified_string2)
|
| 57 |
+
return modified_string2
|
| 58 |
+
|
| 59 |
+
def add_punctuation_and_spaces(ciphertext, plaintext):
|
| 60 |
+
result = ""
|
| 61 |
+
j = 0
|
| 62 |
+
for i in range(len(ciphertext)):
|
| 63 |
+
if not ciphertext[i].isalpha():
|
| 64 |
+
result += ciphertext[i]
|
| 65 |
+
else:
|
| 66 |
+
if ciphertext[i].islower():
|
| 67 |
+
result += plaintext[j].lower()
|
| 68 |
+
else:
|
| 69 |
+
# The original code had a bug here, always making it lowercase.
|
| 70 |
+
# This is a guess at the intended behavior.
|
| 71 |
+
result += plaintext[j]
|
| 72 |
+
j += 1
|
| 73 |
+
return result
|
| 74 |
+
|
| 75 |
+
def decrypt_interface(ciphertext, key):
|
| 76 |
+
"""
|
| 77 |
+
This is the main function that will be exposed through the Gradio interface.
|
| 78 |
+
"""
|
| 79 |
+
if not ciphertext:
|
| 80 |
+
return "Please enter some ciphertext."
|
| 81 |
+
|
| 82 |
+
plaintext = decrypt_text_internal(ciphertext)
|
| 83 |
+
|
| 84 |
+
if key:
|
| 85 |
+
try:
|
| 86 |
+
key_dic = proability.read_key(key)
|
| 87 |
+
plaintext = output(ciphertext, key_dic, plaintext)
|
| 88 |
+
except Exception as e:
|
| 89 |
+
return f"Error processing key: {e}. Please check the key format (e.g., a=B c=D)."
|
| 90 |
+
|
| 91 |
+
return plaintext
|
| 92 |
+
|
| 93 |
+
# Create the Gradio interface
|
| 94 |
+
iface = gr.Interface(
|
| 95 |
+
fn=decrypt_interface,
|
| 96 |
+
inputs=[
|
| 97 |
+
gr.Textbox(lines=10, label="Ciphertext", placeholder="Enter the text to decrypt..."),
|
| 98 |
+
gr.Textbox(lines=2, label="Known Key Mappings (Optional)", placeholder="e.g., a=B c=D")
|
| 99 |
+
],
|
| 100 |
+
outputs=gr.Textbox(lines=10, label="Plaintext"),
|
| 101 |
+
title="Simple Substitution Cipher Decryptor",
|
| 102 |
+
description="An automatic decryption tool for simple substitution ciphers. You can optionally provide known letter mappings to improve accuracy."
|
| 103 |
+
)
|
| 104 |
+
|
| 105 |
+
# Launch the app
|
| 106 |
+
if __name__ == "__main__":
|
| 107 |
+
iface.launch(server_name="0.0.0.0", server_port=8080)
|
ngram_score.py
ADDED
|
@@ -0,0 +1,29 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
from math import log10
|
| 2 |
+
|
| 3 |
+
|
| 4 |
+
class ngram_score(object):
|
| 5 |
+
def __init__(self, ngramfile, sep=' '):
|
| 6 |
+
'''加载包含ngrams和计数的文件,计算对数概率'''
|
| 7 |
+
self.ngrams = {} # 存储ngrams及其计数的字典
|
| 8 |
+
with open(ngramfile, 'r') as file:
|
| 9 |
+
for line in file:
|
| 10 |
+
key, count = line.split(sep) # 将行按分隔符分割为ngram和计数
|
| 11 |
+
self.ngrams[key] = int(count) # 将ngram及其计数存储到字典中
|
| 12 |
+
self.L = len(key) # ngram的长度
|
| 13 |
+
self.N = sum(self.ngrams.values()) # 所有ngrams的总计数
|
| 14 |
+
# 计算对数概率
|
| 15 |
+
for key in self.ngrams.keys():
|
| 16 |
+
self.ngrams[key] = log10(
|
| 17 |
+
float(self.ngrams[key]) / self.N) # 计算ngram的对数概率
|
| 18 |
+
self.floor = log10(0.01 / self.N) # 用于处理未知的ngrams的默认对数概率阈值
|
| 19 |
+
|
| 20 |
+
def score(self, text):
|
| 21 |
+
'''计算文本的分数'''
|
| 22 |
+
score = 0
|
| 23 |
+
ngrams = self.ngrams.__getitem__ # 获取ngrams的对数概率函数
|
| 24 |
+
for i in range(len(text) - self.L + 1):
|
| 25 |
+
if text[i:i+self.L] in self.ngrams:
|
| 26 |
+
score += ngrams(text[i:i+self.L]) # 若ngrams在文本中存在,则加上对数概率
|
| 27 |
+
else:
|
| 28 |
+
score += self.floor # 若ngrams在文本中不存在,则加上默认对数概率阈值
|
| 29 |
+
return score
|
proability.py
ADDED
|
@@ -0,0 +1,125 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
def notuse (notusing_character):#用于屏蔽在密文破解时用于分割句子的符号,例如句号,分号这类符号
|
| 2 |
+
blacklist={}
|
| 3 |
+
#允许用户重复输入一个黑名单字符,采用字典类型存储黑名单。
|
| 4 |
+
for char in notusing_character:
|
| 5 |
+
if (char in blacklist):
|
| 6 |
+
blacklist[char]+=1
|
| 7 |
+
else:
|
| 8 |
+
blacklist[char]=1
|
| 9 |
+
return blacklist
|
| 10 |
+
|
| 11 |
+
def read_key(string):#读入密钥,并生成映射字典
|
| 12 |
+
pairs = string.split() # 分割字符串为键值对列表
|
| 13 |
+
dict={}
|
| 14 |
+
for pair in pairs:
|
| 15 |
+
key, value = pair.split('=') # 分割键值对为键和键值
|
| 16 |
+
dict[key] = value
|
| 17 |
+
return dict
|
| 18 |
+
|
| 19 |
+
# # def count_character(string, blacklist):
|
| 20 |
+
# # "记录不同字符的概率"
|
| 21 |
+
# # character_count = {}#存放字符频率的字典
|
| 22 |
+
# # number1 = 0
|
| 23 |
+
# # for char in string:
|
| 24 |
+
# # if (char not in blacklist):#跳过黑名单字符
|
| 25 |
+
# # if (char in character_count):
|
| 26 |
+
# # character_count[char] += 1
|
| 27 |
+
# # else:
|
| 28 |
+
# # character_count[char] = 1
|
| 29 |
+
# # number1 += 1#计算有效字符的总个数
|
| 30 |
+
|
| 31 |
+
# # character_proability = {}#存放字符概率的字典
|
| 32 |
+
# # for char, count in character_count.items():
|
| 33 |
+
# # proability = count / number1
|
| 34 |
+
# # formatted_proability = "{:.2%}".format(proability)
|
| 35 |
+
# # character_proability[char] = formatted_proability#计算字符的概率
|
| 36 |
+
|
| 37 |
+
# # return sort_dict(character_proability)
|
| 38 |
+
def count_bingary_character(string,blacklist):
|
| 39 |
+
"记录二元字符的概率"
|
| 40 |
+
bingary_character_count = {}#存放二元字符频率的字典
|
| 41 |
+
number2=0
|
| 42 |
+
# 遍历字符串,提取二元字符并统计频率
|
| 43 |
+
for i in range(len(string) - 1):
|
| 44 |
+
char1 = string[i]
|
| 45 |
+
char2 = string[i + 1]
|
| 46 |
+
|
| 47 |
+
if (char1 not in blacklist) and (char2 not in blacklist): # 跳过黑名单字符,若两个字符都不在黑名单中则合成一个二元字符
|
| 48 |
+
bingary_character = char1 + char2
|
| 49 |
+
|
| 50 |
+
if bingary_character in bingary_character_count:
|
| 51 |
+
bingary_character_count[bingary_character] += 1
|
| 52 |
+
else:
|
| 53 |
+
bingary_character_count[bingary_character] = 1
|
| 54 |
+
number2+=1#计算有效二元字符的数量
|
| 55 |
+
|
| 56 |
+
bingary_character_proability = {}#存放二元字符概率的字典
|
| 57 |
+
for bingary_character, count in bingary_character_count.items():
|
| 58 |
+
probability = count / number2
|
| 59 |
+
formatted_proability = "{:.2%}".format(probability)
|
| 60 |
+
bingary_character_proability[bingary_character] = formatted_proability#计算二元字符的概率
|
| 61 |
+
|
| 62 |
+
return sort_dict(bingary_character_proability)
|
| 63 |
+
|
| 64 |
+
# def count_ternary_character(string,blacklist):
|
| 65 |
+
# "记录三元字符的概率"
|
| 66 |
+
# ternary_character_count = {}#存放三元字符频率的字典
|
| 67 |
+
# number3=0
|
| 68 |
+
# # 遍历字符串,提取三元字符并统计频率
|
| 69 |
+
# for i in range(len(string) - 2):
|
| 70 |
+
# char1 = string[i]
|
| 71 |
+
# char2 = string[i + 1]
|
| 72 |
+
# char3 = string[i + 2]
|
| 73 |
+
|
| 74 |
+
# if (char1 not in blacklist) and (char2 not in blacklist) and (char3 not in blacklist): # 跳过黑名单字符,若三个字符都不属于黑名单则合成一个三元字符
|
| 75 |
+
# ternary_character = char1 + char2 +char3
|
| 76 |
+
# if ternary_character in ternary_character_count:
|
| 77 |
+
# ternary_character_count[ternary_character] += 1
|
| 78 |
+
# else:
|
| 79 |
+
# ternary_character_count[ternary_character] = 1
|
| 80 |
+
# number3+=1#计算有效三元字符的数量
|
| 81 |
+
|
| 82 |
+
# ternary_character_proability = {}#存放三元字符概率的字典
|
| 83 |
+
# for ternary_character, count in ternary_character_count.items():
|
| 84 |
+
# proability = count / number3
|
| 85 |
+
# formatted_proability = "{:.2%}".format(proability)#直接把小数显示为百分数
|
| 86 |
+
# ternary_character_proability[ternary_character] =formatted_proability#计算三元字符的概率
|
| 87 |
+
|
| 88 |
+
# return sort_dict(ternary_character_proability)
|
| 89 |
+
|
| 90 |
+
|
| 91 |
+
def print_dict(dictionary): # 打印字典
|
| 92 |
+
dictionary = sort_dict(dictionary)
|
| 93 |
+
for key, value in dictionary.items():
|
| 94 |
+
print(key, ":", value)
|
| 95 |
+
|
| 96 |
+
|
| 97 |
+
def sort_dict(percentage_dict): # 字典排序
|
| 98 |
+
sorted_dict = dict(sorted(percentage_dict.items(
|
| 99 |
+
), key=lambda item: float(item[1].rstrip("%")), reverse=True))
|
| 100 |
+
return sorted_dict
|
| 101 |
+
|
| 102 |
+
# def read_frequency_file(file_path,frequency_dict):#读入英文字频分布
|
| 103 |
+
# with open(file_path, 'r') as file:
|
| 104 |
+
# for line in file:
|
| 105 |
+
# line = line.strip()
|
| 106 |
+
# if line: # 确保不是空行
|
| 107 |
+
# letter, frequency = line.split()
|
| 108 |
+
# letter = letter.strip()
|
| 109 |
+
# frequency = frequency.strip().rstrip('%')
|
| 110 |
+
# frequency = float(frequency) / 100.0 # 将百分数转换为浮点数
|
| 111 |
+
# formatted_number = "{:.2%}".format(frequency)#直接把小数显示为百分数
|
| 112 |
+
# frequency_dict[letter] =formatted_number
|
| 113 |
+
# return frequency_dict
|
| 114 |
+
|
| 115 |
+
# def read_frequency_file1(file_path,frequency_list):#三元组的具体频率没找到只找到概率排序
|
| 116 |
+
# with open(file_path, 'r') as file:
|
| 117 |
+
# for line in file:
|
| 118 |
+
# line = line.strip()
|
| 119 |
+
# if line:
|
| 120 |
+
# frequency_list.append(line)
|
| 121 |
+
# return frequency_list
|
| 122 |
+
|
| 123 |
+
# def print_list(lst):
|
| 124 |
+
# for index, value in enumerate(lst):
|
| 125 |
+
# print(f"Index {index}: {value}")
|
quadgrams.txt
ADDED
|
The diff for this file is too large to render.
See raw diff
|
|
|
requirements.txt
ADDED
|
@@ -0,0 +1,2 @@
|
|
|
|
|
|
|
|
|
|
| 1 |
+
pycipher
|
| 2 |
+
gradio
|