Jack698 commited on
Commit
e9046e8
·
verified ·
1 Parent(s): 2268cd4

Upload folder using huggingface_hub

Browse files
Files changed (6) hide show
  1. README.md +24 -12
  2. main.py +107 -0
  3. ngram_score.py +29 -0
  4. proability.py +125 -0
  5. quadgrams.txt +0 -0
  6. requirements.txt +2 -0
README.md CHANGED
@@ -1,12 +1,24 @@
1
- ---
2
- title: Simplecrack
3
- emoji: 🔥
4
- colorFrom: blue
5
- colorTo: gray
6
- sdk: gradio
7
- sdk_version: 5.45.0
8
- app_file: app.py
9
- pinned: false
10
- ---
11
-
12
- Check out the configuration reference at https://huggingface.co/docs/hub/spaces-config-reference
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ ---
2
+ title: Simple Substitution Cipher Decryptor
3
+ emoji: 🕵️
4
+ colorFrom: blue
5
+ colorTo: green
6
+ sdk: gradio
7
+ sdk_version: 3.35.2
8
+ app_file: main.py
9
+ pinned: false
10
+ ---
11
+
12
+ # Simple Substitution Cipher Decryptor
13
+
14
+ This is a simple tool to automatically decrypt text that has been encrypted with a simple substitution cipher.
15
+
16
+ **How it works:**
17
+ The backend uses n-gram frequency analysis (specifically quadgrams) to score possible decryptions and find the most likely plaintext.
18
+
19
+ **How to use:**
20
+ 1. Paste your ciphertext into the "Ciphertext" box.
21
+ 2. (Optional) If you know any letter mappings (e.g., you know 'a' in the ciphertext is 'T' in the plaintext), you can provide them in the "Known Key Mappings" box. The format is `a=T b=E`.
22
+ 3. The decrypted plaintext will appear in the "Plaintext" box.
23
+
24
+ *Note: The decryption process is heuristic and may not always produce a perfect result, especially for short ciphertexts.*
main.py ADDED
@@ -0,0 +1,107 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # 导入所需的库
2
+ from pycipher import SimpleSubstitution as SimpleSub
3
+ import random
4
+ import re
5
+ from ngram_score import ngram_score
6
+ import proability
7
+ import gradio as gr
8
+
9
+ def decrypt_text_internal(ciphertext):
10
+ fitness = ngram_score('quadgrams.txt')
11
+ ctext = re.sub('[^A-Z]', '', ciphertext.upper())
12
+ maxkey = list('ABCDEFGHIJKLMNOPQRSTUVWXYZ')
13
+ maxscore = -99e9
14
+ parentscore, parentkey = maxscore, maxkey[:]
15
+
16
+ i = 0
17
+ # Limit iterations to prevent infinite loops in a web server environment
18
+ while i < 1000:
19
+ i = i + 1
20
+ random.shuffle(parentkey)
21
+ deciphered = SimpleSub(parentkey).decipher(ctext)
22
+ parentscore = fitness.score(deciphered)
23
+ count = 0
24
+ while count < 1000:
25
+ a = random.randint(0, 25)
26
+ b = random.randint(0, 25)
27
+ child = parentkey[:]
28
+ child[a], child[b] = child[b], child[a]
29
+ deciphered = SimpleSub(child).decipher(ctext)
30
+ score = fitness.score(deciphered)
31
+ if score > parentscore:
32
+ parentscore = score
33
+ parentkey = child[:]
34
+ count = 0
35
+ count = count + 1
36
+ if parentscore > maxscore:
37
+ maxscore, maxkey = parentscore, parentkey[:]
38
+ ss = SimpleSub(maxkey)
39
+ plaintext = ss.decipher(ctext)
40
+ plaintext1 = add_punctuation_and_spaces(ciphertext, plaintext)
41
+ # In a web context, we return the first good result.
42
+ # The original loop was infinite, which is not suitable for a server.
43
+ return plaintext1
44
+ # Fallback if no good solution is found within the iteration limit
45
+ ss = SimpleSub(maxkey)
46
+ plaintext = ss.decipher(ctext)
47
+ return add_punctuation_and_spaces(ciphertext, plaintext)
48
+
49
+
50
+ def output(string1, dic, string2):
51
+ modified_string1 = list(string1)
52
+ modified_string2 = list(string2)
53
+ for i in range(len(string1)):
54
+ if modified_string1[i] in dic and modified_string2[i] != ' ':
55
+ modified_string2[i] = dic[modified_string1[i]]
56
+ modified_string2 = ''.join(modified_string2)
57
+ return modified_string2
58
+
59
+ def add_punctuation_and_spaces(ciphertext, plaintext):
60
+ result = ""
61
+ j = 0
62
+ for i in range(len(ciphertext)):
63
+ if not ciphertext[i].isalpha():
64
+ result += ciphertext[i]
65
+ else:
66
+ if ciphertext[i].islower():
67
+ result += plaintext[j].lower()
68
+ else:
69
+ # The original code had a bug here, always making it lowercase.
70
+ # This is a guess at the intended behavior.
71
+ result += plaintext[j]
72
+ j += 1
73
+ return result
74
+
75
+ def decrypt_interface(ciphertext, key):
76
+ """
77
+ This is the main function that will be exposed through the Gradio interface.
78
+ """
79
+ if not ciphertext:
80
+ return "Please enter some ciphertext."
81
+
82
+ plaintext = decrypt_text_internal(ciphertext)
83
+
84
+ if key:
85
+ try:
86
+ key_dic = proability.read_key(key)
87
+ plaintext = output(ciphertext, key_dic, plaintext)
88
+ except Exception as e:
89
+ return f"Error processing key: {e}. Please check the key format (e.g., a=B c=D)."
90
+
91
+ return plaintext
92
+
93
+ # Create the Gradio interface
94
+ iface = gr.Interface(
95
+ fn=decrypt_interface,
96
+ inputs=[
97
+ gr.Textbox(lines=10, label="Ciphertext", placeholder="Enter the text to decrypt..."),
98
+ gr.Textbox(lines=2, label="Known Key Mappings (Optional)", placeholder="e.g., a=B c=D")
99
+ ],
100
+ outputs=gr.Textbox(lines=10, label="Plaintext"),
101
+ title="Simple Substitution Cipher Decryptor",
102
+ description="An automatic decryption tool for simple substitution ciphers. You can optionally provide known letter mappings to improve accuracy."
103
+ )
104
+
105
+ # Launch the app
106
+ if __name__ == "__main__":
107
+ iface.launch(server_name="0.0.0.0", server_port=8080)
ngram_score.py ADDED
@@ -0,0 +1,29 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from math import log10
2
+
3
+
4
+ class ngram_score(object):
5
+ def __init__(self, ngramfile, sep=' '):
6
+ '''加载包含ngrams和计数的文件,计算对数概率'''
7
+ self.ngrams = {} # 存储ngrams及其计数的字典
8
+ with open(ngramfile, 'r') as file:
9
+ for line in file:
10
+ key, count = line.split(sep) # 将行按分隔符分割为ngram和计数
11
+ self.ngrams[key] = int(count) # 将ngram及其计数存储到字典中
12
+ self.L = len(key) # ngram的长度
13
+ self.N = sum(self.ngrams.values()) # 所有ngrams的总计数
14
+ # 计算对数概率
15
+ for key in self.ngrams.keys():
16
+ self.ngrams[key] = log10(
17
+ float(self.ngrams[key]) / self.N) # 计算ngram的对数概率
18
+ self.floor = log10(0.01 / self.N) # 用于处理未知的ngrams的默认对数概率阈值
19
+
20
+ def score(self, text):
21
+ '''计算文本的分数'''
22
+ score = 0
23
+ ngrams = self.ngrams.__getitem__ # 获取ngrams的对数概率函数
24
+ for i in range(len(text) - self.L + 1):
25
+ if text[i:i+self.L] in self.ngrams:
26
+ score += ngrams(text[i:i+self.L]) # 若ngrams在文本中存在,则加上对数概率
27
+ else:
28
+ score += self.floor # 若ngrams在文本中不存在,则加上默认对数概率阈值
29
+ return score
proability.py ADDED
@@ -0,0 +1,125 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ def notuse (notusing_character):#用于屏蔽在密文破解时用于分割句子的符号,例如句号,分号这类符号
2
+ blacklist={}
3
+ #允许用户重复输入一个黑名单字符,采用字典类型存储黑名单。
4
+ for char in notusing_character:
5
+ if (char in blacklist):
6
+ blacklist[char]+=1
7
+ else:
8
+ blacklist[char]=1
9
+ return blacklist
10
+
11
+ def read_key(string):#读入密钥,并生成映射字典
12
+ pairs = string.split() # 分割字符串为键值对列表
13
+ dict={}
14
+ for pair in pairs:
15
+ key, value = pair.split('=') # 分割键值对为键和键值
16
+ dict[key] = value
17
+ return dict
18
+
19
+ # # def count_character(string, blacklist):
20
+ # # "记录不同字符的概率"
21
+ # # character_count = {}#存放字符频率的字典
22
+ # # number1 = 0
23
+ # # for char in string:
24
+ # # if (char not in blacklist):#跳过黑名单字符
25
+ # # if (char in character_count):
26
+ # # character_count[char] += 1
27
+ # # else:
28
+ # # character_count[char] = 1
29
+ # # number1 += 1#计算有效字符的总个数
30
+
31
+ # # character_proability = {}#存放字符概率的字典
32
+ # # for char, count in character_count.items():
33
+ # # proability = count / number1
34
+ # # formatted_proability = "{:.2%}".format(proability)
35
+ # # character_proability[char] = formatted_proability#计算字符的概率
36
+
37
+ # # return sort_dict(character_proability)
38
+ def count_bingary_character(string,blacklist):
39
+ "记录二元字符的概率"
40
+ bingary_character_count = {}#存放二元字符频率的字典
41
+ number2=0
42
+ # 遍历字符串,提取二元字符并统计频率
43
+ for i in range(len(string) - 1):
44
+ char1 = string[i]
45
+ char2 = string[i + 1]
46
+
47
+ if (char1 not in blacklist) and (char2 not in blacklist): # 跳过黑名单字符,若两个字符都不在黑名单中则合成一个二元字符
48
+ bingary_character = char1 + char2
49
+
50
+ if bingary_character in bingary_character_count:
51
+ bingary_character_count[bingary_character] += 1
52
+ else:
53
+ bingary_character_count[bingary_character] = 1
54
+ number2+=1#计算有效二元字符的数量
55
+
56
+ bingary_character_proability = {}#存放二元字符概率的字典
57
+ for bingary_character, count in bingary_character_count.items():
58
+ probability = count / number2
59
+ formatted_proability = "{:.2%}".format(probability)
60
+ bingary_character_proability[bingary_character] = formatted_proability#计算二元字符的概率
61
+
62
+ return sort_dict(bingary_character_proability)
63
+
64
+ # def count_ternary_character(string,blacklist):
65
+ # "记录三元字符的概率"
66
+ # ternary_character_count = {}#存放三元字符频率的字典
67
+ # number3=0
68
+ # # 遍历字符串,提取三元字符并统计频率
69
+ # for i in range(len(string) - 2):
70
+ # char1 = string[i]
71
+ # char2 = string[i + 1]
72
+ # char3 = string[i + 2]
73
+
74
+ # if (char1 not in blacklist) and (char2 not in blacklist) and (char3 not in blacklist): # 跳过黑名单字符,若三个字符都不属于黑名单则合成一个三元字符
75
+ # ternary_character = char1 + char2 +char3
76
+ # if ternary_character in ternary_character_count:
77
+ # ternary_character_count[ternary_character] += 1
78
+ # else:
79
+ # ternary_character_count[ternary_character] = 1
80
+ # number3+=1#计算有效三元字符的数量
81
+
82
+ # ternary_character_proability = {}#存放三元字符概率的字典
83
+ # for ternary_character, count in ternary_character_count.items():
84
+ # proability = count / number3
85
+ # formatted_proability = "{:.2%}".format(proability)#直接把小数显示为百分数
86
+ # ternary_character_proability[ternary_character] =formatted_proability#计算三元字符的概率
87
+
88
+ # return sort_dict(ternary_character_proability)
89
+
90
+
91
+ def print_dict(dictionary): # 打印字典
92
+ dictionary = sort_dict(dictionary)
93
+ for key, value in dictionary.items():
94
+ print(key, ":", value)
95
+
96
+
97
+ def sort_dict(percentage_dict): # 字典排序
98
+ sorted_dict = dict(sorted(percentage_dict.items(
99
+ ), key=lambda item: float(item[1].rstrip("%")), reverse=True))
100
+ return sorted_dict
101
+
102
+ # def read_frequency_file(file_path,frequency_dict):#读入英文字频分布
103
+ # with open(file_path, 'r') as file:
104
+ # for line in file:
105
+ # line = line.strip()
106
+ # if line: # 确保不是空行
107
+ # letter, frequency = line.split()
108
+ # letter = letter.strip()
109
+ # frequency = frequency.strip().rstrip('%')
110
+ # frequency = float(frequency) / 100.0 # 将百分数转换为浮点数
111
+ # formatted_number = "{:.2%}".format(frequency)#直接把小数显示为百分数
112
+ # frequency_dict[letter] =formatted_number
113
+ # return frequency_dict
114
+
115
+ # def read_frequency_file1(file_path,frequency_list):#三元组的具体频率没找到只找到概率排序
116
+ # with open(file_path, 'r') as file:
117
+ # for line in file:
118
+ # line = line.strip()
119
+ # if line:
120
+ # frequency_list.append(line)
121
+ # return frequency_list
122
+
123
+ # def print_list(lst):
124
+ # for index, value in enumerate(lst):
125
+ # print(f"Index {index}: {value}")
quadgrams.txt ADDED
The diff for this file is too large to render. See raw diff
 
requirements.txt ADDED
@@ -0,0 +1,2 @@
 
 
 
1
+ pycipher
2
+ gradio