Jack698 commited on
Commit
b3aa249
·
verified ·
1 Parent(s): 5d057f3

Upload folder using huggingface_hub

Browse files
Files changed (3) hide show
  1. main.py +73 -106
  2. ngram_score.py +16 -17
  3. requirements.txt +1 -1
main.py CHANGED
@@ -1,118 +1,85 @@
1
- # 导入所需的库
2
- from pycipher import SimpleSubstitution as SimpleSub
3
  import random
4
- import re
5
- from ngram_score import ngram_score
6
- import proability
7
  import gradio as gr
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
8
 
9
- # 全局变量,避免重复加载大文件
10
- _fitness = None
11
-
12
- def get_fitness():
13
- global _fitness
14
- if _fitness is None:
15
- print("Loading quadgrams data...")
16
- _fitness = ngram_score('quadgrams.txt')
17
- print("Quadgrams data loaded successfully.")
18
- return _fitness
19
-
20
- def decrypt_text_internal(ciphertext):
21
- fitness = get_fitness()
22
- ctext = re.sub('[^A-Z]', '', ciphertext.upper())
23
- maxkey = list('ABCDEFGHIJKLMNOPQRSTUVWXYZ')
24
- maxscore = -99e9
25
- parentscore, parentkey = maxscore, maxkey[:]
26
-
27
- i = 0
28
- # 进一步减少迭代次数以加快响应速度
29
- while i < 500: # 减少迭代次数
30
- i = i + 1
31
- random.shuffle(parentkey)
32
- deciphered = SimpleSub(parentkey).decipher(ctext)
33
- parentscore = fitness.score(deciphered)
34
  count = 0
35
- while count < 500: # 减少内部迭代次数
36
  a = random.randint(0, 25)
37
  b = random.randint(0, 25)
38
- child = parentkey[:]
39
- child[a], child[b] = child[b], child[a]
40
- deciphered = SimpleSub(child).decipher(ctext)
41
- score = fitness.score(deciphered)
42
- if score > parentscore:
43
- parentscore = score
44
- parentkey = child[:]
 
 
 
 
45
  count = 0
46
  count = count + 1
47
- if parentscore > maxscore:
48
- maxscore, maxkey = parentscore, parentkey[:]
49
- ss = SimpleSub(maxkey)
50
- plaintext = ss.decipher(ctext)
51
- plaintext1 = add_punctuation_and_spaces(ciphertext, plaintext)
52
- # In a web context, we return the first good result.
53
- # The original loop was infinite, which is not suitable for a server.
54
- return plaintext1
55
- # Fallback if no good solution is found within the iteration limit
56
- ss = SimpleSub(maxkey)
57
- plaintext = ss.decipher(ctext)
58
- return add_punctuation_and_spaces(ciphertext, plaintext)
59
-
60
-
61
- def output(string1, dic, string2):
62
- modified_string1 = list(string1)
63
- modified_string2 = list(string2)
64
- for i in range(len(string1)):
65
- if modified_string1[i] in dic and modified_string2[i] != ' ':
66
- modified_string2[i] = dic[modified_string1[i]]
67
- modified_string2 = ''.join(modified_string2)
68
- return modified_string2
69
-
70
- def add_punctuation_and_spaces(ciphertext, plaintext):
71
- result = ""
72
- j = 0
73
- for i in range(len(ciphertext)):
74
- if not ciphertext[i].isalpha():
75
- result += ciphertext[i]
76
- else:
77
- if ciphertext[i].islower():
78
- result += plaintext[j].lower()
79
- else:
80
- # The original code had a bug here, always making it lowercase.
81
- # This is a guess at the intended behavior.
82
- result += plaintext[j]
83
- j += 1
84
- return result
85
-
86
- def decrypt_interface(ciphertext, key):
87
- """
88
- This is the main function that will be exposed through the Gradio interface.
89
- """
90
- if not ciphertext:
91
- return "Please enter some ciphertext."
92
-
93
- plaintext = decrypt_text_internal(ciphertext)
94
-
95
- if key:
96
- try:
97
- key_dic = proability.read_key(key)
98
- plaintext = output(ciphertext, key_dic, plaintext)
99
- except Exception as e:
100
- return f"Error processing key: {e}. Please check the key format (e.g., a=B c=D)."
101
-
102
- return plaintext
103
-
104
- # Create the Gradio interface
105
  iface = gr.Interface(
106
- fn=decrypt_interface,
107
- inputs=[
108
- gr.Textbox(lines=10, label="Ciphertext", placeholder="Enter the text to decrypt..."),
109
- gr.Textbox(lines=2, label="Known Key Mappings (Optional)", placeholder="e.g., a=B c=D")
110
- ],
111
- outputs=gr.Textbox(lines=10, label="Plaintext"),
112
- title="Simple Substitution Cipher Decryptor",
113
- description="An automatic decryption tool for simple substitution ciphers. You can optionally provide known letter mappings to improve accuracy."
114
  )
115
 
116
- # Launch the app
117
  if __name__ == "__main__":
118
- iface.launch(server_name="0.0.0.0", server_port=7860)
 
 
 
1
  import random
 
 
 
2
  import gradio as gr
3
+ from ngram_score import ngram_score
4
+
5
+
6
+ def get_keydict(key_dict, current_key, original_alphabet):
7
+ for i in range(len(current_key)):
8
+ key_dict[current_key[i]] = original_alphabet[i] # 这是将当前的密文进行映射
9
+ return key_dict
10
+
11
+
12
+ def exchange(mydict, message):
13
+ message = list(message)
14
+ cnt = 0
15
+ for i in message:
16
+ if i in mydict:
17
+ message[cnt] = mydict[i]
18
+ cnt = cnt + 1
19
+ return "".join(message)
20
+
21
+
22
+ def solve_cipher(ciphertext):
23
+ S_new = ciphertext.replace(" ", "")
24
+ S_new = S_new.replace(",", "")
25
+ S_new = S_new.replace("-", "")
26
+ S_new = S_new.replace(".", "")
27
+ # 参数初始化
28
+ m_message = S_new.upper() # 这是全部改变为大写的密文
29
+ current_key = list('ABCDEFGHIJKLMNOPQRSTUVWXYZ') # 这是当前的密文
30
+ original_alphabet = list('ABCDEFGHIJKLMNOPQRSTUVWXYZ')
31
+ key_dict = dict() # 这是一个字典,用来将字母映射到上面字母表上去
32
+ fitness = ngram_score('english_quadgrams.txt')
33
+
34
+ last_score = -2 ** 31
35
+ current_max_score = -2 ** 31
36
+ generation = 0 # generation就是迭代的数量
37
+
38
+ best_plaintext = ""
39
+
40
+ while generation < 10:
41
+ # 上面是迭代最高次数,一般10以内就能出结果
42
+ generation = generation + 1
43
+
44
+ # 随机改变顺序
45
+ random.shuffle(current_key)
46
+ key_dict = get_keydict(key_dict, current_key, original_alphabet) # 获得明密文映射
47
+ last_score = fitness.score(exchange(key_dict, m_message)) # 计算适应度
48
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
49
  count = 0
50
+ while count < 1000:
51
  a = random.randint(0, 25)
52
  b = random.randint(0, 25)
53
+ # 随机交换并进行比较
54
+ child_current_key = current_key[:]
55
+ child_current_key[a], child_current_key[b] = child_current_key[b], child_current_key[a]
56
+
57
+ child_key_dict = dict()
58
+ child_key_dict = get_keydict(child_key_dict, child_current_key, original_alphabet)
59
+ score = fitness.score(exchange(child_key_dict, m_message))
60
+ # 说明新的key_dict更高效
61
+ if score > last_score:
62
+ last_score = score
63
+ current_key = child_current_key
64
  count = 0
65
  count = count + 1
66
+
67
+ # 输出结果
68
+ if last_score > current_max_score:
69
+ current_max_score = last_score
70
+ maxkey = current_key
71
+ key_dict = get_keydict(key_dict, current_key, original_alphabet)
72
+ best_plaintext = exchange(key_dict, ciphertext.upper()).lower()
73
+
74
+ return best_plaintext
75
+
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
76
  iface = gr.Interface(
77
+ fn=solve_cipher,
78
+ inputs=gr.Textbox(lines=10, placeholder="Enter ciphertext here..."),
79
+ outputs="text",
80
+ title="Substitution Cipher Solver",
81
+ description="Enter the ciphertext and see the decrypted plaintext."
 
 
 
82
  )
83
 
 
84
  if __name__ == "__main__":
85
+ iface.launch()
ngram_score.py CHANGED
@@ -3,27 +3,26 @@ from math import log10
3
 
4
  class ngram_score(object):
5
  def __init__(self, ngramfile, sep=' '):
6
- '''加载包含ngrams和计数的文件,计算对数概率'''
7
- self.ngrams = {} # 存储ngrams及其计数的字典
8
- with open(ngramfile, 'r') as file:
9
- for line in file:
10
- key, count = line.split(sep) # 将行按分隔符分割为ngram和计数
11
- self.ngrams[key] = int(count) # 将ngram及其计数存储到字典中
12
- self.L = len(key) # ngram的长度
13
- self.N = sum(self.ngrams.values()) # 所有ngrams的总计数
14
- # 计算对数概率
15
  for key in self.ngrams.keys():
16
- self.ngrams[key] = log10(
17
- float(self.ngrams[key]) / self.N) # 计算ngram的对数概率
18
- self.floor = log10(0.01 / self.N) # 用于处理未知的ngrams的默认对数概率阈值
19
 
20
  def score(self, text):
21
- '''计算文本的分数'''
22
  score = 0
23
- ngrams = self.ngrams.__getitem__ # 获取ngrams的对数概率函数
24
  for i in range(len(text) - self.L + 1):
25
- if text[i:i+self.L] in self.ngrams:
26
- score += ngrams(text[i:i+self.L]) # 若ngrams在文本中存在,则加上对数概率
27
  else:
28
- score += self.floor # 若ngrams在文本中不存在,则加上默认对数概率阈值
29
  return score
 
 
3
 
4
  class ngram_score(object):
5
  def __init__(self, ngramfile, sep=' '):
6
+ ''' load a file containing ngrams and counts, calculate log probabilities '''
7
+ self.ngrams = {}
8
+ for line in open(ngramfile):
9
+ key, count = line.split(sep)
10
+ self.ngrams[key] = int(count)
11
+ self.L = len(key)
12
+ self.N = sum(self.ngrams.values())
13
+ # calculate log probabilities
 
14
  for key in self.ngrams.keys():
15
+ self.ngrams[key] = log10(float(self.ngrams[key]) / self.N)
16
+ self.floor = log10(0.01 / self.N)
 
17
 
18
  def score(self, text):
19
+ ''' compute the score of text '''
20
  score = 0
21
+ ngrams = self.ngrams.__getitem__
22
  for i in range(len(text) - self.L + 1):
23
+ if text[i:i + self.L] in self.ngrams:
24
+ score += ngrams(text[i:i + self.L])
25
  else:
26
+ score += self.floor
27
  return score
28
+
requirements.txt CHANGED
@@ -1,2 +1,2 @@
1
  pycipher
2
- gradio>=4.0.0
 
1
  pycipher
2
+ gradio