Spaces:

Jack698
/

simplecrack

Running

App Files Files Community

Jack698 commited on Sep 12, 2025

Commit

e9046e8

verified ·

1 Parent(s): 2268cd4

Upload folder using huggingface_hub

Browse files

Files changed (6) hide show

README.md +24 -12
main.py +107 -0
ngram_score.py +29 -0
proability.py +125 -0
quadgrams.txt +0 -0
requirements.txt +2 -0

README.md CHANGED Viewed

@@ -1,12 +1,24 @@
----
-title: Simplecrack
-emoji: 🔥
-colorFrom: blue
-colorTo: gray
-sdk: gradio
-sdk_version: 5.45.0
-app_file: app.py
-pinned: false
----
-Check out the configuration reference at https://huggingface.co/docs/hub/spaces-config-reference

+---
+title: Simple Substitution Cipher Decryptor
+emoji: 🕵️
+colorFrom: blue
+colorTo: green
+sdk: gradio
+sdk_version: 3.35.2
+app_file: main.py
+pinned: false
+---
+# Simple Substitution Cipher Decryptor
+This is a simple tool to automatically decrypt text that has been encrypted with a simple substitution cipher.
+**How it works:**
+The backend uses n-gram frequency analysis (specifically quadgrams) to score possible decryptions and find the most likely plaintext.
+**How to use:**
+1.  Paste your ciphertext into the "Ciphertext" box.
+2.  (Optional) If you know any letter mappings (e.g., you know 'a' in the ciphertext is 'T' in the plaintext), you can provide them in the "Known Key Mappings" box. The format is `a=T b=E`.
+3.  The decrypted plaintext will appear in the "Plaintext" box.
+*Note: The decryption process is heuristic and may not always produce a perfect result, especially for short ciphertexts.*

main.py ADDED Viewed

	@@ -0,0 +1,107 @@

+# 导入所需的库
+from pycipher import SimpleSubstitution as SimpleSub
+import random
+import re
+from ngram_score import ngram_score
+import proability
+import gradio as gr
+def decrypt_text_internal(ciphertext):
+    fitness = ngram_score('quadgrams.txt')
+    ctext = re.sub('[^A-Z]', '', ciphertext.upper())
+    maxkey = list('ABCDEFGHIJKLMNOPQRSTUVWXYZ')
+    maxscore = -99e9
+    parentscore, parentkey = maxscore, maxkey[:]
+    i = 0
+    # Limit iterations to prevent infinite loops in a web server environment
+    while i < 1000:
+        i = i + 1
+        random.shuffle(parentkey)
+        deciphered = SimpleSub(parentkey).decipher(ctext)
+        parentscore = fitness.score(deciphered)
+        count = 0
+        while count < 1000:
+            a = random.randint(0, 25)
+            b = random.randint(0, 25)
+            child = parentkey[:]
+            child[a], child[b] = child[b], child[a]
+            deciphered = SimpleSub(child).decipher(ctext)
+            score = fitness.score(deciphered)
+            if score > parentscore:
+                parentscore = score
+                parentkey = child[:]
+                count = 0
+            count = count + 1
+        if parentscore > maxscore:
+            maxscore, maxkey = parentscore, parentkey[:]
+            ss = SimpleSub(maxkey)
+            plaintext = ss.decipher(ctext)
+            plaintext1 = add_punctuation_and_spaces(ciphertext, plaintext)
+            # In a web context, we return the first good result.
+            # The original loop was infinite, which is not suitable for a server.
+            return plaintext1
+    # Fallback if no good solution is found within the iteration limit
+    ss = SimpleSub(maxkey)
+    plaintext = ss.decipher(ctext)
+    return add_punctuation_and_spaces(ciphertext, plaintext)
+def output(string1, dic, string2):
+    modified_string1 = list(string1)
+    modified_string2 = list(string2)
+    for i in range(len(string1)):
+        if modified_string1[i] in dic and modified_string2[i] != ' ':
+            modified_string2[i] = dic[modified_string1[i]]
+    modified_string2 = ''.join(modified_string2)
+    return modified_string2
+def add_punctuation_and_spaces(ciphertext, plaintext):
+    result = ""
+    j = 0
+    for i in range(len(ciphertext)):
+        if not ciphertext[i].isalpha():
+            result += ciphertext[i]
+        else:
+            if ciphertext[i].islower():
+                result += plaintext[j].lower()
+            else:
+                # The original code had a bug here, always making it lowercase.
+                # This is a guess at the intended behavior.
+                result += plaintext[j]
+            j += 1
+    return result
+def decrypt_interface(ciphertext, key):
+    """
+    This is the main function that will be exposed through the Gradio interface.
+    """
+    if not ciphertext:
+        return "Please enter some ciphertext."
+    plaintext = decrypt_text_internal(ciphertext)
+    if key:
+        try:
+            key_dic = proability.read_key(key)
+            plaintext = output(ciphertext, key_dic, plaintext)
+        except Exception as e:
+            return f"Error processing key: {e}. Please check the key format (e.g., a=B c=D)."
+    return plaintext
+# Create the Gradio interface
+iface = gr.Interface(
+    fn=decrypt_interface,
+    inputs=[
+        gr.Textbox(lines=10, label="Ciphertext", placeholder="Enter the text to decrypt..."),
+        gr.Textbox(lines=2, label="Known Key Mappings (Optional)", placeholder="e.g., a=B c=D")
+    ],
+    outputs=gr.Textbox(lines=10, label="Plaintext"),
+    title="Simple Substitution Cipher Decryptor",
+    description="An automatic decryption tool for simple substitution ciphers. You can optionally provide known letter mappings to improve accuracy."
+)
+# Launch the app
+if __name__ == "__main__":
+    iface.launch(server_name="0.0.0.0", server_port=8080)

ngram_score.py ADDED Viewed

	@@ -0,0 +1,29 @@

+from math import log10
+class ngram_score(object):
+    def __init__(self, ngramfile, sep=' '):
+        '''加载包含ngrams和计数的文件,计算对数概率'''
+        self.ngrams = {}  # 存储ngrams及其计数的字典
+        with open(ngramfile, 'r') as file:
+            for line in file:
+                key, count = line.split(sep)  # 将行按分隔符分割为ngram和计数
+                self.ngrams[key] = int(count)  # 将ngram及其计数存储到字典中
+        self.L = len(key)  # ngram的长度
+        self.N = sum(self.ngrams.values())  # 所有ngrams的总计数
+        # 计算对数概率
+        for key in self.ngrams.keys():
+            self.ngrams[key] = log10(
+                float(self.ngrams[key]) / self.N)  # 计算ngram的对数概率
+        self.floor = log10(0.01 / self.N)  # 用于处理未知的ngrams的默认对数概率阈值
+    def score(self, text):
+        '''计算文本的分数'''
+        score = 0
+        ngrams = self.ngrams.__getitem__  # 获取ngrams的对数概率函数
+        for i in range(len(text) - self.L + 1):
+            if text[i:i+self.L] in self.ngrams:
+                score += ngrams(text[i:i+self.L])  # 若ngrams在文本中存在，则加上对数概率
+            else:
+                score += self.floor  # 若ngrams在文本中不存在，则加上默认对数概率阈值
+        return score

proability.py ADDED Viewed

	@@ -0,0 +1,125 @@

+def notuse (notusing_character):#用于屏蔽在密文破解时用于分割句子的符号，例如句号，分号这类符号
+    blacklist={}
+    #允许用户重复输入一个黑名单字符，采用字典类型存储黑名单。
+    for char in notusing_character:
+        if (char in blacklist):
+            blacklist[char]+=1
+        else:
+            blacklist[char]=1
+    return blacklist
+def read_key(string):#读入密钥，并生成映射字典
+   pairs = string.split()  # 分割字符串为键值对列表
+   dict={}
+   for pair in pairs:
+       key, value = pair.split('=')  # 分割键值对为键和键值
+       dict[key] = value
+   return dict
+# # def count_character(string, blacklist):
+# #     "记录不同字符的概率"
+# #     character_count = {}#存放字符频率的字典
+# #     number1 = 0
+# #     for char in string:
+# #         if (char not in blacklist):#跳过黑名单字符
+# #             if (char in character_count):
+# #                 character_count[char] += 1
+# #             else:
+# #                 character_count[char] = 1
+# #             number1 += 1#计算有效字符的总个数
+# #     character_proability = {}#存放字符概率的字典
+# #     for char, count in character_count.items():
+# #         proability = count / number1
+# #         formatted_proability = "{:.2%}".format(proability)
+# #         character_proability[char] = formatted_proability#计算字符的概率
+#     # return sort_dict(character_proability)
+def count_bingary_character(string,blacklist):
+    "记录二元字符的概率"
+    bingary_character_count = {}#存放二元字符频率的字典
+    number2=0
+    # 遍历字符串，提取二元字符并统计频率
+    for i in range(len(string) - 1):
+        char1 = string[i]
+        char2 = string[i + 1]
+        if (char1 not in blacklist) and (char2 not in blacklist):  # 跳过黑名单字符，若两个字符都不在黑名单中则合成一个二元字符
+            bingary_character = char1 + char2
+            if bingary_character in bingary_character_count:
+                bingary_character_count[bingary_character] += 1
+            else:
+                bingary_character_count[bingary_character] = 1
+        number2+=1#计算有效二元字符的数量
+    bingary_character_proability = {}#存放二元字符概率的字典
+    for bingary_character, count in bingary_character_count.items():
+        probability = count / number2
+        formatted_proability = "{:.2%}".format(probability)
+        bingary_character_proability[bingary_character] = formatted_proability#计算二元字符的概率
+    return sort_dict(bingary_character_proability)
+# def count_ternary_character(string,blacklist):
+#     "记录三元字符的概率"
+#     ternary_character_count = {}#存放三元字符频率的字典
+#     number3=0
+#     # 遍历字符串，提取三元字符并统计频率
+#     for i in range(len(string) - 2):
+#         char1 = string[i]
+#         char2 = string[i + 1]
+#         char3 = string[i + 2]
+#         if (char1 not in blacklist) and (char2 not in blacklist) and (char3 not in blacklist):  # 跳过黑名单字符，若三个字符都不属于黑名单则合成一个三元字符
+#             ternary_character = char1 + char2 +char3
+#             if ternary_character in ternary_character_count:
+#                 ternary_character_count[ternary_character] += 1
+#             else:
+#                 ternary_character_count[ternary_character] = 1
+#         number3+=1#计算有效三元字符的数量
+#     ternary_character_proability = {}#存放三元字符概率的字典
+#     for ternary_character, count in ternary_character_count.items():
+#         proability = count / number3
+#         formatted_proability = "{:.2%}".format(proability)#直接把小数显示为百分数
+#         ternary_character_proability[ternary_character] =formatted_proability#计算三元字符的概率
+#     return sort_dict(ternary_character_proability)
+def print_dict(dictionary):  # 打印字典
+    dictionary = sort_dict(dictionary)
+    for key, value in dictionary.items():
+        print(key, ":", value)
+def sort_dict(percentage_dict):  # 字典排序
+    sorted_dict = dict(sorted(percentage_dict.items(
+    ), key=lambda item: float(item[1].rstrip("%")), reverse=True))
+    return sorted_dict
+# def read_frequency_file(file_path,frequency_dict):#读入英文字频分布
+#     with open(file_path, 'r') as file:
+#         for line in file:
+#             line = line.strip()
+#             if line:  # 确保不是空行
+#                 letter, frequency = line.split()
+#                 letter = letter.strip()
+#                 frequency = frequency.strip().rstrip('%')
+#                 frequency = float(frequency) / 100.0  # 将百分数转换为浮点数
+#                 formatted_number = "{:.2%}".format(frequency)#直接把小数显示为百分数
+#                 frequency_dict[letter] =formatted_number
+#     return frequency_dict
+# def read_frequency_file1(file_path,frequency_list):#三元组的具体频率没找到只找到概率排序
+#     with open(file_path, 'r') as file:
+#         for line in file:
+#             line = line.strip()
+#             if line:
+#                 frequency_list.append(line)
+#     return frequency_list
+# def print_list(lst):
+#     for index, value in enumerate(lst):
+#         print(f"Index {index}: {value}")

quadgrams.txt ADDED Viewed

The diff for this file is too large to render. See raw diff

requirements.txt ADDED Viewed

	@@ -0,0 +1,2 @@


1	+ pycipher
2	+ gradio