llc1231's picture
download
raw
2.29 kB
"""Decrypt the LoHoSearch dataset CSV.
The encrypted CSV must have three columns: question, answer, canary.
Each question/answer is base64(XOR(plaintext, SHA256-derived keystream)).
Decrypts every row's question and answer and writes them to an output CSV.
"""
import argparse
import base64
import csv
import hashlib
import sys
ENCRYPTED_COLUMNS = ["question", "answer"]
def derive_key(password: str, length: int) -> bytes:
"""Derive a fixed-length key from the password using SHA256."""
hasher = hashlib.sha256()
hasher.update(password.encode())
key = hasher.digest()
return key * (length // len(key)) + key[: length % len(key)]
def decrypt(ciphertext_b64: str, password: str) -> str:
"""Decrypt base64-encoded ciphertext with XOR."""
encrypted = base64.b64decode(ciphertext_b64)
key = derive_key(password, len(encrypted))
decrypted = bytes(a ^ b for a, b in zip(encrypted, key))
return decrypted.decode()
def read_rows(path: str):
with open(path, newline="", encoding="utf-8") as f:
reader = csv.DictReader(f)
return list(reader), (reader.fieldnames or [])
def main() -> None:
parser = argparse.ArgumentParser(description="Decrypt the LoHoSearch dataset CSV.")
parser.add_argument("encrypted", help="Encrypted CSV with question, answer, canary columns.")
parser.add_argument(
"--output",
default="LoHoSearch_decrypted.csv",
help="Path to write the decrypted rows (question, answer).",
)
args = parser.parse_args()
csv.field_size_limit(sys.maxsize)
enc_rows, enc_fields = read_rows(args.encrypted)
for col in ENCRYPTED_COLUMNS + ["canary"]:
if col not in enc_fields:
raise SystemExit(f"Encrypted CSV is missing column: {col}")
decrypted_rows = []
for enc in enc_rows:
canary = enc["canary"]
decrypted_rows.append(
{col: decrypt(enc[col], canary) for col in ENCRYPTED_COLUMNS}
)
with open(args.output, "w", newline="", encoding="utf-8") as f:
writer = csv.DictWriter(f, fieldnames=ENCRYPTED_COLUMNS)
writer.writeheader()
writer.writerows(decrypted_rows)
print(f"Wrote {len(decrypted_rows)} decrypted rows -> {args.output}")
if __name__ == "__main__":
main()

Xet Storage Details

Size:
2.29 kB
·
Xet hash:
d305d425a57acd112ee6e63b45ac1197c5567e28d43bceaf6d27a6d383a9461e

Xet efficiently stores files, intelligently splitting them into unique chunks and accelerating uploads and downloads. More info.