Buckets:
| """Decrypt the LoHoSearch dataset CSV. | |
| The encrypted CSV must have three columns: question, answer, canary. | |
| Each question/answer is base64(XOR(plaintext, SHA256-derived keystream)). | |
| Decrypts every row's question and answer and writes them to an output CSV. | |
| """ | |
| import argparse | |
| import base64 | |
| import csv | |
| import hashlib | |
| import sys | |
| ENCRYPTED_COLUMNS = ["question", "answer"] | |
| def derive_key(password: str, length: int) -> bytes: | |
| """Derive a fixed-length key from the password using SHA256.""" | |
| hasher = hashlib.sha256() | |
| hasher.update(password.encode()) | |
| key = hasher.digest() | |
| return key * (length // len(key)) + key[: length % len(key)] | |
| def decrypt(ciphertext_b64: str, password: str) -> str: | |
| """Decrypt base64-encoded ciphertext with XOR.""" | |
| encrypted = base64.b64decode(ciphertext_b64) | |
| key = derive_key(password, len(encrypted)) | |
| decrypted = bytes(a ^ b for a, b in zip(encrypted, key)) | |
| return decrypted.decode() | |
| def read_rows(path: str): | |
| with open(path, newline="", encoding="utf-8") as f: | |
| reader = csv.DictReader(f) | |
| return list(reader), (reader.fieldnames or []) | |
| def main() -> None: | |
| parser = argparse.ArgumentParser(description="Decrypt the LoHoSearch dataset CSV.") | |
| parser.add_argument("encrypted", help="Encrypted CSV with question, answer, canary columns.") | |
| parser.add_argument( | |
| "--output", | |
| default="LoHoSearch_decrypted.csv", | |
| help="Path to write the decrypted rows (question, answer).", | |
| ) | |
| args = parser.parse_args() | |
| csv.field_size_limit(sys.maxsize) | |
| enc_rows, enc_fields = read_rows(args.encrypted) | |
| for col in ENCRYPTED_COLUMNS + ["canary"]: | |
| if col not in enc_fields: | |
| raise SystemExit(f"Encrypted CSV is missing column: {col}") | |
| decrypted_rows = [] | |
| for enc in enc_rows: | |
| canary = enc["canary"] | |
| decrypted_rows.append( | |
| {col: decrypt(enc[col], canary) for col in ENCRYPTED_COLUMNS} | |
| ) | |
| with open(args.output, "w", newline="", encoding="utf-8") as f: | |
| writer = csv.DictWriter(f, fieldnames=ENCRYPTED_COLUMNS) | |
| writer.writeheader() | |
| writer.writerows(decrypted_rows) | |
| print(f"Wrote {len(decrypted_rows)} decrypted rows -> {args.output}") | |
| if __name__ == "__main__": | |
| main() | |
Xet Storage Details
- Size:
- 2.29 kB
- Xet hash:
- d305d425a57acd112ee6e63b45ac1197c5567e28d43bceaf6d27a6d383a9461e
·
Xet efficiently stores files, intelligently splitting them into unique chunks and accelerating uploads and downloads. More info.