This page includes my solutions to the Cryptopals Crypto Challenges. These are small problems that build upon each other in order to learn about and attack progressively more complex cryptographic constructions. The challenges are divided into 8 sets, made up of 8 challenges each.
1 - Convert hex to base64
The first challenge asks us to convert a hex encoded buffer into a Base64 encoded one. While Python has built-in modules to perform these operations, I’ve implemented them below in order to get familiar with how they work.
Hex decoder
To turn a binary buffer into a printable string, you can encode it as a hexadecimal number. Each byte will become two hex digits (two characters from 0 to f).
hex_digits = "0123456789abcdef" def split_chunks(l, n): ch = [] for x in l: ch.append(x) if len(ch) == n: yield ch ch = [] if ch: yield ch def hex_decode(text): lookup = {y:x for x, y in enumerate(hex_digits)} res = [] for first, second in split_chunks(text, 2): first = lookup[first] second = lookup[second] byte = first << 4 | second res.append(byte) return bytes(res) hex_decode("48656c6c6f")
b'Hello'
Another way decode hex-encoded values is to use a lookup table. Here’s how it looks in practice.
hex_lookup = {} # Construct lookup table i = 0 for x in hex_digits: for y in hex_digits: hex_lookup[x+y] = i i += 1 def hex_decode(text): pairs = split_chunks(text, 2) res = [hex_lookup[x+y] for x, y in pairs] return bytes(res) hex_decode("48656c6c6f")
b'Hello'
Base64 encoder
import string values = string.ascii_uppercase + string.ascii_lowercase + string.digits + "+/" def binary_digits(data): for byte in data: for i in range(8)[::-1]: i = (byte >> i) & 1 yield str(int(i)) def groups(data, n): group = [] for x in data: group.append(x) if len(group) == n: yield "".join(group) group = [] if group: yield "".join(group) def b64_groups(data): digits = binary_digits(data) for num in groups(digits, 6): num = int(num, 2) yield values[num] def base64_encode(data): res = "" for b64 in groups(b64_groups(data), 4): res += b64 for _ in range(4 - len(b64)): res += '=' return res
hex_input = "49276d206b696c6c696e6720796f757220627261696e206c696b65206120706f69736f6e6f7573206d757368726f6f6d" expected = "SSdtIGtpbGxpbmcgeW91ciBicmFpbiBsaWtlIGEgcG9pc29ub3VzIG11c2hyb29t" buffer = hex_decode(hex_input) base64_encode(buffer) base64_encode(buffer) == expected
'SSdtIGtpbGxpbmcgeW91ciBicmFpbiBsaWtlIGEgcG9pc29ub3VzIG11c2hyb29t'
True
Let’s try to make a base64 decoder as well. The logic should be mostly similar.
def b64_to_binary(text): for c in text: x = values.find(c) if x != -1: for i in range(6)[::-1]: bit = (x >> i) & 1 yield str(int(bit)) def base64_decode(text): res = [] for gr in groups(b64_to_binary(text), 8): if len(gr) != 8: break res.append(int(gr, 2)) return bytes(res) base64_decode("VGVzdCBtZXNzYWdlCg==")
b'Test message\n'
2 - Fixed XOR
This challenge asks us to produce the XOR of two equal-length buffers.
buffer1 = hex_decode("1c0111001f010100061a024b53535009181c") buffer2 = hex_decode("686974207468652062756c6c277320657965") def xor_buffers(buf1, buf2): result = [x ^ y for x, y in zip(buf1, buf2)] return bytes(result) xor_buffers(buffer1, buffer2).hex()
'746865206b696420646f6e277420706c6179'
3 - Single-byte XOR cipher
https://en.wikipedia.org/wiki/Letter_frequency
def single_xor(ciphertext, key): plain = [x ^ key for x in ciphertext] return bytes(plain) single_xor(b'Test message', 23) single_xor(b'Crdc7zrddvpr', 23)
b'Crdc7zrddvpr'
b'Test message'
Since the key is only a single byte, it is possible to iterate through all the options and pick the correct one with your eye. But it is useful to automate this, and this automation will come in handy in more complex scenarios.
We will make our program try all possible keys, and pick the key that results in the most English-looking decryption.
def english_score(data): s = 0 data = data.lower() common = b"etaoin shrdlu"[::-1] for c in data: if chr(c) not in string.printable: return 0 i = common.find(c) if i != -1: s += i return s
ciphertext = hex_decode("1b37373331363f78151b7f2b783431333d78397828372d363c78373e783a393b3736") key = max(range(255), key=lambda x: english_score(single_xor(ciphertext, x))) key
88
single_xor(ciphertext, key)
b"Cooking MC's like a pound of bacon"
Challenge 4
import requests URL = 'https://cryptopals.com/static/challenge-data/4.txt' data = requests.get(URL).text.split("\n") data = list(map(hex_decode, data))
candidates = [] for ciphertext in data: for key in range(255): plain = single_xor(ciphertext, key) candidates.append(plain) print(max(candidates, key=english_score))
b'Now that the party is jumping\n'
Challenge 5
data = b"Burning 'em, if you ain't quick and nimble\nI go crazy when I hear a cymbal" key = b"ICE" def repeating_xor(data, key): res = [] for i, c in enumerate(data): k = key[i % len(key)] res.append(c ^ k) return bytes(res) encrypted = repeating_xor(data, key).hex() for gr in groups(encrypted, 25): print(gr)
0b3637272a2b2e63622c2e696 92a23693a2a3c6324202d623d 63343c2a26226324272765272 a282b2f20430a652e2c652a31 24333a653e2b2027630c692b2 0283165286326302e27282f
6 - Break repeating-key XOR
def edit_dist(buf1, buf2): bin1 = binary_digits(buf1) bin2 = binary_digits(buf2) dist = 0 for b1, b2 in zip(bin1, bin2): if b1 != b2: dist += 1 return dist edit_dist(b"this is a test", b"wokka wokka!!!") == 37
True
URL = 'https://cryptopals.com/static/challenge-data/6.txt' text = requests.get(URL).text data = base64_decode(text) len(data)
2876
def keysize_edit_distance(ciphertext, keysize): prev = None diff = 0 n = 0 for i in range(0, len(data), keysize): chunk = data[i:i+keysize] if prev: diff += edit_dist(chunk, prev) / keysize n += 1 prev = chunk diff /= n return diff keysize = min(range(2, 40), key=lambda x: keysize_edit_distance(data, x)) keysize
29
key = [] blocks = [data[i:i+keysize] for i in range(0, len(data), keysize)] for key_i in range(keysize): chunk = b"" for bl in blocks: if key_i < len(bl): chunk += bytes([bl[key_i]]) k = max(range(255), key=lambda x: english_score(single_xor(chunk, x))) key.append(k)
If we’re lucky, we should have the correct key now. Let’s see if the key was human-readable.
bytes(key).decode('ascii')
'Terminator X: Bring the noise'
Looks like it was. Just to be sure we got the right answer, let’s try to decode the data.
plaintext = repeating_xor(data, bytes(key)).decode('ascii') print(plaintext[:150])
I'm back and I'm ringin' the bell A rockin' on the mike while the fly girls yell In ecstasy in the back of me Well that's my DJ Deshay cuttin' all
7 - AES in ECB mode
URL = 'https://cryptopals.com/static/challenge-data/7.txt' ciphertext = requests.get(URL).text ciphertext = base64_decode(ciphertext)
from Crypto.Cipher import AES def aes_ecb_encrypt(key, block): aes = AES.new(key, AES.MODE_ECB) return aes.encrypt(block) def aes_ecb_decrypt(key, block): aes = AES.new(key, AES.MODE_ECB) return aes.decrypt(block) for i in range(0, len(ciphertext), 16): chunk = ciphertext[i:i+16] chunk = aes_ecb_decrypt(b"YELLOW SUBMARINE", chunk) chunk = chunk.decode('ascii') print(chunk, end="") if i > 150: break
I'm back and I'm ringin' the bell A rockin' on the mike while the fly girls yell In ecstasy in the back of me Well that's my DJ Deshay cuttin' all them Z's Hittin' hard and
8 - Detect AES in ECB mode
URL = 'https://cryptopals.com/static/challenge-data/8.txt' ciphertexts = requests.get(URL).text.split("\n") len(ciphertexts)
205
for num, ciphertext in enumerate(ciphertexts): data = hex_decode(ciphertext) chunks = [data[i:i+16] for i in range(0, len(data), 16)] for i, ch in enumerate(chunks): count = chunks.count(ch) if count != 1: print(num, i, count)
132 1 4 132 3 4 132 5 4 132 7 4
9 - Implement PKCS#7 padding
def pkcs7(data, block_size): for block in split_chunks(data, block_size): last = len(block) diff = block_size - len(block) while len(block) != block_size: block.append(diff) yield from block if last == block_size: yield from [block_size] * block_size bytes(pkcs7(b"YELLOW SUBMARINE", 20))
b'YELLOW SUBMARINE\x04\x04\x04\x04'
10 - Implement CBC mode
15 - PKCS#7 padding validation
def pkcs7_unpad(data): padbyte = data[-1] if padbyte > len(data): return None padding = data[padbyte:] if padding.count(padbyte) != padbyte: return None return data[:-padbyte]
valid = [ b"ICE ICE BABY\x04\x04\x04\x04", b"NICE ICE BABY\x03\x03\x03", ] invalid = [ b"ICE ICE BABY\x05\x05\x05\x05", b"ICE ICE BABY\x01\x02\x03\x04", ] for buf in valid + invalid: print(str(buf).ljust(31), "=>", pkcs7_unpad(buf))
b'ICE ICE BABY\x04\x04\x04\x04' => b'ICE ICE BABY' b'NICE ICE BABY\x03\x03\x03' => b'NICE ICE BABY' b'ICE ICE BABY\x05\x05\x05\x05' => None b'ICE ICE BABY\x01\x02\x03\x04' => None