TL;DR
Output.bin is a huge raw binary that is actually little-endian int16 samples. It perfectly split into blocks of 200000 bytes = 100000 samples
After recovering the initial xorshift state, simulate it to get mask_bit and decode plain_bit = a3 XOR mask_bit, then pack bits into bytes to obtain the flag.
Analysis
Output format (tone blocks)
The file is little-endian int16 samples split into fixed-size blocks:
- 1 block = 200000 bytes = 100000 samples
From the block generator (sub_1B0D0), each block is essentially:
where and if , else . The key detail is the denominator: the tone is perfectly periodic with period 20, and .
1. Extracting from one block
Let be one block, and for define:
Since only matters, compress it to 20 sums
- for
Then
- for the chosen , the phase is only or , so it’s a sign flip:
iff
2. PRNG embedding
The PRNG is standard xorshift128 (Marsaglia)
t = x ^ (x<<11);(x,y,z,w) = (y,z,w, w ^ (w>>19) ^ t ^ (t>>8));Per block the program consumes the xorshift stream like
- Run xorshift 3 times, take
w&1each time, and assemblea2as 3 bits (MSBLSB). - Run xorshift 1 more time, take
mask_bit = w&1. - Output phase bit is
a3 = mask_bit XOR plain_bit.
The audio noise generator uses a different RNG state, so it does not advance the xorshift stream used for a2/mask_bit.
3. Recovering the 128-bit initial state (GF(2))
xorshift128 is linear over GF(2). Therefore every observed output bit w&1 after some number of steps is a linear combination of the initial 128-bit state.
From each block we learn 3 output bits via a2, so across enough blocks we collect at least 128 independent equations and solve for the initial state with Gaussian elimination over GF(2).
In practice:
- Build the 128128 transition matrix of one xorshift step over GF(2) by stepping each basis state bit once.
- For every extracted
a2bit, add an equationrow · state0 = bit. - Solve the resulting 128-bit system.
Exploit
- Read output.bin as
int16and split into 100000-sample blocks. - For each block, compute the 20-sample folded sums and extract
(a2,a3)by checking . - Build the GF(2) linear system from all
a2bits and recover the initial xorshift128 state. - Simulate xorshift128:
- verify
a2matches for every block (sanity check) - compute
plain_bit = a3 XOR mask_bit
- verify
MASK128 = (1 << 128) - 1
def _xorshift128_step_words(x: int, y: int, z: int, w: int) -> tuple[int, int, int, int]: t = (x ^ ((x << 11) & 0xFFFFFFFF)) & 0xFFFFFFFF x, y, z = y, z, w w = (w ^ (w >> 19) ^ t ^ (t >> 8)) & 0xFFFFFFFF return x, y, z, w
@dataclass(frozen=True)class XorShift128Matrix: rows: list[int] # 128 rows, each 128-bit int cols: list[int] # 128 cols, each 128-bit int
def build_xorshift128_matrix() -> XorShift128Matrix: cols: list[int] = [] for bit in range(128): x = y = z = w = 0 if bit < 32: x = 1 << bit elif bit < 64: y = 1 << (bit - 32) elif bit < 96: z = 1 << (bit - 64) else: w = 1 << (bit - 96)
nx, ny, nz, nw = _xorshift128_step_words(x, y, z, w) col = (nx & 0xFFFFFFFF) | ((ny & 0xFFFFFFFF) << 32) | ((nz & 0xFFFFFFFF) << 64) | ((nw & 0xFFFFFFFF) << 96) cols.append(col)
rows = [0] * 128 for j, col in enumerate(cols): tmp = col while tmp: lsb = tmp & -tmp i = lsb.bit_length() - 1 rows[i] |= 1 << j tmp ^= lsb
return XorShift128Matrix(rows=rows, cols=cols)
def row_mul(row: int, mat_rows: list[int]) -> int: out = 0 tmp = row while tmp: lsb = tmp & -tmp idx = lsb.bit_length() - 1 out ^= mat_rows[idx] tmp ^= lsb return out
def vec_mul(mat_cols: list[int], vec: int) -> int: out = 0 tmp = vec while tmp: lsb = tmp & -tmp idx = lsb.bit_length() - 1 out ^= mat_cols[idx] tmp ^= lsb return out
def mat_square_rows(mat_rows: list[int]) -> list[int]: return [row_mul(r, mat_rows) for r in mat_rows]
def mat_square_cols(mat_cols: list[int]) -> list[int]: return [vec_mul(mat_cols, c) for c in mat_cols]
def precompute_pows_rows(base_rows: list[int], max_delta: int) -> list[list[int]]: bits = max(1, max_delta.bit_length()) pows = [base_rows] for _ in range(1, bits): pows.append(mat_square_rows(pows[-1])) return pows
def precompute_pows_cols(base_cols: list[int], max_delta: int) -> list[list[int]]: bits = max(1, max_delta.bit_length()) pows = [base_cols] for _ in range(1, bits): pows.append(mat_square_cols(pows[-1])) return pows
def apply_pows_row(row: int, delta: int, pows_rows: list[list[int]]) -> int: bit = 0 tmp = delta out = row while tmp: if tmp & 1: out = row_mul(out, pows_rows[bit]) tmp >>= 1 bit += 1 return out
def apply_pows_vec(vec: int, delta: int, pows_cols: list[list[int]]) -> int: bit = 0 tmp = delta out = vec while tmp: if tmp & 1: out = vec_mul(pows_cols[bit], out) tmp >>= 1 bit += 1 return out
def extract_a2_a3(path: str) -> tuple[np.ndarray, np.ndarray]: seg_samples = 100_000 samples = np.fromfile(path, dtype="<i2") if samples.size % seg_samples != 0: raise ValueError(f"unexpected file size: {samples.size} samples not divisible by {seg_samples}") num_segments = samples.size // seg_samples
segs = samples.reshape(num_segments, seg_samples) sums20 = segs.reshape(num_segments, seg_samples // 20, 20).sum(axis=1, dtype=np.int64)
r = np.arange(20, dtype=np.float64)[:, None] f = np.arange(1, 9, dtype=np.float64)[None, :] weights = np.exp(-1j * 2 * np.pi * r * f / 20.0) # (20, 8)
coefs = sums20 @ weights # (segments, 8) mags = np.abs(coefs) idx = np.argmax(mags, axis=1).astype(np.int64) # 0..7 => a2 chosen = coefs[np.arange(num_segments), idx] a3 = (chosen.real < 0).astype(np.int8) return idx, a3
def build_a2_observations( a2: np.ndarray, *, preamble_segments: int = 8, noise_steps: int = 0,) -> list[tuple[int, int]]: obs: list[tuple[int, int]] = [] step = 0 for seg_idx, v in enumerate(a2.tolist()): bits = [(v >> 2) & 1, (v >> 1) & 1, v & 1] for b in bits: step += 1 obs.append((step, int(b))) step += 1 step += noise_steps return obs
def solve_state_from_observations( obs: list[tuple[int, int]], *, matrix: XorShift128Matrix, preamble_segments: int = 8,) -> int: if not obs: raise ValueError("no observations")
max_delta = obs[0][0] for (s0, _), (s1, _) in zip(obs, obs[1:]): max_delta = max(max_delta, s1 - s0)
pows_rows = precompute_pows_rows(matrix.rows, max_delta)
equations: list[tuple[int, int]] = [] current_step = 0 row = 1 << 96 # output is LSB of w after each step for step, bit in obs: delta = step - current_step row = apply_pows_row(row, delta, pows_rows) current_step = step equations.append((row, bit))
piv = [0] * 128 # store augmented rows (row | rhs<<128) keyed by pivot bit index for row, rhs in equations: r = row & MASK128 b = rhs & 1 while r: i = r.bit_length() - 1 if piv[i]: r ^= piv[i] & MASK128 b ^= (piv[i] >> 128) & 1 else: piv[i] = r | (b << 128) break else: if b: raise ValueError("inconsistent system (bad extraction or wrong step schedule)")
if any(p == 0 for p in piv): missing = sum(1 for p in piv if p == 0) raise ValueError(f"rank deficient: missing {missing} pivots")
sol = 0 for i in range(128): row_aug = piv[i] row = row_aug & MASK128 rhs = (row_aug >> 128) & 1 parity = (row & sol).bit_count() & 1 val = rhs ^ parity if val: sol |= 1 << i
return sol
def decode_plain_bits( a2: np.ndarray, a3: np.ndarray, *, state0: int, matrix: XorShift128Matrix, preamble_segments: int = 8, noise_steps: int = 0,) -> list[int]: num_segments = a2.size if a3.size != num_segments: raise ValueError("a2/a3 length mismatch") max_delta = int(noise_steps) pows_cols = precompute_pows_cols(matrix.cols, max_delta)
state = state0 & MASK128 out_bits: list[int] = [] for seg_idx in range(num_segments): bits = [] for _ in range(3): state = vec_mul(matrix.cols, state) bits.append((state >> 96) & 1) pred_a2 = (bits[0] << 2) | (bits[1] << 1) | bits[2] if pred_a2 != int(a2[seg_idx]): raise ValueError(f"a2 mismatch at segment {seg_idx}: got {pred_a2}, expected {int(a2[seg_idx])}")
state = vec_mul(matrix.cols, state) mask_bit = (state >> 96) & 1 out_bits.append(int(mask_bit ^ int(a3[seg_idx])))
state = apply_pows_vec(state, int(noise_steps), pows_cols)
return out_bits
def bits_to_bytes(bits: list[int], *, msb_first: bool) -> bytes: if len(bits) % 8 != 0: raise ValueError("bit length not divisible by 8") out = bytearray() for i in range(0, len(bits), 8): chunk = bits[i : i + 8] v = 0 if msb_first: for b in chunk: v = (v << 1) | (b & 1) else: for j, b in enumerate(chunk): v |= (b & 1) << j out.append(v) return bytes(out)