Logo
Overview
[SECCON CTF 14] gyokuto writeup

[SECCON CTF 14] gyokuto writeup

December 14, 2025
5 min read

✨ 바이너리 공짜 다운로드 ✨

TL;DR

Output.bin is a huge raw binary that is actually little-endian int16 samples. It perfectly split into blocks of 200000 bytes = 100000 samples

After recovering the initial xorshift state, simulate it to get mask_bit and decode plain_bit = a3 XOR mask_bit, then pack bits into bytes to obtain the flag.

Analysis

Output format (tone blocks)

The file is little-endian int16 samples split into fixed-size blocks:

  • 1 block = 200000 bytes = 100000 samples

From the block generator (sub_1B0D0), each block is essentially:

x[t]=env(t)0.125cos ⁣(ϕ+2π(a2+1)t20)+noise(t).x[t] = \mathrm{env}(t)\cdot 0.125 \cos\!\left(\phi + 2\pi (a_2+1)\frac{t}{20}\right) + \mathrm{noise}(t).

where t=0..99999t=0..99999 and ϕ=0\phi = 0 if a3=0a_3=0, else ϕ=π\phi=\pi. The key detail is the denominator: the tone is perfectly periodic with period 20, and 100000=500020100000 = 5000\cdot 20.

1. Extracting (a2,a3)(a_2, a_3) from one block

Let x[t]x[t] be one block, and for f{1..8}f\in\{1..8\} define:

cf=t=099999x[t]ej2πft/20.c_f = \sum_{t=0}^{99999} x[t] \cdot e^{-j 2\pi f t/20}.

Since only tmod20t\bmod 20 matters, compress it to 20 sums

  • s[r]=tr (mod 20)x[t]s[r] = \sum_{t\equiv r\ (\mathrm{mod}\ 20)} x[t] for r=0..19r=0..19
  • cf=r=019s[r]ej2πfr/20c_f = \sum_{r=0}^{19} s[r]\cdot e^{-j 2\pi f r/20}

Then

  • a2=argmaxf{1..8}cf1a_2 = \arg\max_{f\in\{1..8\}} |c_f| - 1
  • for the chosen f=a2+1f=a_2+1, the phase is only 00 or π\pi, so it’s a sign flip:
    a3=1a_3 = 1 iff (cf)<0\Re(c_f) < 0

2. PRNG embedding

The PRNG is standard xorshift128 (Marsaglia)

t = x ^ (x<<11);
(x,y,z,w) = (y,z,w, w ^ (w>>19) ^ t ^ (t>>8));

Per block the program consumes the xorshift stream like

  1. Run xorshift 3 times, take w&1 each time, and assemble a2 as 3 bits (MSB\rightarrowLSB).
  2. Run xorshift 1 more time, take mask_bit = w&1.
  3. Output phase bit is a3 = mask_bit XOR plain_bit.

The audio noise generator uses a different RNG state, so it does not advance the xorshift stream used for a2/mask_bit.

3. Recovering the 128-bit initial state (GF(2))

xorshift128 is linear over GF(2). Therefore every observed output bit w&1 after some number of steps is a linear combination of the initial 128-bit state.

From each block we learn 3 output bits via a2, so across enough blocks we collect at least 128 independent equations and solve for the initial state with Gaussian elimination over GF(2).

In practice:

  • Build the 128×\times128 transition matrix of one xorshift step over GF(2) by stepping each basis state bit once.
  • For every extracted a2 bit, add an equation row · state0 = bit.
  • Solve the resulting 128-bit system.

Exploit

  1. Read output.bin as int16 and split into 100000-sample blocks.
  2. For each block, compute the 20-sample folded sums s[r]s[r] and extract (a2,a3) by checking f=1..8f=1..8.
  3. Build the GF(2) linear system from all a2 bits and recover the initial xorshift128 state.
  4. Simulate xorshift128:
    • verify a2 matches for every block (sanity check)
    • compute plain_bit = a3 XOR mask_bit
MASK128 = (1 << 128) - 1
def _xorshift128_step_words(x: int, y: int, z: int, w: int) -> tuple[int, int, int, int]:
t = (x ^ ((x << 11) & 0xFFFFFFFF)) & 0xFFFFFFFF
x, y, z = y, z, w
w = (w ^ (w >> 19) ^ t ^ (t >> 8)) & 0xFFFFFFFF
return x, y, z, w
@dataclass(frozen=True)
class XorShift128Matrix:
rows: list[int] # 128 rows, each 128-bit int
cols: list[int] # 128 cols, each 128-bit int
def build_xorshift128_matrix() -> XorShift128Matrix:
cols: list[int] = []
for bit in range(128):
x = y = z = w = 0
if bit < 32:
x = 1 << bit
elif bit < 64:
y = 1 << (bit - 32)
elif bit < 96:
z = 1 << (bit - 64)
else:
w = 1 << (bit - 96)
nx, ny, nz, nw = _xorshift128_step_words(x, y, z, w)
col = (nx & 0xFFFFFFFF) | ((ny & 0xFFFFFFFF) << 32) | ((nz & 0xFFFFFFFF) << 64) | ((nw & 0xFFFFFFFF) << 96)
cols.append(col)
rows = [0] * 128
for j, col in enumerate(cols):
tmp = col
while tmp:
lsb = tmp & -tmp
i = lsb.bit_length() - 1
rows[i] |= 1 << j
tmp ^= lsb
return XorShift128Matrix(rows=rows, cols=cols)
def row_mul(row: int, mat_rows: list[int]) -> int:
out = 0
tmp = row
while tmp:
lsb = tmp & -tmp
idx = lsb.bit_length() - 1
out ^= mat_rows[idx]
tmp ^= lsb
return out
def vec_mul(mat_cols: list[int], vec: int) -> int:
out = 0
tmp = vec
while tmp:
lsb = tmp & -tmp
idx = lsb.bit_length() - 1
out ^= mat_cols[idx]
tmp ^= lsb
return out
def mat_square_rows(mat_rows: list[int]) -> list[int]:
return [row_mul(r, mat_rows) for r in mat_rows]
def mat_square_cols(mat_cols: list[int]) -> list[int]:
return [vec_mul(mat_cols, c) for c in mat_cols]
def precompute_pows_rows(base_rows: list[int], max_delta: int) -> list[list[int]]:
bits = max(1, max_delta.bit_length())
pows = [base_rows]
for _ in range(1, bits):
pows.append(mat_square_rows(pows[-1]))
return pows
def precompute_pows_cols(base_cols: list[int], max_delta: int) -> list[list[int]]:
bits = max(1, max_delta.bit_length())
pows = [base_cols]
for _ in range(1, bits):
pows.append(mat_square_cols(pows[-1]))
return pows
def apply_pows_row(row: int, delta: int, pows_rows: list[list[int]]) -> int:
bit = 0
tmp = delta
out = row
while tmp:
if tmp & 1:
out = row_mul(out, pows_rows[bit])
tmp >>= 1
bit += 1
return out
def apply_pows_vec(vec: int, delta: int, pows_cols: list[list[int]]) -> int:
bit = 0
tmp = delta
out = vec
while tmp:
if tmp & 1:
out = vec_mul(pows_cols[bit], out)
tmp >>= 1
bit += 1
return out
def extract_a2_a3(path: str) -> tuple[np.ndarray, np.ndarray]:
seg_samples = 100_000
samples = np.fromfile(path, dtype="<i2")
if samples.size % seg_samples != 0:
raise ValueError(f"unexpected file size: {samples.size} samples not divisible by {seg_samples}")
num_segments = samples.size // seg_samples
segs = samples.reshape(num_segments, seg_samples)
sums20 = segs.reshape(num_segments, seg_samples // 20, 20).sum(axis=1, dtype=np.int64)
r = np.arange(20, dtype=np.float64)[:, None]
f = np.arange(1, 9, dtype=np.float64)[None, :]
weights = np.exp(-1j * 2 * np.pi * r * f / 20.0) # (20, 8)
coefs = sums20 @ weights # (segments, 8)
mags = np.abs(coefs)
idx = np.argmax(mags, axis=1).astype(np.int64) # 0..7 => a2
chosen = coefs[np.arange(num_segments), idx]
a3 = (chosen.real < 0).astype(np.int8)
return idx, a3
def build_a2_observations(
a2: np.ndarray,
*,
preamble_segments: int = 8,
noise_steps: int = 0,
) -> list[tuple[int, int]]:
obs: list[tuple[int, int]] = []
step = 0
for seg_idx, v in enumerate(a2.tolist()):
bits = [(v >> 2) & 1, (v >> 1) & 1, v & 1]
for b in bits:
step += 1
obs.append((step, int(b)))
step += 1
step += noise_steps
return obs
def solve_state_from_observations(
obs: list[tuple[int, int]],
*,
matrix: XorShift128Matrix,
preamble_segments: int = 8,
) -> int:
if not obs:
raise ValueError("no observations")
max_delta = obs[0][0]
for (s0, _), (s1, _) in zip(obs, obs[1:]):
max_delta = max(max_delta, s1 - s0)
pows_rows = precompute_pows_rows(matrix.rows, max_delta)
equations: list[tuple[int, int]] = []
current_step = 0
row = 1 << 96 # output is LSB of w after each step
for step, bit in obs:
delta = step - current_step
row = apply_pows_row(row, delta, pows_rows)
current_step = step
equations.append((row, bit))
piv = [0] * 128 # store augmented rows (row | rhs<<128) keyed by pivot bit index
for row, rhs in equations:
r = row & MASK128
b = rhs & 1
while r:
i = r.bit_length() - 1
if piv[i]:
r ^= piv[i] & MASK128
b ^= (piv[i] >> 128) & 1
else:
piv[i] = r | (b << 128)
break
else:
if b:
raise ValueError("inconsistent system (bad extraction or wrong step schedule)")
if any(p == 0 for p in piv):
missing = sum(1 for p in piv if p == 0)
raise ValueError(f"rank deficient: missing {missing} pivots")
sol = 0
for i in range(128):
row_aug = piv[i]
row = row_aug & MASK128
rhs = (row_aug >> 128) & 1
parity = (row & sol).bit_count() & 1
val = rhs ^ parity
if val:
sol |= 1 << i
return sol
def decode_plain_bits(
a2: np.ndarray,
a3: np.ndarray,
*,
state0: int,
matrix: XorShift128Matrix,
preamble_segments: int = 8,
noise_steps: int = 0,
) -> list[int]:
num_segments = a2.size
if a3.size != num_segments:
raise ValueError("a2/a3 length mismatch")
max_delta = int(noise_steps)
pows_cols = precompute_pows_cols(matrix.cols, max_delta)
state = state0 & MASK128
out_bits: list[int] = []
for seg_idx in range(num_segments):
bits = []
for _ in range(3):
state = vec_mul(matrix.cols, state)
bits.append((state >> 96) & 1)
pred_a2 = (bits[0] << 2) | (bits[1] << 1) | bits[2]
if pred_a2 != int(a2[seg_idx]):
raise ValueError(f"a2 mismatch at segment {seg_idx}: got {pred_a2}, expected {int(a2[seg_idx])}")
state = vec_mul(matrix.cols, state)
mask_bit = (state >> 96) & 1
out_bits.append(int(mask_bit ^ int(a3[seg_idx])))
state = apply_pows_vec(state, int(noise_steps), pows_cols)
return out_bits
def bits_to_bytes(bits: list[int], *, msb_first: bool) -> bytes:
if len(bits) % 8 != 0:
raise ValueError("bit length not divisible by 8")
out = bytearray()
for i in range(0, len(bits), 8):
chunk = bits[i : i + 8]
v = 0
if msb_first:
for b in chunk:
v = (v << 1) | (b & 1)
else:
for j, b in enumerate(chunk):
v |= (b & 1) << j
out.append(v)
return bytes(out)

flag