# ============================================================
# CELL 3 — LeakyReLU v4: Pre-dispatch threshold + vectorized
# ============================================================
@triton.jit
def leaky_relu_fwd_kernel(
x_ptr, out_ptr,
negative_slope,
n_elements,
BLOCK_SIZE: tl.constexpr,
):
# function implementation