Coverage for src/flag_gems/ops/randn_like.py: 100%
22 statements
« prev ^ index » next coverage.py v7.6.9, created at 2026-03-28 12:23 +0800
« prev ^ index » next coverage.py v7.6.9, created at 2026-03-28 12:23 +0800
1import logging
3import torch
4import triton
6from flag_gems.ops.randn import randn_kernel
7from flag_gems.runtime import torch_device_fn
8from flag_gems.utils.random_utils import philox_backend_seed_offset
10logger = logging.getLogger(__name__)
11UNROLL = 4
14def randn_like(
15 x, *, dtype=None, layout=None, device=None, pin_memory=None, memory_format=None
16):
17 logger.debug("GEMS RANDN_LIKE")
18 if device is None:
19 device = x.device.index
20 if dtype is None:
21 dtype = x.dtype
22 out = torch.empty_like(x, device=device, dtype=dtype)
23 N = x.numel()
24 grid_fn = lambda meta: (triton.cdiv(N, meta["BLOCK"] * UNROLL),)
25 # (TODO) Using Triton autotuner makes kernel parameters opaque to the caller,
26 # hence we cannot obtain the per thread offset as in Pytorch.
27 increment = triton.cdiv(N, UNROLL)
28 philox_seed, philox_offset = philox_backend_seed_offset(increment)
29 with torch_device_fn.device(x.device):
30 randn_kernel[grid_fn](out, N, philox_seed, philox_offset)
31 return out