Coverage for src/flag_gems/runtime/backend/_mthreads/ops/randn_like.py: 0%

22 statements  

« prev     ^ index     » next       coverage.py v7.6.9, created at 2026-03-11 02:28 +0800

1import logging 

2 

3import torch 

4import triton 

5 

6from flag_gems.runtime import torch_device_fn 

7from flag_gems.utils.random_utils import philox_backend_seed_offset 

8 

9from .randn import randn_kernel 

10 

11logger = logging.getLogger( 

12 f'flag_gems.runtime.backend._mthreads.ops.{__name__.split(".")[-1]}' 

13) 

14UNROLL = 4 

15 

16 

17def randn_like( 

18 x, *, dtype=None, layout=None, device=None, pin_memory=None, memory_format=None 

19): 

20 logger.debug("GEMS_MTHREADS RANDN_LIKE") 

21 if device is None: 

22 device = x.device.index 

23 if dtype is None: 

24 dtype = x.dtype 

25 out = torch.empty_like(x, device=device, dtype=dtype) 

26 N = x.numel() 

27 grid_fn = lambda meta: (triton.cdiv(N, meta["BLOCK"] * UNROLL),) 

28 # (TODO) Using Triton autotuner makes kernel parameters opaque to the caller, 

29 # hence we cannot obtain the per thread offset as in Pytorch. 

30 increment = triton.cdiv(N, UNROLL) 

31 philox_seed, philox_offset = philox_backend_seed_offset(increment) 

32 with torch_device_fn.device(x.device): 

33 randn_kernel[grid_fn](out, N, philox_seed, philox_offset) 

34 return out