Coverage for src/flag_gems/runtime/backend/_cambricon/ops/randn_like.py: 0%
22 statements
« prev ^ index » next coverage.py v7.6.9, created at 2026-03-07 22:33 +0800
« prev ^ index » next coverage.py v7.6.9, created at 2026-03-07 22:33 +0800
1import logging
3import torch
4import triton
6from flag_gems.runtime import torch_device_fn
7from flag_gems.utils.random_utils import philox_backend_seed_offset
9from ..utils import TOTAL_CORE_NUM
10from .randn import randn_kernel
12logger = logging.getLogger("flag_gems").getChild(__name__.lstrip("."))
13UNROLL = 4
16def randn_like(
17 x, *, dtype=None, layout=None, device=None, pin_memory=None, memory_format=None
18):
19 logger.debug("GEMS_CAMBRICON RANDN_LIKE")
20 if device is None:
21 device = x.device.index
22 if dtype is None:
23 dtype = x.dtype
24 out = torch.empty_like(x, device=device, dtype=dtype)
25 N = x.numel()
26 grid_fn = lambda meta: (
27 min(triton.cdiv(N, meta["BLOCK"] * UNROLL), TOTAL_CORE_NUM),
28 )
29 # (TODO) Using Triton autotuner makes kernel parameters opaque to the caller,
30 # hence we cannot obtain the per thread offset as in Pytorch.
31 philox_seed, philox_offset = philox_backend_seed_offset(N)
32 with torch_device_fn.device(x.device):
33 randn_kernel[grid_fn](
34 out, N, philox_seed, philox_offset, num_stages=3, num_warps=1
35 )
36 return out