Coverage for src/flag_gems/ops/alias

1# Generated by KernelGen: https://github.com/flagos-ai/KernelGen

2import logging

4import torch

5import triton

6import triton.language as tl

8from flag_gems.runtime import torch_device_fn

10logger = logging.getLogger(__name__)

13@triton.jit

14def _alias_copy_kernel(src_ptr, dst_ptr, n_elements, BLOCK_SIZE: tl.constexpr):

15 pid = tl.program_id(axis=0)

16 block_start = pid * BLOCK_SIZE

17 offsets = block_start + tl.arange(0, BLOCK_SIZE)

18 mask = offsets < n_elements

19 vals = tl.load(src_ptr + offsets, mask=mask)

20 tl.store(dst_ptr + offsets, vals, mask=mask)

23def alias_copy(x: torch.Tensor):

24 logger.debug("GEMS ALIAS_COPY")

25 """

26 Wrapper for aten::alias_copy

27 Creates and returns a copy of `x` with identical content.

28 """

29 out = torch.empty_like(x)

30 n_elements = out.numel()

31 if n_elements == 0:

32 return out

33 # Ensure contiguous memory for efficient linear copy

34 src = x.contiguous() if not x.is_contiguous() else x

35 if not out.is_contiguous():

36 out = out.contiguous()

37 if src.dtype != out.dtype:

38 raise RuntimeError("alias_copy: dtype mismatch between input and output.")

39 grid = lambda meta: (triton.cdiv(n_elements, meta["BLOCK_SIZE"]),)

40 with torch_device_fn.device(x.device):

41 _alias_copy_kernel[grid](src, out, n_elements, BLOCK_SIZE=1024)

42 return out

45def alias_copy_out(x: torch.Tensor, out: torch.Tensor):

46 logger.debug("GEMS ALIAS_COPY_OUT")

47 """

48 Wrapper for aten::alias_copy.out

49 Copies `x` into `out` and returns `out`.

50 """

51 if x.dtype != out.dtype:

52 raise RuntimeError("alias_copy_out: dtype of input and output must match.")

53 if x.numel() != out.numel():

54 raise RuntimeError(

55 "alias_copy_out: input and output must have the same number of elements."

56 )

57 if x.device != out.device:

58 raise RuntimeError(

59 "alias_copy_out: input and output must be on the same device."

60 )

61 if not out.is_contiguous():

62 raise RuntimeError("alias_copy_out: output tensor must be contiguous.")

63 src = x.contiguous() if not x.is_contiguous() else x

64 n_elements = out.numel()

65 if n_elements == 0:

66 return out

67 grid = lambda meta: (triton.cdiv(n_elements, meta["BLOCK_SIZE"]),)

68 with torch_device_fn.device(x.device):

69 _alias_copy_kernel[grid](src, out, n_elements, BLOCK_SIZE=1024)

70 return out

Coverage for src/flag_gems/ops/alias_copy.py: 45%

49 statements