Coverage for src/flag_gems/ops/flip.py: 97%

1import logging

3import torch

4import triton

6from flag_gems.utils import pointwise_dynamic

7from flag_gems.utils.tensor_wrapper import StridedBuffer

9logger = logging.getLogger(__name__)

12@pointwise_dynamic(is_tensor=[True], promotion_methods=[(0, "DEFAULT")])

13@triton.jit

14def copy_func(x):

15 return x

18def flip(A: torch.Tensor, dims) -> torch.Tensor:

19 logger.debug("GEMS FLIP")

20 strides = list(A.stride())

21 flip_dims_b = [False for _ in A.stride()]

22 for dim in dims:

23 assert (

24 dim >= -A.dim() and dim < A.dim()

25 ), "Dimension out of range (expected to be in range of [{}, {}], but got {})".format(

26 -A.dim(), A.dim() - 1, dim

28 assert not flip_dims_b[

29 dim

30 ], "dim {} appears multiple times in the list of dims".format(dim)

31 flip_dims_b[dim] = True

32 n = 0

33 offset = 0

34 for i in range(len(flip_dims_b)):

35 if flip_dims_b[i] and A.size(i) > 1 and A.stride(i) != 0:

36 offset += strides[i] * (A.shape[i] - 1)

37 strides[i] = -strides[i]

38 n += 1

39 if n == 0 or A.numel() <= 1:

40 return A.clone()

41 out = torch.empty_like(A)

42 # a flipped view of A

43 flipped_A = StridedBuffer(A, strides=strides, offset=offset)

45 # TODO: flip op can have a custom task simplification method, but we skip it now and just use A's rank.

46 overload = copy_func.instantiate(A.ndim)

47 overload(flipped_A, out0=out)

48 return out