Coverage for src/flag_gems/runtime/backend/_kunlunxin/ops/index

1import logging

3import torch

4import triton

5import triton.language as tl

7from flag_gems import runtime

8from flag_gems.utils import dim_compress, libentry

9from flag_gems.utils import triton_lang_extension as tle

11logger = logging.getLogger("flag_gems").getChild(__name__.lstrip("."))

14@libentry()

15@triton.heuristics(runtime.get_heuristic_config("index_select"))

16@triton.jit

17def index_select_kernel(

18 inp,

19 out,

20 M: tl.constexpr,

21 N: tl.constexpr,

22 index,

23 index_len: tl.constexpr,

24 BLOCK_M: tl.constexpr,

25 BLOCK_N: tl.constexpr,

26):

27 pid_x = tle.program_id(axis=0)

28 pid_y = tle.program_id(axis=1)

29 rows_offsets = pid_x * BLOCK_M + tl.arange(0, BLOCK_M)[:, None]

30 rows_mask = rows_offsets < M

31 cols_offsets = pid_y * BLOCK_N + tl.arange(0, BLOCK_N)

33 out_mask = rows_mask and (cols_offsets < index_len)

35 indices = tl.load(index + cols_offsets, mask=(cols_offsets < index_len), other=0)

36 inp_off = rows_offsets * N + indices[None, :]

37 out_off = rows_offsets * index_len + cols_offsets[None, :]

39 selected = tl.load(inp + inp_off, mask=rows_mask, other=0.0)

40 tl.store(out + out_off, selected, mask=out_mask)

43def index_select(inp, dim, index):

44 logger.debug("GEMS INDEX SELECT")

45 assert dim >= -inp.ndim and dim < inp.ndim, "Invalid dim"

46 assert index.ndim <= 1, "Index should have dimension 1 or 0"

47 assert ((i >= 0 and i < inp.size(dim)) for i in index), "Index out of range"

49 if index.ndim == 0:

50 index = index.unsqueeze(0)

51 dim = dim % inp.ndim

52 inp_shape = list(inp.shape)

53 index_len = index.numel()

55 # with dim_compress

56 inp = dim_compress(inp, dim)

57 N = inp_shape[dim]

58 M = inp.numel() // N

59 out_shape = list(inp.shape)

60 out_shape[inp.ndim - 1] = index_len

61 out = torch.empty(out_shape, dtype=inp.dtype, device=inp.device)

63 grid = lambda meta: (

64 triton.cdiv(M, meta["BLOCK_M"]),

65 triton.cdiv(index_len, meta["BLOCK_N"]),

66 )

67 index_select_kernel[grid](inp, out, M, N, index, index_len)

68 if dim != out.ndim - 1:

69 order = [i for i in range(out.ndim - 1)]

70 order.insert(dim, out.ndim - 1)

71 return out.permute(order)

72 else:

73 return out

Coverage for src/flag_gems/runtime/backend/_kunlunxin/ops/index_select.py: 0%

46 statements