Coverage for src/flag_gems/runtime/backend/

1# SPDX-License-Identifier: Apache-2.0

2# SPDX-FileCopyrightText: Copyright contributors to the vLLM project

3# SPDX-FileCopyrightText: Songlin Yang, Yu Zhang

5# This file contains code copied from the flash-linear-attention project.

6# The original source code was licensed under the MIT license and included

7# the following copyright notice:

9# ruff: noqa: E501

10import torch

11import triton

12import triton.language as tl

15def prepare_lens(cu_seqlens: torch.LongTensor) -> torch.LongTensor:

16 return cu_seqlens[1:] - cu_seqlens[:-1]

19def prepare_chunk_indices(

20 cu_seqlens: torch.LongTensor, chunk_size: int

21) -> torch.LongTensor:

22 indices = torch.cat(

24 torch.arange(n)

25 for n in triton.cdiv(prepare_lens(cu_seqlens), chunk_size).tolist()

28 return torch.stack([indices.eq(0).cumsum(0) - 1, indices], 1).to(cu_seqlens)

31def prepare_chunk_offsets(

32 cu_seqlens: torch.LongTensor, chunk_size: int

33) -> torch.LongTensor:

34 return torch.cat(

35 [cu_seqlens.new_tensor([0]), triton.cdiv(prepare_lens(cu_seqlens), chunk_size)]

36 ).cumsum(-1)

39@triton.jit

40def safe_exp(x):

41 return tl.exp(tl.where(x <= 0, x, float("-inf")))

Coverage for src/flag_gems/runtime/backend/_ascend/fla/utils.py: 0%