Coverage for src/flag_gems/fused/FLA/index.py: 69%

13 statements  

« prev     ^ index     » next       coverage.py v7.6.9, created at 2026-03-28 12:23 +0800

1# This file contains code copied from the flash-linear-attention project. 

2# The original source code was licensed under the MIT license and included 

3# the following copyright notice: 

4# Copyright (c) 2023-2025, Songlin Yang, Yu Zhang 

5# ruff: noqa: E501 

6import torch 

7import triton 

8 

9from flag_gems.fused.FLA.utils import tensor_cache 

10 

11 

12@tensor_cache 

13def prepare_lens(cu_seqlens: torch.LongTensor) -> torch.LongTensor: 

14 return cu_seqlens[1:] - cu_seqlens[:-1] 

15 

16 

17@tensor_cache 

18def prepare_chunk_indices( 

19 cu_seqlens: torch.LongTensor, chunk_size: int 

20) -> torch.LongTensor: 

21 indices = torch.cat( 

22 [ 

23 torch.arange(n) 

24 for n in triton.cdiv(prepare_lens(cu_seqlens), chunk_size).tolist() 

25 ] 

26 ) 

27 return torch.stack([indices.eq(0).cumsum(0) - 1, indices], 1).to(cu_seqlens) 

28 

29 

30@tensor_cache 

31def prepare_chunk_offsets( 

32 cu_seqlens: torch.LongTensor, chunk_size: int 

33) -> torch.LongTensor: 

34 return torch.cat( 

35 [cu_seqlens.new_tensor([0]), triton.cdiv(prepare_lens(cu_seqlens), chunk_size)] 

36 ).cumsum(-1)