Coverage for src/flag_gems/fused/FLA/index.py: 69%
13 statements
« prev ^ index » next coverage.py v7.6.9, created at 2026-03-07 22:33 +0800
« prev ^ index » next coverage.py v7.6.9, created at 2026-03-07 22:33 +0800
1# This file contains code copied from the flash-linear-attention project.
2# The original source code was licensed under the MIT license and included
3# the following copyright notice:
4# Copyright (c) 2023-2025, Songlin Yang, Yu Zhang
5# ruff: noqa: E501
6import torch
7import triton
9from flag_gems.fused.FLA.utils import tensor_cache
12@tensor_cache
13def prepare_lens(cu_seqlens: torch.LongTensor) -> torch.LongTensor:
14 return cu_seqlens[1:] - cu_seqlens[:-1]
17@tensor_cache
18def prepare_chunk_indices(
19 cu_seqlens: torch.LongTensor, chunk_size: int
20) -> torch.LongTensor:
21 indices = torch.cat(
22 [
23 torch.arange(n)
24 for n in triton.cdiv(prepare_lens(cu_seqlens), chunk_size).tolist()
25 ]
26 )
27 return torch.stack([indices.eq(0).cumsum(0) - 1, indices], 1).to(cu_seqlens)
30@tensor_cache
31def prepare_chunk_offsets(
32 cu_seqlens: torch.LongTensor, chunk_size: int
33) -> torch.LongTensor:
34 return torch.cat(
35 [cu_seqlens.new_tensor([0]), triton.cdiv(prepare_lens(cu_seqlens), chunk_size)]
36 ).cumsum(-1)