Coverage for src/flag_gems/ops/addcmul.py: 67%

1import logging

3import torch

4import triton

6from flag_gems.utils import pointwise_dynamic

8logger = logging.getLogger(__name__)

11@pointwise_dynamic(

12 is_tensor=[True, True, True, False], promotion_methods=[(0, 1, 2, "DEFAULT")]

14@triton.jit

15def addcmul_forward(x, t1, t2, value):

16 return x + value * t1 * t2

19def addcmul(inp, tensor1, tensor2, *, value=1.0, out=None):

20 logger.debug("GEMS ADDCMUL FORWARD")

21 if out is not None:

22 broadcast_shape = torch.broadcast_shapes(

23 inp.shape, tensor1.shape, tensor2.shape

25 if list(out.shape) != list(broadcast_shape):

26 out.resize_(broadcast_shape)

27 addcmul_forward(inp, tensor1, tensor2, value, out0=out)

28 return out

29 else:

30 return addcmul_forward(inp, tensor1, tensor2, value)