Skip to content
Snippets Groups Projects
tensor.py 68.5 KiB
Newer Older
johannes bilk's avatar
johannes bilk committed
import numpy as np
from numpy.typing import ArrayLike
from typing import Any, Callable
from abc import ABC, abstractmethod
johannes bilk's avatar
johannes bilk committed
from .backend import BackendInterface, NumpyBackend, CupyBackend, NumbaBackend


class Tensor(object):
    __slots__ = ['_backend', 'data', 'gradient', 'requireGradient', 'gradientFunc', 'batched']

    __backend__ = NumpyBackend()

    def __init__(self, data: Any,
                 gradient: Any = None,
                 gradientFunc: Callable = None,
                 requireGradient: bool = False,
                 batched: bool = True) -> None:

        self._backend = Tensor.__backend__

        #if isinstance(data, (list | np.ndarray)):
        #    data = self._backend.array(data)
        #elif isinstance(data, (int, float)):
        #    data = self._backend.array([data])
        #elif isinstance(data, self.__class__):
        #    gradient = data.gradient if gradient is None else gradient
        #    gradientFunc = data.gradientFunc if gradientFunc is None else gradientFunc
        #    requireGradient = data.requireGradient if requireGradient is False else requireGradient
        #    data = data.data
johannes bilk's avatar
johannes bilk committed
        #if len(data.shape) == 1:
        #    data = self._backend.reshape(data, (1, *data.shape))

        #if gradient is None and requireGradient:
        #    # If gradient is not provided and it's required, initialize it as None
        #    gradient = self._backend.zeros_like(data)
        #elif isinstance(gradient, (list, int, float)):
        #    gradient = self._backend.array(gradient)

        # Checking if the shapes are the same
        #if gradient is not None:
        #    assert data.shape == gradient.shape, "value and gradient must have the same shape"

        self.data = data
        self.gradient = gradient
        self.requireGradient = requireGradient
        self.gradientFunc = gradientFunc
        self.batched = batched

    def zeroGradient(self) -> None:
        """In-place operation for nulling the gradient"""
        if self.requireGradient:
            self.gradient = self._backend.zeros_like(self.data)
        else:
            raise AttributeError("this tensor is not differentiable")

    def backward(self, gradient=None):
        """
        Compute the gradients recursively by applying the chain rule.
        """
        if gradient is None:
            gradient = self._backend.ones_like(self.data)

        if not self.requireGradient:
            return

        # If grad_fn is not set, this is probably the starting point for backpropagation,
        # so we don't need to compute further backward.
        if self.gradientFunc is None:
            return

        # Accumulate gradients instead of overwriting.
        self.gradient += gradient
        # Compute the local gradients using grad_fn
        self.gradientFunc.backward(self.gradient)

    def __repr__(self) -> str:
        """String representation."""
        dataTitle = 'data:\n'
        gradientTitle = 'gradient:\n'
        dataStr = str(self.data)
        gradientStr = str(self.gradient)
        if self.requireGradient is True:
            return dataTitle + dataStr + '\n' + gradientTitle + gradientStr
        else:
            return dataTitle + dataStr

    def copy(self) -> 'Tensor':
        data = self._backend.copy(self.data)
        gradient = self._backend.copy(self.gradient)
        return self.__class__(data, gradient, gradientFunc=self.gradientFunc, requireGradient=self.requireGradient)

    @property
    def strides(self) -> tuple:
        return self.data.strides

    def __len__(self) -> int:
        """Return the length of the value."""
        return len(self.data)

    @property
    def shape(self) -> tuple:
        """Return the shape of the value."""
        return self.data.shape
johannes bilk's avatar
johannes bilk committed
    @property
    def ndim(self) -> tuple:
        """Return the ndim of the value."""
        return self.data.ndim

    def reshape(self, newshape) -> 'Tensor':
        return reshapeForward(self, newshape)
johannes bilk's avatar
johannes bilk committed
    def transpose(self) -> 'Tensor':
        return transposeForward(self)
johannes bilk's avatar
johannes bilk committed

    def T(self) -> 'Tensor':
        return transposeForward(self)
johannes bilk's avatar
johannes bilk committed

    def tolist(self) -> tuple[list, list] | list:
        if self.requireGradient is True:
            return self.data.tolist(), self.gradient.tolist()
        else:
            return self.data.tolist()

    @classmethod
    def setBackend(cls, backend: BackendInterface) -> None:
        if isinstance(backend, BackendInterface):
            cls.__backend__ = backend
        else:
            raise TypeError(f"{backend} is not an backend")
johannes bilk's avatar
johannes bilk committed

    def __getitem__(self, index):
        """Get an item by index."""
        if self.requireGradient is True and self.gradient:
johannes bilk's avatar
johannes bilk committed
            return self.__class__(data=self.data[index], gradient=self.gradient[index], requireGradient=True, gradientFunc=self.gradientFunc)
        elif self.requireGradient is True:
            return self.__class__(data=self.data[index], requireGradient=True, gradientFunc=self.gradientFunc)
johannes bilk's avatar
johannes bilk committed
        else:
            return self.__class__(data=self.data[index], requireGradient=False)

    def __setitem__(self, index, value) -> None:
        """Set the value of an item by index."""
        if isinstance(value, self.__class__):
            self.data[index] = value.data
            if self.requireGradient is True and self.gradient:
johannes bilk's avatar
johannes bilk committed
                self.gradient[index] = value.gradient
                self.requireGradient = True
        else:
            self.data[index] = value
            self.gradient[index] = 0

    def __array_ufunc__(self, ufunc, method, *inputs, **kwargs):
        if method == '__call__':
            operation = ufuncMap.get(ufunc)
            if operation is not None:
                return operation()(*inputs, **kwargs)
        raise NotImplementedError(f'{ufunc} is not implemented yet')

    def __array_function__(self, func, types, args, kwargs):
        operation = funcMap.get(func)
        if operation is not None:
            return operation()(*args, **kwargs)
        raise NotImplementedError(f'{func} is not implemented yet')
johannes bilk's avatar
johannes bilk committed
    def __add__(self, other: ArrayLike) -> 'Tensor':
johannes bilk's avatar
johannes bilk committed

    def __radd__(self, other: ArrayLike) -> 'Tensor':
johannes bilk's avatar
johannes bilk committed

    def __iadd__(self, other: ArrayLike) -> 'Tensor':
        result = addForward(self, other)
johannes bilk's avatar
johannes bilk committed
        self.data = result.data
        self.gradient = result.gradient
        self.requireGradient = result.requireGradient
        return self

    def __sub__(self, other: ArrayLike) -> 'Tensor':
        return subtractForward(self, other)
johannes bilk's avatar
johannes bilk committed

    def __rsub__(self, other: ArrayLike) -> 'Tensor':
        return subtractForward(other, self)
johannes bilk's avatar
johannes bilk committed

    def __isub__(self, other: ArrayLike) -> 'Tensor':
        result = subtractForward(self, other)
johannes bilk's avatar
johannes bilk committed
        self.data = result.data
        self.gradient = result.gradient
        self.requireGradient = result.requireGradient
        return self

    def __mul__(self, other: ArrayLike) -> 'Tensor':
        return multiplyForward(self, other)
johannes bilk's avatar
johannes bilk committed

    def __rmul__(self, other: ArrayLike) -> 'Tensor':
        return multiplyForward(other, self)
johannes bilk's avatar
johannes bilk committed

    def __imul__(self, other: ArrayLike) -> 'Tensor':
        result = multiplyForward(self, other)
johannes bilk's avatar
johannes bilk committed
        self.data = result.data
        self.gradient = result.gradient
        self.requireGradient = result.requireGradient
        return self

    def __truediv__(self, other: ArrayLike) -> 'Tensor':
        return divideForward(self, other)
johannes bilk's avatar
johannes bilk committed

    def __rtruediv__(self, other: ArrayLike) -> 'Tensor':
        return divideForward(other, self)
johannes bilk's avatar
johannes bilk committed

    def __itruediv__(self, other: ArrayLike) -> 'Tensor':
        result = divideForward(self, other)
johannes bilk's avatar
johannes bilk committed
        self.data = result.data
        self.gradient = result.gradient
        self.requireGradient = result.requireGradient
        return self

    def __matmul__(self, other: ArrayLike) -> 'Tensor':
        return matmulForward(self, other)
johannes bilk's avatar
johannes bilk committed

    def __rmatmul__(self, other: ArrayLike) -> 'Tensor':
        return matmulForward(other, self)
johannes bilk's avatar
johannes bilk committed

    def __imatmul__(self, other: ArrayLike) -> 'Tensor':
        result = matmulForward(self, other)
johannes bilk's avatar
johannes bilk committed
        self.data = result.data
        self.gradient = result.gradient
        self.requireGradient = result.requireGradient
        return self

    def __pow__(self, other: ArrayLike) -> 'Tensor':
        return powerForward(self, other)
johannes bilk's avatar
johannes bilk committed

    def __rpow__(self, other: ArrayLike) -> 'Tensor':
        return powerForward(other, self)
johannes bilk's avatar
johannes bilk committed

    def __ipow__(self, other: ArrayLike) -> 'Tensor':
        result = powerForward(self, other)
johannes bilk's avatar
johannes bilk committed
        self.data = result.data
        self.gradient = result.gradient
        self.requireGradient = result.requireGradient
        return self

    def __abs__(self) -> 'Tensor':
        return absForward(self)
johannes bilk's avatar
johannes bilk committed

    def __pos__(self) -> 'Tensor':
        return positiveForward(self)
johannes bilk's avatar
johannes bilk committed

    def __neg__(self) -> 'Tensor':
        return negativeForward(self)
johannes bilk's avatar
johannes bilk committed

    def __eq__(self, other) -> bool:
        """Equality comparison."""
        return equalForward(self, other)
johannes bilk's avatar
johannes bilk committed

    def __gt__(self, other) -> bool:
        """Greater than comparison."""
        return greaterForward(self, other)
johannes bilk's avatar
johannes bilk committed

    def __ge__(self, other) -> bool:
        """Greater than or equal to comparison."""
        return greaterEqualForward(self, other)
johannes bilk's avatar
johannes bilk committed

    def __lt__(self, other) -> bool:
        """Less than comparison."""
        return lessForward(self, other)
johannes bilk's avatar
johannes bilk committed

    def __le__(self, other) -> bool:
        """Less than or equal to comparison."""
        return lessEqualForward(self, other)
    def sum(axis=None, dtype=None, keepdims=False) -> 'Tensor':
        return sumForward(self, axis, dtype, keepdims)
johannes bilk's avatar
johannes bilk committed

    def prod(axis=None, dtype=None, keepdims=False) -> 'Tensor':
        return prodForward(self, axis, dtype, keepdims)
    def max(axis=None, keepdims=False) -> 'Tensor':
        return maxForward(self, axis, keepdims)
johannes bilk's avatar
johannes bilk committed

    def min(axis=None, keepdims=False) -> 'Tensor':
        return minForward(self, axis, keepdims)
johannes bilk's avatar
johannes bilk committed

    def mean(axis=None, keepdims=False) -> 'Tensor':
        return meanForward(self, axis, keepdims)
    def var(axis=None, ddof=0, keepdims=False) -> 'Tensor':
        return varForward(self, axis, ddof, keepdims)
johannes bilk's avatar
johannes bilk committed

    def std(axis=None, keepdims=False) -> 'Tensor':
        return stdForward(self, axis, keepdims)
johannes bilk's avatar
johannes bilk committed

def checkTensor(tensor: Tensor) -> Tensor:
    if isinstance(tensor, Tensor):
        return tensor
    return Tensor(tensor)
def getbroadcastAxid(data, gradient) -> None:
    # Store old shapes
    tensorShape = np.array(data.shape)

    # Get new shape
    gradientShape = np.array(gradient.shape)

    # Prepend ones to the shape of the smaller array
    if len(tensorShape) < len(gradientShape):
        tensorShape = np.pad(tensorShape, (len(gradientShape) - len(tensorShape), 0), mode='constant', constant_values=1)
    elif len(tensorShape) > len(gradientShape):
        gradientShape = np.pad(gradientShape, (len(tensorShape) - len(gradientShape), 0), mode='constant', constant_values=1)

    # Find broadcasted axes
    tensorBroadcastAxis = np.where(tensorShape != gradientShape)[0]

    # Change broadcastAxis variables to None if they're empty
    if tensorBroadcastAxis.size == 0:
        tensorBroadcastAxis = None

    return tensorBroadcastAxis


def addForward(tensor1: Tensor, tensor2: Tensor, *args, **kwargs) -> Tensor:
    if not isinstance(tensor1, Tensor):
        tensor1 = Tensor(tensor1)
    if not isinstance(tensor2, Tensor):
        tensor2 = Tensor(tensor2)

    data = np.add(tensor1.data, tensor2.data, *args, **kwargs)

    requireGradient = tensor1.requireGradient or tensor2.requireGradient
    if requireGradient:
        gradfunc = partial(addBackward, tensor1, tensor2)
        return Tensor(data, requireGradient=requireGradient, gradientFunc=gradfunc)

    return Tensor(data, requireGradient=requireGradient, gradientFunc=None)


def addBackward(tensor1: Tensor, tensor2: Tensor, gradient: np.ndarray, *args, **kwargs) -> None:
    if tensor1 and tensor1.requireGradient:
        gradientForTensor1 = np.copy(gradient)

        tensorBroadcastAxis = getbroadcastAxid(tensor1, gradientForTensor1)
        if tensorBroadcastAxis is not None:
            gradientForTensor1 = np.sum(gradientForTensor1, axis=tuple(tensorBroadcastAxis), keepdims=True)

        tensor1.gradient = np.add(tensor1.gradient, gradient)
        if tensor1.gradientFunc:
            tensor1.gradientFunc(tensor1.gradient)

    if tensor2 and tensor2.requireGradient:
        gradientForTensor2 = np.copy(gradient)

        tensorBroadcastAxis = getbroadcastAxid(tensor2, gradientForTensor2)
        if tensorBroadcastAxis is not None:
            gradientForTensor2 = np.sum(gradientForTensor2, axis=tuple(tensorBroadcastAxis), keepdims=True)

        tensor2.gradient = np.add(tensor2.gradient, gradient)
        if tensor2.gradientFunc:
            tensor2.gradientFunc(tensor2.gradient)
def subtractForward(tensor1: Tensor, tensor2: Tensor, *args, **kwargs) -> Tensor:
    if not isinstance(tensor1, Tensor):
        tensor1 = Tensor(tensor1)
    if not isinstance(tensor2, Tensor):
        tensor2 = Tensor(tensor2)

    data = np.subtract(tensor1.data, tensor2.data, *args, **kwargs)

    requireGradient = tensor1.requireGradient or tensor2.requireGradient
    if requireGradient:
        gradfunc = partial(subtractBackward, tensor1, tensor2)
        return Tensor(data, requireGradient=requireGradient, gradientFunc=gradfunc)

    return Tensor(data, requireGradient=requireGradient, gradientFunc=None)


def subtractBackward(tensor1: Tensor, tensor2: Tensor, gradient: np.ndarray, *args, **kwargs) -> None:
    if tensor1 and tensor1.requireGradient:
        gradientForTensor1 = np.copy(gradient)

        tensorBroadcastAxis = getbroadcastAxid(tensor1, gradientForTensor1)
        if tensorBroadcastAxis is not None:
            gradientForTensor1 = np.sum(gradientForTensor1, axis=tuple(tensorBroadcastAxis), keepdims=True)

        tensor1.gradient = np.add(tensor1.gradient, gradient)
        if tensor1.gradientFunc:
            tensor1.gradientFunc(tensor1.gradient)

    if tensor2 and tensor2.requireGradient:
        gradientForTensor2 = np.copy(gradient)

        tensorBroadcastAxis = getbroadcastAxid(tensor2, gradientForTensor2)
        if tensorBroadcastAxis is not None:
            gradientForTensor2 = np.sum(gradientForTensor2, axis=tuple(tensorBroadcastAxis), keepdims=True)

        tensor2.gradient = np.subtract(tensor2.gradient, gradient)
        if tensor2.gradientFunc:
            tensor2.gradientFunc(tensor2.gradient)
def multiplyForward(tensor1: Tensor, tensor2: Tensor, *args, **kwargs) -> Tensor:
    if not isinstance(tensor1, Tensor):
        tensor1 = Tensor(tensor1)
    if not isinstance(tensor2, Tensor):
        tensor2 = Tensor(tensor2)
johannes bilk's avatar
johannes bilk committed

    data = np.multiply(tensor1.data, tensor2.data, *args, **kwargs)
johannes bilk's avatar
johannes bilk committed

    requireGradient = tensor1.requireGradient or tensor2.requireGradient
    if requireGradient:
        gradfunc = partial(multiplyBackward, tensor1, tensor2)
        return Tensor(data, requireGradient=requireGradient, gradientFunc=gradfunc)
    return Tensor(data, requireGradient=requireGradient, gradientFunc=None)
def multiplyBackward(tensor1: Tensor, tensor2: Tensor, gradient: np.ndarray, *args, **kwargs) -> None:
    if tensor1 and tensor1.requireGradient:
        gradientForTensor1 = np.copy(gradient)
johannes bilk's avatar
johannes bilk committed

        tensorBroadcastAxis = getbroadcastAxid(tensor1, gradientForTensor1)
        if tensorBroadcastAxis is not None:
            gradientForTensor1 = np.sum(gradientForTensor1, axis=tuple(tensorBroadcastAxis), keepdims=True)
johannes bilk's avatar
johannes bilk committed

        tensor1.gradient = np.add(tensor1.gradient, np.multiply(tensor2.data, gradient))
        if tensor1.gradientFunc:
            tensor1.gradientFunc(tensor1.gradient)
    if tensor2 and tensor2.requireGradient:
        gradientForTensor2 = np.copy(gradient)
johannes bilk's avatar
johannes bilk committed

        tensorBroadcastAxis = getbroadcastAxid(tensor2, gradientForTensor2)
        if tensorBroadcastAxis is not None:
            gradientForTensor2 = np.sum(gradientForTensor2, axis=tuple(tensorBroadcastAxis), keepdims=True)
johannes bilk's avatar
johannes bilk committed

        tensor2.gradient = np.add(tensor2.gradient, np.multiply(tensor1.data, gradient))
        if tensor2.gradientFunc:
            tensor2.gradientFunc(tensor2.gradient)
def divideForward(tensor1: Tensor, tensor2: Tensor, *args, **kwargs) -> Tensor:
    if not isinstance(tensor1, Tensor):
        tensor1 = Tensor(tensor1)
    if not isinstance(tensor2, Tensor):
        tensor2 = Tensor(tensor2)
    data = np.divide(tensor1.data, tensor2.data, *args, **kwargs)
johannes bilk's avatar
johannes bilk committed

    requireGradient = tensor1.requireGradient or tensor2.requireGradient
    if requireGradient:
        gradfunc = partial(divideBackward, tensor1, tensor2)
        return Tensor(data, requireGradient=requireGradient, gradientFunc=gradfunc)
johannes bilk's avatar
johannes bilk committed

    return Tensor(data, requireGradient=requireGradient, gradientFunc=None)
johannes bilk's avatar
johannes bilk committed

def divideBackward(tensor1: Tensor, tensor2: Tensor, gradient: np.ndarray, *args, **kwargs) -> None:
    if tensor1 and tensor1.requireGradient:
        gradientForTensor1 = np.copy(gradient)
        tensorBroadcastAxis = getbroadcastAxid(tensor1, gradientForTensor1)
        if tensorBroadcastAxis is not None:
            gradientForTensor1 = np.sum(gradientForTensor1, axis=tuple(tensorBroadcastAxis), keepdims=True)
        tensor1.gradient = np.add(tensor1.gradient, np.divide(gradient, tensor2.data))
        if tensor1.gradientFunc:
            tensor1.gradientFunc(tensor1.gradient)
johannes bilk's avatar
johannes bilk committed

    if tensor2 and tensor2.requireGradient:
        gradientForTensor2 = np.copy(gradient)
johannes bilk's avatar
johannes bilk committed

        tensorBroadcastAxis = getbroadcastAxid(tensor2, gradientForTensor2)
        if tensorBroadcastAxis is not None:
            gradientForTensor2 = np.sum(gradientForTensor2, axis=tuple(tensorBroadcastAxis), keepdims=True)
johannes bilk's avatar
johannes bilk committed

        tensor2.gradient = np.subtract(tensor2.gradient, np.divide(np.multiply(tensor1.data, gradient), np.power(tensor2.data, 2)))
        if tensor2.gradientFunc:
            tensor2.gradientFunc(tensor2.gradient)
def matmulForward(tensor1: Tensor, tensor2: Tensor, *args, **kwargs) -> Tensor:
    if not isinstance(tensor1, Tensor):
        tensor1 = Tensor(tensor1)
    if not isinstance(tensor2, Tensor):
        tensor2 = Tensor(tensor2)
johannes bilk's avatar
johannes bilk committed

    data = np.matmul(tensor1.data, tensor2.data, *args, **kwargs)
johannes bilk's avatar
johannes bilk committed

    requireGradient = tensor1.requireGradient or tensor2.requireGradient
    if requireGradient:
        gradfunc = partial(matmulBackward, tensor1, tensor2)
        return Tensor(data, requireGradient=requireGradient, gradientFunc=gradfunc)
    return Tensor(data, requireGradient=requireGradient, gradientFunc=None)
def matmulBackward(tensor1: Tensor, tensor2: Tensor, gradient: np.ndarray, *args, **kwargs) -> None:
    if tensor1 and tensor1.requireGradient:
        gradientForTensor1 = np.copy(gradient)
        if len(tensor1.data.shape) > 2 or len(tensor2.data.shape) > 2:
            tensor1.gradient = np.add(tensor1.gradient, np.matmul(gradient, np.transpose(tensor2.data, axes=(0, 2, 1))))
        else:
            tensor1.gradient = np.add(tensor1.gradient, np.matmul(gradient, np.transpose(tensor2.data)))
        if tensor1.gradientFunc:
            tensor1.gradientFunc(tensor1.gradient)
    if tensor2 and tensor2.requireGradient:
        gradientForTensor2 = np.copy(gradient)
johannes bilk's avatar
johannes bilk committed

        if len(tensor1.data.shape) > 2 or len(tensor2.data.shape) > 2:
            tensor2.gradient = np.add(tensor2.gradient, np.matmul(np.transpose(tensor1.data, axes=(0, 2, 1)), gradient))
        else:
            tensor2.gradient = np.add(tensor2.gradient, np.matmul(np.transpose(tensor1.data), gradient))
johannes bilk's avatar
johannes bilk committed

        if tensor2.gradientFunc:
            tensor2.gradientFunc(tensor2.gradient)
def dotForward(tensor1: Tensor, tensor2: Tensor, *args, **kwargs) -> Tensor:
    if not isinstance(tensor1, Tensor):
        tensor1 = Tensor(tensor1)
    if not isinstance(tensor2, Tensor):
        tensor2 = Tensor(tensor2)

    data = np.dot(tensor1.data, tensor2.data, *args, **kwargs)
johannes bilk's avatar
johannes bilk committed

    requireGradient = tensor1.requireGradient or tensor2.requireGradient
    if requireGradient:
        gradfunc = partial(dotBackward, tensor1, tensor2)
        return Tensor(data, requireGradient=requireGradient, gradientFunc=gradfunc)
    return Tensor(data, requireGradient=requireGradient, gradientFunc=None)
def dotBackward(tensor1: Tensor, tensor2: Tensor, gradient: np.ndarray, *args, **kwargs) -> None:
    if tensor1 and tensor1.requireGradient:
        gradientForTensor1 = np.copy(gradient)

        tensorBroadcastAxis = getbroadcastAxid(tensor1, gradientForTensor1)
        if tensorBroadcastAxis is not None:
            gradientForTensor1 = np.sum(gradientForTensor1, axis=tuple(tensorBroadcastAxis), keepdims=True)
johannes bilk's avatar
johannes bilk committed

        tensor1.gradient = np.add(tensor1.gradient, np.multiply(tensor2.data, gradient))
        if tensor1.gradientFunc:
            tensor1.gradientFunc(tensor1.gradient)
    if tensor2 and tensor2.requireGradient:
        gradientForTensor2 = np.copy(gradient)
        tensorBroadcastAxis = getbroadcastAxid(tensor2, gradientForTensor2)
        if tensorBroadcastAxis is not None:
            gradientForTensor2 = np.sum(gradientForTensor2, axis=tuple(tensorBroadcastAxis), keepdims=True)
johannes bilk's avatar
johannes bilk committed

        tensor2.gradient = np.subtract(tensor2.gradient, np.multiply(tensor1.data, gradient))
        if tensor2.gradientFunc:
            tensor2.gradientFunc(tensor2.gradient)
johannes bilk's avatar
johannes bilk committed

def powerForward(tensor1: Tensor, tensor2: Tensor, *args, **kwargs) -> Tensor:
    if not isinstance(tensor1, Tensor):
        tensor1 = Tensor(tensor1)
    if not isinstance(tensor2, Tensor):
        tensor2 = Tensor(tensor2)
johannes bilk's avatar
johannes bilk committed

    data = np.power(tensor1.data, tensor2.data, *args, **kwargs)
johannes bilk's avatar
johannes bilk committed

    requireGradient = tensor1.requireGradient or tensor2.requireGradient
    if requireGradient:
        gradfunc = partial(powerBackward, tensor1, tensor2)
        return Tensor(data, requireGradient=requireGradient, gradientFunc=gradfunc)
johannes bilk's avatar
johannes bilk committed

    return Tensor(data, requireGradient=requireGradient, gradientFunc=None)
def powerBackward(tensor1: Tensor, tensor2: Tensor, gradient: np.ndarray, *args, **kwargs) -> None:
    if tensor1 and tensor1.requireGradient:
        gradientForTensor1 = np.copy(gradient)
johannes bilk's avatar
johannes bilk committed

        tensorBroadcastAxis = getbroadcastAxid(tensor1, gradientForTensor1)
        if tensorBroadcastAxis is not None:
            gradientForTensor1 = np.sum(gradientForTensor1, axis=tuple(tensorBroadcastAxis), keepdims=True)
johannes bilk's avatar
johannes bilk committed

        tensor1.gradient = np.add(tensor1.gradient, np.multiply(np.multiply(tensor2.data, np.power(tensor1.data, (np.subtract(tensor2.data, 1)))), gradient))
        if tensor1.gradientFunc:
            tensor1.gradientFunc(tensor1.gradient)
johannes bilk's avatar
johannes bilk committed

    if tensor2 and tensor2.requireGradient:
        gradientForTensor2 = np.copy(gradient)
        tensorBroadcastAxis = getbroadcastAxid(tensor2, gradientForTensor2)
        if tensorBroadcastAxis is not None:
            gradientForTensor2 = np.sum(gradientForTensor2, axis=tuple(tensorBroadcastAxis), keepdims=True)
johannes bilk's avatar
johannes bilk committed

        tensor2.gradient = np.add(tensor2.gradient, np.multiply(np.multiply(np.log(tensor1.data), np.power(tensor1.data, tensor2.data)), gradient))
        if tensor2.gradientFunc:
            tensor2.gradientFunc(tensor2.gradient)
johannes bilk's avatar
johannes bilk committed

#
# Single Tensor
#
johannes bilk's avatar
johannes bilk committed

def squareForward(tensor: Tensor, *args, **kwargs) -> Tensor:
    if not isinstance(tensor, Tensor):
        tensor = Tensor(tensor)
johannes bilk's avatar
johannes bilk committed

    data = np.square(tensor.data, *args, **kwargs)
johannes bilk's avatar
johannes bilk committed

    if tensor.requireGradient:
        gradfunc = partial(squareBackward, tensor)
        return Tensor(data, requireGradient=tensor.requireGradient, gradientFunc=gradfunc)
    return Tensor(data, requireGradient=tensor.requireGradient, gradientFunc=None)
def squareBackward(tensor: Tensor, gradient: np.ndarray, *args, **kwargs) -> None:
    if tensor and tensor.requireGradient:
        tensor.gradient = np.add(tensor.gradient, np.multiply(np.multiply(tensor.data, 2.0), gradient))
        if tensor.gradientFunc:
            tensor.gradientFunc(tensor.gradient)
johannes bilk's avatar
johannes bilk committed

def sqrtForward(tensor: Tensor, *args, **kwargs) -> Tensor:
    if not isinstance(tensor, Tensor):
        tensor = Tensor(tensor)
johannes bilk's avatar
johannes bilk committed

    data = np.sqrt(tensor.data, *args, **kwargs)
johannes bilk's avatar
johannes bilk committed

    if tensor.requireGradient:
        gradfunc = partial(sqrtBackward, tensor)
        return Tensor(data, requireGradient=tensor.requireGradient, gradientFunc=gradfunc)
johannes bilk's avatar
johannes bilk committed

    return Tensor(data, requireGradient=tensor.requireGradient, gradientFunc=None)
johannes bilk's avatar
johannes bilk committed

def sqrtBackward(tensor: Tensor, gradient: np.ndarray, *args, **kwargs) -> None:
    if tensor and tensor.requireGradient:
        tensor.gradient = np.add(np.divide(gradient, np.multiply(2, np.sqrt(tensor.data))))
        if tensor.gradientFunc:
            tensor.gradientFunc(tensor.gradient)
johannes bilk's avatar
johannes bilk committed

def logForward(tensor: Tensor, *args, **kwargs) -> Tensor:
    if not isinstance(tensor, Tensor):
        tensor = Tensor(tensor)
    data = np.log(tensor.data, *args, **kwargs)
johannes bilk's avatar
johannes bilk committed

    if tensor.requireGradient:
        gradfunc = partial(logBackward, tensor)
        return Tensor(data, requireGradient=tensor.requireGradient, gradientFunc=gradfunc)
johannes bilk's avatar
johannes bilk committed

    return Tensor(data, requireGradient=tensor.requireGradient, gradientFunc=None)
johannes bilk's avatar
johannes bilk committed

def logBackward(tensor: Tensor, gradient: np.ndarray, *args, **kwargs) -> None:
    if tensor and tensor.requireGradient:
        tensor.gradient = np.add(tensor.gradient, np.multiply((np.divide(1, tensor.data)), gradient))
        if tensor.gradientFunc:
            tensor.gradientFunc(tensor.gradient)
def expForward(tensor: Tensor, *args, **kwargs) -> Tensor:
    if not isinstance(tensor, Tensor):
        tensor = Tensor(tensor)
johannes bilk's avatar
johannes bilk committed

    data = np.exp(tensor.data, *args, **kwargs)
johannes bilk's avatar
johannes bilk committed

    if tensor.requireGradient:
        gradfunc = partial(expBackward, tensor)
        return Tensor(data, requireGradient=tensor.requireGradient, gradientFunc=gradfunc)
johannes bilk's avatar
johannes bilk committed

    return Tensor(data, requireGradient=tensor.requireGradient, gradientFunc=None)
johannes bilk's avatar
johannes bilk committed

def expBackward(tensor: Tensor, gradient: np.ndarray, *args, **kwargs) -> None:
    if tensor and tensor.requireGradient:
        tensor.gradient = np.add(np.multiply(np.exp(tensor.data), gradient))
        if tensor.gradientFunc:
            tensor.gradientFunc(tensor.gradient)
johannes bilk's avatar
johannes bilk committed

def sinForward(tensor: Tensor, *args, **kwargs) -> Tensor:
    if not isinstance(tensor, Tensor):
        tensor = Tensor(tensor)
    data = np.sin(tensor.data, *args, **kwargs)
johannes bilk's avatar
johannes bilk committed

    if tensor.requireGradient:
        gradfunc = partial(sinBackward, tensor)
        return Tensor(data, requireGradient=tensor.requireGradient, gradientFunc=gradfunc)
johannes bilk's avatar
johannes bilk committed

    return Tensor(data, requireGradient=tensor.requireGradient, gradientFunc=None)
def sinBackward(tensor: Tensor, gradient: np.ndarray, *args, **kwargs) -> None:
    if tensor and tensor.requireGradient:
        tensor.gradient = np.add(np.multiply(np.cos(tensor.data), gradient))
        if tensor.gradientFunc:
            tensor.gradientFunc(tensor.gradient)
def cosForward(tensor: Tensor, *args, **kwargs) -> Tensor:
    if not isinstance(tensor, Tensor):
        tensor = Tensor(tensor)
johannes bilk's avatar
johannes bilk committed

    data = np.cos(tensor.data, *args, **kwargs)
johannes bilk's avatar
johannes bilk committed

    if tensor.requireGradient:
        gradfunc = partial(cosBackward, tensor)
        return Tensor(data, requireGradient=tensor.requireGradient, gradientFunc=gradfunc)
johannes bilk's avatar
johannes bilk committed

    return Tensor(data, requireGradient=tensor.requireGradient, gradientFunc=None)
johannes bilk's avatar
johannes bilk committed

def cosBackward(tensor: Tensor, gradient: np.ndarray, *args, **kwargs) -> None:
    if tensor and tensor.requireGradient:
        tensor.gradient = np.subtract(np.multiply(np.sin(tensor.data), gradient))
        if tensor.gradientFunc:
            tensor.gradientFunc(tensor.gradient)
def tanForward(tensor: Tensor, *args, **kwargs) -> Tensor:
    if not isinstance(tensor, Tensor):
        tensor = Tensor(tensor)
johannes bilk's avatar
johannes bilk committed

    data = np.tan(tensor.data, *args, **kwargs)
johannes bilk's avatar
johannes bilk committed

    if tensor.requireGradient:
        gradfunc = partial(tanBackward, tensor)
        return Tensor(data, requireGradient=tensor.requireGradient, gradientFunc=gradfunc)
    return Tensor(data, requireGradient=tensor.requireGradient, gradientFunc=None)
def tanBackward(tensor: Tensor, gradient: np.ndarray, *args, **kwargs) -> None:
    if tensor and tensor.requireGradient:
        tensor.gradient = np.add(tensor.gradient, np.multiply((np.divide(1, np.power(np.cos(tensor.data), 2))), gradient))
        if tensor.gradientFunc:
            tensor.gradientFunc(tensor.gradient)
johannes bilk's avatar
johannes bilk committed

def sinhForward(tensor: Tensor, *args, **kwargs) -> Tensor:
    if not isinstance(tensor, Tensor):
        tensor = Tensor(tensor)
    data = np.sinh(tensor.data, *args, **kwargs)
johannes bilk's avatar
johannes bilk committed

    if tensor.requireGradient:
        gradfunc = partial(sinhBackward, tensor)
        return Tensor(data, requireGradient=tensor.requireGradient, gradientFunc=gradfunc)
johannes bilk's avatar
johannes bilk committed

    return Tensor(data, requireGradient=tensor.requireGradient, gradientFunc=None)
def sinhBackward(tensor: Tensor, gradient: np.ndarray, *args, **kwargs) -> None:
    if tensor and tensor.requireGradient:
        tensor.gradient = np.add(tensor.gradient, np.multiply(np.cosh(tensor.data), gradient))
        if tensor.gradientFunc:
            tensor.gradientFunc(tensor.gradient)
johannes bilk's avatar
johannes bilk committed

def coshForward(tensor: Tensor, *args, **kwargs) -> Tensor:
    if not isinstance(tensor, Tensor):
        tensor = Tensor(tensor)
johannes bilk's avatar
johannes bilk committed

    data = np.cosh(tensor.data, *args, **kwargs)
johannes bilk's avatar
johannes bilk committed

    if tensor.requireGradient:
        gradfunc = partial(coshBackward, tensor)
        return Tensor(data, requireGradient=tensor.requireGradient, gradientFunc=gradfunc)
johannes bilk's avatar
johannes bilk committed

    return Tensor(data, requireGradient=tensor.requireGradient, gradientFunc=None)
johannes bilk's avatar
johannes bilk committed

def coshBackward(tensor: Tensor, gradient: np.ndarray, *args, **kwargs) -> None:
    if tensor and tensor.requireGradient:
        tensor.gradient = np.add(tensor.gradient, np.multiply(np.sinh(tensor.data), gradient))
        if tensor.gradientFunc:
            tensor.gradientFunc(tensor.gradient)
def tanhForward(tensor: Tensor, *args, **kwargs) -> Tensor:
    if not isinstance(tensor, Tensor):
        tensor = Tensor(tensor)
johannes bilk's avatar
johannes bilk committed

    data = np.tanh(tensor.data, *args, **kwargs)
    if tensor.requireGradient:
        gradfunc = partial(tanhBackward, tensor)
        return Tensor(data, requireGradient=tensor.requireGradient, gradientFunc=gradfunc)
johannes bilk's avatar
johannes bilk committed

    return Tensor(data, requireGradient=tensor.requireGradient, gradientFunc=None)
def tanhBackward(tensor: Tensor, gradient: np.ndarray, *args, **kwargs) -> None:
    if tensor and tensor.requireGradient:
        tensor.gradient = np.add(tensor.gradient, np.multiply((np.divide(1, np.power(np.cosh(tensor.data), 2))), gradient))
        if tensor.gradientFunc:
            tensor.gradientFunc(tensor.gradient)
def absForward(tensor: Tensor, *args, **kwargs) -> Tensor:
    if not isinstance(tensor, Tensor):
        tensor = Tensor(tensor)
    data = np.abs(tensor.data, *args, **kwargs)
johannes bilk's avatar
johannes bilk committed

    if tensor.requireGradient:
        gradfunc = partial(absBackward, tensor)
        return Tensor(data, requireGradient=tensor.requireGradient, gradientFunc=gradfunc)
johannes bilk's avatar
johannes bilk committed

    return Tensor(data, requireGradient=tensor.requireGradient, gradientFunc=None)
johannes bilk's avatar
johannes bilk committed

def absBackward(tensor: Tensor, gradient: np.ndarray, *args, **kwargs) -> None:
    if tensor and tensor.requireGradient:
        tensor.gradient = np.add(tensor.gradient, np.multiply(np.sign(tensor.data), gradient))
        if tensor.gradientFunc:
            tensor.gradientFunc(tensor.gradient)
# Signs
def signForward(tensor: Tensor, *args, **kwargs) -> Tensor:
    if not isinstance(tensor, Tensor):
        tensor = Tensor(tensor)
johannes bilk's avatar
johannes bilk committed

    data = np.sign(tensor.data, *args, **kwargs)
johannes bilk's avatar
johannes bilk committed

    if tensor.requireGradient:
        gradfunc = partial(signBackward, tensor)
        return Tensor(data, requireGradient=tensor.requireGradient, gradientFunc=gradfunc)
johannes bilk's avatar
johannes bilk committed

    return Tensor(data, requireGradient=tensor.requireGradient, gradientFunc=None)
def signBackward(tensor: Tensor, gradient: np.ndarray, *args, **kwargs) -> None:
    if tensor and tensor.requireGradient:
        tensor.gradient = np.add(tensor.gradient, np.multiply(np.sign(tensor.data), gradient))
        if tensor.gradientFunc:
            tensor.gradientFunc(tensor.gradient)
def positiveForward(tensor: Tensor, *args, **kwargs) -> Tensor:
    if not isinstance(tensor, Tensor):
        tensor = Tensor(tensor)
johannes bilk's avatar
johannes bilk committed

    data = np.positive(tensor.data, *args, **kwargs)
johannes bilk's avatar
johannes bilk committed

    if tensor.requireGradient:
        gradfunc = partial(positiveBackward, tensor)
        return Tensor(data, requireGradient=tensor.requireGradient, gradientFunc=gradfunc)
johannes bilk's avatar
johannes bilk committed

    return Tensor(data, requireGradient=tensor.requireGradient, gradientFunc=None)
def positiveBackward(tensor: Tensor, gradient: np.ndarray, *args, **kwargs) -> None:
    if tensor and tensor.requireGradient:
        tensor.gradient = np.add(tensor.gradient, np.multiply(np.positive(tensor.data), gradient))
        if tensor.gradientFunc:
            tensor.gradientFunc(tensor.gradient)
def negativeForward(tensor: Tensor, *args, **kwargs) -> Tensor:
    if not isinstance(tensor, Tensor):
        tensor = Tensor(tensor)
johannes bilk's avatar
johannes bilk committed

    data = np.negative(tensor.data, *args, **kwargs)
johannes bilk's avatar
johannes bilk committed

    if tensor.requireGradient:
        gradfunc = partial(negativeBackward, tensor)
        return Tensor(data, requireGradient=tensor.requireGradient, gradientFunc=gradfunc)
johannes bilk's avatar
johannes bilk committed

    return Tensor(data, requireGradient=tensor.requireGradient, gradientFunc=None)
def negativeBackward(tensor: Tensor, gradient: np.ndarray, *args, **kwargs) -> None:
    if tensor and tensor.requireGradient:
        tensor.gradient = np.add(tensor.gradient, np.multiply(np.negative(tensor.data), gradient))
        if tensor.gradientFunc:
            tensor.gradientFunc(tensor.gradient)
johannes bilk's avatar
johannes bilk committed

def equalForward(tensor1: Tensor, tensor2: Tensor, *args, **kwargs) -> Tensor:
    if not isinstance(tensor1, Tensor):
        tensor1 = Tensor(tensor1)
    if not isinstance(tensor2, Tensor):
        tensor2 = Tensor(tensor2)
johannes bilk's avatar
johannes bilk committed

    data = np.equal(tensor1.data, tensor2.data, *args, **kwargs)
johannes bilk's avatar
johannes bilk committed

    requireGradient = tensor1.requireGradient or tensor2.requireGradient
    if requireGradient:
        gradfunc = partial(equalBackward, tensor1, tensor2, data)
        return Tensor(data, requireGradient=requireGradient, gradientFunc=gradfunc)
johannes bilk's avatar
johannes bilk committed

    return Tensor(data, requireGradient=requireGradient, gradientFunc=None)
johannes bilk's avatar
johannes bilk committed

def equalBackward(tensor1: Tensor, tensor2: Tensor, bools: np.ndarray, gradient: np.ndarray, *args, **kwargs) -> None:
    if tensor1 and tensor1.requireGradient:
        gradientForTensor1 = np.copy(gradient)
johannes bilk's avatar
johannes bilk committed

        tensorBroadcastAxis = getbroadcastAxid(tensor1, gradientForTensor1)
        if tensorBroadcastAxis is not None:
            gradientForTensor1 = np.sum(gradientForTensor1, axis=tuple(tensorBroadcastAxis), keepdims=True)
johannes bilk's avatar
johannes bilk committed

        tensor1.gradient = np.add(tensor1.gradient, np.multiply(bools, gradient))
        if tensor1.gradientFunc:
            tensor1.gradientFunc(tensor1.gradient)
    if tensor2 and tensor2.requireGradient:
        gradientForTensor2 = np.copy(gradient)
johannes bilk's avatar
johannes bilk committed

        tensorBroadcastAxis = getbroadcastAxid(tensor2, gradientForTensor2)
        if tensorBroadcastAxis is not None:
            gradientForTensor2 = np.sum(gradientForTensor2, axis=tuple(tensorBroadcastAxis), keepdims=True)
johannes bilk's avatar
johannes bilk committed

        tensor2.gradient = np.add(tensor2.gradient, np.multiply(bools, gradient))
        if tensor2.gradientFunc:
            tensor2.gradientFunc(tensor2.gradient)
johannes bilk's avatar
johannes bilk committed

def notEqualForward(tensor1: Tensor, tensor2: Tensor, *args, **kwargs) -> Tensor:
    if not isinstance(tensor1, Tensor):
        tensor1 = Tensor(tensor1)
    if not isinstance(tensor2, Tensor):
        tensor2 = Tensor(tensor2)
johannes bilk's avatar
johannes bilk committed

    data = np.not_equal(tensor1.data, tensor2.data, *args, **kwargs)
johannes bilk's avatar
johannes bilk committed

    requireGradient = tensor1.requireGradient or tensor2.requireGradient
    if requireGradient:
        gradfunc = partial(notEqualBackward, tensor1, tensor2, data)
        return Tensor(data, requireGradient=requireGradient, gradientFunc=gradfunc)
    return Tensor(data, requireGradient=requireGradient, gradientFunc=None)
johannes bilk's avatar
johannes bilk committed

def notEqualBackward(tensor1: Tensor, tensor2: Tensor, bools: np.ndarray, gradient: np.ndarray, *args, **kwargs) -> None:
    if tensor1 and tensor1.requireGradient:
        gradientForTensor1 = np.copy(gradient)
johannes bilk's avatar
johannes bilk committed

        tensorBroadcastAxis = getbroadcastAxid(tensor1, gradientForTensor1)
        if tensorBroadcastAxis is not None:
            gradientForTensor1 = np.sum(gradientForTensor1, axis=tuple(tensorBroadcastAxis), keepdims=True)
johannes bilk's avatar
johannes bilk committed

        tensor1.gradient = np.add(tensor1.gradient, np.multiply(bools, gradient))
        if tensor1.gradientFunc:
            tensor1.gradientFunc(tensor1.gradient)
johannes bilk's avatar
johannes bilk committed

    if tensor2 and tensor2.requireGradient:
        gradientForTensor2 = np.copy(gradient)
johannes bilk's avatar
johannes bilk committed

        tensorBroadcastAxis = getbroadcastAxid(tensor2, gradientForTensor2)
        if tensorBroadcastAxis is not None:
            gradientForTensor2 = np.sum(gradientForTensor2, axis=tuple(tensorBroadcastAxis), keepdims=True)
        tensor2.gradient = np.add(tensor2.gradient, np.multiply(bools, gradient))
        if tensor2.gradientFunc:
            tensor2.gradientFunc(tensor2.gradient)
def lessForward(tensor1: Tensor, tensor2: Tensor, *args, **kwargs) -> Tensor:
    if not isinstance(tensor1, Tensor):
        tensor1 = Tensor(tensor1)
    if not isinstance(tensor2, Tensor):
        tensor2 = Tensor(tensor2)
johannes bilk's avatar
johannes bilk committed

    data = np.less(tensor1.data, tensor2.data, *args, **kwargs)

    requireGradient = tensor1.requireGradient or tensor2.requireGradient
    if requireGradient:
        gradfunc = partial(lessBackward, tensor1, tensor2, data)
        return Tensor(data, requireGradient=requireGradient, gradientFunc=gradfunc)

    return Tensor(data, requireGradient=requireGradient, gradientFunc=None)