Skip to content
Snippets Groups Projects
activation.py 5.86 KiB
Newer Older
johannes bilk's avatar
johannes bilk committed
import numpy as np
from .layer import Layer
from abc import abstractmethod
from numpy.typing import ArrayLike


class Activation(Layer):
    """
    the main activation function class containing all the methods used for activation function
    it's an abstract class, meaning it should never be used directly, but instead used a base
    """
    __slots__ = ['input', 'activation']

    def __init__(self) -> None:
        super().__init__()

    def forward(self, input: ArrayLike) -> np.ndarray:
        """
        creates the activation and introduces non-linearity to the network
        """
        self.input = input
        self.activation = self._function(self.input)
        return self.activation

    def backward(self, gradient: ArrayLike) -> np.ndarray:
        """
        creates the upstream gradient from input gradient
        """
        return self._derivative() * gradient

    @abstractmethod
    def _function(self, input: ArrayLike) -> np.ndarray:
        """
        it's abstract method, thus must be implemented individually
        """
        pass

    @abstractmethod
    def _derivative(self) -> np.ndarray:
        """
        it's abstract method, thus must be implemented individually
        """
        pass


class Relu(Activation):
    """
    Rectified Linear Unit (ReLU) activation function.

    ReLU is a commonly used activation function in neural networks, defined as f(x) = max(0, x).
    It is known to perform well in deep learning models due to its ability to produce sparse representations
    and avoid the vanishing gradient problem.
    """
    __slots__ = []

    def __init__(self) -> None:
        super().__init__()

    def _function(self, input: ArrayLike) -> np.ndarray:
        return np.maximum(0.0, input)

    def _derivative(self) -> np.ndarray:
        return np.where(self.input > 0, 1, 0)


class Elu(Activation):
    """
    Exponential Linear Unit (ELU) activation function.
    it accepts a scaling parameter
    """
    __slots__ = ['alpha']

    def __init__(self, alpha: float = 1.0) -> None:
        super().__init__()
        self.alpha = alpha

    def _function(self, input: ArrayLike) -> np.ndarray:
        return np.where(input <= 0., self.alpha * np.exp(input) - 1, input)

    def _derivative(self) -> np.ndarray:
        return np.where(self.input > 0, 1, self.alpha * np.exp(self.input))


class LeakyRelu(Activation):
    """
    Leaky ReLU activation function.
    one can set the slope on the negative side
    """
    __slots__ = ['epsilon']

    def __init__(self, epislon: float = 1e-1) -> None:
        super().__init__()
        self.epislon = epislon

    def _function(self, input: ArrayLike) -> np.ndarray:
        input[input <= 0.] *= self.epislon
        return input

    def _derivative(self) -> np.ndarray:
        return np.where(self.input > 1, 1, self.epislon)


class Tanh(Activation):
    """
    The hyperbolic tangent (tanh) activation function.

    This activation function maps input values to the range (-1, 1). It is commonly used in neural networks due to its
    ability to introduce non-linearity while still being differentiable.
    """
    __slots__ = []

    def __init__(self) -> None:
        super().__init__()

    def _function(self, input: ArrayLike) -> np.ndarray:
        return np.tanh(input)

    def _derivative(self) -> np.ndarray:
        return 1 - np.square(self.activation)


class Sigmoid(Activation):
    """
    Sigmoid activation function class.
    """
    __slots__ = []

    def __init__(self) -> None:
        super().__init__()

    def _function(self, input: ArrayLike) -> np.ndarray:
        return 1 / (1 + np.exp(-input))

    def _derivative(self) -> np.ndarray:
        return (1 - self.activation) * self.activation


class SoftMax(Activation):
    """
    Softmax activation function.

    Softmax function normalizes the output of a neural network to a probability
    distribution over the classes in the output layer. It is commonly used in
    multi-class classification tasks.
    """
    __slots__ = []

    def __init__(self) -> None:
        super().__init__()

    def _function(self, input: ArrayLike) -> np.ndarray:
        input = input - np.max(input)
        output = np.exp(input)
        return output/np.sum(output, axis=1, keepdims=True)

    def _derivative(self) -> np.ndarray:
        return self.activation * (1 - self.activation)


class SoftPlus(Activation):
    # The SoftPlus activation function is defined as log(1 + e^x).
    # This function is used to introduce non-linearity to a neural network's output.
    __slots__ = []

    def __init__(self) -> None:
        super().__init__()

    def _function(self, input: ArrayLike) -> np.ndarray:
        return np.log(1. + np.exp(input))

    def _derivative(self) -> np.ndarray:
        output = np.exp(self.input)
        return output / (1. + output)


class SoftSign(Activation):
    # SoftSign activation function.
    #
    # The SoftSign activation function maps the input to the range [-1, 1],
    # making it useful in neural networks where it is important to limit the range
    # of activations to avoid saturation.
    __slots__ = []

    def __init__(self) -> None:
        super().__init__()

    def _function(self, input: ArrayLike) -> np.ndarray:
        return input / (np.abs(input) + 1.)

    def _derivative(self) -> np.ndarray:
        output = np.abs(self.input) + 1.
        return 1. / (output ** 2)


class Identity(Activation):
    # The identity activation function.
    #
    # The identity function simply returns its input without any transformation.
    # It is often used as the activation function for the output layer of a neural network
    # when the task involves regression, i.e., predicting a continuous output value.
    __slots__ = []

    def __init__(self) -> None:
        super().__init__()

    def _function(self, input: ArrayLike) -> np.ndarray:
        return input

    def _derivative(self) -> np.ndarray:
        return 1