Skip to content
Snippets Groups Projects
activation.py 5.86 KiB
Newer Older
  • Learn to ignore specific revisions
  • johannes bilk's avatar
    johannes bilk committed
    import numpy as np
    from .layer import Layer
    from abc import abstractmethod
    from numpy.typing import ArrayLike
    
    
    class Activation(Layer):
        """
        the main activation function class containing all the methods used for activation function
        it's an abstract class, meaning it should never be used directly, but instead used a base
        """
        __slots__ = ['input', 'activation']
    
        def __init__(self) -> None:
            super().__init__()
    
        def forward(self, input: ArrayLike) -> np.ndarray:
            """
            creates the activation and introduces non-linearity to the network
            """
            self.input = input
            self.activation = self._function(self.input)
            return self.activation
    
        def backward(self, gradient: ArrayLike) -> np.ndarray:
            """
            creates the upstream gradient from input gradient
            """
            return self._derivative() * gradient
    
        @abstractmethod
        def _function(self, input: ArrayLike) -> np.ndarray:
            """
            it's abstract method, thus must be implemented individually
            """
            pass
    
        @abstractmethod
        def _derivative(self) -> np.ndarray:
            """
            it's abstract method, thus must be implemented individually
            """
            pass
    
    
    class Relu(Activation):
        """
        Rectified Linear Unit (ReLU) activation function.
    
        ReLU is a commonly used activation function in neural networks, defined as f(x) = max(0, x).
        It is known to perform well in deep learning models due to its ability to produce sparse representations
        and avoid the vanishing gradient problem.
        """
        __slots__ = []
    
        def __init__(self) -> None:
            super().__init__()
    
        def _function(self, input: ArrayLike) -> np.ndarray:
            return np.maximum(0.0, input)
    
        def _derivative(self) -> np.ndarray:
            return np.where(self.input > 0, 1, 0)
    
    
    class Elu(Activation):
        """
        Exponential Linear Unit (ELU) activation function.
        it accepts a scaling parameter
        """
        __slots__ = ['alpha']
    
        def __init__(self, alpha: float = 1.0) -> None:
            super().__init__()
            self.alpha = alpha
    
        def _function(self, input: ArrayLike) -> np.ndarray:
            return np.where(input <= 0., self.alpha * np.exp(input) - 1, input)
    
        def _derivative(self) -> np.ndarray:
            return np.where(self.input > 0, 1, self.alpha * np.exp(self.input))
    
    
    class LeakyRelu(Activation):
        """
        Leaky ReLU activation function.
        one can set the slope on the negative side
        """
        __slots__ = ['epsilon']
    
        def __init__(self, epislon: float = 1e-1) -> None:
            super().__init__()
            self.epislon = epislon
    
        def _function(self, input: ArrayLike) -> np.ndarray:
            input[input <= 0.] *= self.epislon
            return input
    
        def _derivative(self) -> np.ndarray:
            return np.where(self.input > 1, 1, self.epislon)
    
    
    class Tanh(Activation):
        """
        The hyperbolic tangent (tanh) activation function.
    
        This activation function maps input values to the range (-1, 1). It is commonly used in neural networks due to its
        ability to introduce non-linearity while still being differentiable.
        """
        __slots__ = []
    
        def __init__(self) -> None:
            super().__init__()
    
        def _function(self, input: ArrayLike) -> np.ndarray:
            return np.tanh(input)
    
        def _derivative(self) -> np.ndarray:
            return 1 - np.square(self.activation)
    
    
    class Sigmoid(Activation):
        """
        Sigmoid activation function class.
        """
        __slots__ = []
    
        def __init__(self) -> None:
            super().__init__()
    
        def _function(self, input: ArrayLike) -> np.ndarray:
            return 1 / (1 + np.exp(-input))
    
        def _derivative(self) -> np.ndarray:
            return (1 - self.activation) * self.activation
    
    
    class SoftMax(Activation):
        """
        Softmax activation function.
    
        Softmax function normalizes the output of a neural network to a probability
        distribution over the classes in the output layer. It is commonly used in
        multi-class classification tasks.
        """
        __slots__ = []
    
        def __init__(self) -> None:
            super().__init__()
    
        def _function(self, input: ArrayLike) -> np.ndarray:
            input = input - np.max(input)
            output = np.exp(input)
            return output/np.sum(output, axis=1, keepdims=True)
    
        def _derivative(self) -> np.ndarray:
            return self.activation * (1 - self.activation)
    
    
    class SoftPlus(Activation):
        # The SoftPlus activation function is defined as log(1 + e^x).
        # This function is used to introduce non-linearity to a neural network's output.
        __slots__ = []
    
        def __init__(self) -> None:
            super().__init__()
    
        def _function(self, input: ArrayLike) -> np.ndarray:
            return np.log(1. + np.exp(input))
    
        def _derivative(self) -> np.ndarray:
            output = np.exp(self.input)
            return output / (1. + output)
    
    
    class SoftSign(Activation):
        # SoftSign activation function.
        #
        # The SoftSign activation function maps the input to the range [-1, 1],
        # making it useful in neural networks where it is important to limit the range
        # of activations to avoid saturation.
        __slots__ = []
    
        def __init__(self) -> None:
            super().__init__()
    
        def _function(self, input: ArrayLike) -> np.ndarray:
            return input / (np.abs(input) + 1.)
    
        def _derivative(self) -> np.ndarray:
            output = np.abs(self.input) + 1.
            return 1. / (output ** 2)
    
    
    class Identity(Activation):
        # The identity activation function.
        #
        # The identity function simply returns its input without any transformation.
        # It is often used as the activation function for the output layer of a neural network
        # when the task involves regression, i.e., predicting a continuous output value.
        __slots__ = []
    
        def __init__(self) -> None:
            super().__init__()
    
        def _function(self, input: ArrayLike) -> np.ndarray:
            return input
    
        def _derivative(self) -> np.ndarray:
            return 1