import numpy as np from .layer import Layer from abc import abstractmethod from numpy.typing import ArrayLike class Activation(Layer): """ the main activation function class containing all the methods used for activation function it's an abstract class, meaning it should never be used directly, but instead used a base """ __slots__ = ['input', 'activation'] def __init__(self) -> None: super().__init__() def forward(self, input: ArrayLike) -> np.ndarray: """ creates the activation and introduces non-linearity to the network """ self.input = input self.activation = self._function(self.input) return self.activation def backward(self, gradient: ArrayLike) -> np.ndarray: """ creates the upstream gradient from input gradient """ return self._derivative() * gradient @abstractmethod def _function(self, input: ArrayLike) -> np.ndarray: """ it's abstract method, thus must be implemented individually """ pass @abstractmethod def _derivative(self) -> np.ndarray: """ it's abstract method, thus must be implemented individually """ pass class Relu(Activation): """ Rectified Linear Unit (ReLU) activation function. ReLU is a commonly used activation function in neural networks, defined as f(x) = max(0, x). It is known to perform well in deep learning models due to its ability to produce sparse representations and avoid the vanishing gradient problem. """ __slots__ = [] def __init__(self) -> None: super().__init__() def _function(self, input: ArrayLike) -> np.ndarray: return np.maximum(0.0, input) def _derivative(self) -> np.ndarray: return np.where(self.input > 0, 1, 0) class Elu(Activation): """ Exponential Linear Unit (ELU) activation function. it accepts a scaling parameter """ __slots__ = ['alpha'] def __init__(self, alpha: float = 1.0) -> None: super().__init__() self.alpha = alpha def _function(self, input: ArrayLike) -> np.ndarray: return np.where(input <= 0., self.alpha * np.exp(input) - 1, input) def _derivative(self) -> np.ndarray: return np.where(self.input > 0, 1, self.alpha * np.exp(self.input)) class LeakyRelu(Activation): """ Leaky ReLU activation function. one can set the slope on the negative side """ __slots__ = ['epsilon'] def __init__(self, epislon: float = 1e-1) -> None: super().__init__() self.epislon = epislon def _function(self, input: ArrayLike) -> np.ndarray: input[input <= 0.] *= self.epislon return input def _derivative(self) -> np.ndarray: return np.where(self.input > 1, 1, self.epislon) class Tanh(Activation): """ The hyperbolic tangent (tanh) activation function. This activation function maps input values to the range (-1, 1). It is commonly used in neural networks due to its ability to introduce non-linearity while still being differentiable. """ __slots__ = [] def __init__(self) -> None: super().__init__() def _function(self, input: ArrayLike) -> np.ndarray: return np.tanh(input) def _derivative(self) -> np.ndarray: return 1 - np.square(self.activation) class Sigmoid(Activation): """ Sigmoid activation function class. """ __slots__ = [] def __init__(self) -> None: super().__init__() def _function(self, input: ArrayLike) -> np.ndarray: return 1 / (1 + np.exp(-input)) def _derivative(self) -> np.ndarray: return (1 - self.activation) * self.activation class SoftMax(Activation): """ Softmax activation function. Softmax function normalizes the output of a neural network to a probability distribution over the classes in the output layer. It is commonly used in multi-class classification tasks. """ __slots__ = [] def __init__(self) -> None: super().__init__() def _function(self, input: ArrayLike) -> np.ndarray: input = input - np.max(input) output = np.exp(input) return output/np.sum(output, axis=1, keepdims=True) def _derivative(self) -> np.ndarray: return self.activation * (1 - self.activation) class SoftPlus(Activation): """ The SoftPlus activation function is defined as log(1 + e^x). This function is used to introduce non-linearity to a neural network's output. """ __slots__ = [] def __init__(self) -> None: super().__init__() def _function(self, input: ArrayLike) -> np.ndarray: return np.log(1. + np.exp(input)) def _derivative(self) -> np.ndarray: output = np.exp(self.input) return output / (1. + output) class SoftSign(Activation): """ SoftSign activation function. The SoftSign activation function maps the input to the range [-1, 1], making it useful in neural networks where it is important to limit the range of activations to avoid saturation. """ __slots__ = [] def __init__(self) -> None: super().__init__() def _function(self, input: ArrayLike) -> np.ndarray: return input / (np.abs(input) + 1.) def _derivative(self) -> np.ndarray: output = np.abs(self.input) + 1. return 1. / (output ** 2) class Identity(Activation): """ The identity activation function. The identity function simply returns its input without any transformation. It is often used as the activation function for the output layer of a neural network when the task involves regression, i.e., predicting a continuous output value. """ __slots__ = [] def __init__(self) -> None: super().__init__() def _function(self, input: ArrayLike) -> np.ndarray: return input def _derivative(self) -> np.ndarray: return 1