Newer
Older
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
import numpy as np
from .layer import Layer
from abc import abstractmethod
from numpy.typing import ArrayLike
class Activation(Layer):
"""
the main activation function class containing all the methods used for activation function
it's an abstract class, meaning it should never be used directly, but instead used a base
"""
__slots__ = ['input', 'activation']
def __init__(self) -> None:
super().__init__()
def forward(self, input: ArrayLike) -> np.ndarray:
"""
creates the activation and introduces non-linearity to the network
"""
self.input = input
self.activation = self._function(self.input)
return self.activation
def backward(self, gradient: ArrayLike) -> np.ndarray:
"""
creates the upstream gradient from input gradient
"""
return self._derivative() * gradient
@abstractmethod
def _function(self, input: ArrayLike) -> np.ndarray:
"""
it's abstract method, thus must be implemented individually
"""
pass
@abstractmethod
def _derivative(self) -> np.ndarray:
"""
it's abstract method, thus must be implemented individually
"""
pass
class Relu(Activation):
"""
Rectified Linear Unit (ReLU) activation function.
ReLU is a commonly used activation function in neural networks, defined as f(x) = max(0, x).
It is known to perform well in deep learning models due to its ability to produce sparse representations
and avoid the vanishing gradient problem.
"""
__slots__ = []
def __init__(self) -> None:
super().__init__()
def _function(self, input: ArrayLike) -> np.ndarray:
return np.maximum(0.0, input)
def _derivative(self) -> np.ndarray:
return np.where(self.input > 0, 1, 0)
class Elu(Activation):
"""
Exponential Linear Unit (ELU) activation function.
it accepts a scaling parameter
"""
__slots__ = ['alpha']
def __init__(self, alpha: float = 1.0) -> None:
super().__init__()
self.alpha = alpha
def _function(self, input: ArrayLike) -> np.ndarray:
return np.where(input <= 0., self.alpha * np.exp(input) - 1, input)
def _derivative(self) -> np.ndarray:
return np.where(self.input > 0, 1, self.alpha * np.exp(self.input))
class LeakyRelu(Activation):
"""
Leaky ReLU activation function.
one can set the slope on the negative side
"""
__slots__ = ['epsilon']
def __init__(self, epislon: float = 1e-1) -> None:
super().__init__()
self.epislon = epislon
def _function(self, input: ArrayLike) -> np.ndarray:
input[input <= 0.] *= self.epislon
return input
def _derivative(self) -> np.ndarray:
return np.where(self.input > 1, 1, self.epislon)
class Tanh(Activation):
"""
The hyperbolic tangent (tanh) activation function.
This activation function maps input values to the range (-1, 1). It is commonly used in neural networks due to its
ability to introduce non-linearity while still being differentiable.
"""
__slots__ = []
def __init__(self) -> None:
super().__init__()
def _function(self, input: ArrayLike) -> np.ndarray:
return np.tanh(input)
def _derivative(self) -> np.ndarray:
return 1 - np.square(self.activation)
class Sigmoid(Activation):
"""
Sigmoid activation function class.
"""
__slots__ = []
def __init__(self) -> None:
super().__init__()
def _function(self, input: ArrayLike) -> np.ndarray:
return 1 / (1 + np.exp(-input))
def _derivative(self) -> np.ndarray:
return (1 - self.activation) * self.activation
class SoftMax(Activation):
"""
Softmax activation function.
Softmax function normalizes the output of a neural network to a probability
distribution over the classes in the output layer. It is commonly used in
multi-class classification tasks.
"""
__slots__ = []
def __init__(self) -> None:
super().__init__()
def _function(self, input: ArrayLike) -> np.ndarray:
input = input - np.max(input)
output = np.exp(input)
return output/np.sum(output, axis=1, keepdims=True)
def _derivative(self) -> np.ndarray:
return self.activation * (1 - self.activation)
class SoftPlus(Activation):
# The SoftPlus activation function is defined as log(1 + e^x).
# This function is used to introduce non-linearity to a neural network's output.
__slots__ = []
def __init__(self) -> None:
super().__init__()
def _function(self, input: ArrayLike) -> np.ndarray:
return np.log(1. + np.exp(input))
def _derivative(self) -> np.ndarray:
output = np.exp(self.input)
return output / (1. + output)
class SoftSign(Activation):
# SoftSign activation function.
#
# The SoftSign activation function maps the input to the range [-1, 1],
# making it useful in neural networks where it is important to limit the range
# of activations to avoid saturation.
__slots__ = []
def __init__(self) -> None:
super().__init__()
def _function(self, input: ArrayLike) -> np.ndarray:
return input / (np.abs(input) + 1.)
def _derivative(self) -> np.ndarray:
output = np.abs(self.input) + 1.
return 1. / (output ** 2)
class Identity(Activation):
# The identity activation function.
#
# The identity function simply returns its input without any transformation.
# It is often used as the activation function for the output layer of a neural network
# when the task involves regression, i.e., predicting a continuous output value.
__slots__ = []
def __init__(self) -> None:
super().__init__()
def _function(self, input: ArrayLike) -> np.ndarray:
return input
def _derivative(self) -> np.ndarray:
return 1