stuff about regularization

5419dd21 · johannes bilk · 17066a1e · 5419dd21 · 5419dd21 · 5419dd21
Commit 5419dd21 authored 1 year ago by johannes bilk
--- a/nn/hopfield.py
+++ b/nn/hopfield.py
+import numpy as np
+from .layer import Layer
+from .weights import Weights
+
+
+class Hopfield(Layer):
+    def __init__(self, args):
+        super().__init__()
\ No newline at end of file
--- a/nn/linear.py
+++ b/nn/linear.py
@@ -9,53 +9,69 @@ def checkDims(input: np.ndarray) -> None:
    assert batchsize > 0 and numFeatures > 0, "All dimensions should be greater than 0"


-class Linear(Layer):
+from abc import ABC, abstractmethod
+from .weights import Weights
+import numpy as np
+from numpy.typing import ArrayLike
+
+
+class Layer(ABC):
    """
-    linear, dense or mlp layer, multiplies a weight matrix and adds bias
+    this is an abstract class and can only be used indirectly through inherited classes
    """
-    __slots__ = ['inputSize', 'outputSize', 'input', 'weights', 'bias']
+    __slots__ = ['name', 'mode', 'layerID']
+    id = 0

-    def __init__(self, inputSize: int, outputSize: int, weights: np.ndarray = None, bias: np.ndarray = None) -> None:
-        super().__init__()
-        self.inputSize = inputSize
-        self.outputSize = outputSize
-        self.weights = Weights((inputSize, outputSize), values=weights)
-        self.bias = Weights((1, outputSize), values=bias)
+    def __init__(self) -> None:
+        self.name = self.__class__.__name__
+        self.mode = ''
+        self.layerID = Layer.id
+        Layer.id += 1
+
+    @property
+    def qualifiedName(self) -> tuple:
+        return self.__class__.__module__, self.__class__.__name__

-    def params(self) -> tuple[Weights, Weights]:
+    @abstractmethod
+    def forward(self, input: ArrayLike) -> np.ndarray:
        """
-        returns weights and bias in a python list, called by optimizers
+        it's an abstract method, thus forcing the coder to implement it in daughter classes
        """
-        return [self.weights, self.bias]
+        pass

-    def forward(self, input: np.ndarray) -> np.ndarray:
+    def __call__(self, *args: ArrayLike) -> np.ndarray:
        """
-        forward pass of the linear layer
+        this is used to make layers behave more like functions
        """
-        self.input = input
-        checkDims(input)
-        output = np.matmul(self.input, self.weights.values)
-        if self.bias is not False:
-            output += self.bias.values
-        return output
+        return self.forward(*args)

-    def backward(self, gradient: np.ndarray) -> np.ndarray:
+    @abstractmethod
+    def backward(self, gradient: ArrayLike) -> np.ndarray:
+        """
+        it's an abstract method, thus forcing the coder to implement it in daughter classes
+        """
+        pass
+
+    def train(self) -> None:
        """
-        backward pass of the linear layer
+        used to put layer in to training mode
+        meaning unfreezes parameters
        """
-        self.weights.deltas = np.matmul(self.input.T, gradient)
-        if self.bias is not False:
-            self.bias.deltas = np.sum(gradient, axis=0, keepdims=True)
-        return np.matmul(gradient, self.weights.values.T)
+        self.mode = 'train'
+
+    def eval(self) -> None:
+        """
+        used to put layer in to evaluation mode
+        meaning freezes parameters
+        """
+        self.mode = 'eval'

    def __str__(self) -> str:
        """
        used for print the layer in a human readable manner
        """
-        printString = self.name
-        printString += '    input size: ' + str(self.inputSize)
-        printString += '    output size: ' + str(self.outputSize)
-        return printString
+        return self.name
+		


 class Flatten(Layer):

--- a/nn/norm.py
+++ b/nn/norm.py
+import numpy as np
+from numpy.typing import ArrayLike
+from .layer import Layer
+
+
+class L1Norm(Layer):
+    """
+    A basic norming layer, doesn't learn anything or affect
+    the learning process beyond norming inputs
+    """
+    def __init__(self, axis=None, epsilon: float = 1e-8) -> None:
+        super().__init__()
+        self.axis = axis # Axis or axes along which to compute the L1 norm
+        self.scales = None
+        self.epsilon = epsilon # A small value to avoid division by zero
+
+    def forward(self, input: ArrayLike) -> ArrayLike:
+        # Compute L1 norm (sum of absolute values) along the specified axis
+        norm = np.abs(input).sum(axis=self.axis, keepdims=True)
+        # Compute reciprocal of L1 norm
+        norm = 1. / (norm + self.epsilon)
+        # Normalize the input
+        output = input * norm
+        # Compute scale factors (sign of the output)
+        self.scales = -np.sign(output)
+        # Initialize gradient to zero array of same shape as output
+        self.gradient = np.zeros_like(output, dtype=float)
+        return output
+
+    def backward(self, gradient: ArrayLike) -> ArrayLike:
+        # Add scales to the gradient
+        self.gradient += self.scales
+        # Add gradient to the input gradient
+        gradient[:] += self.gradient
+        return gradient
+
+
+class L2Norm(Layer):
+    """
+    A basic norming layer, doesn't learn anything or affect
+    the learning process beyond norming inputs
+    """
+    def __init__(self, axis=None, epsilon: float = 1e-8) -> None:
+        super().__init__()
+        self.axis = axis # Axis or axes along which to compute the L2 norm
+        self.scales = None
+        self.epsilon = epsilon # A small value to avoid division by zero
+
+    def forward(self, input: ArrayLike) -> ArrayLike:
+        # Compute L2 norm (square root of sum of squares) along the specified axis
+        norm = (input * input).sum(axis=self.axis, keepdims=True)
+        # Compute reciprocal of L2 norm
+        norm = 1. / np.sqrt(norm + self.epsilon)
+        # Normalize the input
+        output = input * norm
+        # Compute scale factors
+        self.scales = (1. - output) * norm
+        # Initialize gradient to zero array of same shape as output
+        self.gradient = np.zeros_like(output, dtype=float)
+        return output
+
+    def backward(self, gradient: ArrayLike) -> ArrayLike:
+        # Add scales to the gradient
+        self.gradient += self.scales
+        # Add gradient to the input gradient
+        gradient[:] += self.delta
+        return gradient
--- a/nn/regularization.py
+++ b/nn/regularization.py
 import numpy as np
-from numpy.typing import ArrayLike
 from .layer import Layer
+from .module import Module
+from .weights import Weights


-class L1Norm(Layer):
-    # Parameters
-    # ‾‾‾‾‾‾‾‾‾‾
-    # Attributes
-    # ‾‾‾‾‾‾‾‾‾‾
-    # Methods
-    # ‾‾‾‾‾‾‾
-    def __init__(self, axis=None, epsilon: float = 1e-8) -> None:
-        super().__init__()
-        self.axis = axis
-        self.scales = None
-        self.epsilon = epsilon
+class Regularization(Layer):
+    def __init__(self, layers: list | Module, Lambda: float) -> None:
+        self.name = self.__class__.__name__
+        self.lambda = Lambda
+        self.params = []

-    def forward(self, input: ArrayLike) -> ArrayLike:
-        # Parameters
-        # ‾‾‾‾‾‾‾‾‾‾
-        # Returns
-        # ‾‾‾‾‾‾‾
-        norm = np.abs(input).sum(axis=self.axis, keepdims=True)
-        norm = 1. / (norm + self.epsilon)
-        output = input * norm
-        self.scales = -np.sign(output)
-        self.gradient = np.zeros_like(output, dtype=float)
-        return output
+        for layer in layers:
+            try:
+                params = layer.params()
+                for param in params:
+                    self.params.append(param)
+            except AttributeError:
+                # 'params' method not found in the layer, skip updating
+                continue

-    def backward(self, gradient: ArrayLike) -> ArrayLike:
-        # Parameters
-        # ‾‾‾‾‾‾‾‾‾‾
-        # Returns
-        # ‾‾‾‾‾‾‾
-        self.gradient += self.scales
-        gradient[:] += self.gradient
-        return gradient
+    def forward(self, input: np.ndarray) -> np.ndarray:
+        return input

+    @abstractmethod
+    def backward(self, gradient: np.ndarray) -> np.ndarray:
+        pass

-class L2Norm(Layer):
-    # Parameters
-    # ‾‾‾‾‾‾‾‾‾‾
-    # Attributes
-    # ‾‾‾‾‾‾‾‾‾‾
-    # Methods
-    # ‾‾‾‾‾‾‾
-    def __init__(self, axis=None, epsilon: float = 1e-8) -> None:
-        super().__init__()
-        self.axis = axis
-        self.scales = None
-        self.epsilon = epsilon

-    def forward(self, input: ArrayLike) -> ArrayLike:
-        # Parameters
-        # ‾‾‾‾‾‾‾‾‾‾
-        # Returns
-        # ‾‾‾‾‾‾‾
-        norm = (input * input).sum(axis=self.axis, keepdims=True)
-        norm = 1. / np.sqrt(norm + self.epsilon)
-        output = input * norm
-        self.scales = (1. - output) * norm
-        self.gradient = np.zeros_like(output, dtype=float)
-        return output
+class L1Regularization(Regularization):
+    def __init__(self, *args, **kwargs):
+        super().__init__(*args, **kwargs)

-    def backward(self, gradient: ArrayLike) -> ArrayLike:
-        # Parameters
-        # ‾‾‾‾‾‾‾‾‾‾
-        # Returns
-        # ‾‾‾‾‾‾‾
-        self.gradient += self.scales
-        gradient[:] += self.delta
-        return gradient
+    def backward(self, gradient: np.ndarray) -> np.ndarray:
+    # Compute regularization gradients and add to existing gradients
+    for param in self.params:
+        gradient += self.Lambda * np.sign(param.values)
+    return gradient
+
+
+class L2Regularization(Regularization):
+    def __init__(self, *args, **kwargs):
+        super().__init__(*args, **kwargs)
+
+    def backward(self, gradient: np.ndarray) -> np.ndarray:
+        # Compute regularization gradients and add to existing gradients
+        for param in self.params:
+            gradient += self.Lambda * 2 * param.values
+        return gradient
\ No newline at end of file
--- a/tensor/tensor.py
+++ b/tensor/tensor.py