Skip to content
Snippets Groups Projects
convolution2D.py 6.58 KiB
Newer Older
  • Learn to ignore specific revisions
  • johannes bilk's avatar
    johannes bilk committed
    import numpy as np
    from .layer import Layer
    from .weights import Weights
    
    
    def assignParameter(parameter: int | tuple) -> tuple:
        """
        this checks wether a parameter is a tuple or int and returns a tuple
        """
        if isinstance(parameter, (int, float)):
            if float(parameter).is_integer():
                return (int(parameter), int(parameter))
            else:
                raise ValueError('the parameter should be a whole number')
        return parameter
    
    
    def getWindows(input: np.ndarray, kernelSize: tuple[int, int], outputSize: tuple[int, int], padding: tuple[int, int] = (0,0), stride: tuple[int, int] = (1,1), dilate: tuple[int, int] = (0,0)) -> np.ndarray:
        """
        creates windows of input for convolution and pooling layer
        this function is needed to avoid loops
        """
    
        # getting shape parameters
        batchSize, channels, height, width = input.shape
    
        # dilate the input if necessary
        if dilate[0] != 0:
            input = np.insert(input, range(1, height), 0, axis=2)
        if dilate[1] != 0:
            input = np.insert(input, range(1, width), 0, axis=3)
    
        # pad the input if necessary
        if padding[0] != 0 or padding[1] != 0:
            input = np.pad(input, pad_width=((0,), (0,), (padding[0],), (padding[1],)), mode='constant', constant_values=(0.,))
    
        # getting the strides of input
        batchStrides, channelStrides, kernelHeightStrides, kernelWidthStrides = input.strides
        striding = (batchStrides, channelStrides, stride[0] * kernelHeightStrides, stride[1] * kernelWidthStrides, kernelHeightStrides, kernelWidthStrides)
    
        # returning the windows
        return np.lib.stride_tricks.as_strided(input, (*outputSize, kernelSize[0], kernelSize[1]), striding)
    
    
    def checkDims(input: np.ndarray) -> None:
        """
        Checks that the input tensor has the correct shape.
        """
    
        # Check that the input tensor has 4 dimensions.
        assert input.ndim == 4, f"Input tensor should have 4 dimensions, got {input.ndim}"
    
        # Get the size of each dimension.
        batchsize, channels, height, width = input.shape
    
        # Check that all dimensions have a size greater than 0.
        assert batchsize > 0 and channels > 0 and height > 0 and width > 0, "All dimensions should be greater than 0"
    
    
    class Convolution2D(Layer):
        """
        An implementation of the convolutional layer. We convolve the input with out_channels different filters
        and each filter spans all channels in the input.
        """
        __slots__ = ['inChannels', 'outChannels', 'kernelSize', 'stride', 'padding', 'windows', 'input', 'weights', 'bias']
    
        def __init__(self, inChannels: int, outChannels: int, kernelSize: tuple = (3,3), padding: tuple = (0,0), stride: tuple = (1,1), weights: Weights = None, bias: Weights = None) -> None:
            super().__init__()
            self.inChannels = inChannels
            self.outChannels = outChannels
    
            self.kernelSize = assignParameter(kernelSize)
            self.stride = assignParameter(stride)
            self.padding = assignParameter(padding)
    
            # learnable parameters
            self.weights = Weights((self.outChannels, self.inChannels, self.kernelSize[0], self.kernelSize[1]), values=weights)
            self.bias = Weights(self.outChannels, init='zeros', values=bias)
    
        def params(self) -> tuple[Weights, Weights]:
            """
            returns weights and bias in a python list, called by optimizers
            """
            return [self.weights, self.bias]
    
        def forward(self, input: np.ndarray) -> np.ndarray:
            """
            The forward pass of convolution
            """
            self.input = input
            checkDims(input)
    
            batchSize, channels, height, width = input.shape
            outHeight = (height - self.kernelSize[0] + 2 * self.padding[0]) / self.stride[0] + 1
            outWidth = (width - self.kernelSize[1] + 2 * self.padding[1]) / self.stride[1] + 1
            outputSize = (batchSize, channels, int(outHeight), int(outWidth))
    
            self.windows = getWindows(input, self.kernelSize, outputSize, self.padding, self.stride)
            output = np.einsum('bihwkl,oikl->bohw', self.windows, self.weights.values) + self.bias.values[None, :, None, None]
            return output
    
        def backward(self, gradient: np.ndarray) -> np.ndarray:
            """
            The backward pass of convolution
            """
            padding = self.kernelSize - 1 if self.padding == 0 else self.padding
    
            gradientWindows = getWindows(gradient, self.kernelSize, self.input.shape, padding=padding, stride=(1,1), dilate=(self.stride[0] - 1, self.stride[1] - 1))
            rotatedKernel = np.rot90(self.weights.values, 2, axes=(2, 3))
    
            self.weights.deltas = np.einsum('bihwkl,bohw->oikl', self.windows, gradient)
            self.bias.deltas = np.sum(gradient, axis=(0, 2, 3))
    
            return np.einsum('bohwkl,oikl->bihw', gradientWindows, rotatedKernel)
    
        def __str__(self) -> str:
            """
            used for print the layer in a human readable manner
            """
            printString = self.name
            printString += '    input channels: ' + str(self.inChannels)
            printString += '    output channels: ' + str(self.outChannels)
            printString += '    kernel size: ' + str(self.kernelSize)
            printString += '    padding: ' + str(self.padding)
            printString += '    stride: ' + str(self.stride)
            return printString
    
    
    class Unsqueeze(Layer):
        """
        this layer type exists because I was too lazy adding/removing .reshape
        to inputs, depending if there is a convolution or not as the first layer
        it reshapes the input according to user specification
        if no channel information is given, the class assumes 1 channel
        """
        __slots__ = ['inputShape', 'orginialShape']
    
        def __init__(self, inputShape: tuple[int, int, int]) -> None:
            super().__init__()
    
            # testing if inputShape provides (channels, height, width)
            if type(inputShape) is not tuple:
                raise TypeError('input shape should be a tuple')
            if len(inputShape) == 2:
                inputShape = (1, *inputShape)
            elif len(inputShape) < 2 or len(inputShape) > 3:
                raise ValueError('input shape not corrisponding to (channels, height, width)')
    
            # class attributes
            self.inputShape = inputShape
            self.orginialShape = None
    
        def forward(self, input: np.ndarray) -> np.ndarray:
            """
            Reshapes input into an acceptable shape for convolutions
            """
            if self.orginialShape is None:
                self.orginialShape = input.shape[1:]
            return input.reshape(-1, *self.inputShape)
    
        def backward(self, gradient: np.ndarray) -> np.ndarray:
            """
            Reshapes the upstream gradient into original shape
            """
            return gradient.reshape(-1, *self.orginialShape)