import numpy as np from .layer import Layer from .weights import Weights def assignParameter(parameter: int | tuple) -> tuple: """ this checks wether a parameter is a tuple or int and returns a tuple """ if isinstance(parameter, (int, float)): if float(parameter).is_integer(): return (int(parameter), int(parameter)) else: raise ValueError('the parameter should be a whole number') return parameter def getWindows(input: np.ndarray, kernelSize: tuple[int, int], outputSize: tuple[int, int], padding: tuple[int, int] = (0,0), stride: tuple[int, int] = (1,1), dilate: tuple[int, int] = (0,0)) -> np.ndarray: """ creates windows of input for convolution and pooling layer this function is needed to avoid loops """ # getting shape parameters batchSize, channels, height, width = input.shape # dilate the input if necessary if dilate[0] != 0: input = np.insert(input, range(1, height), 0, axis=2) if dilate[1] != 0: input = np.insert(input, range(1, width), 0, axis=3) # pad the input if necessary if padding[0] != 0 or padding[1] != 0: input = np.pad(input, pad_width=((0,), (0,), (padding[0],), (padding[1],)), mode='constant', constant_values=(0.,)) # getting the strides of input batchStrides, channelStrides, kernelHeightStrides, kernelWidthStrides = input.strides striding = (batchStrides, channelStrides, stride[0] * kernelHeightStrides, stride[1] * kernelWidthStrides, kernelHeightStrides, kernelWidthStrides) # returning the windows return np.lib.stride_tricks.as_strided(input, (*outputSize, kernelSize[0], kernelSize[1]), striding) def checkDims(input: np.ndarray) -> None: """ Checks that the input tensor has the correct shape. """ # Check that the input tensor has 4 dimensions. assert input.ndim == 4, f"Input tensor should have 4 dimensions, got {input.ndim}" # Get the size of each dimension. batchsize, channels, height, width = input.shape # Check that all dimensions have a size greater than 0. assert batchsize > 0 and channels > 0 and height > 0 and width > 0, "All dimensions should be greater than 0" class Convolution2D(Layer): """ An implementation of the convolutional layer. We convolve the input with out_channels different filters and each filter spans all channels in the input. """ __slots__ = ['inChannels', 'outChannels', 'kernelSize', 'stride', 'padding', 'windows', 'input', 'weights', 'bias'] def __init__(self, inChannels: int, outChannels: int, kernelSize: tuple = (3,3), padding: tuple = (0,0), stride: tuple = (1,1), weights: Weights = None, bias: Weights = None) -> None: super().__init__() self.inChannels = inChannels self.outChannels = outChannels self.kernelSize = assignParameter(kernelSize) self.stride = assignParameter(stride) self.padding = assignParameter(padding) # learnable parameters self.weights = Weights((self.outChannels, self.inChannels, self.kernelSize[0], self.kernelSize[1]), values=weights) self.bias = Weights(self.outChannels, init='zeros', values=bias) def params(self) -> tuple[Weights, Weights]: """ returns weights and bias in a python list, called by optimizers """ return [self.weights, self.bias] def forward(self, input: np.ndarray) -> np.ndarray: """ The forward pass of convolution """ self.input = input checkDims(input) batchSize, channels, height, width = input.shape outHeight = (height - self.kernelSize[0] + 2 * self.padding[0]) / self.stride[0] + 1 outWidth = (width - self.kernelSize[1] + 2 * self.padding[1]) / self.stride[1] + 1 outputSize = (batchSize, channels, int(outHeight), int(outWidth)) self.windows = getWindows(input, self.kernelSize, outputSize, self.padding, self.stride) output = np.einsum('bihwkl,oikl->bohw', self.windows, self.weights.values) + self.bias.values[None, :, None, None] return output def backward(self, gradient: np.ndarray) -> np.ndarray: """ The backward pass of convolution """ padding = self.kernelSize - 1 if self.padding == 0 else self.padding gradientWindows = getWindows(gradient, self.kernelSize, self.input.shape, padding=padding, stride=(1,1), dilate=(self.stride[0] - 1, self.stride[1] - 1)) rotatedKernel = np.rot90(self.weights.values, 2, axes=(2, 3)) self.weights.deltas = np.einsum('bihwkl,bohw->oikl', self.windows, gradient) self.bias.deltas = np.sum(gradient, axis=(0, 2, 3)) return np.einsum('bohwkl,oikl->bihw', gradientWindows, rotatedKernel) def __str__(self) -> str: """ used for print the layer in a human readable manner """ printString = self.name printString += ' input channels: ' + str(self.inChannels) printString += ' output channels: ' + str(self.outChannels) printString += ' kernel size: ' + str(self.kernelSize) printString += ' padding: ' + str(self.padding) printString += ' stride: ' + str(self.stride) return printString class Unsqueeze(Layer): """ this layer type exists because I was too lazy adding/removing .reshape to inputs, depending if there is a convolution or not as the first layer it reshapes the input according to user specification if no channel information is given, the class assumes 1 channel """ __slots__ = ['inputShape', 'orginialShape'] def __init__(self, inputShape: tuple[int, int, int]) -> None: super().__init__() # testing if inputShape provides (channels, height, width) if type(inputShape) is not tuple: raise TypeError('input shape should be a tuple') if len(inputShape) == 2: inputShape = (1, *inputShape) elif len(inputShape) < 2 or len(inputShape) > 3: raise ValueError('input shape not corrisponding to (channels, height, width)') # class attributes self.inputShape = inputShape self.orginialShape = None def forward(self, input: np.ndarray) -> np.ndarray: """ Reshapes input into an acceptable shape for convolutions """ if self.orginialShape is None: self.orginialShape = input.shape[1:] return input.reshape(-1, *self.inputShape) def backward(self, gradient: np.ndarray) -> np.ndarray: """ Reshapes the upstream gradient into original shape """ return gradient.reshape(-1, *self.orginialShape)