Skip to content
Snippets Groups Projects
pxd.py 25.6 KiB
Newer Older
  • Learn to ignore specific revisions
  • import numpy as np
    from numpy.typing import ArrayLike
    from uproot import TTree
    from ..common import FancyDict
    from concurrent.futures import ThreadPoolExecutor
    from .pxdFilter import FindUnselectedClusters
    
    
    
    class PXD(FancyDict):
        def __init__(self, data: dict = None) -> None:
            self.name = 'pxd'
    
            # list of pxd panels
            self.panels = [[[-0.89 ,  0.36 ,  0.36 , -0.89 , -0.89 ], [ 1.4  ,  1.4  ,  1.4  ,  1.4  ,  1.4  ], [-3.12, -3.12, 5.92, 5.92, -3.12]],      # 00
                              [[ 1.25 ,  0.365,  0.365,  1.25 ,  1.25 ], [ 0.72 ,  1.615,  1.615,  0.72 ,  0.72 ], [-3.12, -3.12, 5.92, 5.92, -3.12]],      # 01
                              [[ 1.4  ,   1.4 ,  1.4  ,  1.4  ,  1.4  ], [-0.36 ,  0.89 ,  0.89 , -0.36 , -0.36 ], [-3.12, -3.12, 5.92, 5.92, -3.12]],      # 02
                              [[ 0.72 ,  1.615,  1.615,  0.72 ,  0.72 ], [-1.25 , -0.365, -0.365, -1.25 , -1.25 ], [-3.12, -3.12, 5.92, 5.92, -3.12]],      # 03
                              [[ 0.89 , -0.36 , -0.36 ,  0.89 ,  0.89 ], [-1.4  , -1.4  , -1.4  , -1.4  , -1.4  ], [-3.12, -3.12, 5.92, 5.92, -3.12]],      # 04
                              [[-1.25 , -0.365, -0.365, -1.25 , -1.25 ], [-0.72 , -1.615, -1.615, -0.72 , -0.72 ], [-3.12, -3.12, 5.92, 5.92, -3.12]],      # 05
                              [[-1.4  , -1.4  , -1.4  , -1.4  , -1.4  ], [ 0.36 , -0.89 , -0.89 ,  0.36 ,  0.36 ], [-3.12, -3.12, 5.92, 5.92, -3.12]],      # 06
                              [[-0.72 , -1.615, -1.615, -0.72 , -0.72 ], [ 1.25 ,  0.365,  0.365,  1.25 ,  1.25 ], [-3.12, -3.12, 5.92, 5.92, -3.12]],      # 07
                              [[-0.89 ,  0.36 ,  0.36 , -0.89 , -0.89 ], [ 2.2  ,  2.2  ,  2.2  ,  2.2  ,  2.2  ], [-4.28, -4.28, 8.08, 8.08, -4.28]],      # 08
                              [[ 0.345,  1.4  ,  1.4  ,  0.345,  0.345], [ 2.35 ,  1.725,  1.725,  2.35 ,  2.35 ], [-4.28, -4.28, 8.08, 8.08, -4.28]],      # 09
                              [[ 1.48 ,  2.1  ,  2.1  ,  1.48 ,  1.48 ], [ 1.85 ,  0.78 ,  0.78 ,  1.85 ,  1.85 ], [-4.28, -4.28, 8.08, 8.08, -4.28]],      # 10
                              [[ 2.2  ,  2.2  ,  2.2  ,  2.2  ,  2.2  ], [ 0.89 , -0.36 , -0.36 ,  0.89 ,  0.89 ], [-4.28, -4.28, 8.08, 8.08, -4.28]],      # 11
                              [[ 2.35 ,  1.725,  1.725,  2.35 ,  2.35 ], [-0.345, -1.4  , -1.4  , -0.345, -0.345], [-4.28, -4.28, 8.08, 8.08, -4.28]],      # 12
                              [[ 1.85 ,  0.78 ,  0.78 ,  1.85 ,  1.85 ], [-1.48 , -2.1  , -2.1  , -1.48 , -1.48 ], [-4.28, -4.28, 8.08, 8.08, -4.28]],      # 13
                              [[ 0.89 , -0.36 , -0.36 ,  0.89 ,  0.89 ], [-2.2  , -2.2  , -2.2  , -2.2  , -2.2  ], [-4.28, -4.28, 8.08, 8.08, -4.28]],      # 14
                              [[-0.345, -1.4  , -1.4  , -0.345, -0.345], [-2.35 , -1.725, -1.725, -2.35 , -2.35 ], [-4.28, -4.28, 8.08, 8.08, -4.28]],      # 15
                              [[-1.48 , -2.1  , -2.1  , -1.48 , -1.48 ], [-1.85 , -0.78 , -0.78 , -1.85 , -1.85 ], [-4.28, -4.28, 8.08, 8.08, -4.28]],      # 16
                              [[-2.2  , -2.2  , -2.2  , -2.2  , -2.2  ], [-0.89 ,  0.36 ,  0.36 , -0.89 , -0.89 ], [-4.28, -4.28, 8.08, 8.08, -4.28]],      # 17
                              [[-2.35 , -1.725, -1.725, -2.35 , -2.35 ], [ 0.345,  1.4  ,  1.4  ,  0.345,  0.345], [-4.28, -4.28, 8.08, 8.08, -4.28]],      # 18
                              [[-1.85 , -0.78 , -0.78 , -1.85 , -1.85 ], [ 1.48 ,  2.1  ,  2.1  ,  1.48 ,  1.48 ], [-4.28, -4.28, 8.08, 8.08, -4.28]]]      # 19
    
            # these are the branch names for cluster info in the root file
            self.clusters = ['PXDClusters/PXDClusters.m_clsCharge',
                             'PXDClusters/PXDClusters.m_seedCharge',
                             'PXDClusters/PXDClusters.m_clsSize',
                             'PXDClusters/PXDClusters.m_uSize',
                             'PXDClusters/PXDClusters.m_vSize',
                             'PXDClusters/PXDClusters.m_uPosition',
                             'PXDClusters/PXDClusters.m_vPosition',
                             'PXDClusters/PXDClusters.m_sensorID']
    
            # these are the branch names for cluster digits in the root file
            self.digits = ['PXDDigits/PXDDigits.m_uCellID',
                           'PXDDigits/PXDDigits.m_vCellID',
                           'PXDDigits/PXDDigits.m_charge']
    
            # this establishes the relationship between clusters and digits
            # because for some reaseon the branch for digits has a different
            # size than the cluster branch
            self.clusterToDigis = 'PXDClustersToPXDDigits/m_elements/m_elements.m_to'
    
            # these are the branch names for monte carlo data in the root file
            self.mcData = ['MCParticles/MCParticles.m_pdg',
                           'MCParticles/MCParticles.m_momentum_x',
                           'MCParticles/MCParticles.m_momentum_y',
                           'MCParticles/MCParticles.m_momentum_z']
    
            # these two establish the relation ship to an from clusters and monte carlo
            # there more entries than in the cluster data, but there still mc data missing
            # for some cluster files
            self.clusterToMC = 'PXDClustersToMCParticles/m_elements/m_elements.m_to'
            self.mcToCluster = 'PXDClustersToMCParticles/m_elements/m_elements.m_from'
    
            # these are the sensor IDs of the pxd modules/panels from the root file, they are
            # use to identify on which panels a cluster event happened
            self.panelIDs = np.array([ 8480,  8512,  8736,  8768,  8992,  9024,  9248,  9280,
                                  9504,  9536,  9760,  9792, 10016, 10048, 10272, 10304,
                                 16672, 16704, 16928, 16960, 17184, 17216, 17440, 17472,
                                 17696, 17728, 17952, 17984, 18208, 18240, 18464, 18496,
                                 18720, 18752, 18976, 19008, 19232, 19264, 19488, 19520])
    
            # every line in this corresponds to one entry in the array above, this is used
            # to put the projected uv plane in the right position
            self.panelShifts = np.array([[1.3985    ,  0.2652658 ,  3.68255],
                                   [ 1.3985    ,  0.23238491, -0.88255],
                                   [ 0.80146531,  1.17631236,  3.68255],
                                   [ 0.82407264,  1.15370502, -0.88255],
                                   [-0.2582769 ,  1.3985    ,  3.68255],
                                   [-0.2322286 ,  1.3985    , -0.88255],
                                   [-1.17531186,  0.80246583, 3.68255 ],
                                   [-1.15510614,  0.82267151, -0.88255],
                                   [-1.3985    , -0.2645974 ,  3.68255],
                                   [-1.3985    , -0.23012119, -0.88255],
                                   [-0.80591227, -1.17186534,  3.68255],
                                   [-0.82344228, -1.15433536, -0.88255],
                                   [ 0.26975836, -1.3985    ,  3.68255],
                                   [ 0.23326624, -1.3985    , -0.88255],
                                   [ 1.1746111 , -0.80316652,  3.68255],
                                   [ 1.15205703, -0.82572062, -0.88255],
                                   [ 2.2015    ,  0.26959865,  5.01305],
                                   [ 2.2015    ,  0.2524582 , -1.21305],
                                   [ 1.77559093,  1.32758398,  5.01305],
                                   [ 1.78212569,  1.31626522, -1.21305],
                                   [ 0.87798948,  2.03516717,  5.01305],
                                   [ 0.88478563,  2.03124357, -1.21305],
                                   [-0.26129975,  2.2015    ,  5.01305],
                                   [-0.25184137,  2.2015    , -1.21305],
                                   [-1.32416655,  1.77756402,  5.01305],
                                   [-1.31417539,  1.78333226, -1.21305],
                                   [-2.03421133,  0.87964512,  5.01305],
                                   [-2.02960691,  0.88762038, -1.21305],
                                   [-2.2015    , -0.25954151,  5.01305],
                                   [-2.2015    , -0.24969109, -1.21305],
                                   [-1.77636043, -1.32625112,  5.01305],
                                   [-1.78138268, -1.31755219, -1.21305],
                                   [-0.87493138, -2.03693277, 5.01305 ],
                                   [-0.8912978 , -2.02748378, -1.21305],
                                   [ 0.26489725, -2.2015    ,  5.01305],
                                   [ 0.25364439, -2.2015    , -1.21305],
                                   [ 1.3269198 , -1.7759744 ,  5.01305],
                                   [ 1.32258793, -1.77847528, -1.21305],
                                   [ 2.03616649, -0.87625871,  5.01305],
                                   [ 2.02936825, -0.8880338 , -1.21305]])
    
            # every entry here corresponds to the entries in the array above, these are
            # used for rotating the projected uv plane
            self.panelRotations = np.array([ 90,  90, 135, 135, 180, 180, 225, 225, 270, 270, 315, 315, 360,
                                       360, 405, 405,  90,  90, 120, 120, 150, 150, 180, 180, 210, 210,
                                       240, 240, 270, 270, 300, 300, 330, 330, 360, 360, 390, 390, 420,
                                       420])
    
            # the layer and ladder arrays, for finding them from sensor id
            self.panelLayer = np.array([1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2])
            self.panelLadder = np.array([1, 1, 2, 2, 3, 3, 4, 4, 5, 5, 6, 6, 7, 7, 8, 8, 9, 9, 10, 10, 12, 12, 13, 13, 14, 14, 15, 15, 16, 16, 17, 17, 18, 18, 19, 19, 20, 20, 21, 21])
    
    
            # all transpormaations are stored in a dict, with the sensor id as a keyword
            self.transformation = {}
            self.layersLadders = {}
            for i in range(len(self.panelIDs)):
                self.transformation[str(self.panelIDs[i])] = [self.panelShifts[i], self.panelRotations[i]]
                self.layersLadders[str(self.panelIDs[i])] = [self.panelLayer[i], self.panelLadder[i]]
    
            # parameter for checking if coordinates have been loaded
            self.gotClusters = False
            self.gotCoordinates = False
            self.gotSphericals = False
            self.gotLayers = False
            self.gotDigits = False
            self.gotMatrices = False
            self.gotMCData = False
            self.gotFiltered = False
    
            # this dict stores the data
            self.data = data if data is not None else {}
    
            # inorder to find roi unselected clusters
    
            self.findUnselectedClusters = FindUnselectedClusters()
    
    johannes bilk's avatar
    johannes bilk committed
            self.includeUnSelected = False
    
    
        def getClusters(self, eventTree: TTree, includeUnSelected: bool = False) -> None:
            """
            this uses the array from __init__ to load different branches into the data dict
            """
            self.gotClusters = True
    
    johannes bilk's avatar
    johannes bilk committed
            self.includeUnSelected = includeUnSelected
    
            for branch in self.clusters:
                data = self._getData(eventTree, branch)
                keyword = branch.split('_')[-1]
                self.data[keyword] = data
            self.data['roiSelected'] = np.array([True] * len(self.data['clsCharge']))
    
            clusters = eventTree.arrays('PXDClusters/PXDClusters.m_clsCharge', library='np')['PXDClusters/PXDClusters.m_clsCharge']
            self._getEventNumbers(clusters)
    
            if includeUnSelected:
                unselectedClusters = self.findUnselectedClusters.getClusters(eventTree)
                for key in unselectedClusters:
                    self.data[key] = np.concatenate((self.data[key], unselectedClusters[key]))
    
            self.data['detector'] = np.array(['pxd'] * len(self.data['clsCharge']))
    
        def _getEventNumbers(self, clusters: np.ndarray, offset: int = 0) -> None:
            """
            this generates event numbers from the structure of pxd clusters
            """
            eventNumbers = []
            for i in range(len(clusters)):
                eventNumbers.append(np.array([i]*len(clusters[i])) + offset)
            self.data['eventNumber'] = np.hstack(eventNumbers)
    
        def _getData(self, eventTree: TTree, keyword: str, library: str = 'np') -> np.ndarray:
            """
            a private method for converting branches into something useful, namely
            into numpy arrays, if the keyward library is set to np.
            keyword: str = the full branch name
            library: str = can be 'np' (numpy), 'pd' (pandas) or 'ak' (akward)
                           see uproot documentation for more info
            """
            try:
                data = eventTree.arrays(keyword, library=library)[keyword]
                return np.hstack(data)
            except:
                return KeyError
    
    
        def getDigits(self, eventTree: TTree) -> None:
    
            """
            reorganizes digits, so that they fit to the clusters
            """
            uCellIDs = eventTree.arrays(self.digits[0], library='np')[self.digits[0]]
            vCellIDs = eventTree.arrays(self.digits[1], library='np')[self.digits[1]]
            cellCharges = eventTree.arrays(self.digits[2], library='np')[self.digits[2]]
    
            # this establishes the relation between digits and clusters, it's still
            # shocking to me, that this is necessary, why aren't digits stored in the
            # same way as clusters, than one wouldn't need to jump through hoops just
            # to have the data in a usable und sensible manner
            # root is such a retarded file format
            clusterDigits = eventTree.arrays(self.clusterToDigis, library='np')[self.clusterToDigis]
    
            self.data['uCellIDs'] = []
            self.data['vCellIDs'] = []
            self.data['cellCharges'] = []
            for event in range(len(clusterDigits)):
                for cls in clusterDigits[event]:
                    self.data['uCellIDs'].append(uCellIDs[event][cls])
                    self.data['vCellIDs'].append(vCellIDs[event][cls])
                    self.data['cellCharges'].append(cellCharges[event][cls])
    
            self.data['uCellIDs'] = np.array(self.data['uCellIDs'], dtype=object)
            self.data['vCellIDs'] = np.array(self.data['vCellIDs'], dtype=object)
            self.data['cellCharges'] = np.array(self.data['cellCharges'], dtype=object)
    
    
            if self.includeUnSelected:
    
                unselectedClusters = self.findUnselectedClusters.getDigits(eventTree)
                for key in unselectedClusters:
                    self.data[key] = np.concatenate((self.data[key], unselectedClusters[key]))
    
            self.gotDigits = True
    
        def getMatrices(self, eventTree: TTree, matrixSize: tuple = (9, 9)) -> None:
            """
            Loads the digit branches into arrays and converts them into adc matrices
            """
            popDigits = False
            if self.gotDigits is False:
                self.getDigits(eventTree)
                popDigits = True
    
            uCellIDs = self.data['uCellIDs']
            vCellIDs = self.data['vCellIDs']
            cellCharges = self.data['cellCharges']
    
            indexChunks = np.array_split(range(len(cellCharges)), 4)
    
            with ThreadPoolExecutor(max_workers=None) as executor:
                futures = [executor.submit(self._getMatrices, chunk, uCellIDs, vCellIDs, cellCharges, matrixSize) for chunk in indexChunks]
                results = [future.result() for future in futures]
    
            # Combine the results from all chunks
            self.data['matrix'] = np.concatenate(results).astype('int')
            if popDigits is True:
                self.data.pop('uCellIDs')
                self.data.pop('vCellIDs')
                self.data.pop('cellCharges')
                self.gotDigits = False
            self.gotMatrices = True
    
        @staticmethod
        def _getMatrices(indexChunks: ArrayLike, uCellIDs: ArrayLike, vCellIDs: ArrayLike, cellCharges: ArrayLike, matrixSize: tuple = (9, 9)) -> np.ndarray:
            """
            this takes the ragged/jagged digit arrays and converts them into 9x9 matrices
            it's a rather slow process because of all the looping
            """
            plotRange = np.array(matrixSize) // 2
            events = []
    
            for event in indexChunks:
                # Since uCellIDs, vCellIDs, and cellCharges are now directly associated with clusters,
                # we don't need digitIndices or maxChargeIndex
                digitsU, digitsV, digitsCharge = np.array(uCellIDs[event]), np.array(vCellIDs[event]), np.array(cellCharges[event])
                adcValues = []
    
                # Find the center of the cluster (digit with the max charge)
                uMax, vMax = digitsU[digitsCharge.argmax()], digitsV[digitsCharge.argmax()]
                uPos, vPos = digitsU - uMax + plotRange[0], digitsV - vMax + plotRange[1]
    
                valid_indices = (uPos >= 0) & (uPos < matrixSize[0]) & (vPos >= 0) & (vPos < matrixSize[1])
    
                cacheImg = np.zeros(matrixSize)
                cacheImg[uPos[valid_indices].astype(int), vPos[valid_indices].astype(int)] = digitsCharge[valid_indices]
                adcValues.append(cacheImg)
    
                events.extend(adcValues)
    
            return np.array(events, dtype=object)
    
        def getCoordinates(self, eventTree: TTree) -> None:
            """
            converting the uv coordinates, together with sensor ids, into xyz coordinates
            """
            # checking if cluster parameters have been loaded
            if self.gotClusters is False:
                self.getClusters(eventTree)
    
            # setting a bool for checking if coordinates were calculated
            self.gotCoordinates = True
    
            indexChunnks = np.array_split(range(len(self.data['sensorID'])), 4)
    
            with ThreadPoolExecutor(max_workers=None) as executor:
                futures = [executor.submit(self._getCoordinates, self.data['uPosition'][chunk], self.data['vPosition'][chunk], self.data['sensorID'][chunk]) for chunk in indexChunnks]
    
                xResults, yResults, zResults = [], [], []
                for future in futures:
                    x, y, z = future.result()
                    xResults.append(x)
                    yResults.append(y)
                    zResults.append(z)
    
                self.data['xPosition'] = np.concatenate(xResults)
                self.data['yPosition'] = np.concatenate(yResults)
                self.data['zPosition'] = np.concatenate(zResults)
            self.gotCoordinates = True
    
        def _getCoordinates(self, uPositions: ArrayLike, vPositions: ArrayLike, sensorIDs: ArrayLike) -> tuple[np.ndarray]:
            """
            a private method for transposing/converting 2d uv coords into 3d xyz coordinates
            """
            length = len(sensorIDs)
            xArr, yArr, zArr = np.zeros(length), np.zeros(length), np.zeros(length)
    
            # iterting over the cluster arrays
            for index, (u, v, sensor_id) in enumerate(zip(uPositions, vPositions, sensorIDs)):
                # grabbing the shift vector and rotation angle
                shift, angle = self.transformation[str(sensor_id)]
    
                # setting up rotation matrix
                theta = np.deg2rad(angle)
                rotMatrix = np.array([[np.cos(theta), -np.sin(theta), 0], [np.sin(theta), np.cos(theta), 0], [0, 0, 1]])
    
                # projecting uv coordinates into 3d space
                point = np.array([u, 0, v])
    
                # shifting and rotating the projected vector
                shifted = rotMatrix.dot(point) + shift
                xArr[index], yArr[index], zArr[index] = shifted
    
            return xArr, yArr, zArr
    
        def getSphericals(self) -> None:
            """
            Calculate spherical coordinates for each cluster.
            """
            # Checking if coordinates have been loaded
            popCoords = False
            if self.gotCoordinates is False:
                self.getCoordinates()
                popCoords = True
    
            xPosition = self.data['xPosition']
            yPosition = self.data['yPosition']
            zPosition = self.data['zPosition']
    
            r, theta, phi = self._calcSphericals(xPosition, yPosition, zPosition)
    
            self.data['rPosition'] = r
            self.data['thetaPosition'] = theta
            self.data['phiPosition'] = phi
            self.gotSphericals = True
            if popCoords:
                self.data.pop('xPosition')
                self.data.pop('yPosition')
                self.data.pop('zPosition')
                self.gotCoordinates = False
    
        @staticmethod
        def _calcSphericals(xPosition: np.ndarray, yPosition: np.ndarray, zPosition: np.ndarray) -> tuple[np.ndarray, np.ndarray, np.ndarray]:
            xSquare = np.square(xPosition)
            ySquare = np.square(yPosition)
            zSquare = np.square(zPosition)
    
            # Avoid division by zero by replacing zeros with a small number
            r = np.sqrt(xSquare + ySquare + zSquare)
            rSafe = np.where(r == 0, 1e-10, r)
    
            theta = np.arccos(zPosition / rSafe)
            phi = np.arctan2(yPosition, xPosition)
    
            return r, theta, phi
    
        def getLayers(self, eventTree: TTree) -> None:
            """
            looks up the corresponding layers and ladders for every cluster
            """
            if self.gotClusters is False:
                self.getClusters(eventTree)
    
            layers, ladders = [], []
            for id in self.data['sensorID']:
                layer, ladder = self.layersLadders[str(id)]
                layers.append(layer)
                ladders.append(ladder)
    
            self.data['layer'] = np.array(layers, dtype=int)
            self.data['ladder'] = np.array(ladders, dtype=int)
            self.gotLayers = True
    
        def getMCData(self, eventTree: TTree) -> None:
            """
            this loads the monte carlo from the root file
            """
    
    johannes bilk's avatar
    johannes bilk committed
            if self.includeUnSelected:
    
                warnings.warn('mc data are not supported on roi unselected data')
    
    
            # the monte carlo data, they are longer than the cluster data
            pdg = eventTree.arrays(self.mcData[0], library='np')[self.mcData[0]]
            momentumX = eventTree.arrays(self.mcData[1], library='np')[self.mcData[1]]
            momentumY = eventTree.arrays(self.mcData[2], library='np')[self.mcData[2]]
            momentumZ = eventTree.arrays(self.mcData[3], library='np')[self.mcData[3]]
    
            # this loads the relation ships to and from clusters and mc data
            # this is the same level of retardedness as with the cluster digits
            clusterToMC = eventTree.arrays(self.clusterToMC, library='np')[self.clusterToMC]
            mcToCluster = eventTree.arrays(self.mcToCluster, library='np')[self.mcToCluster]
    
            # it need the cluster charge as a jagged/ragged array, maybe I could simply
            # use the event numbers, but I am too tired to fix this shitty file format
            clsCharge = eventTree.arrays('PXDClusters/PXDClusters.m_clsCharge', library='np')['PXDClusters/PXDClusters.m_clsCharge']
    
            # reorganizing MC data
            momentumXList = []
            momentumYList = []
            momentumZList = []
            pdgList = []
            clusterNumbersList = []
            for i in range(len(clusterToMC)):
                # _fillMCList fills in the missing spots, because there are not mc data for
                # every cluster, even though there are more entries in this branch than
                # in the cluster branch... as I said, the root format is retarded
                fullClusterReferences = self._fillMCList(mcToCluster[i], clusterToMC[i], len(clsCharge[i]))
                clusterNumbersList.append(fullClusterReferences)
                pdgs, xmom, ymom, zmom = self._getMCData(fullClusterReferences, pdg[i], momentumX[i], momentumY[i], momentumZ[i])
                momentumXList.append(xmom)
                momentumYList.append(ymom)
                momentumZList.append(zmom)
                pdgList.append(pdgs)
    
            self.data['momentumX'] = np.hstack(momentumXList).astype(float)
            self.data['momentumY'] = np.hstack(momentumYList).astype(float)
            self.data['momentumZ'] = np.hstack(momentumZList).astype(float)
            self.data['pdg'] = np.hstack(pdgList).astype(int)
            self.data['clsNumber'] = np.hstack(clusterNumbersList).astype(int)
            self.gotMCData = True
    
    
            if self.includeUnSelected:
                sampleSize = np.sum(self.data['roiSelected'] == False)
                missingMCData = self.findUnselectedClusters.fillMCData({
                        'momentumX': self.data['momentumX'],
                        'momentumY': self.data['momentumY'],
                        'momentumZ': self.data['momentumZ'],
                        'pdg': self.data['pdg'],
                        'clsNumber': self.data['clsNumber']
                    })
                for key in missingMCData:
                    self.data[key] = np.hstack((self.data[key], missingMCData[key][0:sampleSize]))
    
    
    
        @staticmethod
        def _findMissing(lst: list, length: int) -> list:
            """
            a private method for finding missing elements in mc data arrays
            """
            return sorted(set(range(0, length)) - set(lst))
    
        def _fillMCList(self, fromClusters: ArrayLike, toClusters: ArrayLike, length: ArrayLike) -> list:
            """
            a private method for filling MC data arrays where clusters don't have
            any information
            """
            missingIndex = self._findMissing(fromClusters, length)
            testList = [-1] * length
            fillIndex = 0
            for i in range(len(testList)):
                if i in missingIndex:
                    testList[i] = -1
                else:
                    try:
                        testList[i] = int(toClusters[fillIndex])
                    except TypeError:
                        testList[i] = int(toClusters[fillIndex][0])
                    fillIndex += 1
            return testList
    
        @staticmethod
        def _getMCData(toClusters: ArrayLike, pdgs: ArrayLike, xMom: ArrayLike, yMom: ArrayLike, zMom: ArrayLike) -> tuple[np.ndarray]:
            """
            after filling and reorganizing MC data arrays one can finally collect the
            actual MC data, where there's data missing I will with zeros
            """
            pxList, pyList, pzList = [], [], []
            pdgList = []
            for references in toClusters:
                if references == -1:
                    pxList.append(0)
                    pyList.append(0)
                    pzList.append(0)
                    pdgList.append(0)
                else:
                    pxList.append(xMom[references])
                    pyList.append(yMom[references])
                    pzList.append(zMom[references])
                    pdgList.append(pdgs[references])
            return np.array(pdgList,dtype=list), np.array(pxList,dtype=list), np.array(pyList,dtype=list), np.array(pzList,dtype=list)