pxd.py

import numpy as np
from numpy.typing import ArrayLike
from uproot import TTree
from ..common import FancyDict
from concurrent.futures import ThreadPoolExecutor
from .pxdFilter import FindUnselectedClusters
import warnings


class PXD(FancyDict):
    def __init__(self, data: dict = None) -> None:
        self.name = 'pxd'

        # list of pxd panels
        self.panels = [[[-0.89 ,  0.36 ,  0.36 , -0.89 , -0.89 ], [ 1.4  ,  1.4  ,  1.4  ,  1.4  ,  1.4  ], [-3.12, -3.12, 5.92, 5.92, -3.12]],      # 00
                          [[ 1.25 ,  0.365,  0.365,  1.25 ,  1.25 ], [ 0.72 ,  1.615,  1.615,  0.72 ,  0.72 ], [-3.12, -3.12, 5.92, 5.92, -3.12]],      # 01
                          [[ 1.4  ,   1.4 ,  1.4  ,  1.4  ,  1.4  ], [-0.36 ,  0.89 ,  0.89 , -0.36 , -0.36 ], [-3.12, -3.12, 5.92, 5.92, -3.12]],      # 02
                          [[ 0.72 ,  1.615,  1.615,  0.72 ,  0.72 ], [-1.25 , -0.365, -0.365, -1.25 , -1.25 ], [-3.12, -3.12, 5.92, 5.92, -3.12]],      # 03
                          [[ 0.89 , -0.36 , -0.36 ,  0.89 ,  0.89 ], [-1.4  , -1.4  , -1.4  , -1.4  , -1.4  ], [-3.12, -3.12, 5.92, 5.92, -3.12]],      # 04
                          [[-1.25 , -0.365, -0.365, -1.25 , -1.25 ], [-0.72 , -1.615, -1.615, -0.72 , -0.72 ], [-3.12, -3.12, 5.92, 5.92, -3.12]],      # 05
                          [[-1.4  , -1.4  , -1.4  , -1.4  , -1.4  ], [ 0.36 , -0.89 , -0.89 ,  0.36 ,  0.36 ], [-3.12, -3.12, 5.92, 5.92, -3.12]],      # 06
                          [[-0.72 , -1.615, -1.615, -0.72 , -0.72 ], [ 1.25 ,  0.365,  0.365,  1.25 ,  1.25 ], [-3.12, -3.12, 5.92, 5.92, -3.12]],      # 07
                          [[-0.89 ,  0.36 ,  0.36 , -0.89 , -0.89 ], [ 2.2  ,  2.2  ,  2.2  ,  2.2  ,  2.2  ], [-4.28, -4.28, 8.08, 8.08, -4.28]],      # 08
                          [[ 0.345,  1.4  ,  1.4  ,  0.345,  0.345], [ 2.35 ,  1.725,  1.725,  2.35 ,  2.35 ], [-4.28, -4.28, 8.08, 8.08, -4.28]],      # 09
                          [[ 1.48 ,  2.1  ,  2.1  ,  1.48 ,  1.48 ], [ 1.85 ,  0.78 ,  0.78 ,  1.85 ,  1.85 ], [-4.28, -4.28, 8.08, 8.08, -4.28]],      # 10
                          [[ 2.2  ,  2.2  ,  2.2  ,  2.2  ,  2.2  ], [ 0.89 , -0.36 , -0.36 ,  0.89 ,  0.89 ], [-4.28, -4.28, 8.08, 8.08, -4.28]],      # 11
                          [[ 2.35 ,  1.725,  1.725,  2.35 ,  2.35 ], [-0.345, -1.4  , -1.4  , -0.345, -0.345], [-4.28, -4.28, 8.08, 8.08, -4.28]],      # 12
                          [[ 1.85 ,  0.78 ,  0.78 ,  1.85 ,  1.85 ], [-1.48 , -2.1  , -2.1  , -1.48 , -1.48 ], [-4.28, -4.28, 8.08, 8.08, -4.28]],      # 13
                          [[ 0.89 , -0.36 , -0.36 ,  0.89 ,  0.89 ], [-2.2  , -2.2  , -2.2  , -2.2  , -2.2  ], [-4.28, -4.28, 8.08, 8.08, -4.28]],      # 14
                          [[-0.345, -1.4  , -1.4  , -0.345, -0.345], [-2.35 , -1.725, -1.725, -2.35 , -2.35 ], [-4.28, -4.28, 8.08, 8.08, -4.28]],      # 15
                          [[-1.48 , -2.1  , -2.1  , -1.48 , -1.48 ], [-1.85 , -0.78 , -0.78 , -1.85 , -1.85 ], [-4.28, -4.28, 8.08, 8.08, -4.28]],      # 16
                          [[-2.2  , -2.2  , -2.2  , -2.2  , -2.2  ], [-0.89 ,  0.36 ,  0.36 , -0.89 , -0.89 ], [-4.28, -4.28, 8.08, 8.08, -4.28]],      # 17
                          [[-2.35 , -1.725, -1.725, -2.35 , -2.35 ], [ 0.345,  1.4  ,  1.4  ,  0.345,  0.345], [-4.28, -4.28, 8.08, 8.08, -4.28]],      # 18
                          [[-1.85 , -0.78 , -0.78 , -1.85 , -1.85 ], [ 1.48 ,  2.1  ,  2.1  ,  1.48 ,  1.48 ], [-4.28, -4.28, 8.08, 8.08, -4.28]]]      # 19

        # these are the branch names for cluster info in the root file
        self.clusters = ['PXDClusters/PXDClusters.m_clsCharge',
                         'PXDClusters/PXDClusters.m_seedCharge',
                         'PXDClusters/PXDClusters.m_clsSize',
                         'PXDClusters/PXDClusters.m_uSize',
                         'PXDClusters/PXDClusters.m_vSize',
                         'PXDClusters/PXDClusters.m_uPosition',
                         'PXDClusters/PXDClusters.m_vPosition',
                         'PXDClusters/PXDClusters.m_sensorID']

        # these are the branch names for cluster digits in the root file
        self.digits = ['PXDDigits/PXDDigits.m_uCellID',
                       'PXDDigits/PXDDigits.m_vCellID',
                       'PXDDigits/PXDDigits.m_charge']

        # this establishes the relationship between clusters and digits
        # because for some reaseon the branch for digits has a different
        # size than the cluster branch
        self.clusterToDigis = 'PXDClustersToPXDDigits/m_elements/m_elements.m_to'

        # these are the branch names for monte carlo data in the root file
        self.mcData = ['MCParticles/MCParticles.m_pdg',
                       'MCParticles/MCParticles.m_momentum_x',
                       'MCParticles/MCParticles.m_momentum_y',
                       'MCParticles/MCParticles.m_momentum_z']

        # these two establish the relation ship to an from clusters and monte carlo
        # there more entries than in the cluster data, but there still mc data missing
        # for some cluster files
        self.clusterToMC = 'PXDClustersToMCParticles/m_elements/m_elements.m_to'
        self.mcToCluster = 'PXDClustersToMCParticles/m_elements/m_elements.m_from'

        # these are the sensor IDs of the pxd modules/panels from the root file, they are
        # use to identify on which panels a cluster event happened
        self.panelIDs = np.array([ 8480,  8512,  8736,  8768,  8992,  9024,  9248,  9280,
                              9504,  9536,  9760,  9792, 10016, 10048, 10272, 10304,
                             16672, 16704, 16928, 16960, 17184, 17216, 17440, 17472,
                             17696, 17728, 17952, 17984, 18208, 18240, 18464, 18496,
                             18720, 18752, 18976, 19008, 19232, 19264, 19488, 19520])

        # every line in this corresponds to one entry in the array above, this is used
        # to put the projected uv plane in the right position
        self.panelShifts = np.array([[1.3985    ,  0.2652658 ,  3.68255],
                               [ 1.3985    ,  0.23238491, -0.88255],
                               [ 0.80146531,  1.17631236,  3.68255],
                               [ 0.82407264,  1.15370502, -0.88255],
                               [-0.2582769 ,  1.3985    ,  3.68255],
                               [-0.2322286 ,  1.3985    , -0.88255],
                               [-1.17531186,  0.80246583, 3.68255 ],
                               [-1.15510614,  0.82267151, -0.88255],
                               [-1.3985    , -0.2645974 ,  3.68255],
                               [-1.3985    , -0.23012119, -0.88255],
                               [-0.80591227, -1.17186534,  3.68255],
                               [-0.82344228, -1.15433536, -0.88255],
                               [ 0.26975836, -1.3985    ,  3.68255],
                               [ 0.23326624, -1.3985    , -0.88255],
                               [ 1.1746111 , -0.80316652,  3.68255],
                               [ 1.15205703, -0.82572062, -0.88255],
                               [ 2.2015    ,  0.26959865,  5.01305],
                               [ 2.2015    ,  0.2524582 , -1.21305],
                               [ 1.77559093,  1.32758398,  5.01305],
                               [ 1.78212569,  1.31626522, -1.21305],
                               [ 0.87798948,  2.03516717,  5.01305],
                               [ 0.88478563,  2.03124357, -1.21305],
                               [-0.26129975,  2.2015    ,  5.01305],
                               [-0.25184137,  2.2015    , -1.21305],
                               [-1.32416655,  1.77756402,  5.01305],
                               [-1.31417539,  1.78333226, -1.21305],
                               [-2.03421133,  0.87964512,  5.01305],
                               [-2.02960691,  0.88762038, -1.21305],
                               [-2.2015    , -0.25954151,  5.01305],
                               [-2.2015    , -0.24969109, -1.21305],
                               [-1.77636043, -1.32625112,  5.01305],
                               [-1.78138268, -1.31755219, -1.21305],
                               [-0.87493138, -2.03693277, 5.01305 ],
                               [-0.8912978 , -2.02748378, -1.21305],
                               [ 0.26489725, -2.2015    ,  5.01305],
                               [ 0.25364439, -2.2015    , -1.21305],
                               [ 1.3269198 , -1.7759744 ,  5.01305],
                               [ 1.32258793, -1.77847528, -1.21305],
                               [ 2.03616649, -0.87625871,  5.01305],
                               [ 2.02936825, -0.8880338 , -1.21305]])

        # every entry here corresponds to the entries in the array above, these are
        # used for rotating the projected uv plane
        self.panelRotations = np.array([ 90,  90, 135, 135, 180, 180, 225, 225, 270, 270, 315, 315, 360,
                                   360, 405, 405,  90,  90, 120, 120, 150, 150, 180, 180, 210, 210,
                                   240, 240, 270, 270, 300, 300, 330, 330, 360, 360, 390, 390, 420,
                                   420])

        # the layer and ladder arrays, for finding them from sensor id
        self.panelLayer = np.array([1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2])
        self.panelLadder = np.array([1, 1, 2, 2, 3, 3, 4, 4, 5, 5, 6, 6, 7, 7, 8, 8, 9, 9, 10, 10, 12, 12, 13, 13, 14, 14, 15, 15, 16, 16, 17, 17, 18, 18, 19, 19, 20, 20, 21, 21])


        # all transpormaations are stored in a dict, with the sensor id as a keyword
        self.transformation = {}
        self.layersLadders = {}
        for i in range(len(self.panelIDs)):
            self.transformation[str(self.panelIDs[i])] = [self.panelShifts[i], self.panelRotations[i]]
            self.layersLadders[str(self.panelIDs[i])] = [self.panelLayer[i], self.panelLadder[i]]

        # parameter for checking if coordinates have been loaded
        self.gotClusters = False
        self.gotCoordinates = False
        self.gotSphericals = False
        self.gotLayers = False
        self.gotDigits = False
        self.gotMatrices = False
        self.gotMCData = False
        self.gotFiltered = False

        # this dict stores the data
        self.data = data if data is not None else {}

        # inorder to find roi unselected clusters
        self.findUnselectedClusters = FindUnselectedClusters()
        self.includeUnSelected = False

    def getClusters(self, eventTree: TTree, fileName: str = None) -> None:
        """
        this uses the array from __init__ to load different branches into the data dict
        """
        self.gotClusters = True
        for branch in self.clusters:
            data = self._getData(eventTree, branch)
            keyword = branch.split('_')[-1]
            self.set(keyword, data)
            #self.data[keyword] = data

        self.set('roiSelected', np.array([True] * len(data)))
        self.set('fileName', np.array([fileName] * len(data)))
        #self.data['roiSelected'] = np.array([True] * self.numClusters)
        #self.data['fileName'] = np.array([fileName] * self.numClusters)

        clusters = eventTree.arrays('PXDClusters/PXDClusters.m_clsCharge', library='np')['PXDClusters/PXDClusters.m_clsCharge']
        self._getEventNumbers(clusters)

        if self.includeUnSelected:
            unselectedClusters = self.findUnselectedClusters.getClusters(eventTree)
            self.extend(unselectedClusters)
            #for key in unselectedClusters:
            #    self.data[key] = np.concatenate((self.data[key], unselectedClusters[key]))

        self.set('detector', np.array(['pxd'] * len(data)))
        #self.data['detector'] = np.array(['pxd'] * self.numClusters)

    def _getEventNumbers(self, clusters: np.ndarray, offset: int = 0) -> None:
        """
        this generates event numbers from the structure of pxd clusters
        """
        eventNumbers = []
        for i in range(len(clusters)):
            eventNumbers.append(np.array([i]*len(clusters[i])) + offset)
        self.set('eventNumber', np.concatenate(eventNumbers))
        #self.data['eventNumber'] = np.hstack(eventNumbers)

    def _getData(self, eventTree: TTree, keyword: str, library: str = 'np') -> np.ndarray:
        """
        a private method for converting branches into something useful, namely
        into numpy arrays, if the keyward library is set to np.
        keyword: str = the full branch name
        library: str = can be 'np' (numpy), 'pd' (pandas) or 'ak' (akward)
                       see uproot documentation for more info
        """
        try:
            data = eventTree.arrays(keyword, library=library)[keyword]
            return np.hstack(data)
        except:
            return KeyError

    def getDigits(self, eventTree: TTree) -> None:
        """
        reorganizes digits, so that they fit to the clusters
        """
        uCellIDs = eventTree.arrays(self.digits[0], library='np')[self.digits[0]]
        vCellIDs = eventTree.arrays(self.digits[1], library='np')[self.digits[1]]
        cellCharges = eventTree.arrays(self.digits[2], library='np')[self.digits[2]]

        # this establishes the relation between digits and clusters, it's still
        # shocking to me, that this is necessary, why aren't digits stored in the
        # same way as clusters, than one wouldn't need to jump through hoops just
        # to have the data in a usable und sensible manner
        # root is such a retarded file format
        clusterDigits = eventTree.arrays(self.clusterToDigis, library='np')[self.clusterToDigis]

        uCellIDsTemp = []
        vCellIDsTemp = []
        cellChargesTemp = []
        for event in range(len(clusterDigits)):
            for cls in clusterDigits[event]:
                uCellIDsTemp.append(uCellIDs[event][cls])
                vCellIDsTemp.append(vCellIDs[event][cls])
                cellChargesTemp.append(cellCharges[event][cls])

        self.set('uCellIDs', np.array(uCellIDsTemp, dtype=object))
        self.set('vCellIDs', np.array(vCellIDsTemp, dtype=object))
        self.set('cellCharges', np.array(cellChargesTemp, dtype=object))

        #self.data['uCellIDs'] = np.array(uCellIDsTemp, dtype=object)
        #self.data['vCellIDs'] = np.array(vCellIDsTemp, dtype=object)
        #self.data['cellCharges'] = np.array(cellChargesTemp, dtype=object)

        if self.includeUnSelected:
            unselectedClusters = self.findUnselectedClusters.getDigits(eventTree)
            self.extend(unselectedClusters)

        self.gotDigits = True

    def getMatrices(self, eventTree: TTree, matrixSize: tuple = (9, 9)) -> None:
        """
        Loads the digit branches into arrays and converts them into adc matrices
        """
        popDigits = False
        if self.gotDigits is False:
            self.getDigits(eventTree)
            popDigits = True

        uCellIDs = self.data['uCellIDs']
        vCellIDs = self.data['vCellIDs']
        cellCharges = self.data['cellCharges']

        indexChunks = np.array_split(range(len(cellCharges)), 4)

        with ThreadPoolExecutor(max_workers=None) as executor:
            futures = [executor.submit(self._getMatrices, chunk, uCellIDs, vCellIDs, cellCharges, matrixSize) for chunk in indexChunks]
            results = [future.result() for future in futures]

        # Combine the results from all chunks
        self.set('matrix', np.concatenate(results).astype('int'))
        #self.data['matrix'] = np.concatenate(results).astype('int')
        if popDigits is True:
            self.data.pop('uCellIDs')
            self.data.pop('vCellIDs')
            self.data.pop('cellCharges')
            self.gotDigits = False
        self.gotMatrices = True

    @staticmethod
    def _getMatrices(indexChunks: ArrayLike, uCellIDs: ArrayLike, vCellIDs: ArrayLike, cellCharges: ArrayLike, matrixSize: tuple = (9, 9)) -> np.ndarray:
        """
        this takes the ragged/jagged digit arrays and converts them into 9x9 matrices
        it's a rather slow process because of all the looping
        """
        plotRange = np.array(matrixSize) // 2
        events = []

        for event in indexChunks:
            # Since uCellIDs, vCellIDs, and cellCharges are now directly associated with clusters,
            # we don't need digitIndices or maxChargeIndex
            digitsU, digitsV, digitsCharge = np.array(uCellIDs[event]), np.array(vCellIDs[event]), np.array(cellCharges[event])
            adcValues = []

            # Find the center of the cluster (digit with the max charge)
            uMax, vMax = digitsU[digitsCharge.argmax()], digitsV[digitsCharge.argmax()]
            uPos, vPos = digitsU - uMax + plotRange[0], digitsV - vMax + plotRange[1]

            valid_indices = (uPos >= 0) & (uPos < matrixSize[0]) & (vPos >= 0) & (vPos < matrixSize[1])

            cacheImg = np.zeros(matrixSize)
            cacheImg[uPos[valid_indices].astype(int), vPos[valid_indices].astype(int)] = digitsCharge[valid_indices]
            adcValues.append(cacheImg)

            events.extend(adcValues)

        return np.array(events, dtype=object)

    def getCoordinates(self, eventTree: TTree) -> None:
        """
        converting the uv coordinates, together with sensor ids, into xyz coordinates
        """
        # checking if cluster parameters have been loaded
        if self.gotClusters is False:
            self.getClusters(eventTree)

        #setting up index chunks for multi threading
        indexChunnks = np.array_split(range(len(self.data['sensorID'])), 4)

        with ThreadPoolExecutor(max_workers=None) as executor:
            futures = [executor.submit(self._getCoordinates, self.data['uPosition'][chunk], self.data['vPosition'][chunk], self.data['sensorID'][chunk]) for chunk in indexChunnks]

            xResults, yResults, zResults = [], [], []
            for future in futures:
                x, y, z = future.result()
                xResults.append(x)
                yResults.append(y)
                zResults.append(z)

            self.set('xPosition', np.concatenate(xResults))
            self.set('yPosition', np.concatenate(yResults))
            self.set('zPosition', np.concatenate(zResults))
            #self.data['xPosition'] = np.concatenate(xResults)
            #self.data['yPosition'] = np.concatenate(yResults)
            #self.data['zPosition'] = np.concatenate(zResults)

        # setting a bool for checking if coordinates were calculated
        self.gotCoordinates = True

    def _getCoordinates(self, uPositions: ArrayLike, vPositions: ArrayLike, sensorIDs: ArrayLike) -> tuple[np.ndarray]:
        """
        a private method for transposing/converting 2d uv coords into 3d xyz coordinates
        """
        length = len(sensorIDs)
        xArr, yArr, zArr = np.zeros(length), np.zeros(length), np.zeros(length)

        # iterting over the cluster arrays
        for index, (u, v, sensor_id) in enumerate(zip(uPositions, vPositions, sensorIDs)):
            # grabbing the shift vector and rotation angle
            shift, angle = self.transformation[str(sensor_id)]

            # setting up rotation matrix
            theta = np.deg2rad(angle)
            rotMatrix = np.array([[np.cos(theta), -np.sin(theta), 0], [np.sin(theta), np.cos(theta), 0], [0, 0, 1]])

            # projecting uv coordinates into 3d space
            point = np.array([u, 0, v])

            # shifting and rotating the projected vector
            shifted = rotMatrix.dot(point) + shift
            xArr[index], yArr[index], zArr[index] = shifted

        return xArr, yArr, zArr

    def getSphericals(self, eventTree: TTree) -> None:
        """
        Calculate spherical coordinates for each cluster.
        """
        # Checking if coordinates have been loaded
        popCoords = False
        if self.gotCoordinates is False:
            self.getCoordinates(eventTree)
            popCoords = True

        xPosition = self.data['xPosition']
        yPosition = self.data['yPosition']
        zPosition = self.data['zPosition']

        r, theta, phi = self._calcSphericals(xPosition, yPosition, zPosition)

        self.set('rPosition', r)
        self.set('thetaPosition', theta)
        self.set('phiPosition', phi)
        #self.data['rPosition'] = r
        #self.data['thetaPosition'] = theta
        #self.data['phiPosition'] = phi
        self.gotSphericals = True
        if popCoords:
            self.data.pop('xPosition')
            self.data.pop('yPosition')
            self.data.pop('zPosition')
            self.gotCoordinates = False

    @staticmethod
    def _calcSphericals(xPosition: np.ndarray, yPosition: np.ndarray, zPosition: np.ndarray) -> tuple[np.ndarray, np.ndarray, np.ndarray]:
        xSquare = np.square(xPosition)
        ySquare = np.square(yPosition)
        zSquare = np.square(zPosition)

        # Avoid division by zero by replacing zeros with a small number
        r = np.sqrt(xSquare + ySquare + zSquare)
        rSafe = np.where(r == 0, 1e-10, r)

        theta = np.arccos(zPosition / rSafe)
        phi = np.arctan2(yPosition, xPosition)

        return r, theta, phi

    def getLayers(self, eventTree: TTree) -> None:
        """
        looks up the corresponding layers and ladders for every cluster
        """
        if self.gotClusters is False:
            self.getClusters(eventTree)

        layers, ladders = [], []
        for id in self.data['sensorID']:
            layer, ladder = self.layersLadders[str(id)]
            layers.append(layer)
            ladders.append(ladder)

        self.set('layer', np.array(layers, dtype=int))
        self.set('ladder', np.array(ladders, dtype=int))
        #self.data['layer'] = np.array(layers, dtype=int)
        #self.data['ladder'] = np.array(ladders, dtype=int)
        self.gotLayers = True

    def getMCData(self, eventTree: TTree) -> None:
        """
        this loads the monte carlo from the root file
        """
        if self.gotClusters is False:
            self.getClusters(eventTree)

        if self.includeUnSelected:
            warnings.warn('mc data are not supported on roi unselected data')

        # the monte carlo data, they are longer than the cluster data
        pdg = eventTree.arrays(self.mcData[0], library='np')[self.mcData[0]]
        momentumX = eventTree.arrays(self.mcData[1], library='np')[self.mcData[1]]
        momentumY = eventTree.arrays(self.mcData[2], library='np')[self.mcData[2]]
        momentumZ = eventTree.arrays(self.mcData[3], library='np')[self.mcData[3]]

        # this loads the relation ships to and from clusters and mc data
        # this is the same level of retardedness as with the cluster digits
        clusterToMC = eventTree.arrays(self.clusterToMC, library='np')[self.clusterToMC]
        mcToCluster = eventTree.arrays(self.mcToCluster, library='np')[self.mcToCluster]

        # it need the cluster charge as a jagged/ragged array, maybe I could simply
        # use the event numbers, but I am too tired to fix this shitty file format
        clsCharge = eventTree.arrays('PXDClusters/PXDClusters.m_clsCharge', library='np')['PXDClusters/PXDClusters.m_clsCharge']

        # reorganizing MC data
        momentumXList = []
        momentumYList = []
        momentumZList = []
        pdgList = []
        clusterNumbersList = []
        for i in range(len(clusterToMC)):
            # _fillMCList fills in the missing spots, because there are not mc data for
            # every cluster, even though there are more entries in this branch than
            # in the cluster branch... as I said, the root format is retarded
            fullClusterReferences = self._fillMCList(mcToCluster[i], clusterToMC[i], len(clsCharge[i]))
            clusterNumbersList.append(fullClusterReferences)
            pdgs, xmom, ymom, zmom = self._getMCData(fullClusterReferences, pdg[i], momentumX[i], momentumY[i], momentumZ[i])
            momentumXList.append(xmom)
            momentumYList.append(ymom)
            momentumZList.append(zmom)
            pdgList.append(pdgs)

        self.set('momentumX', np.hstack(momentumXList).astype(float))
        self.set('momentumY', np.hstack(momentumYList).astype(float))
        self.set('momentumZ', np.hstack(momentumZList).astype(float))
        self.set('pdg', np.hstack(pdgList).astype(int))
        self.set('clsNumber', np.hstack(clusterNumbersList).astype(int))
        #self.data['momentumX'] = np.hstack(momentumXList).astype(float)
        #self.data['momentumY'] = np.hstack(momentumYList).astype(float)
        #self.data['momentumZ'] = np.hstack(momentumZList).astype(float)
        #self.data['pdg'] = np.hstack(pdgList).astype(int)
        #self.data['clsNumber'] = np.hstack(clusterNumbersList).astype(int)
        self.gotMCData = True

        if self.includeUnSelected:
            sampleSize = np.sum(self.data['roiSelected'] == False)
            missingMCData = self.findUnselectedClusters.fillMCData({
                    'momentumX': self.data['momentumX'],
                    'momentumY': self.data['momentumY'],
                    'momentumZ': self.data['momentumZ'],
                    'pdg': self.data['pdg'],
                    'clsNumber': self.data['clsNumber']
                })
            self.extend(missingMCData)
            #for key in missingMCData:
            #    self.data[key] = np.hstack((self.data[key], missingMCData[key][0:sampleSize]))


    @staticmethod
    def _findMissing(lst: list, length: int) -> list:
        """
        a private method for finding missing elements in mc data arrays
        """
        return sorted(set(range(0, length)) - set(lst))

    def _fillMCList(self, fromClusters: ArrayLike, toClusters: ArrayLike, length: ArrayLike) -> list:
        """
        a private method for filling MC data arrays where clusters don't have
        any information
        """
        missingIndex = self._findMissing(fromClusters, length)
        testList = [-1] * length
        fillIndex = 0
        for i in range(len(testList)):
            if i in missingIndex:
                testList[i] = -1
            else:
                try:
                    testList[i] = int(toClusters[fillIndex])
                except TypeError:
                    testList[i] = int(toClusters[fillIndex][0])
                fillIndex += 1
        return testList

    @staticmethod
    def _getMCData(toClusters: ArrayLike, pdgs: ArrayLike, xMom: ArrayLike, yMom: ArrayLike, zMom: ArrayLike) -> tuple[np.ndarray]:
        """
        after filling and reorganizing MC data arrays one can finally collect the
        actual MC data, where there's data missing I will with zeros
        """
        pxList, pyList, pzList = [], [], []
        pdgList = []
        for references in toClusters:
            if references == -1:
                pxList.append(0)
                pyList.append(0)
                pzList.append(0)
                pdgList.append(0)
            else:
                pxList.append(xMom[references])
                pyList.append(yMom[references])
                pzList.append(zMom[references])
                pdgList.append(pdgs[references])
        return np.array(pdgList,dtype=list), np.array(pxList,dtype=list), np.array(pyList,dtype=list), np.array(pzList,dtype=list)