import numpy as np from numpy.typing import ArrayLike from uproot import TTree from ..common import FancyDict from concurrent.futures import ProcessPoolExecutor from .pxdFilter import FindUnselectedClusters import warnings class PXD(FancyDict): def __init__(self, data: dict = None) -> None: self.name = 'pxd' # list of pxd panels self.panels = [[[-0.89 , 0.36 , 0.36 , -0.89 , -0.89 ], [ 1.4 , 1.4 , 1.4 , 1.4 , 1.4 ], [-3.12, -3.12, 5.92, 5.92, -3.12]], # 00 [[ 1.25 , 0.365, 0.365, 1.25 , 1.25 ], [ 0.72 , 1.615, 1.615, 0.72 , 0.72 ], [-3.12, -3.12, 5.92, 5.92, -3.12]], # 01 [[ 1.4 , 1.4 , 1.4 , 1.4 , 1.4 ], [-0.36 , 0.89 , 0.89 , -0.36 , -0.36 ], [-3.12, -3.12, 5.92, 5.92, -3.12]], # 02 [[ 0.72 , 1.615, 1.615, 0.72 , 0.72 ], [-1.25 , -0.365, -0.365, -1.25 , -1.25 ], [-3.12, -3.12, 5.92, 5.92, -3.12]], # 03 [[ 0.89 , -0.36 , -0.36 , 0.89 , 0.89 ], [-1.4 , -1.4 , -1.4 , -1.4 , -1.4 ], [-3.12, -3.12, 5.92, 5.92, -3.12]], # 04 [[-1.25 , -0.365, -0.365, -1.25 , -1.25 ], [-0.72 , -1.615, -1.615, -0.72 , -0.72 ], [-3.12, -3.12, 5.92, 5.92, -3.12]], # 05 [[-1.4 , -1.4 , -1.4 , -1.4 , -1.4 ], [ 0.36 , -0.89 , -0.89 , 0.36 , 0.36 ], [-3.12, -3.12, 5.92, 5.92, -3.12]], # 06 [[-0.72 , -1.615, -1.615, -0.72 , -0.72 ], [ 1.25 , 0.365, 0.365, 1.25 , 1.25 ], [-3.12, -3.12, 5.92, 5.92, -3.12]], # 07 [[-0.89 , 0.36 , 0.36 , -0.89 , -0.89 ], [ 2.2 , 2.2 , 2.2 , 2.2 , 2.2 ], [-4.28, -4.28, 8.08, 8.08, -4.28]], # 08 [[ 0.345, 1.4 , 1.4 , 0.345, 0.345], [ 2.35 , 1.725, 1.725, 2.35 , 2.35 ], [-4.28, -4.28, 8.08, 8.08, -4.28]], # 09 [[ 1.48 , 2.1 , 2.1 , 1.48 , 1.48 ], [ 1.85 , 0.78 , 0.78 , 1.85 , 1.85 ], [-4.28, -4.28, 8.08, 8.08, -4.28]], # 10 [[ 2.2 , 2.2 , 2.2 , 2.2 , 2.2 ], [ 0.89 , -0.36 , -0.36 , 0.89 , 0.89 ], [-4.28, -4.28, 8.08, 8.08, -4.28]], # 11 [[ 2.35 , 1.725, 1.725, 2.35 , 2.35 ], [-0.345, -1.4 , -1.4 , -0.345, -0.345], [-4.28, -4.28, 8.08, 8.08, -4.28]], # 12 [[ 1.85 , 0.78 , 0.78 , 1.85 , 1.85 ], [-1.48 , -2.1 , -2.1 , -1.48 , -1.48 ], [-4.28, -4.28, 8.08, 8.08, -4.28]], # 13 [[ 0.89 , -0.36 , -0.36 , 0.89 , 0.89 ], [-2.2 , -2.2 , -2.2 , -2.2 , -2.2 ], [-4.28, -4.28, 8.08, 8.08, -4.28]], # 14 [[-0.345, -1.4 , -1.4 , -0.345, -0.345], [-2.35 , -1.725, -1.725, -2.35 , -2.35 ], [-4.28, -4.28, 8.08, 8.08, -4.28]], # 15 [[-1.48 , -2.1 , -2.1 , -1.48 , -1.48 ], [-1.85 , -0.78 , -0.78 , -1.85 , -1.85 ], [-4.28, -4.28, 8.08, 8.08, -4.28]], # 16 [[-2.2 , -2.2 , -2.2 , -2.2 , -2.2 ], [-0.89 , 0.36 , 0.36 , -0.89 , -0.89 ], [-4.28, -4.28, 8.08, 8.08, -4.28]], # 17 [[-2.35 , -1.725, -1.725, -2.35 , -2.35 ], [ 0.345, 1.4 , 1.4 , 0.345, 0.345], [-4.28, -4.28, 8.08, 8.08, -4.28]], # 18 [[-1.85 , -0.78 , -0.78 , -1.85 , -1.85 ], [ 1.48 , 2.1 , 2.1 , 1.48 , 1.48 ], [-4.28, -4.28, 8.08, 8.08, -4.28]]] # 19 # these are the branch names for cluster info in the root file self.clusters = ['PXDClusters/PXDClusters.m_clsCharge', 'PXDClusters/PXDClusters.m_seedCharge', 'PXDClusters/PXDClusters.m_clsSize', 'PXDClusters/PXDClusters.m_uSize', 'PXDClusters/PXDClusters.m_vSize', 'PXDClusters/PXDClusters.m_uPosition', 'PXDClusters/PXDClusters.m_vPosition', 'PXDClusters/PXDClusters.m_sensorID'] # these are the branch names for cluster digits in the root file self.digits = ['PXDDigits/PXDDigits.m_uCellID', 'PXDDigits/PXDDigits.m_vCellID', 'PXDDigits/PXDDigits.m_charge'] # this establishes the relationship between clusters and digits # because for some reaseon the branch for digits has a different # size than the cluster branch self.clusterToDigis = 'PXDClustersToPXDDigits/m_elements/m_elements.m_to' # these are the branch names for monte carlo data in the root file self.mcData = ['MCParticles/MCParticles.m_pdg', 'MCParticles/MCParticles.m_momentum_x', 'MCParticles/MCParticles.m_momentum_y', 'MCParticles/MCParticles.m_momentum_z'] # these two establish the relation ship to an from clusters and monte carlo # there more entries than in the cluster data, but there still mc data missing # for some cluster files self.clusterToMC = 'PXDClustersToMCParticles/m_elements/m_elements.m_to' self.mcToCluster = 'PXDClustersToMCParticles/m_elements/m_elements.m_from' # these are the sensor IDs of the pxd modules/panels from the root file, they are # use to identify on which panels a cluster event happened self.panelIDs = np.array([ 8480, 8512, 8736, 8768, 8992, 9024, 9248, 9280, 9504, 9536, 9760, 9792, 10016, 10048, 10272, 10304, 16672, 16704, 16928, 16960, 17184, 17216, 17440, 17472, 17696, 17728, 17952, 17984, 18208, 18240, 18464, 18496, 18720, 18752, 18976, 19008, 19232, 19264, 19488, 19520]) # every line in this corresponds to one entry in the array above, this is used # to put the projected uv plane in the right position self.panelShifts = np.array([[1.3985 , 0.2652658 , 3.68255], [ 1.3985 , 0.23238491, -0.88255], [ 0.80146531, 1.17631236, 3.68255], [ 0.82407264, 1.15370502, -0.88255], [-0.2582769 , 1.3985 , 3.68255], [-0.2322286 , 1.3985 , -0.88255], [-1.17531186, 0.80246583, 3.68255 ], [-1.15510614, 0.82267151, -0.88255], [-1.3985 , -0.2645974 , 3.68255], [-1.3985 , -0.23012119, -0.88255], [-0.80591227, -1.17186534, 3.68255], [-0.82344228, -1.15433536, -0.88255], [ 0.26975836, -1.3985 , 3.68255], [ 0.23326624, -1.3985 , -0.88255], [ 1.1746111 , -0.80316652, 3.68255], [ 1.15205703, -0.82572062, -0.88255], [ 2.2015 , 0.26959865, 5.01305], [ 2.2015 , 0.2524582 , -1.21305], [ 1.77559093, 1.32758398, 5.01305], [ 1.78212569, 1.31626522, -1.21305], [ 0.87798948, 2.03516717, 5.01305], [ 0.88478563, 2.03124357, -1.21305], [-0.26129975, 2.2015 , 5.01305], [-0.25184137, 2.2015 , -1.21305], [-1.32416655, 1.77756402, 5.01305], [-1.31417539, 1.78333226, -1.21305], [-2.03421133, 0.87964512, 5.01305], [-2.02960691, 0.88762038, -1.21305], [-2.2015 , -0.25954151, 5.01305], [-2.2015 , -0.24969109, -1.21305], [-1.77636043, -1.32625112, 5.01305], [-1.78138268, -1.31755219, -1.21305], [-0.87493138, -2.03693277, 5.01305 ], [-0.8912978 , -2.02748378, -1.21305], [ 0.26489725, -2.2015 , 5.01305], [ 0.25364439, -2.2015 , -1.21305], [ 1.3269198 , -1.7759744 , 5.01305], [ 1.32258793, -1.77847528, -1.21305], [ 2.03616649, -0.87625871, 5.01305], [ 2.02936825, -0.8880338 , -1.21305]]) # every entry here corresponds to the entries in the array above, these are # used for rotating the projected uv plane self.panelRotations = np.array([ 90, 90, 135, 135, 180, 180, 225, 225, 270, 270, 315, 315, 360, 360, 405, 405, 90, 90, 120, 120, 150, 150, 180, 180, 210, 210, 240, 240, 270, 270, 300, 300, 330, 330, 360, 360, 390, 390, 420, 420]) # the layer and ladder arrays, for finding them from sensor id self.panelLayer = np.array([1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2]) self.panelLadder = np.array([1, 1, 2, 2, 3, 3, 4, 4, 5, 5, 6, 6, 7, 7, 8, 8, 9, 9, 10, 10, 12, 12, 13, 13, 14, 14, 15, 15, 16, 16, 17, 17, 18, 18, 19, 19, 20, 20, 21, 21]) # all transpormaations are stored in a dict, with the sensor id as a keyword self.transformation = {} self.layersLadders = {} for i in range(len(self.panelIDs)): self.transformation[str(self.panelIDs[i])] = [self.panelShifts[i], self.panelRotations[i]] self.layersLadders[str(self.panelIDs[i])] = [self.panelLayer[i], self.panelLadder[i]] # parameter for checking if coordinates have been loaded self.gotClusters = False self.gotCoordinates = False self.gotSphericals = False self.gotLayers = False self.gotDigits = False self.gotMatrices = False self.gotMCData = False self.gotFiltered = False # this dict stores the data self.data = data if data is not None else {} # inorder to find roi unselected clusters self.findUnselectedClusters = FindUnselectedClusters() self.includeUnSelected = False def getClusters(self, eventTree: TTree, fileName: str = None) -> None: """ this uses the array from __init__ to load different branches into the data dict """ self.gotClusters = True for branch in self.clusters: data = self._getData(eventTree, branch) keyword = branch.split('_')[-1] self.set(keyword, data) #self.data[keyword] = data self.set('roiSelected', np.array([True] * len(data))) self.set('fileName', np.array([fileName] * len(data))) clusters = eventTree.arrays('PXDClusters/PXDClusters.m_clsCharge', library='np')['PXDClusters/PXDClusters.m_clsCharge'] self._getEventNumbers(clusters) if self.includeUnSelected: unselectedClusters = self.findUnselectedClusters.getClusters(eventTree, fileName) self.extend(unselectedClusters) self.set('detector', np.array(['pxd'] * len(data))) def _getEventNumbers(self, clusters: np.ndarray, offset: int = 0) -> None: """ this generates event numbers from the structure of pxd clusters """ eventNumbers = [] for i in range(len(clusters)): eventNumbers.append(np.array([i]*len(clusters[i])) + offset) self.set('eventNumber', np.concatenate(eventNumbers)) def _getData(self, eventTree: TTree, keyword: str, library: str = 'np') -> np.ndarray: """ a private method for converting branches into something useful, namely into numpy arrays, if the keyward library is set to np. keyword: str = the full branch name library: str = can be 'np' (numpy), 'pd' (pandas) or 'ak' (akward) see uproot documentation for more info """ try: data = eventTree.arrays(keyword, library=library)[keyword] return np.hstack(data) except: return KeyError def getDigits(self, eventTree: TTree) -> None: """ reorganizes digits, so that they fit to the clusters """ digits = eventTree.arrays(self.digits, library='np') uCellIDs = digits[self.digits[0]] vCellIDs = digits[self.digits[1]] cellCharges = digits[self.digits[2]] # this establishes the relation between digits and clusters, it's still # shocking to me, that this is necessary, why aren't digits stored in the # same way as clusters, than one wouldn't need to jump through hoops just # to have the data in a usable und sensible manner # root is such a retarded file format clusterDigits = eventTree.arrays(self.clusterToDigis, library='np')[self.clusterToDigis] uCellIDsTemp = [] vCellIDsTemp = [] cellChargesTemp = [] for event in range(len(clusterDigits)): for cls in clusterDigits[event]: uCellIDsTemp.append(uCellIDs[event][cls]) vCellIDsTemp.append(vCellIDs[event][cls]) cellChargesTemp.append(cellCharges[event][cls]) self.set('uCellIDs', np.array(uCellIDsTemp, dtype=object)) self.set('vCellIDs', np.array(vCellIDsTemp, dtype=object)) self.set('cellCharges', np.array(cellChargesTemp, dtype=object)) if self.includeUnSelected: unselectedClusters = self.findUnselectedClusters.getDigits(eventTree) self.extend(unselectedClusters) self.gotDigits = True def getMatrices(self, eventTree: TTree, matrixSize: tuple = (9, 9)) -> None: """ Loads the digit branches into arrays and converts them into adc matrices """ popDigits = False if self.gotDigits is False: self.getDigits(eventTree) popDigits = True uCellIDs = self.data['uCellIDs'] vCellIDs = self.data['vCellIDs'] cellCharges = self.data['cellCharges'] indexChunks = np.array_split(range(len(cellCharges)), 4) with ProcessPoolExecutor(max_workers=4) as executor: # Automatically uses as many workers as there are CPUs futures = [executor.submit(self._getMatrices, chunk, uCellIDs, vCellIDs, cellCharges, matrixSize) for chunk in indexChunks] results = [future.result() for future in futures] # Combine the results from all chunks self.set('matrix', np.concatenate(results).astype('int')) if popDigits is True: self.data.pop('uCellIDs') self.data.pop('vCellIDs') self.data.pop('cellCharges') self.gotDigits = False self.gotMatrices = True @staticmethod def _getMatrices(indexChunks: ArrayLike, uCellIDs: ArrayLike, vCellIDs: ArrayLike, cellCharges: ArrayLike, matrixSize: tuple = (9, 9)) -> np.ndarray: """ this takes the ragged/jagged digit arrays and converts them into 9x9 matrices it's a rather slow process because of all the looping """ plotRange = np.array(matrixSize) // 2 numEvents = len(indexChunks) events = np.zeros((numEvents, *matrixSize), dtype=cellCharges.dtype) for i, event in enumerate(indexChunks): # Since uCellIDs, vCellIDs, and cellCharges are now directly associated with clusters, digitsU, digitsV, digitsCharge = np.array(uCellIDs[event]), np.array(vCellIDs[event]), np.array(cellCharges[event]) # Find the center of the cluster (digit with the max charge) uMax, vMax = digitsU[digitsCharge.argmax()], digitsV[digitsCharge.argmax()] uPos, vPos = digitsU - uMax + plotRange[0], digitsV - vMax + plotRange[1] valid_indices = (uPos >= 0) & (uPos < matrixSize[0]) & (vPos >= 0) & (vPos < matrixSize[1]) # In-place operation to populate the matrix for the current event events[i, uPos[valid_indices].astype(int), vPos[valid_indices].astype(int)] = digitsCharge[valid_indices] return np.array(events, dtype=object) def getCoordinates(self, eventTree: TTree) -> None: """ converting the uv coordinates, together with sensor ids, into xyz coordinates """ # checking if cluster parameters have been loaded if self.gotClusters is False: self.getClusters(eventTree) #setting up index chunks for multi threading indexChunnks = np.array_split(range(len(self.data['sensorID'])), 4) # Initialize result lists xResults, yResults, zResults = [], [], [] with ProcessPoolExecutor(max_workers=4) as executor: futures = [executor.submit(self._getCoordinates, self.data['uPosition'][chunk], self.data['vPosition'][chunk], self.data['sensorID'][chunk]) for chunk in indexChunnks] for future in futures: x, y, z = future.result() xResults.append(x) yResults.append(y) zResults.append(z) self.set('xPosition', np.concatenate(xResults)) self.set('yPosition', np.concatenate(yResults)) self.set('zPosition', np.concatenate(zResults)) # setting a bool for checking if coordinates were calculated self.gotCoordinates = True def _getCoordinates(self, uPositions: ArrayLike, vPositions: ArrayLike, sensorIDs: ArrayLike) -> tuple[np.ndarray]: """ a private method for transposing/converting 2d uv coords into 3d xyz coordinates """ length = len(sensorIDs) xArr, yArr, zArr = np.zeros(length), np.zeros(length), np.zeros(length) # iterting over the cluster arrays for index, (u, v, sensor_id) in enumerate(zip(uPositions, vPositions, sensorIDs)): # grabbing the shift vector and rotation angle shift, angle = self.transformation[str(sensor_id)] # setting up rotation matrix theta = np.deg2rad(angle) rotMatrix = np.array([[np.cos(theta), -np.sin(theta), 0], [np.sin(theta), np.cos(theta), 0], [0, 0, 1]]) # projecting uv coordinates into 3d space point = np.array([u, 0, v]) # shifting and rotating the projected vector shifted = rotMatrix.dot(point) + shift xArr[index], yArr[index], zArr[index] = shifted return xArr, yArr, zArr def getSphericals(self, eventTree: TTree) -> None: """ Calculate spherical coordinates for each cluster. """ # Checking if coordinates have been loaded popCoords = False if self.gotCoordinates is False: self.getCoordinates(eventTree) popCoords = True xPosition = self.data['xPosition'] yPosition = self.data['yPosition'] zPosition = self.data['zPosition'] r, theta, phi = self._calcSphericals(xPosition, yPosition, zPosition) self.set('rPosition', r) self.set('thetaPosition', theta) self.set('phiPosition', phi) self.gotSphericals = True if popCoords: self.data.pop('xPosition') self.data.pop('yPosition') self.data.pop('zPosition') self.gotCoordinates = False @staticmethod def _calcSphericals(xPosition: np.ndarray, yPosition: np.ndarray, zPosition: np.ndarray) -> tuple[np.ndarray, np.ndarray, np.ndarray]: xSquare = np.square(xPosition) ySquare = np.square(yPosition) zSquare = np.square(zPosition) # Avoid division by zero by replacing zeros with a small number r = np.sqrt(xSquare + ySquare + zSquare) rSafe = np.where(r == 0, 1e-10, r) theta = np.arccos(zPosition / rSafe) phi = np.arctan2(yPosition, xPosition) return r, theta, phi def getLayers(self, eventTree: TTree) -> None: """ looks up the corresponding layers and ladders for every cluster """ if self.gotClusters is False: self.getClusters(eventTree) n = len(self.data['sensorID']) layers = np.empty(n, dtype=int) ladders = np.empty(n, dtype=int) for i, id in enumerate(self.data['sensorID']): layers[i], ladders[i] = self.layersLadders[str(id)] self.set('layer', np.array(layers, dtype=int)) self.set('ladder', np.array(ladders, dtype=int)) self.gotLayers = True def getMCData(self, eventTree: TTree) -> None: """ this loads the monte carlo from the root file """ if self.gotClusters is False: self.getClusters(eventTree) if self.includeUnSelected: warnings.warn('mc data are not supported on roi unselected data') # the monte carlo data, they are longer than the cluster data mcData = eventTree.arrays(self.mcData, library='np') pdg = mcData[self.mcData[0]] momentumX = mcData[self.mcData[1]] momentumY = mcData[self.mcData[2]] momentumZ = mcData[self.mcData[3]] # this loads the relation ships to and from clusters and mc data # this is the same level of retardedness as with the cluster digits clusterToMC = eventTree.arrays(self.clusterToMC, library='np')[self.clusterToMC] mcToCluster = eventTree.arrays(self.mcToCluster, library='np')[self.mcToCluster] # it need the cluster charge as a jagged/ragged array, maybe I could simply # use the event numbers, but I am too tired to fix this shitty file format clsCharge = eventTree.arrays('PXDClusters/PXDClusters.m_clsCharge', library='np')['PXDClusters/PXDClusters.m_clsCharge'] # reorganizing MC data n = len(clusterToMC) momentumXList = np.zeros(n, dtype=object) momentumYList = np.zeros(n, dtype=object) momentumZList = np.zeros(n, dtype=object) pdgList = np.zeros(n, dtype=object) clusterNumbersList = np.zeros(n, dtype=object) for i in range(n): # _fillMCList fills in the missing spots, because there are not mc data for # every cluster, even though there are more entries in this branch than # in the cluster branch... as I said, the root format is retarded fullClusterReferences = self._fillMCList(mcToCluster[i], clusterToMC[i], len(clsCharge[i])) clusterNumbersList[i] = fullClusterReferences pdgs, xmom, ymom, zmom = self._getMCData(fullClusterReferences, pdg[i], momentumX[i], momentumY[i], momentumZ[i]) momentumXList[i] = xmom momentumYList[i] = ymom momentumZList[i] = zmom pdgList[i] = pdgs self.set('momentumX', np.hstack(momentumXList).astype(float)) self.set('momentumY', np.hstack(momentumYList).astype(float)) self.set('momentumZ', np.hstack(momentumZList).astype(float)) self.set('pdg', np.hstack(pdgList).astype(int)) self.set('clsNumber', np.hstack(clusterNumbersList).astype(int)) self.gotMCData = True if self.includeUnSelected: sampleSize = np.sum(self.data['roiSelected'] == False) missingMCData = self.findUnselectedClusters.fillMCData({ 'momentumX': self.data['momentumX'], 'momentumY': self.data['momentumY'], 'momentumZ': self.data['momentumZ'], 'pdg': self.data['pdg'], 'clsNumber': self.data['clsNumber'] }, sampleSize) self.extend(missingMCData) @staticmethod def _findMissing(lst: list, length: int) -> list: """ a private method for finding missing elements in mc data arrays """ return sorted(set(range(0, length)) - set(lst)) def _fillMCList(self, fromClusters: ArrayLike, toClusters: ArrayLike, length: ArrayLike) -> list: """ a private method for filling MC data arrays where clusters don't have any information """ missingIndex = self._findMissing(fromClusters, length) testList = [-1] * length fillIndex = 0 for i in range(len(testList)): if i in missingIndex: testList[i] = -1 else: try: testList[i] = int(toClusters[fillIndex]) except TypeError: testList[i] = int(toClusters[fillIndex][0]) fillIndex += 1 return testList @staticmethod def _getMCData(toClusters: ArrayLike, pdgs: ArrayLike, xMom: ArrayLike, yMom: ArrayLike, zMom: ArrayLike) -> tuple[np.ndarray]: """ after filling and reorganizing MC data arrays one can finally collect the actual MC data, where there's data missing I will with zeros """ n = len(toClusters) pxList = np.zeros(n) pyList = np.zeros(n) pzList = np.zeros(n) pdgList = np.zeros(n, dtype=int) for i, references in enumerate(toClusters): if references == -1: continue # Arrays were initialized to zero else: pxList[i] = xMom[references] pyList[i] = yMom[references] pzList[i] = zMom[references] pdgList[i] = pdgs[references] return pdgList, pxList, pyList, pzList