import numpy as np from numpy.typing import ArrayLike from uproot import TTree from ..common import FancyDict from concurrent.futures import ThreadPoolExecutor from .pxdFilter import FindUnselectedClusters import warnings class PXD(FancyDict): def __init__(self, data: dict = None) -> None: self.name = 'pxd' # list of pxd panels self.panels = [[[-0.89 , 0.36 , 0.36 , -0.89 , -0.89 ], [ 1.4 , 1.4 , 1.4 , 1.4 , 1.4 ], [-3.12, -3.12, 5.92, 5.92, -3.12]], # 00 [[ 1.25 , 0.365, 0.365, 1.25 , 1.25 ], [ 0.72 , 1.615, 1.615, 0.72 , 0.72 ], [-3.12, -3.12, 5.92, 5.92, -3.12]], # 01 [[ 1.4 , 1.4 , 1.4 , 1.4 , 1.4 ], [-0.36 , 0.89 , 0.89 , -0.36 , -0.36 ], [-3.12, -3.12, 5.92, 5.92, -3.12]], # 02 [[ 0.72 , 1.615, 1.615, 0.72 , 0.72 ], [-1.25 , -0.365, -0.365, -1.25 , -1.25 ], [-3.12, -3.12, 5.92, 5.92, -3.12]], # 03 [[ 0.89 , -0.36 , -0.36 , 0.89 , 0.89 ], [-1.4 , -1.4 , -1.4 , -1.4 , -1.4 ], [-3.12, -3.12, 5.92, 5.92, -3.12]], # 04 [[-1.25 , -0.365, -0.365, -1.25 , -1.25 ], [-0.72 , -1.615, -1.615, -0.72 , -0.72 ], [-3.12, -3.12, 5.92, 5.92, -3.12]], # 05 [[-1.4 , -1.4 , -1.4 , -1.4 , -1.4 ], [ 0.36 , -0.89 , -0.89 , 0.36 , 0.36 ], [-3.12, -3.12, 5.92, 5.92, -3.12]], # 06 [[-0.72 , -1.615, -1.615, -0.72 , -0.72 ], [ 1.25 , 0.365, 0.365, 1.25 , 1.25 ], [-3.12, -3.12, 5.92, 5.92, -3.12]], # 07 [[-0.89 , 0.36 , 0.36 , -0.89 , -0.89 ], [ 2.2 , 2.2 , 2.2 , 2.2 , 2.2 ], [-4.28, -4.28, 8.08, 8.08, -4.28]], # 08 [[ 0.345, 1.4 , 1.4 , 0.345, 0.345], [ 2.35 , 1.725, 1.725, 2.35 , 2.35 ], [-4.28, -4.28, 8.08, 8.08, -4.28]], # 09 [[ 1.48 , 2.1 , 2.1 , 1.48 , 1.48 ], [ 1.85 , 0.78 , 0.78 , 1.85 , 1.85 ], [-4.28, -4.28, 8.08, 8.08, -4.28]], # 10 [[ 2.2 , 2.2 , 2.2 , 2.2 , 2.2 ], [ 0.89 , -0.36 , -0.36 , 0.89 , 0.89 ], [-4.28, -4.28, 8.08, 8.08, -4.28]], # 11 [[ 2.35 , 1.725, 1.725, 2.35 , 2.35 ], [-0.345, -1.4 , -1.4 , -0.345, -0.345], [-4.28, -4.28, 8.08, 8.08, -4.28]], # 12 [[ 1.85 , 0.78 , 0.78 , 1.85 , 1.85 ], [-1.48 , -2.1 , -2.1 , -1.48 , -1.48 ], [-4.28, -4.28, 8.08, 8.08, -4.28]], # 13 [[ 0.89 , -0.36 , -0.36 , 0.89 , 0.89 ], [-2.2 , -2.2 , -2.2 , -2.2 , -2.2 ], [-4.28, -4.28, 8.08, 8.08, -4.28]], # 14 [[-0.345, -1.4 , -1.4 , -0.345, -0.345], [-2.35 , -1.725, -1.725, -2.35 , -2.35 ], [-4.28, -4.28, 8.08, 8.08, -4.28]], # 15 [[-1.48 , -2.1 , -2.1 , -1.48 , -1.48 ], [-1.85 , -0.78 , -0.78 , -1.85 , -1.85 ], [-4.28, -4.28, 8.08, 8.08, -4.28]], # 16 [[-2.2 , -2.2 , -2.2 , -2.2 , -2.2 ], [-0.89 , 0.36 , 0.36 , -0.89 , -0.89 ], [-4.28, -4.28, 8.08, 8.08, -4.28]], # 17 [[-2.35 , -1.725, -1.725, -2.35 , -2.35 ], [ 0.345, 1.4 , 1.4 , 0.345, 0.345], [-4.28, -4.28, 8.08, 8.08, -4.28]], # 18 [[-1.85 , -0.78 , -0.78 , -1.85 , -1.85 ], [ 1.48 , 2.1 , 2.1 , 1.48 , 1.48 ], [-4.28, -4.28, 8.08, 8.08, -4.28]]] # 19 # these are the branch names for cluster info in the root file self.clusters = ['PXDClusters/PXDClusters.m_clsCharge', 'PXDClusters/PXDClusters.m_seedCharge', 'PXDClusters/PXDClusters.m_clsSize', 'PXDClusters/PXDClusters.m_uSize', 'PXDClusters/PXDClusters.m_vSize', 'PXDClusters/PXDClusters.m_uPosition', 'PXDClusters/PXDClusters.m_vPosition', 'PXDClusters/PXDClusters.m_sensorID'] # these are the branch names for cluster digits in the root file self.digits = ['PXDDigits/PXDDigits.m_uCellID', 'PXDDigits/PXDDigits.m_vCellID', 'PXDDigits/PXDDigits.m_charge'] # this establishes the relationship between clusters and digits # because for some reaseon the branch for digits has a different # size than the cluster branch self.clusterToDigis = 'PXDClustersToPXDDigits/m_elements/m_elements.m_to' # these are the branch names for monte carlo data in the root file self.mcData = ['MCParticles/MCParticles.m_pdg', 'MCParticles/MCParticles.m_momentum_x', 'MCParticles/MCParticles.m_momentum_y', 'MCParticles/MCParticles.m_momentum_z'] # these two establish the relation ship to an from clusters and monte carlo # there more entries than in the cluster data, but there still mc data missing # for some cluster files self.clusterToMC = 'PXDClustersToMCParticles/m_elements/m_elements.m_to' self.mcToCluster = 'PXDClustersToMCParticles/m_elements/m_elements.m_from' # these are the sensor IDs of the pxd modules/panels from the root file, they are # use to identify on which panels a cluster event happened self.panelIDs = np.array([ 8480, 8512, 8736, 8768, 8992, 9024, 9248, 9280, 9504, 9536, 9760, 9792, 10016, 10048, 10272, 10304, 16672, 16704, 16928, 16960, 17184, 17216, 17440, 17472, 17696, 17728, 17952, 17984, 18208, 18240, 18464, 18496, 18720, 18752, 18976, 19008, 19232, 19264, 19488, 19520]) # every line in this corresponds to one entry in the array above, this is used # to put the projected uv plane in the right position self.panelShifts = np.array([[1.3985 , 0.2652658 , 3.68255], [ 1.3985 , 0.23238491, -0.88255], [ 0.80146531, 1.17631236, 3.68255], [ 0.82407264, 1.15370502, -0.88255], [-0.2582769 , 1.3985 , 3.68255], [-0.2322286 , 1.3985 , -0.88255], [-1.17531186, 0.80246583, 3.68255 ], [-1.15510614, 0.82267151, -0.88255], [-1.3985 , -0.2645974 , 3.68255], [-1.3985 , -0.23012119, -0.88255], [-0.80591227, -1.17186534, 3.68255], [-0.82344228, -1.15433536, -0.88255], [ 0.26975836, -1.3985 , 3.68255], [ 0.23326624, -1.3985 , -0.88255], [ 1.1746111 , -0.80316652, 3.68255], [ 1.15205703, -0.82572062, -0.88255], [ 2.2015 , 0.26959865, 5.01305], [ 2.2015 , 0.2524582 , -1.21305], [ 1.77559093, 1.32758398, 5.01305], [ 1.78212569, 1.31626522, -1.21305], [ 0.87798948, 2.03516717, 5.01305], [ 0.88478563, 2.03124357, -1.21305], [-0.26129975, 2.2015 , 5.01305], [-0.25184137, 2.2015 , -1.21305], [-1.32416655, 1.77756402, 5.01305], [-1.31417539, 1.78333226, -1.21305], [-2.03421133, 0.87964512, 5.01305], [-2.02960691, 0.88762038, -1.21305], [-2.2015 , -0.25954151, 5.01305], [-2.2015 , -0.24969109, -1.21305], [-1.77636043, -1.32625112, 5.01305], [-1.78138268, -1.31755219, -1.21305], [-0.87493138, -2.03693277, 5.01305 ], [-0.8912978 , -2.02748378, -1.21305], [ 0.26489725, -2.2015 , 5.01305], [ 0.25364439, -2.2015 , -1.21305], [ 1.3269198 , -1.7759744 , 5.01305], [ 1.32258793, -1.77847528, -1.21305], [ 2.03616649, -0.87625871, 5.01305], [ 2.02936825, -0.8880338 , -1.21305]]) # every entry here corresponds to the entries in the array above, these are # used for rotating the projected uv plane self.panelRotations = np.array([ 90, 90, 135, 135, 180, 180, 225, 225, 270, 270, 315, 315, 360, 360, 405, 405, 90, 90, 120, 120, 150, 150, 180, 180, 210, 210, 240, 240, 270, 270, 300, 300, 330, 330, 360, 360, 390, 390, 420, 420]) # the layer and ladder arrays, for finding them from sensor id self.panelLayer = np.array([1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2]) self.panelLadder = np.array([1, 1, 2, 2, 3, 3, 4, 4, 5, 5, 6, 6, 7, 7, 8, 8, 9, 9, 10, 10, 12, 12, 13, 13, 14, 14, 15, 15, 16, 16, 17, 17, 18, 18, 19, 19, 20, 20, 21, 21]) # all transpormaations are stored in a dict, with the sensor id as a keyword self.transformation = {} self.layersLadders = {} for i in range(len(self.panelIDs)): self.transformation[str(self.panelIDs[i])] = [self.panelShifts[i], self.panelRotations[i]] self.layersLadders[str(self.panelIDs[i])] = [self.panelLayer[i], self.panelLadder[i]] # parameter for checking if coordinates have been loaded self.gotClusters = False self.gotCoordinates = False self.gotSphericals = False self.gotLayers = False self.gotDigits = False self.gotMatrices = False self.gotMCData = False self.gotFiltered = False # this dict stores the data self.data = data if data is not None else {} # inorder to find roi unselected clusters self.findUnselectedClusters = FindUnselectedClusters() self.includeUnSelected = False def getClusters(self, eventTree: TTree, includeUnSelected: bool = False) -> None: """ this uses the array from __init__ to load different branches into the data dict """ self.gotClusters = True self.includeUnSelected = includeUnSelected for branch in self.clusters: data = self._getData(eventTree, branch) keyword = branch.split('_')[-1] self.data[keyword] = data self.data['roiSelected'] = np.array([True] * len(self.data['clsCharge'])) clusters = eventTree.arrays('PXDClusters/PXDClusters.m_clsCharge', library='np')['PXDClusters/PXDClusters.m_clsCharge'] self._getEventNumbers(clusters) if includeUnSelected: unselectedClusters = self.findUnselectedClusters.getClusters(eventTree) for key in unselectedClusters: self.data[key] = np.concatenate((self.data[key], unselectedClusters[key])) self.data['detector'] = np.array(['pxd'] * len(self.data['clsCharge'])) def _getEventNumbers(self, clusters: np.ndarray, offset: int = 0) -> None: """ this generates event numbers from the structure of pxd clusters """ eventNumbers = [] for i in range(len(clusters)): eventNumbers.append(np.array([i]*len(clusters[i])) + offset) self.data['eventNumber'] = np.hstack(eventNumbers) def _getData(self, eventTree: TTree, keyword: str, library: str = 'np') -> np.ndarray: """ a private method for converting branches into something useful, namely into numpy arrays, if the keyward library is set to np. keyword: str = the full branch name library: str = can be 'np' (numpy), 'pd' (pandas) or 'ak' (akward) see uproot documentation for more info """ try: data = eventTree.arrays(keyword, library=library)[keyword] return np.hstack(data) except: return KeyError def getDigits(self, eventTree: TTree) -> None: """ reorganizes digits, so that they fit to the clusters """ uCellIDs = eventTree.arrays(self.digits[0], library='np')[self.digits[0]] vCellIDs = eventTree.arrays(self.digits[1], library='np')[self.digits[1]] cellCharges = eventTree.arrays(self.digits[2], library='np')[self.digits[2]] # this establishes the relation between digits and clusters, it's still # shocking to me, that this is necessary, why aren't digits stored in the # same way as clusters, than one wouldn't need to jump through hoops just # to have the data in a usable und sensible manner # root is such a retarded file format clusterDigits = eventTree.arrays(self.clusterToDigis, library='np')[self.clusterToDigis] self.data['uCellIDs'] = [] self.data['vCellIDs'] = [] self.data['cellCharges'] = [] for event in range(len(clusterDigits)): for cls in clusterDigits[event]: self.data['uCellIDs'].append(uCellIDs[event][cls]) self.data['vCellIDs'].append(vCellIDs[event][cls]) self.data['cellCharges'].append(cellCharges[event][cls]) self.data['uCellIDs'] = np.array(self.data['uCellIDs'], dtype=object) self.data['vCellIDs'] = np.array(self.data['vCellIDs'], dtype=object) self.data['cellCharges'] = np.array(self.data['cellCharges'], dtype=object) if self.includeUnSelected: unselectedClusters = self.findUnselectedClusters.getDigits(eventTree) for key in unselectedClusters: self.data[key] = np.concatenate((self.data[key], unselectedClusters[key])) self.gotDigits = True def getMatrices(self, eventTree: TTree, matrixSize: tuple = (9, 9)) -> None: """ Loads the digit branches into arrays and converts them into adc matrices """ popDigits = False if self.gotDigits is False: self.getDigits(eventTree) popDigits = True uCellIDs = self.data['uCellIDs'] vCellIDs = self.data['vCellIDs'] cellCharges = self.data['cellCharges'] indexChunks = np.array_split(range(len(cellCharges)), 4) with ThreadPoolExecutor(max_workers=None) as executor: futures = [executor.submit(self._getMatrices, chunk, uCellIDs, vCellIDs, cellCharges, matrixSize) for chunk in indexChunks] results = [future.result() for future in futures] # Combine the results from all chunks self.data['matrix'] = np.concatenate(results).astype('int') if popDigits is True: self.data.pop('uCellIDs') self.data.pop('vCellIDs') self.data.pop('cellCharges') self.gotDigits = False self.gotMatrices = True @staticmethod def _getMatrices(indexChunks: ArrayLike, uCellIDs: ArrayLike, vCellIDs: ArrayLike, cellCharges: ArrayLike, matrixSize: tuple = (9, 9)) -> np.ndarray: """ this takes the ragged/jagged digit arrays and converts them into 9x9 matrices it's a rather slow process because of all the looping """ plotRange = np.array(matrixSize) // 2 events = [] for event in indexChunks: # Since uCellIDs, vCellIDs, and cellCharges are now directly associated with clusters, # we don't need digitIndices or maxChargeIndex digitsU, digitsV, digitsCharge = np.array(uCellIDs[event]), np.array(vCellIDs[event]), np.array(cellCharges[event]) adcValues = [] # Find the center of the cluster (digit with the max charge) uMax, vMax = digitsU[digitsCharge.argmax()], digitsV[digitsCharge.argmax()] uPos, vPos = digitsU - uMax + plotRange[0], digitsV - vMax + plotRange[1] valid_indices = (uPos >= 0) & (uPos < matrixSize[0]) & (vPos >= 0) & (vPos < matrixSize[1]) cacheImg = np.zeros(matrixSize) cacheImg[uPos[valid_indices].astype(int), vPos[valid_indices].astype(int)] = digitsCharge[valid_indices] adcValues.append(cacheImg) events.extend(adcValues) return np.array(events, dtype=object) def getCoordinates(self, eventTree: TTree) -> None: """ converting the uv coordinates, together with sensor ids, into xyz coordinates """ # checking if cluster parameters have been loaded if self.gotClusters is False: self.getClusters(eventTree) # setting a bool for checking if coordinates were calculated self.gotCoordinates = True indexChunnks = np.array_split(range(len(self.data['sensorID'])), 4) with ThreadPoolExecutor(max_workers=None) as executor: futures = [executor.submit(self._getCoordinates, self.data['uPosition'][chunk], self.data['vPosition'][chunk], self.data['sensorID'][chunk]) for chunk in indexChunnks] xResults, yResults, zResults = [], [], [] for future in futures: x, y, z = future.result() xResults.append(x) yResults.append(y) zResults.append(z) self.data['xPosition'] = np.concatenate(xResults) self.data['yPosition'] = np.concatenate(yResults) self.data['zPosition'] = np.concatenate(zResults) self.gotCoordinates = True def _getCoordinates(self, uPositions: ArrayLike, vPositions: ArrayLike, sensorIDs: ArrayLike) -> tuple[np.ndarray]: """ a private method for transposing/converting 2d uv coords into 3d xyz coordinates """ length = len(sensorIDs) xArr, yArr, zArr = np.zeros(length), np.zeros(length), np.zeros(length) # iterting over the cluster arrays for index, (u, v, sensor_id) in enumerate(zip(uPositions, vPositions, sensorIDs)): # grabbing the shift vector and rotation angle shift, angle = self.transformation[str(sensor_id)] # setting up rotation matrix theta = np.deg2rad(angle) rotMatrix = np.array([[np.cos(theta), -np.sin(theta), 0], [np.sin(theta), np.cos(theta), 0], [0, 0, 1]]) # projecting uv coordinates into 3d space point = np.array([u, 0, v]) # shifting and rotating the projected vector shifted = rotMatrix.dot(point) + shift xArr[index], yArr[index], zArr[index] = shifted return xArr, yArr, zArr def getSphericals(self) -> None: """ Calculate spherical coordinates for each cluster. """ # Checking if coordinates have been loaded popCoords = False if self.gotCoordinates is False: self.getCoordinates() popCoords = True xPosition = self.data['xPosition'] yPosition = self.data['yPosition'] zPosition = self.data['zPosition'] r, theta, phi = self._calcSphericals(xPosition, yPosition, zPosition) self.data['rPosition'] = r self.data['thetaPosition'] = theta self.data['phiPosition'] = phi self.gotSphericals = True if popCoords: self.data.pop('xPosition') self.data.pop('yPosition') self.data.pop('zPosition') self.gotCoordinates = False @staticmethod def _calcSphericals(xPosition: np.ndarray, yPosition: np.ndarray, zPosition: np.ndarray) -> tuple[np.ndarray, np.ndarray, np.ndarray]: xSquare = np.square(xPosition) ySquare = np.square(yPosition) zSquare = np.square(zPosition) # Avoid division by zero by replacing zeros with a small number r = np.sqrt(xSquare + ySquare + zSquare) rSafe = np.where(r == 0, 1e-10, r) theta = np.arccos(zPosition / rSafe) phi = np.arctan2(yPosition, xPosition) return r, theta, phi def getLayers(self, eventTree: TTree) -> None: """ looks up the corresponding layers and ladders for every cluster """ if self.gotClusters is False: self.getClusters(eventTree) layers, ladders = [], [] for id in self.data['sensorID']: layer, ladder = self.layersLadders[str(id)] layers.append(layer) ladders.append(ladder) self.data['layer'] = np.array(layers, dtype=int) self.data['ladder'] = np.array(ladders, dtype=int) self.gotLayers = True def getMCData(self, eventTree: TTree) -> None: """ this loads the monte carlo from the root file """ if self.includeUnSelected: warnings.warn('mc data are not supported on roi unselected data') # the monte carlo data, they are longer than the cluster data pdg = eventTree.arrays(self.mcData[0], library='np')[self.mcData[0]] momentumX = eventTree.arrays(self.mcData[1], library='np')[self.mcData[1]] momentumY = eventTree.arrays(self.mcData[2], library='np')[self.mcData[2]] momentumZ = eventTree.arrays(self.mcData[3], library='np')[self.mcData[3]] # this loads the relation ships to and from clusters and mc data # this is the same level of retardedness as with the cluster digits clusterToMC = eventTree.arrays(self.clusterToMC, library='np')[self.clusterToMC] mcToCluster = eventTree.arrays(self.mcToCluster, library='np')[self.mcToCluster] # it need the cluster charge as a jagged/ragged array, maybe I could simply # use the event numbers, but I am too tired to fix this shitty file format clsCharge = eventTree.arrays('PXDClusters/PXDClusters.m_clsCharge', library='np')['PXDClusters/PXDClusters.m_clsCharge'] # reorganizing MC data momentumXList = [] momentumYList = [] momentumZList = [] pdgList = [] clusterNumbersList = [] for i in range(len(clusterToMC)): # _fillMCList fills in the missing spots, because there are not mc data for # every cluster, even though there are more entries in this branch than # in the cluster branch... as I said, the root format is retarded fullClusterReferences = self._fillMCList(mcToCluster[i], clusterToMC[i], len(clsCharge[i])) clusterNumbersList.append(fullClusterReferences) pdgs, xmom, ymom, zmom = self._getMCData(fullClusterReferences, pdg[i], momentumX[i], momentumY[i], momentumZ[i]) momentumXList.append(xmom) momentumYList.append(ymom) momentumZList.append(zmom) pdgList.append(pdgs) self.data['momentumX'] = np.hstack(momentumXList).astype(float) self.data['momentumY'] = np.hstack(momentumYList).astype(float) self.data['momentumZ'] = np.hstack(momentumZList).astype(float) self.data['pdg'] = np.hstack(pdgList).astype(int) self.data['clsNumber'] = np.hstack(clusterNumbersList).astype(int) self.gotMCData = True if self.includeUnSelected: sampleSize = np.sum(self.data['roiSelected'] == False) missingMCData = self.findUnselectedClusters.fillMCData({ 'momentumX': self.data['momentumX'], 'momentumY': self.data['momentumY'], 'momentumZ': self.data['momentumZ'], 'pdg': self.data['pdg'], 'clsNumber': self.data['clsNumber'] }) for key in missingMCData: self.data[key] = np.hstack((self.data[key], missingMCData[key][0:sampleSize])) @staticmethod def _findMissing(lst: list, length: int) -> list: """ a private method for finding missing elements in mc data arrays """ return sorted(set(range(0, length)) - set(lst)) def _fillMCList(self, fromClusters: ArrayLike, toClusters: ArrayLike, length: ArrayLike) -> list: """ a private method for filling MC data arrays where clusters don't have any information """ missingIndex = self._findMissing(fromClusters, length) testList = [-1] * length fillIndex = 0 for i in range(len(testList)): if i in missingIndex: testList[i] = -1 else: try: testList[i] = int(toClusters[fillIndex]) except TypeError: testList[i] = int(toClusters[fillIndex][0]) fillIndex += 1 return testList @staticmethod def _getMCData(toClusters: ArrayLike, pdgs: ArrayLike, xMom: ArrayLike, yMom: ArrayLike, zMom: ArrayLike) -> tuple[np.ndarray]: """ after filling and reorganizing MC data arrays one can finally collect the actual MC data, where there's data missing I will with zeros """ pxList, pyList, pzList = [], [], [] pdgList = [] for references in toClusters: if references == -1: pxList.append(0) pyList.append(0) pzList.append(0) pdgList.append(0) else: pxList.append(xMom[references]) pyList.append(yMom[references]) pzList.append(zMom[references]) pdgList.append(pdgs[references]) return np.array(pdgList,dtype=list), np.array(pxList,dtype=list), np.array(pyList,dtype=list), np.array(pzList,dtype=list)