Newer
Older
import numpy as np
from numpy.typing import ArrayLike
import uproot as ur

johannes bilk
committed
from typing import Any
class Rootable:
"""
this class uses uproot to load pxd data from root files and converts them into
native python data structures.
it can load the cluster information, uses the digits to generate the adc matrices,
coordinates, layer and ladders and finally also monte carlo data.
"""
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
# these are the sensor IDs of the pxd modules/panels from the root file, they are
# use to identify on which panels a cluster event happened
self.panelIDs = np.array([ 8480, 8512, 8736, 8768, 8992, 9024, 9248, 9280,
9504, 9536, 9760, 9792, 10016, 10048, 10272, 10304,
16672, 16704, 16928, 16960, 17184, 17216, 17440, 17472,
17696, 17728, 17952, 17984, 18208, 18240, 18464, 18496,
18720, 18752, 18976, 19008, 19232, 19264, 19488, 19520])
# every line in this corresponds to one entry in the array above, this is used
# to put the projected uv plane in the right position
self.panelShifts = np.array([[1.3985 , 0.2652658 , 3.68255],
[ 1.3985 , 0.23238491, -0.88255],
[ 0.80146531, 1.17631236, 3.68255],
[ 0.82407264, 1.15370502, -0.88255],
[-0.2582769 , 1.3985 , 3.68255],
[-0.2322286 , 1.3985 , -0.88255],
[-1.17531186, 0.80246583, 3.68255 ],
[-1.15510614, 0.82267151, -0.88255],
[-1.3985 , -0.2645974 , 3.68255],
[-1.3985 , -0.23012119, -0.88255],
[-0.80591227, -1.17186534, 3.68255],
[-0.82344228, -1.15433536, -0.88255],
[ 0.26975836, -1.3985 , 3.68255],
[ 0.23326624, -1.3985 , -0.88255],
[ 1.1746111 , -0.80316652, 3.68255],
[ 1.15205703, -0.82572062, -0.88255],
[ 2.2015 , 0.26959865, 5.01305],
[ 2.2015 , 0.2524582 , -1.21305],
[ 1.77559093, 1.32758398, 5.01305],
[ 1.78212569, 1.31626522, -1.21305],
[ 0.87798948, 2.03516717, 5.01305],
[ 0.88478563, 2.03124357, -1.21305],
[-0.26129975, 2.2015 , 5.01305],
[-0.25184137, 2.2015 , -1.21305],
[-1.32416655, 1.77756402, 5.01305],
[-1.31417539, 1.78333226, -1.21305],
[-2.03421133, 0.87964512, 5.01305],
[-2.02960691, 0.88762038, -1.21305],
[-2.2015 , -0.25954151, 5.01305],
[-2.2015 , -0.24969109, -1.21305],
[-1.77636043, -1.32625112, 5.01305],
[-1.78138268, -1.31755219, -1.21305],
[-0.87493138, -2.03693277, 5.01305 ],
[-0.8912978 , -2.02748378, -1.21305],
[ 0.26489725, -2.2015 , 5.01305],
[ 0.25364439, -2.2015 , -1.21305],
[ 1.3269198 , -1.7759744 , 5.01305],
[ 1.32258793, -1.77847528, -1.21305],
[ 2.03616649, -0.87625871, 5.01305],
[ 2.02936825, -0.8880338 , -1.21305]])
# every entry here corresponds to the entries in the array above, these are
# used for rotating the projected uv plane
self.panelRotations = np.array([ 90, 90, 135, 135, 180, 180, 225, 225, 270, 270, 315, 315, 360,
360, 405, 405, 90, 90, 120, 120, 150, 150, 180, 180, 210, 210,
240, 240, 270, 270, 300, 300, 330, 330, 360, 360, 390, 390, 420,
420])
# the layer and ladder arrays, for finding them from sensor id
self.panelLayer = np.array([1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2])
self.panelLadder = np.array([1, 1, 2, 2, 3, 3, 4, 4, 5, 5, 6, 6, 7, 7, 8, 8, 9, 9, 10, 10, 12, 12, 13, 13, 14, 14, 15, 15, 16, 16, 17, 17, 18, 18, 19, 19, 20, 20, 21, 21])
# all transpormaations are stored in a dict, with the sensor id as a keyword
self.transformation = {}
self.layersLadders = {}
for i in range(len(self.panelIDs)):
self.transformation[str(self.panelIDs[i])] = [self.panelShifts[i], self.panelRotations[i]]
self.layersLadders[str(self.panelIDs[i])] = [self.panelLayer[i], self.panelLadder[i]]
# these are the branch names for cluster info in the root file
self.gotClusters = False
self.clusters = ['PXDClusters/PXDClusters.m_clsCharge',
'PXDClusters/PXDClusters.m_seedCharge',
'PXDClusters/PXDClusters.m_clsSize',
'PXDClusters/PXDClusters.m_uSize',
'PXDClusters/PXDClusters.m_vSize',
'PXDClusters/PXDClusters.m_uPosition',
'PXDClusters/PXDClusters.m_vPosition',
'PXDClusters/PXDClusters.m_sensorID']
# these are the branch names for cluster digits in the root file
self.digits = ['PXDDigits/PXDDigits.m_uCellID',
'PXDDigits/PXDDigits.m_vCellID',
'PXDDigits/PXDDigits.m_charge']
# this establishes the relationship between clusters and digits
# because for some reaseon the branch for digits has a different
# size than the cluster branch
self.clusterToDigis = 'PXDClustersToPXDDigits/m_elements/m_elements.m_to'
# these are the branch names for monte carlo data in the root file
self.mcData = ['MCParticles/MCParticles.m_pdg',
'MCParticles/MCParticles.m_momentum_x',
'MCParticles/MCParticles.m_momentum_y',
'MCParticles/MCParticles.m_momentum_z']

johannes bilk
committed
# indices for events to be imported
self.eventIndices = None
# these two establish the relation ship to an from clusters and monte carlo
# there more entries than in the cluster data, but there still mc data missing
# for some cluster files
self.clusterToMC = 'PXDClustersToMCParticles/m_elements/m_elements.m_to'
self.mcToCluster = 'PXDClustersToMCParticles/m_elements/m_elements.m_from'
# this dict stores the data
self.data = data if data is not None else {}
# list of pxd panels
self.pxdPanels = [[[-0.89 , 0.36 , 0.36 , -0.89 , -0.89 ], [ 1.4 , 1.4 , 1.4 , 1.4 , 1.4 ], [-3.12, -3.12, 5.92, 5.92, -3.12]], # 00
[[ 1.25 , 0.365, 0.365, 1.25 , 1.25 ], [ 0.72 , 1.615, 1.615, 0.72 , 0.72 ], [-3.12, -3.12, 5.92, 5.92, -3.12]], # 01
[[ 1.4 , 1.4 , 1.4 , 1.4 , 1.4 ], [-0.36 , 0.89 , 0.89 , -0.36 , -0.36 ], [-3.12, -3.12, 5.92, 5.92, -3.12]], # 02
[[ 0.72 , 1.615, 1.615, 0.72 , 0.72 ], [-1.25 , -0.365, -0.365, -1.25 , -1.25 ], [-3.12, -3.12, 5.92, 5.92, -3.12]], # 03
[[ 0.89 , -0.36 , -0.36 , 0.89 , 0.89 ], [-1.4 , -1.4 , -1.4 , -1.4 , -1.4 ], [-3.12, -3.12, 5.92, 5.92, -3.12]], # 04
[[-1.25 , -0.365, -0.365, -1.25 , -1.25 ], [-0.72 , -1.615, -1.615, -0.72 , -0.72 ], [-3.12, -3.12, 5.92, 5.92, -3.12]], # 05
[[-1.4 , -1.4 , -1.4 , -1.4 , -1.4 ], [ 0.36 , -0.89 , -0.89 , 0.36 , 0.36 ], [-3.12, -3.12, 5.92, 5.92, -3.12]], # 06
[[-0.72 , -1.615, -1.615, -0.72 , -0.72 ], [ 1.25 , 0.365, 0.365, 1.25 , 1.25 ], [-3.12, -3.12, 5.92, 5.92, -3.12]], # 07
[[-0.89 , 0.36 , 0.36 , -0.89 , -0.89 ], [ 2.2 , 2.2 , 2.2 , 2.2 , 2.2 ], [-4.28, -4.28, 8.08, 8.08, -4.28]], # 08
[[ 0.345, 1.4 , 1.4 , 0.345, 0.345], [ 2.35 , 1.725, 1.725, 2.35 , 2.35 ], [-4.28, -4.28, 8.08, 8.08, -4.28]], # 09
[[ 1.48 , 2.1 , 2.1 , 1.48 , 1.48 ], [ 1.85 , 0.78 , 0.78 , 1.85 , 1.85 ], [-4.28, -4.28, 8.08, 8.08, -4.28]], # 10
[[ 2.2 , 2.2 , 2.2 , 2.2 , 2.2 ], [ 0.89 , -0.36 , -0.36 , 0.89 , 0.89 ], [-4.28, -4.28, 8.08, 8.08, -4.28]], # 11
[[ 2.35 , 1.725, 1.725, 2.35 , 2.35 ], [-0.345, -1.4 , -1.4 , -0.345, -0.345], [-4.28, -4.28, 8.08, 8.08, -4.28]], # 12
[[ 1.85 , 0.78 , 0.78 , 1.85 , 1.85 ], [-1.48 , -2.1 , -2.1 , -1.48 , -1.48 ], [-4.28, -4.28, 8.08, 8.08, -4.28]], # 13
[[ 0.89 , -0.36 , -0.36 , 0.89 , 0.89 ], [-2.2 , -2.2 , -2.2 , -2.2 , -2.2 ], [-4.28, -4.28, 8.08, 8.08, -4.28]], # 14
[[-0.345, -1.4 , -1.4 , -0.345, -0.345], [-2.35 , -1.725, -1.725, -2.35 , -2.35 ], [-4.28, -4.28, 8.08, 8.08, -4.28]], # 15
[[-1.48 , -2.1 , -2.1 , -1.48 , -1.48 ], [-1.85 , -0.78 , -0.78 , -1.85 , -1.85 ], [-4.28, -4.28, 8.08, 8.08, -4.28]], # 16
[[-2.2 , -2.2 , -2.2 , -2.2 , -2.2 ], [-0.89 , 0.36 , 0.36 , -0.89 , -0.89 ], [-4.28, -4.28, 8.08, 8.08, -4.28]], # 17
[[-2.35 , -1.725, -1.725, -2.35 , -2.35 ], [ 0.345, 1.4 , 1.4 , 0.345, 0.345], [-4.28, -4.28, 8.08, 8.08, -4.28]], # 18
[[-1.85 , -0.78 , -0.78 , -1.85 , -1.85 ], [ 1.48 , 2.1 , 2.1 , 1.48 , 1.48 ], [-4.28, -4.28, 8.08, 8.08, -4.28]]] # 19
def __getitem__(self, index: str | int | ArrayLike) -> np.ndarray | dict:
"""
this makes the class subscriptable, one can retrieve one coloumn by using
strings as keywords, or get a row by using integer indices or arrays
"""
if isinstance(index, str):
return self.data[index]
return {key: value[index] for key, value in self.data.items()}

johannes bilk
committed
def __setitem__(self, index: str | int | ArrayLike, value: dict | Any) -> None:
"""
Allows setting the value of a column by using strings as keywords,
setting the value of a row by using integer indices or arrays,
or setting a specific value using a tuple of key and index.
:param index: The column name, row index, or tuple of key and index.
:param value: The value to set.
"""
if isinstance(index, str):
assert len(value) == len(self.data[list(self.data.keys())[0]]), 'value should have same length as data'
self.data[index] = value
elif isinstance(index, tuple) and len(index) == 2 and isinstance(index[0], str) and isinstance(index[1], int):
key, idx = index
assert key in self.data, f"key {key} not found in data"
self.data[key][idx] = value
else:
assert isinstance(value, dict), "value must be a dictionary when setting rows"
assert set(value.keys()) == set(self.data.keys()), "keys of value must match keys of data"
for key in self.data:
self.data[key][index] = value[key]
def where(self, *conditions: str) -> dict:
"""
Filters the data based on the provided conditions.
:param conditions: List of conditions as strings for filtering. The keys should be the names of the data fields, and the conditions should be in a format that can be split into key, operator, and value.
:return: Instance of the class containing the filtered data.
mask = np.ones(len(next(iter(self.data.values()))), dtype=bool) # Initial mask allowing all elements
# Applying the conditions to create the mask
for condition in conditions:
key, op, value = condition.split(None, 2) # Split by the first two spaces only
if op == 'in':
value = eval(value)
mask &= np.isin(self.data[key], value)
else:
comparisionValue = float(value)
fieldValues = self.data[key].astype(float)
# Determine the correct comparison to apply
operation = {
'==': np.equal,
'<': np.less,
'>': np.greater,
'<=': np.less_equal,
'>=': np.greater_equal,
}.get(op)
if operation is None:
raise ValueError(f"Invalid operator {op}")
mask &= operation(fieldValues, comparisionValue)
# Applying the mask to filter the data
for key, values in filteredData.items():
filteredData[key] = values[mask]
return self.__class__(data=filteredData)
def __repr__(self) -> str:
return str(self.data)

johannes bilk
committed
def loadData(self, file: str, events: int = None, selection: str = None) -> None:

johannes bilk
committed
Reads the file off of the hard drive; it automatically creates event numbers.
file: str = it's the whole file path + .root ending

johannes bilk
committed
events: int = the number of events to import (None for all)
selection: str = method of event selection ('random' for random selection)
"""
self.eventTree = ur.open(f'{file}:tree')

johannes bilk
committed
numEvents = len(self.eventTree.arrays('PXDClusters/PXDClusters.m_clsCharge', library='np')['PXDClusters/PXDClusters.m_clsCharge'])
if events is not None:
if selection == 'random':
self.eventIndices = np.random.permutation(numEvents)[:events]
else:
self.eventIndices = np.arange(min(events, numEvents))
clusters = self.eventTree.arrays('PXDClusters/PXDClusters.m_clsCharge', library='np')['PXDClusters/PXDClusters.m_clsCharge'][self.eventIndices]
else:
clusters = self.eventTree.arrays('PXDClusters/PXDClusters.m_clsCharge', library='np')['PXDClusters/PXDClusters.m_clsCharge']
self._getEventNumbers(clusters)
def _getEventNumbers(self, clusters: np.ndarray, offset: int = 0) -> None:
eventNumbers = []
for i in range(len(clusters)):

johannes bilk
committed
eventNumbers.append(np.array([i]*len(clusters[i])) + offset)
self.data['eventNumber'] = self._flatten(eventNumbers)
def _getData(self, keyword: str, library: str = 'np') -> np.ndarray:
"""
a private method for converting branches into something useful, namely
into numpy arrays, if the keyward library is set to np.
keyword: str = the full branch name
library: str = can be 'np' (numpy), 'pd' (pandas) or 'ak' (akward)
see uproot documentation for more info
"""
try:

johannes bilk
committed
if self.eventIndices is not None:
data = self.eventTree.arrays(keyword, library=library)[keyword][self.eventIndices]
else:
data = self.eventTree.arrays(keyword, library=library)[keyword]
return self._flatten(data)
except:
return KeyError
def _flatten(self, structure: ArrayLike, maxDepth: int = None, currentDepth: int = 0) -> np.ndarray:
"""
this is a private function, that gets called during loading branches
it flattens ragged array, one can set the depths to which one wants to flatten
structure: the list/array to flatten
maxDepth: int = the amount of flattening
currentDepth: int = don't touch this, it's used for recursively calling
"""
flat_list = []
for element in structure:
if isinstance(element, (list, np.ndarray)) and (maxDepth is None or currentDepth < maxDepth):
flat_list.extend(self._flatten(element, maxDepth, currentDepth + 1))
else:
flat_list.append(element)
return np.array(flat_list)
def getClusters(self) -> None:
"""
this uses the array from __init__ to load different branches into the data dict
"""
self.gotClusters = True
for branch in self.clusters:
data = self._getData(branch)
keyword = branch.split('_')[-1]
self.data[keyword] = data
def getMatrices(self, matrixSize: tuple = (9, 9)) -> None:
"""
loads the digit branches into arrays and converts them into adc matrices
"""

johannes bilk
committed
if self.eventIndices is not None:
uCellIDs = self.eventTree.arrays(self.digits[0], library='np')[self.digits[0]][self.eventIndices]
vCellIDs = self.eventTree.arrays(self.digits[1], library='np')[self.digits[1]][self.eventIndices]
cellCharges = self.eventTree.arrays(self.digits[2], library='np')[self.digits[2]][self.eventIndices]
else:
uCellIDs = self.eventTree.arrays(self.digits[0], library='np')[self.digits[0]]
vCellIDs = self.eventTree.arrays(self.digits[1], library='np')[self.digits[1]]
cellCharges = self.eventTree.arrays(self.digits[2], library='np')[self.digits[2]]
# this establishes the relation between digits and clusters, it's still
# shocking to me, that this is necessary, why aren't digits stored in the
# same way as clusters, than one wouldn't need to jump through hoops just
# to have the data in a usable und sensible manner
# root is such a retarded file format

johannes bilk
committed
if self.eventIndices is not None:
clusterDigits = self.eventTree.arrays(self.clusterToDigis, library='np')[self.clusterToDigis][self.eventIndices]
else:
clusterDigits = self.eventTree.arrays(self.clusterToDigis, library='np')[self.clusterToDigis]
indexChunnks = np.array_split(range(len(cellCharges)), 4)
with ThreadPoolExecutor(max_workers=None) as executor:
futures = [executor.submit(self._getMatrices, chunk, uCellIDs, vCellIDs, cellCharges, clusterDigits, matrixSize) for chunk in indexChunnks]
results = [future.result() for future in futures]
# Combine the results from all chunks
self.data['cluster'] = np.concatenate(results).astype('int')
def _getMatrices(self, indexChunks: ArrayLike, uCellIDs: ArrayLike, vCellIDs: ArrayLike, cellCharges: ArrayLike, clusterDigits: ArrayLike, matrixSize: tuple = (9, 9)) -> np.ndarray:
"""
this takes the ragged/jagged digit arrays and converts them into 9x9 matrices
it's a rather slow process because of all the looping
"""
plotRange = np.array(matrixSize) // 2
events = []
for event in indexChunks:
digitsU, digitsV, digitsCharge = np.array(uCellIDs[event]), np.array(vCellIDs[event]), np.array(cellCharges[event])
for indices in digitIndices:
cacheImg = np.zeros(matrixSize)
maxChargeIndex = digitsCharge[indices].argmax()
uMax, vMax = digitsU[indices[maxChargeIndex]], digitsV[indices[maxChargeIndex]]
uPos, vPos = digitsU[indices] - uMax + plotRange[0], digitsV[indices] - vMax + plotRange[1]
valid_indices = (uPos >= 0) & (uPos < matrixSize[0]) & (vPos >= 0) & (vPos < matrixSize[1])
cacheImg[uPos[valid_indices].astype(int), vPos[valid_indices].astype(int)] = digitsCharge[indices][valid_indices]
adcValues.append(cacheImg)
events.extend(adcValues)
return np.array(events, dtype=object)
"""
converting the uv coordinates, together with sensor ids, into xyz coordinates
"""
if self.gotClusters is False:
self.getClusters()
indexChunnks = np.array_split(range(len(self.data['sensorID'])), 4)
with ThreadPoolExecutor(max_workers=None) as executor:
futures = [executor.submit(self._getCoordisnates, self.data['uPosition'][chunk], self.data['vPosition'][chunk], self.data['sensorID'][chunk]) for chunk in indexChunnks]
xResults, yResults, zResults = [], [], []
for future in futures:
x, y, z = future.result()
xResults.append(x)
yResults.append(y)
zResults.append(z)
self.data['xPosition'] = np.concatenate(xResults)
self.data['yPosition'] = np.concatenate(yResults)
self.data['zPosition'] = np.concatenate(zResults)
def _getCoordisnates(self, uPositions: ArrayLike, vPositions: ArrayLike, sensorIDs: ArrayLike) -> tuple[np.ndarray]:
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
"""
a private method for transposing/converting 2d uv coords into 3d xyz coordinates
"""
length = len(sensorIDs)
xArr, yArr, zArr = np.zeros(length), np.zeros(length), np.zeros(length)
# iterting over the cluster arrays
for index, (u, v, sensor_id) in enumerate(zip(uPositions, vPositions, sensorIDs)):
# grabbing the shift vector and rotation angle
shift, angle = self.transformation[str(sensor_id)]
# setting up rotation matrix
theta = np.deg2rad(angle)
rotMatrix = np.array([[np.cos(theta), -np.sin(theta), 0], [np.sin(theta), np.cos(theta), 0], [0, 0, 1]])
# projecting uv coordinates into 3d space
point = np.array([u, 0, v])
# shifting and rotating the projected vector
shifted = rotMatrix.dot(point) + shift
xArr[index], yArr[index], zArr[index] = shifted
return xArr, yArr, zArr
def getLayers(self) -> None:
"""
looks up the corresponding layers and ladders for every cluster
"""
if self.gotClusters is False:
self.getClusters()
layers, ladders = [], []
for id in self.data['sensorID']:
layer, ladder = self.layersLadders[str(id)]
layers.append(layer)
ladders.append(ladder)

johannes bilk
committed
self.data['layer'] = np.array(layers)
self.data['ladder'] = np.array(ladders)
def getMCData(self) -> None:
"""
this loads the monte carlo from the root file
"""
# the monte carlo data, they are longer than the cluster data

johannes bilk
committed
if self.eventIndices is not None:
pdg = self.eventTree.arrays(self.mcData[0], library='np')[self.mcData[0]][self.eventIndices]
momentumX = self.eventTree.arrays(self.mcData[1], library='np')[self.mcData[1]][self.eventIndices]
momentumY = self.eventTree.arrays(self.mcData[2], library='np')[self.mcData[2]][self.eventIndices]
momentumZ = self.eventTree.arrays(self.mcData[3], library='np')[self.mcData[3]][self.eventIndices]
else:
pdg = self.eventTree.arrays(self.mcData[0], library='np')[self.mcData[0]]
momentumX = self.eventTree.arrays(self.mcData[1], library='np')[self.mcData[1]]
momentumY = self.eventTree.arrays(self.mcData[2], library='np')[self.mcData[2]]
momentumZ = self.eventTree.arrays(self.mcData[3], library='np')[self.mcData[3]]
# this loads the relation ships to and from clusters and mc data
# this is the same level of retardedness as with the cluster digits

johannes bilk
committed
if self.eventIndices is not None:
clusterToMC = self.eventTree.arrays(self.clusterToMC, library='np')[self.clusterToMC][self.eventIndices]
mcToCluster = self.eventTree.arrays(self.mcToCluster, library='np')[self.mcToCluster][self.eventIndices]
else:
clusterToMC = self.eventTree.arrays(self.clusterToMC, library='np')[self.clusterToMC]
mcToCluster = self.eventTree.arrays(self.mcToCluster, library='np')[self.mcToCluster]
# it need the cluster charge as a jagged/ragged array, maybe I could simply
# use the event numbers, but I am too tired to fix this shitty file format

johannes bilk
committed
if self.eventIndices is not None:
clsCharge = self.eventTree.arrays('PXDClusters/PXDClusters.m_clsCharge', library='np')['PXDClusters/PXDClusters.m_clsCharge'][self.eventIndices]
else:
clsCharge = self.eventTree.arrays('PXDClusters/PXDClusters.m_clsCharge', library='np')['PXDClusters/PXDClusters.m_clsCharge']
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
# reorganizing MC data
momentumXList = []
momentumYList = []
momentumZList = []
pdgList = []
clusterNumbersList = []
for i in range(len(clusterToMC)):
# _fillMCList fills in the missing spots, because there are not mc data for
# every cluster, even though there are more entries in this branch than
# in the cluster branch... as I said, the root format is retarded
fullClusterReferences = self._fillMCList(mcToCluster[i], clusterToMC[i], len(clsCharge[i]))
clusterNumbersList.append(fullClusterReferences)
pdgs, xmom, ymom, zmom = self._getMCData(fullClusterReferences, pdg[i], momentumX[i], momentumY[i], momentumZ[i])
momentumXList.append(xmom)
momentumYList.append(ymom)
momentumZList.append(zmom)
pdgList.append(pdgs)
self.data['momentumX'] = self._flatten(momentumXList)
self.data['momentumY'] = self._flatten(momentumYList)
self.data['momentumZ'] = self._flatten(momentumZList)
self.data['pdg'] = self._flatten(pdgList)
self.data['clsNumber'] = self._flatten(clusterNumbersList)
def _findMissing(self, lst: list, length: int) -> list:
"""
a private method for finding missing elements in mc data arrays
"""
return sorted(set(range(0, length)) - set(lst))
def _fillMCList(self, fromClusters: ArrayLike, toClusters: ArrayLike, length: ArrayLike) -> list:
"""
a private method for filling MC data arrays where clusters don't have
any information
"""
missingIndex = self._findMissing(fromClusters, length)
testList = [-1] * length
fillIndex = 0
for i in range(len(testList)):
if i in missingIndex:
testList[i] = -1
else:
try:
testList[i] = int(toClusters[fillIndex])
except TypeError:
testList[i] = int(toClusters[fillIndex][0])
fillIndex += 1
return testList
def _getMCData(self, toClusters: ArrayLike, pdgs: ArrayLike, xMom: ArrayLike, yMom: ArrayLike, zMom: ArrayLike) -> tuple[np.ndarray]:
"""
after filling and reorganizing MC data arrays one can finally collect the
actual MC data, where there's data missing I will with zeros
"""
pxList, pyList, pzList = [], [], []
pdgList = []
for references in toClusters:
if references == -1:
pxList.append(0)
pyList.append(0)
pzList.append(0)
pdgList.append(0)
else:
pxList.append(xMom[references])
pyList.append(yMom[references])
pzList.append(zMom[references])
pdgList.append(pdgs[references])
return np.array(pdgList,dtype=list), np.array(pxList,dtype=list), np.array(pyList,dtype=list), np.array(pzList,dtype=list)
def getStructuredArray(self) -> np.ndarray:
"""
this converts the data dict of this class into a structured numpy array
"""
# Create a list to hold the dtype specifications
dtype = []
# Iterate through the dictionary keys and values
for key, value in self.data.items():
# Determine the data type of the first value in the list
sampleValue = value[0]
if isinstance(sampleValue, np.ndarray):
# If the value is an array, use its shape and dtype
fieldDtype = (sampleValue.dtype, sampleValue.shape)
else:
# Otherwise, use the type of the value itself
fieldDtype = type(sampleValue)
# Append the key and data type to the dtype list
dtype.append((key, fieldDtype))
# Convert the dictionary to a list of tuples
keys = list(self.data.keys())
dataList = [tuple(self.data[key][i] for key in keys) for i in range(len(self.data[keys[0]]))]
# Create the structured array
structuredArray = np.array(dataList, dtype=dtype)
return structuredArray