Skip to content
Snippets Groups Projects
Commit a3bfa682 authored by johannes bilk's avatar johannes bilk
Browse files

added __setitem__ method, users can also specify the number of events to be...

added __setitem__ method, users can also specify the number of events to be imported and updated readme
parent d605bdd6
No related branches found
No related tags found
No related merge requests found
......@@ -32,13 +32,23 @@ and load the root file and all the data:
```python
loadFromRoot.loadData('/root-files/slow_pions_2.root')
loadFromRoot.getClusters()
loadFromRoot.getCoordisnate()
loadFromRoot.getCoordisnates()
loadFromRoot.getLayers()
loadFromRoot.getMatrices()
loadFromRoot.getMCData()
```
This commands don't have any return value, but instead work in-place.
One can now specify how many events should be loaded from the root file.
Keep in mind, that this is different from 'entries'. Each event consists of an
irregular number of entries. The user can also set if events should be
selected randomly
```python
loadFromRoot.loadData('/root-files/slow_pions_2.root', events = 50)
loadFromRoot.loadData('/root-files/slow_pions_2.root', events = 50, selection = 'random')
```
The 'get' commands don't have any return value, but instead work in-place.
Then all data is stored inside the object as dict:
```python
......@@ -48,7 +58,7 @@ loadFromRoot.data
Here follows a list of keywords contained in the dict:
- cluster data:
- 'eventNumbers'
- 'eventNumber'
- 'clsCharge'
- 'seedCharge'
- 'clsSize'
......@@ -62,7 +72,7 @@ Here follows a list of keywords contained in the dict:
- 'yPosition'
- 'zPosition'
- layers:
- 'layers'
- 'layer'
- 'ladder'
- matrices:
- 'cluster'
......
......@@ -2,6 +2,7 @@ import numpy as np
from numpy.typing import ArrayLike
import uproot as ur
from concurrent.futures import ThreadPoolExecutor
from typing import Any
class Rootable:
......@@ -108,6 +109,9 @@ class Rootable:
'MCParticles/MCParticles.m_momentum_y',
'MCParticles/MCParticles.m_momentum_z']
# indices for events to be imported
self.eventIndices = None
# these two establish the relation ship to an from clusters and monte carlo
# there more entries than in the cluster data, but there still mc data missing
# for some cluster files
......@@ -148,6 +152,27 @@ class Rootable:
if isinstance(index, str):
return self.data[index]
return {key: value[index] for key, value in self.data.items()}
def __setitem__(self, index: str | int | ArrayLike, value: dict | Any) -> None:
"""
Allows setting the value of a column by using strings as keywords,
setting the value of a row by using integer indices or arrays,
or setting a specific value using a tuple of key and index.
:param index: The column name, row index, or tuple of key and index.
:param value: The value to set.
"""
if isinstance(index, str):
assert len(value) == len(self.data[list(self.data.keys())[0]]), 'value should have same length as data'
self.data[index] = value
elif isinstance(index, tuple) and len(index) == 2 and isinstance(index[0], str) and isinstance(index[1], int):
key, idx = index
assert key in self.data, f"key {key} not found in data"
self.data[key][idx] = value
else:
assert isinstance(value, dict), "value must be a dictionary when setting rows"
assert set(value.keys()) == set(self.data.keys()), "keys of value must match keys of data"
for key in self.data:
self.data[key][index] = value[key]
def where(self, *conditions: str) -> dict:
"""
......@@ -191,25 +216,32 @@ class Rootable:
def __repr__(self) -> str:
return str(self.data)
def loadData(self, file: str) -> None:
def loadData(self, file: str, events: int = None, selection: str = None) -> None:
"""
reads the file off of the harddrive, it automatically creates event numbers
Reads the file off of the hard drive; it automatically creates event numbers.
file: str = it's the whole file path + .root ending
events: int = the number of events to import (None for all)
selection: str = method of event selection ('random' for random selection)
"""
self.eventTree = ur.open(f'{file}:tree')
self._getEventNumbers()
def _getEventNumbers(self) -> None:
"""
a private method that gets called on file import
it generates the event numbers from the jagged arrays
coming from the branches
"""
numEvents = len(self.eventTree.arrays('PXDClusters/PXDClusters.m_clsCharge', library='np')['PXDClusters/PXDClusters.m_clsCharge'])
if events is not None:
if selection == 'random':
self.eventIndices = np.random.permutation(numEvents)[:events]
else:
self.eventIndices = np.arange(min(events, numEvents))
clusters = self.eventTree.arrays('PXDClusters/PXDClusters.m_clsCharge', library='np')['PXDClusters/PXDClusters.m_clsCharge'][self.eventIndices]
else:
clusters = self.eventTree.arrays('PXDClusters/PXDClusters.m_clsCharge', library='np')['PXDClusters/PXDClusters.m_clsCharge']
self._getEventNumbers(clusters)
def _getEventNumbers(self, clusters: np.ndarray, offset: int = 0) -> None:
eventNumbers = []
clusters = self.eventTree.arrays('PXDClusters/PXDClusters.m_clsCharge', library='np')['PXDClusters/PXDClusters.m_clsCharge']
for i in range(len(clusters)):
eventNumbers.append(np.array([i]*len(clusters[i])))
self.data['eventNumbers'] = self._flatten(eventNumbers)
eventNumbers.append(np.array([i]*len(clusters[i])) + offset)
self.data['eventNumber'] = self._flatten(eventNumbers)
def _getData(self, keyword: str, library: str = 'np') -> np.ndarray:
"""
......@@ -220,7 +252,10 @@ class Rootable:
see uproot documentation for more info
"""
try:
data = self.eventTree.arrays(keyword, library=library)[keyword]
if self.eventIndices is not None:
data = self.eventTree.arrays(keyword, library=library)[keyword][self.eventIndices]
else:
data = self.eventTree.arrays(keyword, library=library)[keyword]
return self._flatten(data)
except:
return KeyError
......@@ -257,16 +292,25 @@ class Rootable:
"""
loads the digit branches into arrays and converts them into adc matrices
"""
uCellIDs = self.eventTree.arrays(self.digits[0], library='np')[self.digits[0]]
vCellIDs = self.eventTree.arrays(self.digits[1], library='np')[self.digits[1]]
cellCharges = self.eventTree.arrays(self.digits[2], library='np')[self.digits[2]]
if self.eventIndices is not None:
uCellIDs = self.eventTree.arrays(self.digits[0], library='np')[self.digits[0]][self.eventIndices]
vCellIDs = self.eventTree.arrays(self.digits[1], library='np')[self.digits[1]][self.eventIndices]
cellCharges = self.eventTree.arrays(self.digits[2], library='np')[self.digits[2]][self.eventIndices]
else:
uCellIDs = self.eventTree.arrays(self.digits[0], library='np')[self.digits[0]]
vCellIDs = self.eventTree.arrays(self.digits[1], library='np')[self.digits[1]]
cellCharges = self.eventTree.arrays(self.digits[2], library='np')[self.digits[2]]
# this establishes the relation between digits and clusters, it's still
# shocking to me, that this is necessary, why aren't digits stored in the
# same way as clusters, than one wouldn't need to jump through hoops just
# to have the data in a usable und sensible manner
# root is such a retarded file format
clusterDigits = self.eventTree.arrays(self.clusterToDigis, library='np')[self.clusterToDigis]
if self.eventIndices is not None:
clusterDigits = self.eventTree.arrays(self.clusterToDigis, library='np')[self.clusterToDigis][self.eventIndices]
else:
clusterDigits = self.eventTree.arrays(self.clusterToDigis, library='np')[self.clusterToDigis]
indexChunnks = np.array_split(range(len(cellCharges)), 4)
......@@ -363,7 +407,7 @@ class Rootable:
layer, ladder = self.layersLadders[str(id)]
layers.append(layer)
ladders.append(ladder)
self.data['layers'] = np.array(layers)
self.data['layer'] = np.array(layers)
self.data['ladder'] = np.array(ladders)
def getMCData(self) -> None:
......@@ -372,19 +416,32 @@ class Rootable:
"""
# the monte carlo data, they are longer than the cluster data
pdg = self.eventTree.arrays(self.mcData[0], library='np')[self.mcData[0]]
momentumX = self.eventTree.arrays(self.mcData[1], library='np')[self.mcData[1]]
momentumY = self.eventTree.arrays(self.mcData[2], library='np')[self.mcData[2]]
momentumZ = self.eventTree.arrays(self.mcData[3], library='np')[self.mcData[3]]
if self.eventIndices is not None:
pdg = self.eventTree.arrays(self.mcData[0], library='np')[self.mcData[0]][self.eventIndices]
momentumX = self.eventTree.arrays(self.mcData[1], library='np')[self.mcData[1]][self.eventIndices]
momentumY = self.eventTree.arrays(self.mcData[2], library='np')[self.mcData[2]][self.eventIndices]
momentumZ = self.eventTree.arrays(self.mcData[3], library='np')[self.mcData[3]][self.eventIndices]
else:
pdg = self.eventTree.arrays(self.mcData[0], library='np')[self.mcData[0]]
momentumX = self.eventTree.arrays(self.mcData[1], library='np')[self.mcData[1]]
momentumY = self.eventTree.arrays(self.mcData[2], library='np')[self.mcData[2]]
momentumZ = self.eventTree.arrays(self.mcData[3], library='np')[self.mcData[3]]
# this loads the relation ships to and from clusters and mc data
# this is the same level of retardedness as with the cluster digits
clusterToMC = self.eventTree.arrays(self.clusterToMC, library='np')[self.clusterToMC]
mcToCluster = self.eventTree.arrays(self.mcToCluster, library='np')[self.mcToCluster]
if self.eventIndices is not None:
clusterToMC = self.eventTree.arrays(self.clusterToMC, library='np')[self.clusterToMC][self.eventIndices]
mcToCluster = self.eventTree.arrays(self.mcToCluster, library='np')[self.mcToCluster][self.eventIndices]
else:
clusterToMC = self.eventTree.arrays(self.clusterToMC, library='np')[self.clusterToMC]
mcToCluster = self.eventTree.arrays(self.mcToCluster, library='np')[self.mcToCluster]
# it need the cluster charge as a jagged/ragged array, maybe I could simply
# use the event numbers, but I am too tired to fix this shitty file format
clsCharge = self.eventTree.arrays('PXDClusters/PXDClusters.m_clsCharge', library='np')['PXDClusters/PXDClusters.m_clsCharge']
if self.eventIndices is not None:
clsCharge = self.eventTree.arrays('PXDClusters/PXDClusters.m_clsCharge', library='np')['PXDClusters/PXDClusters.m_clsCharge'][self.eventIndices]
else:
clsCharge = self.eventTree.arrays('PXDClusters/PXDClusters.m_clsCharge', library='np')['PXDClusters/PXDClusters.m_clsCharge']
# reorganizing MC data
momentumXList = []
......
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment