updated uv mapping of filtered out clusters

26253bf7 · johannes bilk · f77722a6 · 26253bf7 · 26253bf7 · 26253bf7
Commit 26253bf7 authored 1 year ago by johannes bilk
--- a/README.md
+++ b/README.md
@@ -37,10 +37,19 @@ loadFromRoot.loadData('/root-files/slow_pions_2.root')
 loadFromRoot.getClusters()
 loadFromRoot.getCoordinates()
 loadFromRoot.getLayers()
+loadFromRoot.getDigits()
 loadFromRoot.getMatrices()
 loadFromRoot.getMCData()
 ```

+The user can define which tree is to be loaded by adding its name using a colon:
+
+```python
+loadFromRoot.loadData('/root-files/slow_pions_2.root:tree')
+```
+
+This is not necessary, because the code defaults to 'tree' as the tree name.
+
 One can now specify that ROI unselected digits should be read and to reconstruct
 the cluster data from them. this is still iffy, after including ROI unselected
 clusters, one cannot load monte carlo information and the u/v mapping is still
@@ -50,6 +59,7 @@ very wonky.
 loadFromRoot.loadData('/root-files/slow_pions_2.root', includeUnSelected=True)
 ```

+
 The 'get' commands don't have any return value, but instead work in-place.
 Then all data is stored inside the object as dict:

@@ -78,8 +88,12 @@ Here follows a list of keywords contained in the dict:
 - layers:
    - 'layer': int
    - 'ladder': int
+- digits:
+    - 'uCellIDs': array
+    - 'vCellIDs': array
+    - 'cellCharges': array
 - matrices:
-    - 'cluster': array
+    - 'matrix': array
 - Monte Carlo data:
    - 'momentumX': float
    - 'momentumY': float
@@ -120,13 +134,21 @@ loadFromRoot.where('eventNumbers in [0,1,2]')
 And finally you can convert the dict into a structured Numpy array by simply writing:

 ```python
-loadFromRoot.getStructuredArray()
+loadFromRoot.asStructuredArray()
 ```

 This last command returns a Numpy array. From there the user can save it using
 Numpys build-in functions, convert it to Pandas or use it in any way that is
 compatible with Numpy.

+Alternatively one can get it as a pandas dataframe, which doesn't handle 2D array
+properly. So if one uses the pixel matrices a dataframe is not advisable.
+
+```python
+loadFromRoot.asDataFrame(popMatrices=True)
+```
+
+
 The class itself is iterable, it's a bit different from typical python dicts,
 I iterate over rows and return it as a dict, not sure if that's actually useful.

@@ -152,4 +174,4 @@ and then:

 ```bash
 pip3 install .
-```
\ No newline at end of file
+```
--- a/rootable/common/fancyDict.py
+++ b/rootable/common/fancyDict.py
@@ -38,6 +38,12 @@ class FancyDict:
            for key in self.data:
                self.data[key][index] = value[key]

+    def extend(self, value: dict, axis: int = None) -> None:
+        assert isinstance(value, dict), "value must be a dictionary when setting rows"
+        assert set(value.keys()) == set(self.data.keys()), "keys of value must match keys of data"
+        for key in self.data:
+            self.data[key] = np.concatenate((self.data[key], value[key]), axis=axis)
+
    def where(self, *conditions: str) -> dict:
        """
        Filters the data based on the provided conditions.

--- a/rootable/detectors/pxd.py
+++ b/rootable/detectors/pxd.py
@@ -4,6 +4,7 @@ from uproot import TTree
 from ..common import FancyDict
 from concurrent.futures import ThreadPoolExecutor
 from .pxdFilter import FindUnselectedClusters
+import warnings


 class PXD(FancyDict):
@@ -148,7 +149,7 @@ class PXD(FancyDict):
        self.data = data if data is not None else {}

        # inorder to find roi unselected clusters
-        self.findUnselectedClusters = FindUnselectedClusters(self.panelIDs)
+        self.findUnselectedClusters = FindUnselectedClusters()
        self.includeUnSelected = False

    def getClusters(self, eventTree: TTree, includeUnSelected: bool = False) -> None:
@@ -404,7 +405,7 @@ class PXD(FancyDict):
        this loads the monte carlo from the root file
        """
        if self.includeUnSelected:
-            raise Warning('mc data are not supported on roi unselected data')
+            warnings.warn('mc data are not supported on roi unselected data')

        # the monte carlo data, they are longer than the cluster data
        pdg = eventTree.arrays(self.mcData[0], library='np')[self.mcData[0]]
@@ -446,6 +447,19 @@ class PXD(FancyDict):
        self.data['clsNumber'] = np.hstack(clusterNumbersList).astype(int)
        self.gotMCData = True

+        if self.includeUnSelected:
+            sampleSize = np.sum(self.data['roiSelected'] == False)
+            missingMCData = self.findUnselectedClusters.fillMCData({
+                    'momentumX': self.data['momentumX'],
+                    'momentumY': self.data['momentumY'],
+                    'momentumZ': self.data['momentumZ'],
+                    'pdg': self.data['pdg'],
+                    'clsNumber': self.data['clsNumber']
+                })
+            for key in missingMCData:
+                self.data[key] = np.hstack((self.data[key], missingMCData[key][0:sampleSize]))
+
+
    @staticmethod
    def _findMissing(lst: list, length: int) -> list:
        """

--- a/rootable/detectors/pxdFilter.py
+++ b/rootable/detectors/pxdFilter.py
@@ -3,7 +3,17 @@ from uproot import TTree


 class FindUnselectedClusters:
-    def __init__(self, panelIDs: list[int]) -> None:
+    """
+    the purpose of this class is to reconstruct the different information
+    of pxd clusters that lay outside of ROIs. instead of adding a flag to
+    entries if they are outside or inside of a ROI or adding additional
+    branches (god I hate that concept) containing all the info, they do
+    this random shit, where there all digitis and I have to guess together
+    what it is what, especially that the internal structure is completly
+    different for selected and unselected digits. on top of that, the naming
+    convention sucks and I picked something that is more self-explanatory.
+    """
+    def __init__(self) -> None:
        """
        Initialize the FindUnselectedClusters class.

@@ -16,30 +26,96 @@ class FindUnselectedClusters:
        - uvMapping (dict): Mapping of sensor IDs to their u/v bounds.
        - keyWords (list[str]): Keywords to extract from the event tree.
        """
+        # these are the sensor IDs of the pxd modules/panels from the root file, they are
+        # use to identify on which panels a cluster event happened
+        self.panelIDs = np.array([ 8480,  8512,  8736,  8768,  8992,  9024,  9248,  9280,
+                              9504,  9536,  9760,  9792, 10016, 10048, 10272, 10304,
+                             16672, 16704, 16928, 16960, 17184, 17216, 17440, 17472,
+                             17696, 17728, 17952, 17984, 18208, 18240, 18464, 18496,
+                             18720, 18752, 18976, 19008, 19232, 19264, 19488, 19520])
+        
        # u/v position min/max for layer 1 & 2, they are needed for reconstructing roi unselected cluster locations
        # it's the upper and lower bound of a u/v postion on a per sensor id
-        self.uBounds = (2.23725, -2.23725)
-        self.vBounds = [(2.2342727, -2.2364252), (2.23725, -2.1829333), (2.237, -2.23725),
-                        (2.23725, -2.237), (2.0758998, -2.23725), (2.2350502, -2.225),
-                        (2.2272778, -2.23725), (2.23725, -2.230647), (2.237, -2.23725),
-                        (2.23725, -2.2125263), (2.237, -2.23725), (2.23725, -2.231),
-                        (2.237, -2.23725), (2.2364914, -2.237), (2.2292001, -2.23725),
-                        (2.23175, -2.2006087), (3.059299, -3.0685003), (3.0615, -3.05925),
-                        (2.96575, -3.0685), (3.0644617, -2.9605556), (2.971355, -3.0685),
-                        (3.06475, -3.00825), (3.04225, -3.0615), (3.065875, -2.9997501),
-                        (3.02525, -3.0538826), (3.0545, -2.8899581), (3.0539744, -3.0568514),
-                        (3.0685, -2.9997501), (3.05075, -3.0685), (3.0615, -3.0489914),
-                        (3.0629516, -3.0615), (3.0685, -3.0178683), (3.0032272, -3.0685003),
-                        (3.0475001, -3.06775), (3.054042, -3.0685), (3.064417, -3.06775),
-                        (3.04225, -3.0685), (3.0615, -2.82431), (3.03375, -3.0545),
-                        (3.0627446, -3.0569127)]
-
-
-        # all transpormaations are stored in a dict, with the sensor id as a keyword
-        self.panelIDs = panelIDs
-        self.uvMapping = {}
-        for i in range(len(panelIDs)):
-            self.uvMapping[str(panelIDs[i])] = [self.uBounds, self.vBounds[i]]
+        self.uFit = {8480: np.poly1d([ 0.005    , -0.6228546]),
+                     8512: np.poly1d([ 0.005     , -0.62285449]),
+                     8736: np.poly1d([ 0.005    , -0.6228546]),
+                     8768: np.poly1d([ 0.005     , -0.62285449]),
+                     8992: np.poly1d([ 0.005    , -0.6228546]),
+                     9024: np.poly1d([ 0.005     , -0.62285449]),
+                     9248: np.poly1d([ 0.005    , -0.6228546]),
+                     9280: np.poly1d([ 0.005     , -0.62285449]),
+                     9504: np.poly1d([ 0.005    , -0.6228546]),
+                     9536: np.poly1d([ 0.005     , -0.62285449]),
+                     9760: np.poly1d([ 0.005    , -0.6228546]),
+                     9792: np.poly1d([ 0.005     , -0.62285449]),
+                     10016: np.poly1d([ 0.005    , -0.6228546]),
+                     10048: np.poly1d([ 0.005     , -0.62285449]),
+                     10272: np.poly1d([ 0.005    , -0.6228546]),
+                     10304: np.poly1d([ 0.005     , -0.62285449]),
+                     16672: np.poly1d([ 0.005     , -0.62285456]),
+                     16704: np.poly1d([ 0.005     , -0.62285445]),
+                     16928: np.poly1d([ 0.005     , -0.62285456]),
+                     16960: np.poly1d([ 0.005     , -0.62285446]),
+                     17184: np.poly1d([ 0.005     , -0.62285456]),
+                     17216: np.poly1d([ 0.005     , -0.62285446]),
+                     17440: np.poly1d([ 0.005     , -0.62285456]),
+                     17472: np.poly1d([ 0.005     , -0.62285446]),
+                     17696: np.poly1d([ 0.005     , -0.62285456]),
+                     17728: np.poly1d([ 0.005     , -0.62285446]),
+                     17952: np.poly1d([ 0.005     , -0.62285456]),
+                     17984: np.poly1d([ 0.005     , -0.62285446]),
+                     18208: np.poly1d([ 0.005     , -0.62285456]),
+                     18240: np.poly1d([ 0.005     , -0.62285446]),
+                     18464: np.poly1d([ 0.005     , -0.62285456]),
+                     18496: np.poly1d([ 0.005     , -0.62285446]),
+                     18720: np.poly1d([ 0.005     , -0.62285456]),
+                     18752: np.poly1d([ 0.005     , -0.62285446]),
+                     18976: np.poly1d([ 0.005     , -0.62285456]),
+                     19008: np.poly1d([ 0.005     , -0.62285446]),
+                     19232: np.poly1d([ 0.005     , -0.62285456]),
+                     19264: np.poly1d([ 0.005     , -0.62285446]),
+                     19488: np.poly1d([ 0.005     , -0.62285456]),
+                     19520: np.poly1d([ 0.005     , -0.62285445])}
+        self.vFit = {8480: np.poly1d([ 0.00587037, -2.29395374]),
+                     8512: np.poly1d([ 0.00587037, -2.20862039]),
+                     8736: np.poly1d([ 0.00587037, -2.29395374]),
+                     8768: np.poly1d([ 0.00587037, -2.20862039]),
+                     8992: np.poly1d([ 0.00587037, -2.29395375]),
+                     9024: np.poly1d([ 0.00587037, -2.20862039]),
+                     9248: np.poly1d([ 0.00587037, -2.29395375]),
+                     9280: np.poly1d([ 0.00587037, -2.20862039]),
+                     9504: np.poly1d([ 0.00587037, -2.29395375]),
+                     9536: np.poly1d([ 0.00587037, -2.20862039]),
+                     9760: np.poly1d([ 0.00587037, -2.29395375]),
+                     9792: np.poly1d([ 0.00587037, -2.2086204 ]),
+                     10016: np.poly1d([ 0.00587037, -2.29395375]),
+                     10048: np.poly1d([ 0.00587037, -2.20862039]),
+                     10272: np.poly1d([ 0.00587037, -2.29395375]),
+                     10304: np.poly1d([ 0.00587037, -2.20862039]),
+                     16672: np.poly1d([ 1.44676145e-06,  7.00144541e-03, -3.09694398e+00]),
+                     16704: np.poly1d([-1.44676141e-06,  9.22077745e-03, -3.12427848e+00]),
+                     16928: np.poly1d([ 1.44676147e-06,  7.00144538e-03, -3.09694398e+00]),
+                     16960: np.poly1d([-1.44676141e-06,  9.22077745e-03, -3.12427848e+00]),
+                     17184: np.poly1d([ 1.44676151e-06,  7.00144535e-03, -3.09694397e+00]),
+                     17216: np.poly1d([-1.44676138e-06,  9.22077742e-03, -3.12427847e+00]),
+                     17440: np.poly1d([ 1.44676148e-06,  7.00144538e-03, -3.09694398e+00]),
+                     17472: np.poly1d([-1.44676141e-06,  9.22077744e-03, -3.12427848e+00]),
+                     17696: np.poly1d([ 1.44676154e-06,  7.00144533e-03, -3.09694397e+00]),
+                     17728: np.poly1d([-1.44676144e-06,  9.22077747e-03, -3.12427849e+00]),
+                     17952: np.poly1d([ 1.44676148e-06,  7.00144539e-03, -3.09694398e+00]),
+                     17984: np.poly1d([-1.44676143e-06,  9.22077746e-03, -3.12427848e+00]),
+                     18208: np.poly1d([ 1.44676142e-06,  7.00144543e-03, -3.09694399e+00]),
+                     18240: np.poly1d([-1.44676147e-06,  9.22077748e-03, -3.12427848e+00]),
+                     18464: np.poly1d([ 1.44676148e-06,  7.00144539e-03, -3.09694398e+00]),
+                     18496: np.poly1d([-1.44676139e-06,  9.22077742e-03, -3.12427847e+00]),
+                     18720: np.poly1d([ 1.44676152e-06,  7.00144535e-03, -3.09694397e+00]),
+                     18752: np.poly1d([-1.44676141e-06,  9.22077744e-03, -3.12427848e+00]),
+                     18976: np.poly1d([ 1.44676153e-06,  7.00144534e-03, -3.09694397e+00]),
+                     19008: np.poly1d([-1.44676139e-06,  9.22077743e-03, -3.12427848e+00]),
+                     19232: np.poly1d([ 1.44676152e-06,  7.00144537e-03, -3.09694398e+00]),
+                     19264: np.poly1d([-1.44676145e-06,  9.22077748e-03, -3.12427849e+00]),
+                     19488: np.poly1d([ 1.44676150e-06,  7.00144538e-03, -3.09694398e+00]),
+                     19520: np.poly1d([-1.44676143e-06,  9.22077746e-03, -3.12427848e+00])}

        # Keywords for extracting data from the event tree
        self.keyWords = [
@@ -60,23 +136,10 @@ class FindUnselectedClusters:
        Returns:
        - tuple[float]: The u/v positions.
        """
-        # Define the pixel index bounds
-        ux1, ux2 = 249, 0
-        vx1, vx2 = 767, 0
-
-        # Get the u/v position bounds for the given sensor ID
-        [(uy1, uy2), (vy1, vy2)] = self.uvMapping.get(str(sensorID))
-
-        # Calculate the slope for u and v
-        um = (uy2 - uy1) / (ux2 - ux1)
-        vm = (vy2 - vy1) / (vx2 - vx1)
-
-        # Calculate the intercept for u and v
-        ub = uy1 - um * ux1
-        vb = vy1 - vm * vx1
-
+        uMapped = self.uFit[sensorID](uvIndex[0])
+        vMapped = self.vFit[sensorID](uvIndex[1])
        # Calculate and return the u/v positions for the given pixel index
-        return um * uvIndex[0] + ub, vm * uvIndex[1] + vb
+        return uMapped, vMapped

    def getClusters(self, eventTree: TTree) -> dict:
        """
@@ -102,6 +165,19 @@ class FindUnselectedClusters:
        """
        return self._process(eventTree, 'digits')

+    @staticmethod
+    def fillMCData(sampleData: dict) -> dict:
+        """
+        this method generates bullshit mc data for unselected clusters
+        I use it for filling out empty slots in the mc data, maybe I
+        will figure out how to find the proper data inside the root files
+        """
+        fakeMCData = {}
+        for key in sampleData:
+            fakeMCData[key] = np.zeros_like(sampleData[key], dtype=type(sampleData[key][0]))
+
+        return fakeMCData
+
    def _process(self, eventTree: TTree, processType: str = 'clusters') -> dict:
        """
        Common method to process either clusters or digits based on the given processType.

--- a/vPixelMapper.json
+++ b/vPixelMapper.json
+{
+    
\ No newline at end of file