Source code for dclab.rtdc_dataset.core

"""RT-DC dataset core classes and methods"""
import abc
import hashlib
import json
import logging
import os.path
import pathlib
import random
import threading
import traceback
from typing import Literal
import uuid
import warnings

import numpy as np

from .. import definitions as dfn
from .. import downsampling
from ..kde import KernelDensityEstimator
from ..kde import methods as kde_methods
from ..polygon_filter import PolygonFilter
from ..util import hashobj
from . import feat_basin
from .export import Export
from .feat_anc_core import FEATURES_RAPID, AncillaryFeature
from .filter import Filter


class FeatureShouldExistButNotFoundWarning(UserWarning):
    pass


class LocalBasinForbiddenWarning(UserWarning):
    pass


class LogTransformWarning(UserWarning):
    pass



[docs]
class RTDCBase(abc.ABC):
    def __init__(self, identifier=None, enable_basins=True):
        """RT-DC measurement base class

        Notes
        -----
        Besides the filter arrays for each data feature, there is a manual
        boolean filter array ``RTDCBase.filter.manual`` that can be edited
        by the user - a boolean value of ``False`` means that the event is
        excluded from all computations.
        """
        #: Local basins are basins that are defined on the user's file system.
        #: For reasons of data security (leaking data from a server or from a
        #: user's file system), dclab only allows remote basins (see
        #: :func:`basins_retrieve`) by default. This variable is set to True
        #: for the RTDC_HDF5 file format, because it implies the data are
        #: located on the user's computer.
        self._local_basins_allowed = False

        #: Dataset format (derived from class name)
        self.format = self.__class__.__name__.split("_")[-1].lower()

        # Lock for being thread-safe when modifying filters
        self.lock = threading.Lock()

        # Cache attribute used for __len__()-function
        self._length = None
        self._polygon_filter_ids = []
        # Events have the feature name as keys and contain nD ndarrays.
        self._events = {}
        # Ancillaries have the feature name as keys and a
        # tuple containing feature and hash as value.
        self._ancillaries = {}
        # Temporary features are defined by the user ad hoc at runtime.
        self._usertemp = {}
        # List of :class:`.Basin` for external features
        self._basins = None
        # List of basin identifiers that should be ignored, used to
        # avoid circular basin dependencies
        self._basins_ignored = []
        # List of all features available via basins
        self._basins_features = None
        #: Configuration of the measurement
        self.config = None
        #: Export functionalities; instance of
        #: :class:`dclab.rtdc_dataset.export.Export`.
        self.export = Export(self)
        # Filtering functionalities; instance of
        # :class:`dclab.rtdc_dataset.filter.Filter`.
        self._ds_filter = None
        #: Dictionary of log files. Each log file is a list of strings
        #: (one string per line).
        self.logs = {}
        #: Dictionary of tables. Each table is an indexable compound numpy
        #: array.
        self.tables = {}
        #: Title of the measurement
        self.title = None
        #: Path or DCOR identifier of the dataset (set to "none"
        #: for :class:`RTDC_Dict`)
        self.path = None
        # Unique, random identifier
        if identifier is None:
            # Generate a unique, random identifier for this dataset
            rhex = [random.choice('0123456789abcdef') for _n in range(7)]
            self._identifier = "mm-{}_{}".format(self.format, "".join(rhex))
        else:
            self._identifier = identifier

        # Basins are initialized in the "basins" property function
        self._enable_basins = enable_basins

        self._logger = None

    def __contains__(self, feat):
        ct = False
        if (feat in self._events
                or feat in self._usertemp
                or feat in self.features_basin):
            ct = True
        else:
            # Check ancillary features data
            if feat in self._ancillaries:
                # already computed
                ct = True
            elif feat in AncillaryFeature.feature_names:
                # get all instance of AncillaryFeature that
                # check availability of the feature `feat`
                instlist = AncillaryFeature.get_instances(feat)
                for inst in instlist:
                    if inst.is_available(self):
                        # to be computed
                        ct = True
                        break
        return ct

    def __enter__(self):
        return self

    def __exit__(self, type, value, tb):
        self.close()

    def __getitem__(self, feat):
        if feat in self._events:
            return self._events[feat]
        elif feat in self._usertemp:
            return self._usertemp[feat]
        # 1. Check for cached ancillary data
        data = self._get_ancillary_feature_data(feat, no_compute=True)
        if data is not None:
            return data
        # 2. Check for h5dataset-based, file-based, or other basin data,
        #    in that order.
        for basin_type in ["internal", "file", None]:
            data = self._get_basin_feature_data(feat, basin_type=basin_type)
            if data is not None:
                return data
        # 3. Check for ancillary features that can be computed
        data = self._get_ancillary_feature_data(feat)
        if data is not None:
            return data
        if feat in self:
            warnings.warn(f"The feature {feat} is supposedly defined in "
                          f"{self}, but I cannot get its data. Please "
                          f"make sure you have not defined any unreachable "
                          f"remote basins.",
                          FeatureShouldExistButNotFoundWarning)
        # Not here ¯\_(ツ)_/¯
        raise KeyError(f"Feature '{feat}' does not exist in {self}!")

    def __iter__(self):
        """An iterator over all valid scalar features"""
        mycols = []
        for col in self._feature_candidates:
            if col in self:
                mycols.append(col)
        mycols.sort()
        for col in mycols:
            yield col

    def __len__(self):
        if self._length is None:
            self._length = self._get_length()
        return self._length

    def _get_length(self):
        # Try to get length from metadata.
        length = self.config["experiment"].get("event count")
        if length is not None:
            return length
        # Try to get the length from the feature sizes
        keys = list(self._events.keys()) or self.features_basin
        keys.sort()
        with self.lock:
            for kk in keys:
                length = len(self[kk])
                if length:
                    return length
            else:
                raise ValueError(
                    f"Could not determine size of dataset '{self}'.")

    def __repr__(self):
        repre = "<{} '{}' at {}".format(self.__class__.__name__,
                                        self.identifier,
                                        hex(id(self)))
        if self.path != "none":
            repre += " ({})>".format(self.path)
        return repre

    @property
    def basins(self):
        """Basins with upstream features from internal/external locations

        If the instance was created with `enable_basins=False`, then
        only internal basins are returned.
        """
        if self._basins is None:
            self._basins = self.basins_retrieve()
        return self._basins

    @property
    def filter(self):
        """Filtering functionalities; instance of :class:`.Filter`"""
        self._assert_filter()
        return self._ds_filter

    @property
    def logger(self):
        if self._logger is None:
            self._logger = logging.getLogger(__name__).getChild(self.hash)
        return self._logger

    def _assert_filter(self):
        if self._ds_filter is None:
            self._ds_filter = Filter(self)

    def _get_ancillary_feature_data(self,
                                    feat: str,
                                    no_compute: bool = False):
        """Return feature data of ancillary features

        Parameters
        ----------
        feat: str
            Name of the feature
        no_compute: bool
            Whether to bother computing the feature. If it is not
            already computed, return None instead

        Returns
        -------
        data:
            The feature object (array-like) or None if it could not
            be found or was not computed.
        """
        data = None
        anhash = None
        if feat in AncillaryFeature.feature_names:
            # Try to find the feature in the ancillary features
            # (see feat_anc_core submodule for more information).
            # These features are cached in `self._ancillaries`.
            ancol = AncillaryFeature.available_features(self)
            if feat in ancol:
                # The feature is generally available.
                if feat in self._ancillaries:
                    # We have already computed the feature. Make sure that we
                    # have the updated one by checking the hash.
                    anhash = ancol[feat].hash(self)
                    if self._ancillaries[feat][0] == anhash:
                        # Use cached value
                        data = self._ancillaries[feat][1]
                # We either already have the ancillary feature or have to
                # compute it. We only compute it if we are asked to.
                if data is None and not no_compute:
                    anhash = anhash or ancol[feat].hash(self)
                    # Compute new value
                    data_dict = ancol[feat].compute(self)
                    for okey in data_dict:
                        # Store computed value in `self._ancillaries`.
                        self._ancillaries[okey] = (anhash, data_dict[okey])
                    data = data_dict[feat]
        return data

    def _get_basin_feature_data(
            self,
            feat: str,
            basin_type: Literal["file", "internal", "remote", None] = None):
        """Return feature data from basins

        Parameters
        ----------
        feat: str
            Name of the feature
        basin_type: str or bool
            The basin type to look at, which is either "file"-based
            (e.g. local on disk), "remote"-based (e.g. S3), or
            "internal"-type (e.g. h5py.Dataset inside the current HDF5 file).
            Defaults to `None` which means no preference.

        Returns
        -------
        data:
            The feature object (array-like) or None if it could not
            be found or was not computed.
        """
        data = None
        if self.basins:
            for bn in list(self.basins):
                if basin_type is not None and basin_type != bn.basin_type:
                    # User asked for specific basin type
                    continue
                try:
                    # There are all kinds of errors that may happen here.
                    # Note that `bn.features` can already trigger an
                    # availability check that may raise a ValueError.
                    # TODO:
                    #  Introduce some kind of callback so the user knows
                    #  why the data are not available. The current solution
                    #  (fail silently) is not sufficiently transparent,
                    #  especially when considering networking issues.
                    if feat in bn.features:
                        data = bn.get_feature_data(feat)
                        self.logger.info(f"Feature '{feat}' found in '{bn}'")
                        # The data are available, we may abort the search.
                        break
                except feat_basin.BasinIdentifierMismatchError:
                    # Likely a basin identifier mismatch
                    warnings.warn(traceback.format_exc())
                except (KeyError, OSError, PermissionError):
                    # Basin data not available
                    pass
                except feat_basin.BasinNotAvailableError:
                    # remove the basin from the list
                    # TODO:
                    #  Check whether this has an actual effect. It could be
                    #  that due to some iterative process `self`
                    #  gets re-initialized and we have to go through this
                    #  again.
                    self._basins.remove(bn)
                    warnings.warn(
                        f"Removed unavailable basin {bn} from {self}")
                except BaseException:
                    warnings.warn(f"Could not access {feat} in {self}:\n"
                                  f"{traceback.format_exc()}")
                    pass
        return data


[docs]
    @staticmethod
    def get_kde_spacing(a, scale="linear", method=kde_methods.bin_width_doane,
                        method_kw=None, feat="undefined", ret_scaled=False):
        """Convenience function for computing the contour spacing

        Parameters
        ----------
        a: ndarray
            feature data
        scale: str
            how the data should be scaled ("log" or "linear")
        method: callable
            KDE method to use (see `kde_methods` submodule)
        method_kw: dict
            keyword arguments to `method`
        feat: str
            feature name for debugging
        ret_scaled: bool
            whether to return the scaled array of `a`
        """
        return KernelDensityEstimator.estimate_spacing(
            a=a,
            scale=scale,
            method=method,
            method_kw=method_kw,
            feat=feat,
            ret_scaled=ret_scaled,
        )


    @property
    def _feature_candidates(self):
        """List of feature candidates for this dataset

        Use with caution! Features in this list might not actually
        be available. Always check against `__contains__`.
        """
        feats = list(self._events.keys())
        feats += list(self._usertemp.keys())
        feats += list(AncillaryFeature.feature_names)
        feats += self.features_basin
        feats = sorted(set(feats))
        # exclude non-standard features
        featsv = [ff for ff in feats if dfn.feature_exists(ff)]
        return featsv

    @property
    def _filter(self):
        """return the current filter boolean array"""
        warnings.warn("RTDCBase._filter is deprecated. Please use "
                      + "RTDCBase.filter.all instead.",
                      DeprecationWarning)
        return self.filter.all

    @property
    def _plot_filter(self):
        raise NotImplementedError(
            "RTDCBase._plot_filter has been removed in dclab 0.16.0. "
            + "Please use the output of RTDCBase.downsample_scatter "
            + "with the argument ret_mask instead.")

    @property
    def identifier(self):
        """Unique (unreproducible) identifier"""
        return self._identifier

    @property
    def features(self):
        """All available features"""
        features = []
        for col in self._feature_candidates:
            if col in self:
                features.append(col)
        features.sort()
        return features

    @property
    def features_ancillary(self):
        """All available ancillary features

        This includes all ancillary features, excluding the features
        that are already in `self.features_innate`. This means that
        there may be overlap between `features_ancillary` and e.g.
        `self.features_basin`.

        .. versionadded:: 0.58.0

        """
        features_innate = self.features_innate
        features_ancillary = []
        for feat in AncillaryFeature.feature_names:
            if feat not in features_innate and feat in self:
                features_ancillary.append(feat)
        return sorted(features_ancillary)

    @property
    def features_basin(self):
        """All features accessed via upstream basins from other locations"""
        if self._basins_features is None:
            if self.basins:
                features = []
                for bn in self.basins:
                    if bn.features and set(bn.features) <= set(features):
                        # We already have the features from a different basin.
                        # There might be a basin availability check going on
                        # somewhere, but we are not interested in it.
                        continue
                    if bn.is_available():
                        features += bn.features
                self._basins_features = sorted(set(features))
            else:
                self._basins_features = []
        return self._basins_features

    @property
    def features_innate(self):
        """All features excluding ancillary, basin, or temporary features

        Internal basin features are included since version 0.71.4.
        """
        innate = [ft for ft in self.features if ft in self._events]
        for bn_dict in self.basins_get_dicts():
            if bn_dict.get("type") == "internal":
                for feat in bn_dict.get("features", []):
                    if feat in self.features_basin:
                        innate.append(feat)
        return innate

    @property
    def features_loaded(self):
        """All features that have been computed

        This includes ancillary features and temporary features.

        Notes
        -----
        Ancillary features that are computationally cheap to compute are
        always included. They are defined in
        :const:`dclab.rtdc_dataset.feat_anc_core.FEATURES_RAPID`.
        """
        features_loaded = self.features_local + self.features_innate
        features_loaded += [f for f in self.features if f in FEATURES_RAPID]
        return sorted(set(features_loaded))

    @property
    def features_local(self):
        """All features that are, with certainty, really fast to access

        Local features is a slimmed down version of `features_loaded`.
        Nothing needs to be computed, not even rapid features
        (:const:`dclab.rtdc_dataset.feat_anc_core.FEATURES_RAPID`).
        And features from remote sources that have not been downloaded
        already are excluded. Ancillary and temporary features that are
        available are included.
        """
        features_local = []
        # Note that the hierarchy format just calls its hparent's
        # `features_local`.
        if hasattr(self._events, "_cached_events"):
            features_local += list(self._events._cached_events.keys())

        if self.format == "hdf5":
            features_local += list(self._events.keys())

        # Get into the basins.
        for bn in self.basins:
            if (bn.basin_format == "hdf5"
                    and bn.basin_type == "file"
                    and bn.is_available()):
                features_local += bn.ds.features_local
            elif bn._ds is not None:
                features_local += bn.ds.features_local

        # If they are here, then we use them:
        features_local += list(self._ancillaries.keys())
        features_local += list(self._usertemp.keys())

        return sorted(set(features_local))

    @property
    def features_scalar(self):
        """All scalar features available"""
        sclr = [ft for ft in self.features if dfn.scalar_feature_exists(ft)]
        return sclr

    @property
    @abc.abstractmethod
    def hash(self) -> str:
        """Reproducible dataset hash (defined by derived classes)"""


[docs]
    def ignore_basins(self, basin_identifiers):
        """Ignore these basin identifiers when looking for features

        This is used to avoid circular basin dependencies.
        """
        self._basins_ignored += basin_identifiers



[docs]
    def apply_filter(self, force=None):
        """Compute the filters for the dataset"""
        with self.lock:
            if force is None:
                force = []
            self.filter.update(rtdc_ds=self, force=force)



[docs]
    def close(self):
        """Close any open files or connections, including basins

        If implemented in a subclass, the subclass must call this
        method via `super`, otherwise basins are not closed. The
        subclass is responsible for closing its specific file handles.
        """
        if self._basins:
            for bn in self._basins:
                bn.close()



[docs]
    def get_downsampled_scatter(self, xax="area_um", yax="deform",
                                downsample=0, xscale="linear",
                                yscale="linear", remove_invalid=False,
                                ret_mask=False):
        """Downsampling by removing points at dense locations

        Parameters
        ----------
        xax: str
            Identifier for x axis (e.g. "area_um", "aspect", "deform")
        yax: str
            Identifier for y axis
        downsample: int
            Number of points to draw in the down-sampled plot.
            This number is either

            - >=1: exactly downsample to this number by randomly adding
                   or removing points
            - 0  : do not perform downsampling
        xscale: str
            If set to "log", take the logarithm of the x-values before
            performing downsampling. This is useful when data are are
            displayed on a log-scale. Defaults to "linear".
        yscale: str
            See `xscale`.
        remove_invalid: bool
            Remove nan and inf values before downsampling; if set to
            `True`, the actual number of samples returned might be
            smaller than `downsample` due to infinite or nan values
            (e.g. due to logarithmic scaling).
        ret_mask: bool
            If set to `True`, returns a boolean array of length
            `len(self)` where `True` values identify the filtered
            data.

        Returns
        -------
        xnew, xnew: 1d ndarray of lenght `N`
            Filtered data; `N` is either identical to `downsample`
            or smaller (if `remove_invalid==True`)
        mask: 1d boolean array of length `len(RTDCBase)`
            Array for identifying the downsampled data points
        """
        if downsample < 0:
            raise ValueError("`downsample` must be zero or positive!")

        downsample = int(downsample)
        xax = xax.lower()
        yax = yax.lower()

        # Get data
        with self.lock:
            filter_all = self.filter.all
            x = self[xax][filter_all]
            y = self[yax][filter_all]

        # Apply scale (no change for linear scale)
        xs = KernelDensityEstimator.apply_scale(x, xscale, xax)
        ys = KernelDensityEstimator.apply_scale(y, yscale, yax)

        _, _, idx = downsampling.downsample_grid(xs, ys,
                                                 samples=downsample,
                                                 remove_invalid=remove_invalid,
                                                 ret_idx=True)

        if ret_mask:
            # Mask is a boolean array of len(self)
            mask = np.zeros(len(self), dtype=bool)
            mids = np.where(filter_all)[0]
            mask[mids] = idx
            return x[idx], y[idx], mask
        else:
            return x[idx], y[idx]



[docs]
    def get_kde_contour(self, xax="area_um", yax="deform", xacc=None,
                        yacc=None, kde_type="histogram", kde_kwargs=None,
                        xscale="linear", yscale="linear"):
        """Evaluate the kernel density estimate for contour plots

        Parameters
        ----------
        xax: str
            Identifier for X axis (e.g. "area_um", "aspect", "deform")
        yax: str
            Identifier for Y axis
        xacc: float
            Contour accuracy in x direction
        yacc: float
            Contour accuracy in y direction
        kde_type: str
            The KDE method to use
        kde_kwargs: dict
            Additional keyword arguments to the KDE method
        xscale: str
            If set to "log", take the logarithm of the x-values before
            computing the KDE. This is useful when data are
            displayed on a log-scale. Defaults to "linear".
        yscale: str
            See `xscale`.

        Returns
        -------
        X, Y, Z : coordinates
            The kernel density Z evaluated on a rectangular grid (X,Y).
        """
        kde_instance = KernelDensityEstimator(rtdc_ds=self)
        xmesh, ymesh, density = kde_instance.get_raster(
            xax=xax, yax=yax, xacc=xacc, yacc=yacc, kde_type=kde_type,
            kde_kwargs=kde_kwargs, xscale=xscale, yscale=yscale
        )

        return xmesh, ymesh, density



[docs]
    def get_kde_scatter(self, xax="area_um", yax="deform", positions=None,
                        kde_type="histogram", kde_kwargs=None, xscale="linear",
                        yscale="linear"):
        """Evaluate the kernel density estimate for scatter plots

        Parameters
        ----------
        xax: str
            Identifier for X axis (e.g. "area_um", "aspect", "deform")
        yax: str
            Identifier for Y axis
        positions: list of two 1d ndarrays or ndarray of shape (2, N)
            The positions where the KDE will be computed. Note that
            the KDE estimate is computed from the points that
            are set in `self.filter.all`.
        kde_type: str
            The KDE method to use, see :const:`.kde_methods.methods`
        kde_kwargs: dict
            Additional keyword arguments to the KDE method
        xscale: str
            If set to "log", take the logarithm of the x-values before
            computing the KDE. This is useful when data are are
            displayed on a log-scale. Defaults to "linear".
        yscale: str
            See `xscale`.

        Returns
        -------
        density : 1d ndarray
            The kernel density evaluated for the filtered data points.
        """
        kde_instance = KernelDensityEstimator(rtdc_ds=self)
        density = kde_instance.get_scatter(
            xax=xax, yax=yax, positions=positions, kde_type=kde_type,
            kde_kwargs=kde_kwargs, xscale=xscale, yscale=yscale
        )

        return density



[docs]
    def basins_get_dicts(self):
        """Return the list of dictionaries describing the dataset's basins"""
        # Only implement this for classes that support this
        return []



[docs]
    def basins_retrieve(self):
        """Load all basins available

        .. versionadded:: 0.54.0

        In dclab 0.51.0, we introduced basins, a simple way of combining
        HDF5-based datasets (including the :class:`.HDF5_S3` format).
        The idea is to be able to store parts of the dataset
        (e.g. images) in a separate file that could then be located
        someplace else (e.g. an S3 object store).

        If an RT-DC file has "basins" defined, then these are sought out and
        made available via the `features_basin` property.

        .. versionchanged:: 0.57.5

            "file"-type basins are only available for subclasses that
            set the `_local_basins_allowed` attribute to True.

        .. versionchanged:: 0.71.5

            If the instance was created with `enable_basins=False`, then
            only internal basins are returned.
            The `enable_basins` check was previously done in the logic
            of in the `basins` property.
        """
        basins = []
        if self._enable_basins:
            bc = feat_basin.get_basin_classes()
        else:
            # Only allow internal basins
            bc = {"h5dataset": feat_basin.InternalH5DatasetBasin}
        # Sort basins according to priority
        bdicts_srt = sorted(self.basins_get_dicts(),
                            key=feat_basin.basin_priority_sorted_key)
        # complement basin "key"s (we do the same in writer)
        for bdict in bdicts_srt:
            if "key" not in bdict:
                b_dat = json.dumps(bdict, indent=2, sort_keys=True).split("\n")
                bdict["key"] = hashobj(b_dat)

        bd_keys = [bd["key"] for bd in bdicts_srt]
        bd_keys += self._basins_ignored
        for bdict in bdicts_srt:
            if bdict["format"] not in bc:
                warnings.warn(f"Ignored basin of format '{bdict['format']}'!")
                continue
            if bdict["key"] in self._basins_ignored:
                warnings.warn(
                    f"Encountered cyclic basin dependency '{bdict['key']}'",
                    feat_basin.CyclicBasinDependencyFoundWarning)
                continue

            # Basin initialization keyword arguments
            kwargs = {
                "name": bdict.get("name"),
                "description": bdict.get("description"),
                # Honor features intended by basin creator.
                "features": bdict.get("features"),
                # Which mapping we are using ("same", "basinmap1", ...)
                "mapping": bdict.get("mapping", "same"),
                # For non-identical mapping ("basinmap1", etc.), we
                # need the referring dataset.
                "mapping_referrer": self,
                # Make sure the measurement identifier is checked.
                "referrer_identifier": self.get_measurement_identifier(),
                # Make sure the basin identifier is checked.
                "basin_identifier": bdict.get("identifier"),
                # allow to ignore basins
                "ignored_basins": bd_keys,
                # basin key
                "key": bdict["key"],
                # whether the basin is perishable or not
                "perishable": bdict.get("perishable", False),
            }

            # Check whether this basin is supported and exists
            if bdict["type"] == "internal":
                b_cls = bc[bdict["format"]]
                bna = b_cls(bdict["paths"][0], **kwargs)
                basins.append(bna)
            elif bdict["type"] == "file":
                if not self._local_basins_allowed:
                    warnings.warn(f"Basin type 'file' not allowed for format "
                                  f"'{self.format}'",
                                  LocalBasinForbiddenWarning)
                    # stop processing this basin
                    continue
                p_paths = list(bdict["paths"])
                # translate Windows and Unix relative paths
                for pi in list(p_paths):  # [sic] create a copy of the list
                    if pi.count(".."):
                        if pi[2:].count("/") and os.path.sep == r"\\":
                            # Windows
                            p_paths.append(pi.replace("/", r"\\"))
                        elif pi[2:].count(r"\\") and os.path.sep == "/":
                            # Unix
                            p_paths.append(pi.replace(r"\\", "/"))
                # perform the actual check
                for pp in p_paths:
                    pp = pathlib.Path(pp)
                    # Instantiate the proper basin class
                    b_cls = bc[bdict["format"]]
                    # Try absolute path
                    bna = b_cls(pp, **kwargs)

                    try:
                        absolute_exists = bna.verify_basin()
                    except BaseException:
                        pass
                    else:
                        if absolute_exists:
                            basins.append(bna)
                            break
                    # Try relative path
                    this_path = pathlib.Path(self.path)
                    if this_path.exists():

                        # Insert relative path
                        bnr = b_cls(this_path.parent / pp, **kwargs)
                        if bnr.verify_basin():
                            basins.append(bnr)
                            break
            elif bdict["type"] == "remote":
                for url in bdict["urls"]:
                    # Instantiate the proper basin class
                    b_cls = bc[bdict["format"]]
                    bna = b_cls(url, **kwargs)
                    # In contrast to file-type basins, we just add all remote
                    # basins without checking first. We do not check for
                    # the availability of remote basins, because they could
                    # be temporarily inaccessible (unstable network connection)
                    # and because checking the availability of remote basins
                    # normally takes a lot of time.
                    basins.append(bna)
            else:
                warnings.warn(
                    f"Encountered unsupported basin type '{bdict['type']}'!")
        return basins



[docs]
    def get_measurement_identifier(self):
        """Return a unique measurement identifier

        Return the [experiment]:"run identifier" configuration feat, if it
        exists. Otherwise, return the MD5 sum computed from the measurement
        time, date, and setup identifier.

        Returns `None` if no identifier could be found or computed.

        .. versionadded:: 0.51.0

        """
        identifier = self.config.get("experiment", {}).get("run identifier",
                                                           None)
        if identifier is None:
            time = self.config.get("experiment", {}).get("time", None) or None
            date = self.config.get("experiment", {}).get("date", None) or None
            sid = self.config.get("setup", {}).get("identifier", None) or None
            if None not in [time, date, sid]:
                # only compute an identifier if all of the above are defined.
                hasher = hashlib.md5(f"{time}_{date}_{sid}".encode("utf-8"))
                identifier = str(uuid.UUID(hex=hasher.hexdigest()))
        return identifier



[docs]
    def polygon_filter_add(self, filt):
        """Associate a Polygon Filter with this instance

        Parameters
        ----------
        filt: int or instance of `PolygonFilter`
            The polygon filter to add
        """
        with self.lock:
            self._assert_filter()  # [sic] initialize filter if not done yet
            if not isinstance(filt, (PolygonFilter, int, float)):
                msg = "`filt` must be a number or instance of PolygonFilter!"
                raise ValueError(msg)

            if isinstance(filt, PolygonFilter):
                uid = filt.unique_id
            else:
                uid = int(filt)
            # append item
            self.config["filtering"]["polygon filters"].append(uid)



[docs]
    def polygon_filter_rm(self, filt):
        """Remove a polygon filter from this instance

        Parameters
        ----------
        filt: int or instance of `PolygonFilter`
            The polygon filter to remove
        """
        with self.lock:
            if not isinstance(filt, (PolygonFilter, int, float)):
                msg = "`filt` must be a number or instance of PolygonFilter!"
                raise ValueError(msg)

            if isinstance(filt, PolygonFilter):
                uid = filt.unique_id
            else:
                uid = int(filt)
            # remove item
            self.config["filtering"]["polygon filters"].remove(uid)



[docs]
    def reset_filter(self):
        """Reset the current filter"""
        with self.lock:
            # reset filter instance
            self.filter.reset()
            # reset configuration
            # remember hierarchy parent
            hp = self.config["filtering"]["hierarchy parent"]
            self.config["filtering"].clear()
            self.config._init_default_filter_values()
            self.config["filtering"]["hierarchy parent"] = hp