Source code for dclab.definitions.feat_logic

import re

from . import feat_const


ML_SCORE_REGEX = re.compile(r"^ml_score_[a-z0-9]{3}$")


[docs] def check_feature_shape(name, data): """Check if (non)-scalar feature matches with its data's dimensionality Parameters ---------- name: str name of the feature data: array-like data whose dimensionality will be checked Raises ------ ValueError If the data's shape does not match its scalar description """ if len(data.shape) == 1 and not scalar_feature_exists(name): raise ValueError( f"Feature '{name}' is not a scalar feature, but " "a 1D array was given for `data`!" ) elif len(data.shape) != 1 and scalar_feature_exists(name): raise ValueError( f"Feature '{name}' is a scalar feature, but the " "`data` array is not 1D!" )
[docs] def feature_exists(name, scalar_only=False): """Return True if `name` is a valid feature name This function not only checks whether `name` is in :const:`feature_names`, but also validates against the machine learning scores `ml_score_???` (where `?` can be a digit or a lower-case letter in the English alphabet). Parameters ---------- name: str name of a feature scalar_only : bool Specify whether the check should only search in scalar features Returns ------- valid: bool True if name is a valid feature, False otherwise. See Also -------- scalar_feature_exists: Wraps `feature_exists` with `scalar_only=True` """ valid = False if name in feat_const.scalar_feature_names: # scalar feature valid = True elif not scalar_only and name in feat_const.feature_names: # non-scalar feature valid = True elif ML_SCORE_REGEX.match(name): # machine-learning score feature ml_score_??? valid = True return valid
def feature_register(name, label=None, is_scalar=True): """Register a new feature for usage in dclab Used by temporary features and plugin features to add new feature names and labels to `dclab.definitions`. Parameters ---------- name: str name of a feature label: str, optional feature label corresponding to the feature name. If set to None, then a label is constructed for the feature name. is_scalar: bool Specify whether the feature of an event is a scalar (True) or not (False) Raises ------ ValueError If the feature already exists. """ allowed_chars = "abcdefghijklmnopqrstuvwxyz_1234567890" feat = "".join([f for f in name if f in allowed_chars]) if feat != name: raise ValueError( "`feature` must only contain lower-case characters, " f"digits, and underscores; got '{name}'!" ) if label is None: label = f"User-defined feature {name}" if feature_exists(name): raise ValueError(f"Feature '{name}' already exists!") # Populate the new feature in all dictionaries and lists # (we don't need global here) feat_const.feature_names.append(name) feat_const.feature_labels.append(label) feat_const.feature_name2label[name] = label if is_scalar: feat_const.scalar_feature_names.append(name) def feature_deregister(name): """Unregister a feature from dclab Used by temporary features and plugin features to remove the feature names and labels from `dclab.definitions`. Parameters ---------- name: str name of a feature Warnings -------- This function should only be used internally, i.e., You should not use this function. This function can break things. """ label = get_feature_label(name) feat_const.feature_names.remove(name) feat_const.feature_labels.remove(label) feat_const.feature_name2label.pop(name) if name in feat_const.scalar_feature_names: feat_const.scalar_feature_names.remove(name)
[docs] def get_feature_label(name, rtdc_ds=None, with_unit=True): """Return the label corresponding to a feature name This function not only checks :const:`feature_name2label`, but also supports registered `ml_score_???` features. Parameters ---------- name: str name of a feature with_unit: bool set to False to remove units in square brackets Returns ------- label: str feature label corresponding to the feature name Notes ----- TODO: extract feature label from ancillary information when an rtdc_ds is given. """ if name in feat_const.feature_name2label: label = feat_const.feature_name2label[name] elif ML_SCORE_REGEX.match(name): # use a generic name for machine-learning features label = f"ML score {name[-3:].upper()}" else: exists = feature_exists(name) msg = f"Could not find label for '{name}'" msg += " (feature does not exist)" if not exists else "" raise ValueError(msg) if not with_unit: if label.endswith("]") and label.count("["): label = label.rsplit("[", 1)[0].strip() return label
[docs] def scalar_feature_exists(name): """Convenience method wrapping `feature_exists(..., scalar_only=True)`""" return feature_exists(name, scalar_only=True)