Source code for dclab.definitions.feat_logic

from . import feat_const


[docs] def check_feature_shape(name, data): """Check if (non)-scalar feature matches with its data's dimensionality Parameters ---------- name: str name of the feature data: array-like data whose dimensionality will be checked Raises ------ ValueError If the data's shape does not match its scalar description """ if len(data.shape) == 1 and not scalar_feature_exists(name): raise ValueError(f"Feature '{name}' is not a scalar feature, but " "a 1D array was given for `data`!") elif len(data.shape) != 1 and scalar_feature_exists(name): raise ValueError(f"Feature '{name}' is a scalar feature, but the " "`data` array is not 1D!")
[docs] def feature_exists(name, scalar_only=False): """Return True if `name` is a valid feature name This function not only checks whether `name` is in :const:`feature_names`, but also validates against the machine learning scores `ml_score_???` (where `?` can be a digit or a lower-case letter in the English alphabet). Parameters ---------- name: str name of a feature scalar_only : bool Specify whether the check should only search in scalar features Returns ------- valid: bool True if name is a valid feature, False otherwise. See Also -------- scalar_feature_exists: Wraps `feature_exists` with `scalar_only=True` """ valid = False if name in feat_const.scalar_feature_names: # scalar feature valid = True elif not scalar_only and name in feat_const.feature_names: # non-scalar feature valid = True else: # check whether we have an `ml_score_???` feature valid_chars = "0123456789abcdefghijklmnopqrstuvwxyz" if (name.startswith("ml_score_") and len(name) == len("ml_score_???") and name[-3] in valid_chars and name[-2] in valid_chars and name[-1] in valid_chars): valid = True return valid
def feature_register(name, label=None, is_scalar=True): """Register a new feature for usage in dclab Used by temporary features and plugin features to add new feature names and labels to `dclab.definitions`. Parameters ---------- name: str name of a feature label: str, optional feature label corresponding to the feature name. If set to None, then a label is constructed for the feature name. is_scalar: bool Specify whether the feature of an event is a scalar (True) or not (False) Raises ------ ValueError If the feature already exists. """ allowed_chars = "abcdefghijklmnopqrstuvwxyz_1234567890" feat = "".join([f for f in name if f in allowed_chars]) if feat != name: raise ValueError("`feature` must only contain lower-case characters, " f"digits, and underscores; got '{name}'!") if label is None: label = f"User-defined feature {name}" if feature_exists(name): raise ValueError(f"Feature '{name}' already exists!") # Populate the new feature in all dictionaries and lists # (we don't need global here) feat_const.feature_names.append(name) feat_const.feature_labels.append(label) feat_const.feature_name2label[name] = label if is_scalar: feat_const.scalar_feature_names.append(name) def feature_deregister(name): """Unregister a feature from dclab Used by temporary features and plugin features to remove the feature names and labels from `dclab.definitions`. Parameters ---------- name: str name of a feature Warnings -------- This function should only be used internally, i.e., You should not use this function. This function can break things. """ label = get_feature_label(name) feat_const.feature_names.remove(name) feat_const.feature_labels.remove(label) feat_const.feature_name2label.pop(name) if name in feat_const.scalar_feature_names: feat_const.scalar_feature_names.remove(name)
[docs] def get_feature_label(name, rtdc_ds=None, with_unit=True): """Return the label corresponding to a feature name This function not only checks :const:`feature_name2label`, but also supports registered `ml_score_???` features. Parameters ---------- name: str name of a feature with_unit: bool set to False to remove units in square brackets Returns ------- label: str feature label corresponding to the feature name Notes ----- TODO: extract feature label from ancillary information when an rtdc_ds is given. """ # TODO: Is there another way of avoiding this circular import? from ..rtdc_dataset.feat_anc_core.ancillary_feature import AncillaryFeature assert feature_exists(name) if name in feat_const.feature_name2label: label = feat_const.feature_name2label[name] else: # First check whether an ancillary feature with that # name exists. for af in AncillaryFeature.features: if af.feature_name == name: labelid = af.data.outputs.index(name) label = af.data.output_labels[labelid] break else: # If that did not work, use a generic name. label = "ML score {}".format(name[-3:].upper()) if not with_unit: if label.endswith("]") and label.count("["): label = label.rsplit("[", 1)[0].strip() return label
[docs] def scalar_feature_exists(name): """Convenience method wrapping `feature_exists(..., scalar_only=True)`""" return feature_exists(name, scalar_only=True)