Source code for dclab.cli.task_condense

"""Create .rtdc files with scalar-only features"""
import argparse
import pathlib
import warnings

import h5py
import hdf5plugin

from ..rtdc_dataset import fmt_hdf5, new_dataset, rtdc_copy, RTDCWriter
from .. import util
from .._version import version

from . import common


[docs] def condense(path_out=None, path_in=None, ancillaries=True, check_suffix=True): """Create a new dataset with all (ancillary) scalar-only features""" if path_out is None or path_in is None: parser = condense_parser() args = parser.parse_args() path_in = args.input path_out = args.output ancillaries = not args.no_ancillaries allowed_input_suffixes = [".rtdc", ".tdms"] if not check_suffix: allowed_input_suffixes.append(pathlib.Path(path_in).suffix) path_in, path_out, path_temp = common.setup_task_paths( path_in, path_out, allowed_input_suffixes=allowed_input_suffixes) with warnings.catch_warnings(record=True) as w: warnings.simplefilter("always") with new_dataset(path_in) as ds, \ h5py.File(path_temp, "w") as h5_cond: condense_dataset(ds=ds, h5_cond=h5_cond, ancillaries=ancillaries, warnings_list=w) # Finally, rename temp to out path_temp.rename(path_out)
[docs] def condense_dataset(ds, h5_cond, ancillaries=True, warnings_list=None): """Condense a dataset using low-level HDF5 methods""" cmp_kw = hdf5plugin.Zstd(clevel=5) cmd_dict = {} # If we have an input HDF5 file, then we might readily copy most # of the features over using rtdc_copy. If we have a .tdms file, # then we have to go the long route. if isinstance(ds, fmt_hdf5.RTDC_HDF5): rtdc_copy(src_h5file=ds.h5file, dst_h5file=h5_cond, features="scalar", include_logs=True, include_tables=True, meta_prefix="") h5_cond.require_group("logs") # scalar features feats_sc = ds.features_scalar # loaded (computationally cheap) scalar features feats_sc_in = [f for f in ds.features_loaded if f in feats_sc] # ancillary features feats_sc_anc = list(set(feats_sc) - set(feats_sc_in)) cmd_dict["features_original_innate"] = ds.features_innate if ancillaries: features = feats_sc cmd_dict["features_computed"] = feats_sc_anc if feats_sc_anc: print("Computing ancillary features:", " ".join(feats_sc_anc)) else: print("No ancillary features to compute.") else: features = feats_sc_in # command log logs = {"dclab-condense": common.get_command_log( paths=[ds.path], custom_dict=cmd_dict)} # rename old dclab-condense logs for lkey in ["dclab-condense", "dclab-condense-warnings"]: if lkey in h5_cond["logs"]: # This is cached, so no worry calling it multiple times. md5_cfg = util.hashobj(ds.config) # rename new_log_name = f"{lkey}_{md5_cfg}" if new_log_name not in h5_cond["logs"]: # If the user repeatedly condensed one file, then there is # no benefit in storing the log under a different name (the # metadata did not change). Only write the log if it does # not already exist. h5_cond["logs"][f"{lkey}_{md5_cfg}"] = h5_cond["logs"][lkey] del h5_cond["logs"][lkey] with RTDCWriter(h5_cond, mode="append", compression_kwargs=cmp_kw, ) as hw: # Write all remaining scalar features to the file # (these are *all* scalar features in the case of .tdms data). for feat in features: if feat not in h5_cond["events"]: hw.store_feature(feat=feat, data=ds[feat]) # collect warnings log if warnings_list: logs["dclab-condense-warnings"] = \ common.assemble_warnings(warnings_list) # Write logs for name in logs: hw.store_log(name, logs[name])
def condense_parser(): descr = "Reduce an RT-DC measurement to its scalar-only features " \ + "(i.e. without `contour`, `image`, `mask`, or `trace`). " \ + "All available ancillary features are computed." parser = argparse.ArgumentParser(description=descr) parser.add_argument('input', metavar="INPUT", type=str, help='Input path (.tdms or .rtdc file)') parser.add_argument('output', metavar="OUTPUT", type=str, help='Output path (.rtdc file)') parser.add_argument('--no-ancillary-features', dest='no_ancillaries', action='store_true', help='Do not compute expensive ancillary features ' 'such as volume' ) parser.add_argument('--version', action='version', version=f'dclab-condense {version}') return parser