Source code for dclab.cli.task_tdms2rtdc

"""Convert .tdms to .rtdc files"""
import argparse
import pathlib
import warnings

import hdf5plugin

from ..rtdc_dataset import fmt_tdms, new_dataset, RTDCWriter
from .._version import version

from . import common


[docs]def tdms2rtdc(path_tdms=None, path_rtdc=None, compute_features=False,
              skip_initial_empty_image=True, skip_final_empty_image=True,
              verbose=False):
    """Convert .tdms datasets to the hdf5-based .rtdc file format

    Parameters
    ----------
    path_tdms: str or pathlib.Path
        Path to input .tdms file
    path_rtdc: str or pathlib.Path
        Path to output .rtdc file
    compute_features: bool
        If `True`, compute all ancillary features and store them in the
        output file
    skip_initial_empty_image: bool
        In old versions of Shape-In, the first image was sometimes
        not stored in the resulting .avi file. In dclab, such images
        are represented as zero-valued images. If `True` (default),
        this first image is not included in the resulting .rtdc file.
    skip_final_empty_image: bool
        In other versions of Shape-In, the final image is sometimes
        also not stored in the .avi file. If `True` (default), this
        final image is not included in the resulting .rtdc file.
    verbose: bool
        If `True`, print messages to stdout
    """
    cmp_kw = hdf5plugin.Zstd(clevel=5)
    if path_tdms is None or path_rtdc is None:
        parser = tdms2rtdc_parser()
        args = parser.parse_args()

        path_tdms = pathlib.Path(args.tdms_path).resolve()
        path_rtdc = pathlib.Path(args.rtdc_path)
        compute_features = args.compute_features
        skip_initial_empty_image = not args.include_empty_boundary_images
        skip_final_empty_image = not args.include_empty_boundary_images
        verbose = True

    # Determine whether input path is a tdms file or a directory
    if path_tdms.is_dir():
        # we have a directory to search
        files_tdms = fmt_tdms.get_tdms_files(path_tdms)
        if path_rtdc.is_file():
            raise ValueError(
                f"Output path is a file, expected folder: '{path_rtdc}'!")
        files_rtdc = []
        for path_in in files_tdms:
            path_in = pathlib.Path(path_in)
            rp = path_in.relative_to(path_tdms)
            # determine output file name (same relative path)
            rpr = path_rtdc / rp.with_suffix(".rtdc")
            files_rtdc.append(rpr)
    else:
        # we have a single file or a non-existent path
        files_tdms = [path_tdms]
        files_rtdc = [path_rtdc]

    files_tdms, files_rtdc, files_temp = common.setup_task_paths(
        paths_in=files_tdms,
        paths_out=files_rtdc,
        allowed_input_suffixes=[".tdms"]
    )

    for ii in range(len(files_tdms)):
        path_in = files_tdms[ii]
        path_out = files_rtdc[ii]
        path_temp = files_temp[ii]

        if verbose:
            common.print_info(
                f"Converting {ii+1:d}/{len(files_tdms):d}: {path_in}")
        # create directory
        path_out.parent.mkdir(parents=True, exist_ok=True)
        # load and export dataset
        with warnings.catch_warnings(record=True) as w:
            warnings.simplefilter("always")
            # ignore ResourceWarning: unclosed file <_io.BufferedReader...>
            warnings.simplefilter("ignore", ResourceWarning)  # noqa: F821
            # ignore SlowVideoWarning
            warnings.simplefilter("ignore",
                                  fmt_tdms.event_image.SlowVideoWarning)
            if skip_initial_empty_image:
                # If the initial frame is skipped when empty,
                # suppress any related warning messages.
                warnings.simplefilter(
                    "ignore",
                    fmt_tdms.event_image.InitialFrameMissingWarning)

            with new_dataset(path_in) as ds:
                # determine features to export
                if compute_features:
                    features = ds.features
                else:
                    # consider special case for "image", "trace", and "contour"
                    # (This will export both "mask" and "contour".
                    # The "mask" is computed from "contour" and it is needed
                    # by dclab for other ancillary features. We still keep
                    # "contour" because it is original data.
                    features = ds.features_innate

                common.skip_empty_image_events(
                    ds=ds,
                    initial=skip_initial_empty_image,
                    final=skip_final_empty_image)
                # export as hdf5
                ds.export.hdf5(path=path_temp,
                               features=features,
                               filtered=True,
                               override=True,
                               compression_kwargs=cmp_kw)

                # write logs
                custom_dict = {}
                # computed features
                cfeats = list(set(features) - set(ds.features_innate))
                if "mask" in features:
                    # Mask is always computed from contour data
                    cfeats.append("mask")
                custom_dict["ancillary features"] = sorted(cfeats)

                # command log
                logs = {"dclab-tdms2rtdc": common.get_command_log(
                    paths=[path_in], custom_dict=custom_dict)}
                # warnings log
                if w:
                    logs["dclab-tdms2rtdc-warnings"] = \
                        common.assemble_warnings(w)
                logs.update(ds.logs)
                with RTDCWriter(path_temp, compression_kwargs=cmp_kw) as hw:
                    for name in logs:
                        hw.store_log(name, logs[name])

                # Finally, rename temp to out
                path_temp.rename(path_out)


def tdms2rtdc_parser():
    descr = "Convert RT-DC .tdms files to the hdf5-based .rtdc file format. " \
            + "Note: Do not delete original .tdms files after conversion. " \
            + "The conversion might be incomplete."
    parser = argparse.ArgumentParser(description=descr)
    parser.add_argument('--compute-ancillary-features',
                        dest='compute_features',
                        action='store_true',
                        help='Compute features, such as volume or emodulus, '
                             + 'that are otherwise computed on-the-fly. '
                             + 'Use this if you want to minimize analysis '
                             + 'time in e.g. Shape-Out. CAUTION: ancillary '
                             + 'feature recipes might be subject to change '
                             + '(e.g. if an error is found in the recipe). '
                             + 'Disabling this option maximizes '
                             + 'compatibility with future versions and '
                             + 'allows to isolate the original data.')
    parser.set_defaults(compute_features=False)
    parser.add_argument('--include-empty-boundary-images',
                        dest='include_empty_boundary_images',
                        action='store_true',
                        help='In old versions of Shape-In, the first or last '
                             + 'images were sometimes not stored in the '
                             + 'resulting .avi file. In dclab, such images '
                             + 'are represented as zero-valued images. Set '
                             + 'this option, if you wish to include these '
                             + 'events with empty image data.')
    parser.set_defaults(include_empty_boundary_images=False)
    parser.add_argument('tdms_path', metavar="TDMS_PATH", type=str,
                        help='Input path (tdms file or folder containing '
                             + 'tdms files)')
    parser.add_argument('rtdc_path', metavar="RTDC_PATH", type=str,
                        help='Output path (file or folder), existing data '
                             + 'will be overridden')
    parser.add_argument('--version', action='version',
                        version=f'dclab-tdms2rtdc {version}')
    return parser