Source code for dclab.cli.task_repack

"""Repack (similar to h5repack) .rtdc files"""
from __future__ import annotations

import argparse
import pathlib

import h5py

from ..rtdc_dataset import rtdc_copy
from .._version import version

from . import common


[docs] def repack( path_in: str | pathlib.Path = None, path_out: str | pathlib.Path = None, strip_basins: bool = False, strip_logs: bool = False, check_suffix: bool = True, ret_path: bool = False, ): """Repack/recreate an .rtdc file, optionally stripping the logs Parameters ---------- path_in: str or pathlib.Path file to compress path_out: str or pathlib output file path strip_basins: bool do not write basin information to the output file strip_logs: bool do not write logs to the output file check_suffix: bool check suffixes for input and output files ret_path: bool whether to return the output path Returns ------- path_out: pathlib.Path output path (with possibly corrected suffix) """ if path_in is None and path_out is None: parser = repack_parser() args = parser.parse_args() path_in = args.input path_out = args.output strip_basins = args.strip_basins strip_logs = args.strip_logs allowed_input_suffixes = [".rtdc"] if not check_suffix: allowed_input_suffixes.append(pathlib.Path(path_in).suffix) path_in, path_out, path_temp = common.setup_task_paths( path_in, path_out, allowed_input_suffixes=allowed_input_suffixes) with h5py.File(path_in) as h5, h5py.File(path_temp, "w") as hc: rtdc_copy(src_h5file=h5, dst_h5file=hc, features="all", include_basins=not strip_basins, include_logs=not strip_logs, include_tables=True, meta_prefix="") # Finally, rename temp to out path_temp.rename(path_out) if ret_path: return path_out
def repack_parser(): descr = "Repack an .rtdc file. The difference to dclab-compress " \ + "is that no logs are added. Other logs can optionally be " \ + "stripped away. Repacking also gets rid of old clutter " \ + "data (e.g. previous metadata stored in the HDF5 file)." parser = argparse.ArgumentParser(description=descr) parser.add_argument('input', metavar="INPUT", type=str, help='Input path (.rtdc file)') parser.add_argument('output', metavar="OUTPUT", type=str, help='Output path (.rtdc file)') parser.add_argument('--strip-basins', dest='strip_basins', action='store_true', help='Do not copy any basin information to the ' 'output file.') parser.set_defaults(strip_basins=False) parser.add_argument('--strip-logs', dest='strip_logs', action='store_true', help='Do not copy any logs to the output file.') parser.set_defaults(strip_logs=False) parser.add_argument('--version', action='version', version=f'dclab-repack {version}') return parser