"""Split an .rtdc file into smaller .rtdc files"""
from __future__ import annotations
import argparse
import pathlib
import warnings
import hdf5plugin
from ..rtdc_dataset import fmt_tdms, new_dataset, RTDCWriter
from .._version import version
from . import common
[docs]
def split(
path_in: str | pathlib.Path = None,
path_out: str | pathlib.Path = None,
split_events: int = 10000,
skip_initial_empty_image: bool = True,
skip_final_empty_image: bool = True,
ret_out_paths: bool = False,
verbose: bool = False):
"""Split a measurement file
Parameters
----------
path_in: str or pathlib.Path
path of input measurement file
path_out: str or pathlib.Path
path to output directory (optional)
split_events: int
maximum number of events in each output file
skip_initial_empty_image: bool
remove the first event of the dataset if the image is zero
skip_final_empty_image: bool
remove the final event of the dataset if the image is zero
ret_out_paths:
if True, return the list of output file paths
verbose: bool
if True, print messages to stdout
Returns
-------
[out_paths]: list of pathlib.Path
List of generated files (only if `ret_out_paths` is specified)
"""
cmp_kw = hdf5plugin.Zstd(clevel=5)
if path_in is None:
parser = split_parser()
args = parser.parse_args()
path_in = pathlib.Path(args.path_in).resolve()
path_out = args.path_out
split_events = args.split_events
skip_initial_empty_image = not args.include_empty_boundary_images
skip_final_empty_image = not args.include_empty_boundary_images
verbose = True
if path_out in ["SAME", None]: # default to input directory
path_out = path_in.parent
path_in = pathlib.Path(path_in)
path_out = pathlib.Path(path_out)
logs = {"dclab-split": common.get_command_log(paths=[path_in])}
paths_gen = []
paths_temp = []
with warnings.catch_warnings(record=True) as w:
warnings.simplefilter("always")
# ignore ResourceWarning: unclosed file <_io.BufferedReader...>
warnings.simplefilter("ignore", ResourceWarning) # noqa: F821
if fmt_tdms.NPTDMS_AVAILABLE: # tdms-related warning filters
# ignore SlowVideoWarning
warnings.simplefilter("ignore",
fmt_tdms.event_image.SlowVideoWarning)
if skip_initial_empty_image:
# If the initial frame is skipped when empty,
# suppress any related warning messages.
warnings.simplefilter(
"ignore",
fmt_tdms.event_image.InitialFrameMissingWarning)
with new_dataset(path_in) as ds:
num_files = len(ds) // split_events
if len(ds) % split_events:
num_files += 1
for ii in range(num_files):
pp = path_out / f"{path_in.stem}_{ii+1:04d}.rtdc"
pt = pp.with_suffix(".rtdc~")
paths_gen.append(pp)
paths_temp.append(pt)
if verbose:
print(f"Generating {ii+1:d}/{num_files:d}: {pt}")
ds.filter.manual[:] = False # reset filter
ds.filter.manual[ii*split_events:(ii+1)*split_events] = True
common.skip_empty_image_events(
ds=ds,
initial=skip_initial_empty_image,
final=skip_final_empty_image)
ds.apply_filter()
ds.export.hdf5(path=pt,
features=ds.features_innate,
logs=True,
tables=True,
basins=True,
filtered=True,
compression_kwargs=cmp_kw,
)
if w:
logs["dclab-split-warnings"] = common.assemble_warnings(w)
sample_name = ds.config["experiment"]["sample"]
# Add the logs and update sample name
for ii, pt in enumerate(paths_temp):
meta = {"experiment": {"sample": f"{sample_name} {ii+1}/{num_files}"}}
with RTDCWriter(pt, compression_kwargs=cmp_kw) as hw:
for name in logs:
hw.store_log(name, logs[name])
hw.store_metadata(meta)
for pt, pp in zip(paths_temp, paths_gen):
pt.rename(pp)
if ret_out_paths:
return paths_gen
def split_parser():
descr = "Split an RT-DC measurement file (.tdms or .rtdc) into multiple " \
+ "smaller .rtdc files."
parser = argparse.ArgumentParser(description=descr)
parser.add_argument('path_in', metavar="PATH_IN", type=str,
help='Input path (.tdms or .rtdc file)')
parser.add_argument('--path_out', metavar="PATH_OUT", type=str,
default="SAME",
help='Output directory (defaults to same directory)')
parser.add_argument('--split-events', type=int, default=10000,
help='Maximum number of events in each output file')
parser.add_argument('--include-empty-boundary-images',
dest='include_empty_boundary_images',
action='store_true',
help='In old versions of Shape-In, the first or last '
+ 'images were sometimes not stored in the '
+ 'resulting .avi file. In dclab, such images '
+ 'are represented as zero-valued images. Set '
+ 'this option, if you wish to include these '
+ 'events with empty image data.')
parser.set_defaults(include_empty_boundary_images=False)
parser.add_argument('--version', action='version',
version=f'dclab-split {version}')
return parser