Source code for dclab.rtdc_dataset.export

#!/usr/bin/python
# -*- coding: utf-8 -*-
"""Export RT-DC measurement data"""
from __future__ import division, print_function, unicode_literals

import codecs
import pathlib
import warnings

import h5py

from ..compat import PyImportError

try:
    import imageio
except PyImportError:
    IMAGEIO_AVAILABLE = False
else:
    IMAGEIO_AVAILABLE = True

try:
    import fcswrite
except PyImportError:
    FCSWRITE_AVAILABLE = False
else:
    FCSWRITE_AVAILABLE = True

import numpy as np

from .. import definitions as dfn
from .._version import version
from .write_hdf5 import write


[docs]class NoImageWarning(UserWarning): pass
class LimitingExportSizeWarning(UserWarning): pass
[docs]class Export(object): def __init__(self, rtdc_ds): """Export functionalities for RT-DC datasets""" self.rtdc_ds = rtdc_ds
[docs] def avi(self, path, filtered=True, override=False): """Exports filtered event images to an avi file Parameters ---------- path: str Path to a .avi file. The ending .avi is added automatically. filtered: bool If set to `True`, only the filtered data (index in ds.filter.all) are used. override: bool If set to `True`, an existing file ``path`` will be overridden. If set to `False`, raises `OSError` if ``path`` exists. Notes ----- Raises OSError if current dataset does not contain image data """ if not IMAGEIO_AVAILABLE: raise PyImportError("Package `imageio` required for avi export!") path = pathlib.Path(path) ds = self.rtdc_ds # Make sure that path ends with .avi if path.suffix != ".avi": path = path.with_name(path.name + ".avi") # Check if file already exist if not override and path.exists(): raise OSError("File already exists: {}\n".format( str(path).encode("ascii", "ignore")) + "Please use the `override=True` option.") # Start exporting if "image" in ds: # Open video for writing vout = imageio.get_writer(uri=path, format="FFMPEG", fps=25, codec="rawvideo", pixelformat="yuv420p", macro_block_size=None, ffmpeg_log_level="error") # write the filtered frames to avi file for evid in np.arange(len(ds)): # skip frames that were filtered out if filtered and not ds.filter.all[evid]: continue try: image = ds["image"][evid] except BaseException: warnings.warn("Could not read image {}!".format(evid), NoImageWarning) continue else: if np.isnan(image[0, 0]): # This is a nan-valued image image = np.zeros_like(image, dtype=np.uint8) # Convert image to RGB image = image.reshape(image.shape[0], image.shape[1], 1) image = np.repeat(image, 3, axis=2) vout.append_data(image) else: msg = "No image data to export: dataset {} !".format(ds.title) raise OSError(msg)
[docs] def fcs(self, path, features, meta_data={}, filtered=True, override=False): """Export the data of an RT-DC dataset to an .fcs file Parameters ---------- mm: instance of dclab.RTDCBase The dataset that will be exported. path: str Path to an .fcs file. The ending .fcs is added automatically. features: list of str The features in the resulting .fcs file. These are strings that are defined in `dclab.definitions.scalar_feature_names`, e.g. "area_cvx", "deform", "frame", "fl1_max", "aspect". meta_data: dict User-defined, optional key-value pairs that are stored in the primary TEXT segment of the FCS file; the version of dclab is stored there by default filtered: bool If set to `True`, only the filtered data (index in ds.filter.all) are used. override: bool If set to `True`, an existing file ``path`` will be overridden. If set to `False`, raises `OSError` if ``path`` exists. Notes ----- Due to incompatibility with the .fcs file format, all events with NaN-valued features are not exported. """ if not FCSWRITE_AVAILABLE: raise PyImportError("Package `fcswrite` required for fcs export!") features = [c.lower() for c in features] ds = self.rtdc_ds path = pathlib.Path(path) # Make sure that path ends with .fcs if path.suffix != ".fcs": path = path.with_name(path.name + ".fcs") # Check if file already exist if not override and path.exists(): raise OSError("File already exists: {}\n".format( str(path).encode("ascii", "ignore")) + "Please use the `override=True` option.") # Check that features are in dfn.scalar_feature_names for c in features: if c not in dfn.scalar_feature_names: msg = "Unknown or unsupported feature name: {}".format(c) raise ValueError(msg) # Collect the header chn_names = [dfn.feature_name2label[c] for c in features] # Collect the data if filtered: data = [ds[c][ds.filter.all] for c in features] else: data = [ds[c] for c in features] data = np.array(data).transpose() meta_data["dclab version"] = version fcswrite.write_fcs(filename=str(path), chn_names=chn_names, data=data, text_kw_pr=meta_data, )
[docs] def hdf5(self, path, features, filtered=True, override=False, compression="gzip"): """Export the data of the current instance to an HDF5 file Parameters ---------- path: str Path to an .rtdc file. The ending .rtdc is added automatically. features: list of str The features in the resulting .rtdc file. These are strings that are defined in `dclab.definitions.feature_names`, e.g. "area_cvx", "deform", "frame", "fl1_max", "image". filtered: bool If set to `True`, only the filtered data (index in ds.filter.all) are used. override: bool If set to `True`, an existing file ``path`` will be overridden. If set to `False`, raises `OSError` if ``path`` exists. compression: str or None Compression method for "contour", "image", and "trace" data as well as logs; one of [None, "lzf", "gzip", "szip"]. """ path = pathlib.Path(path) # Make sure that path ends with .rtdc if not path.suffix == ".rtdc": path = path.parent / (path.name + ".rtdc") # Check if file already exists if not override and path.exists(): raise OSError("File already exists: {}\n".format(path) + "Please use the `override=True` option.") elif path.exists(): path.unlink() meta = {} # only export configuration meta data (no user-defined config) for sec in dfn.CFG_METADATA: if sec in ["fmt_tdms"]: # ignored sections continue if sec in self.rtdc_ds.config: meta[sec] = self.rtdc_ds.config[sec].copy() if filtered: filtarr = self.rtdc_ds.filter.all else: filtarr = np.ones(len(self.rtdc_ds), dtype=bool) # check that all features have same length and use smallest # common length lengths = [] for feat in features: if feat == "trace": for tr in list(self.rtdc_ds["trace"].keys()): lengths.append(len(self.rtdc_ds["trace"][tr])) else: lengths.append(len(self.rtdc_ds[feat])) if not np.all(lengths == lengths[0]): lmin = np.min(lengths) lmax = np.max(lengths) nev_bef = np.sum(filtarr) filtarr[lmin:] = False nev_aft = np.sum(filtarr) if nev_bef != nev_aft: warnings.warn( "Not all features have the same length! " + "Limiting output event count to {} ".format(lmin) + "(max {}) in '{}'.".format(lmax, path), LimitingExportSizeWarning) # write meta data with write(path_or_h5file=path, meta=meta, mode="append") as h5obj: # write each feature individually for feat in features: hdf5_append(h5obj=h5obj, rtdc_ds=self.rtdc_ds, feat=feat, compression=compression, filtarr=filtarr) # update configuration hdf5_autocomplete_config(h5obj)
[docs] def tsv(self, path, features, meta_data={}, filtered=True, override=False): """Export the data of the current instance to a .tsv file Parameters ---------- path: str Path to a .tsv file. The ending .tsv is added automatically. features: list of str The features in the resulting .tsv file. These are strings that are defined in `dclab.definitions.scalar_feature_names`, e.g. "area_cvx", "deform", "frame", "fl1_max", "aspect". meta_data: dict User-defined, optional key-value pairs that are stored at the beginning of the tsv file - one key-value pair is stored per line which starts with a hash. The version of dclab is stored there by default. filtered: bool If set to `True`, only the filtered data (index in ds.filter.all) are used. override: bool If set to `True`, an existing file ``path`` will be overridden. If set to `False`, raises `OSError` if ``path`` exists. """ features = [c.lower() for c in features] path = pathlib.Path(path) ds = self.rtdc_ds # Make sure that path ends with .tsv if path.suffix != ".tsv": path = path.with_name(path.name + ".tsv") # Check if file already exist if not override and path.exists(): raise OSError("File already exists: {}\n".format( str(path).encode("ascii", "ignore")) + "Please use the `override=True` option.") # Check that features are in dfn.scalar_feature_names for c in features: if c not in dfn.scalar_feature_names: raise ValueError("Unknown feature name {}".format(c)) meta_data["dclab version"] = version # Write BOM header with path.open("wb") as fd: fd.write(codecs.BOM_UTF8) # Open file with path.open("a", encoding="utf-8") as fd: # write meta data for key in sorted(meta_data.keys()): fd.write("# {}: {}\n".format(key, meta_data[key])) fd.write("#\n") # write header header1 = "\t".join([c for c in features]) fd.write("# "+header1+"\n") header2 = "\t".join([dfn.feature_name2label[c] for c in features]) fd.write("# "+header2+"\n") with path.open("ab") as fd: # write data if filtered: data = [ds[c][ds.filter.all] for c in features] else: data = [ds[c] for c in features] np.savetxt(fd, np.array(data).transpose(), fmt=str("%.10e"), delimiter="\t")
def hdf5_append(h5obj, rtdc_ds, feat, compression, filtarr=None, time_offset=0): """Append feature data to an HDF5 file Parameters ---------- h5obj: h5py.File Opened HDF5 file rtdc_ds: dclab.rtdc_dataset.RTDCBase Instance from which to obtain the data feat: str Valid feature name in `rtdc_ds` compression: str or None Compression method for "contour", "image", and "trace" data as well as logs; one of [None, "lzf", "gzip", "szip"]. filtarr: None or 1d boolean np.ndarray Optional boolean array used for filtering. If set to `None`, all events are saved. time_offset: float This value will be added to the "time" and "frame" features (used for joining multiple measurements) Notes ----- Please update the "experiment::event count" attribute manually. You may use :func:`hdf5_autocomplete_config` for that. """ # optional array for filtering events if filtarr is None: filtarr = np.ones(len(rtdc_ds), dtype=bool) # total number of new events nev = np.sum(filtarr) # event-wise, because # - tdms-based datasets don't allow indexing with numpy # - there might be memory issues if feat == "contour": cont_list = [] cmax = 0 for ii in range(len(rtdc_ds)): if filtarr[ii]: dat = rtdc_ds["contour"][ii] cont_list.append(dat) cmax = max(cmax, dat.max()) write(h5obj, data={"contour": cont_list}, mode="append", compression=compression) elif feat in ["mask", "image"]: # store image stacks (reduced file size and save time) m = 64 im0 = rtdc_ds[feat][0] imstack = np.zeros((m, im0.shape[0], im0.shape[1]), dtype=im0.dtype) jj = 0 for ii in range(len(rtdc_ds)): if filtarr[ii]: dat = rtdc_ds[feat][ii] imstack[jj] = dat if (jj + 1) % m == 0: jj = 0 write(h5obj, data={feat: imstack}, mode="append", compression=compression) else: jj += 1 # write rest if jj: write(h5obj, data={feat: imstack[:jj, :, :]}, mode="append", compression=compression) elif feat == "trace": for tr in rtdc_ds["trace"].keys(): tr0 = rtdc_ds["trace"][tr][0] trdat = np.zeros((nev, tr0.size), dtype=tr0.dtype) jj = 0 for ii in range(len(rtdc_ds)): if filtarr[ii]: trdat[jj] = rtdc_ds["trace"][tr][ii] jj += 1 write(h5obj, data={"trace": {tr: trdat}}, mode="append", compression=compression) elif feat == "index": # re-enumerate data index feature (filtered data) if "events/index" in h5obj: nev0 = len(h5obj["events/index"]) else: nev0 = 0 write(h5obj, data={"index": np.arange(nev0+1, nev0+nev+1)}, mode="append", compression=compression) elif feat == "index_online": if "events/index_online" in h5obj: idxo_offset = h5obj["events/index"][-1] + 1 else: idxo_offset = 0 write(h5obj, data={"index": rtdc_ds["index_online"][filtarr] + idxo_offset}, mode="append", compression=compression) elif feat == "time": write(h5obj, data={"time": rtdc_ds["time"][filtarr] + time_offset}, mode="append", compression=compression) elif feat == "frame": fr = rtdc_ds.config["imaging"]["frame rate"] frame_offset = time_offset * fr write(h5obj, data={"frame": rtdc_ds["frame"][filtarr] + frame_offset}, mode="append", compression=compression) else: write(h5obj, data={feat: rtdc_ds[feat][filtarr]}, mode="append", compression=compression) def hdf5_autocomplete_config(path_or_h5obj): """"Autocompletes the configuration of the RTDC-measurement The following configuration keys are updated: - experiment:event count - fluorescence:samples per event - imaging: roi size x (if image or mask is given) - imaging: roi size y (if image or mask is given) The following configuration keys are added if not present: - fluorescence:channel count Parameters ---------- path: pathlib.Path or str or h5py.File Path to or opened RT-DC measurement """ if not isinstance(path_or_h5obj, h5py.File): close = True h5obj = h5py.File(path_or_h5obj, "a") else: close = False h5obj = path_or_h5obj # set event count feats = sorted(h5obj["events"].keys()) if feats: h5obj.attrs["experiment:event count"] = len(h5obj["events"][feats[0]]) else: raise ValueError("No features in '{}'!".format(path_or_h5obj)) # set samples per event if "trace" in feats: traces = list(h5obj["events"]["trace"].keys()) trsize = h5obj["events"]["trace"][traces[0]].shape[1] h5obj.attrs["fluorescence:samples per event"] = trsize # set channel count chcount = sum(["fl1_max" in feats, "fl2_max" in feats, "fl3_max" in feats]) if chcount: if "fluorescence:channel count" not in h5obj.attrs: h5obj.attrs["fluorescence:channel count"] = chcount # set roi size x/y if "image" in h5obj["events"]: shape = h5obj["events"]["image"][0].shape elif "mask" in h5obj["events"]: shape = h5obj["events"]["mask"][0].shape else: shape = None if shape is not None: # update shape h5obj.attrs["imaging:roi size x"] = shape[1] h5obj.attrs["imaging:roi size y"] = shape[0] if close: h5obj.close()