Source code for dclab.rtdc_dataset.export

#!/usr/bin/python
# -*- coding: utf-8 -*-
"""Export RT-DC measurement data"""
from __future__ import division, print_function, unicode_literals

import pathlib
import warnings

import imageio
import fcswrite
import numpy as np

from .. import definitions as dfn
from .write_hdf5 import write


[docs]class NoImageWarning(UserWarning):
    pass


[docs]class Export(object):
    def __init__(self, rtdc_ds):
        """Export functionalities for RT-DC datasets"""
        self.rtdc_ds = rtdc_ds

[docs]    def avi(self, path, filtered=True, override=False):
        """Exports filtered event images to an avi file

        Parameters
        ----------
        path: str
            Path to a .tsv file. The ending .tsv is added automatically.
        filtered: bool
            If set to `True`, only the filtered data (index in ds._filter)
            are used.
        override: bool
            If set to `True`, an existing file ``path`` will be overridden.
            If set to `False`, raises `OSError` if ``path`` exists.

        Notes
        -----
        Raises OSError if current dataset does not contain image data
        """
        path = pathlib.Path(path)
        ds = self.rtdc_ds
        # Make sure that path ends with .avi
        if path.suffix != ".avi":
            path = path.with_name(path.name + ".avi")
        # Check if file already exist
        if not override and path.exists():
            raise OSError("File already exists: {}\n".format(
                str(path).encode("ascii", "ignore")) +
                "Please use the `override=True` option.")
        # Start exporting
        if "image" in ds:
            # Open video for writing
            vout = imageio.get_writer(uri=path,
                                      format="FFMPEG",
                                      fps=25,
                                      codec="rawvideo",
                                      pixelformat="yuv420p",
                                      macro_block_size=None,
                                      ffmpeg_log_level="error")
            # write the filtered frames to avi file
            for evid in np.arange(len(ds)):
                # skip frames that were filtered out
                if filtered and not ds._filter[evid]:
                    continue
                try:
                    image = ds["image"][evid]
                except BaseException:
                    warnings.warn("Could not read image {}!".format(evid),
                                  NoImageWarning)
                    continue
                else:
                    if np.isnan(image[0, 0]):
                        # This is a nan-valued image
                        image = np.zeros_like(image, dtype=np.uint8)
                # Convert image to RGB
                image = image.reshape(image.shape[0], image.shape[1], 1)
                image = np.repeat(image, 3, axis=2)
                vout.append_data(image)
        else:
            msg = "No image data to export: dataset {} !".format(ds.title)
            raise OSError(msg)

[docs]    def fcs(self, path, features, filtered=True, override=False):
        """Export the data of an RT-DC dataset to an .fcs file

        Parameters
        ----------
        mm: instance of dclab.RTDCBase
            The dataset that will be exported.
        path: str
            Path to a .tsv file. The ending .tsv is added automatically.
        features: list of str
            The features in the resulting .tsv file. These are strings
            that are defined in `dclab.definitions.scalar_feature_names`,
            e.g. "area_cvx", "deform", "frame", "fl1_max", "aspect".
        filtered: bool
            If set to `True`, only the filtered data (index in ds._filter)
            are used.
        override: bool
            If set to `True`, an existing file ``path`` will be overridden.
            If set to `False`, raises `OSError` if ``path`` exists.

        Notes
        -----
        Due to incompatibility with the .fcs file format, all events with
        NaN-valued features are not exported.
        """
        features = [c.lower() for c in features]
        ds = self.rtdc_ds

        path = pathlib.Path(path)
        # Make sure that path ends with .fcs
        if path.suffix != ".fcs":
            path = path.with_name(path.name + ".fcs")
        # Check if file already exist
        if not override and path.exists():
            raise OSError("File already exists: {}\n".format(
                str(path).encode("ascii", "ignore")) +
                "Please use the `override=True` option.")
        # Check that features are in dfn.scalar_feature_names
        for c in features:
            if c not in dfn.scalar_feature_names:
                msg = "Unknown or unsupported feature name: {}".format(c)
                raise ValueError(msg)

        # Collect the header
        chn_names = [dfn.feature_name2label[c] for c in features]

        # Collect the data
        if filtered:
            data = [ds[c][ds._filter] for c in features]
        else:
            data = [ds[c] for c in features]

        data = np.array(data).transpose()
        fcswrite.write_fcs(filename=str(path),
                           chn_names=chn_names,
                           data=data)

[docs]    def hdf5(self, path, features, filtered=True, override=False,
             compression="gzip"):
        """Export the data of the current instance to an HDF5 file

        Parameters
        ----------
        path: str
            Path to an .rtdc file. The ending .rtdc is added
            automatically.
        features: list of str
            The features in the resulting .tsv file. These are strings
            that are defined in `dclab.definitions.feature_names`, e.g.
            "area_cvx", "deform", "frame", "fl1_max", "image".
        filtered: bool
            If set to `True`, only the filtered data (index in ds._filter)
            are used.
        override: bool
            If set to `True`, an existing file ``path`` will be overridden.
            If set to `False`, raises `OSError` if ``path`` exists.
        compression: str or None
            Compression method for "contour", "image", and "trace" data
            as well as logs; one of [None, "lzf", "gzip", "szip"].
        """
        path = pathlib.Path(path)
        # Make sure that path ends with .rtdc
        if not path.suffix == ".rtdc":
            path = path.parent / (path.name + ".rtdc")
        # Check if file already exist
        if not override and path.exists():
            raise OSError("File already exists: {}\n".format(
                          str(path).encode("ascii", "ignore")) +
                          "Please use the `override=True` option.")
        elif path.exists():
            path.unlink()

        meta = {}

        # only export configuration meta data (no user-defined config)
        for sec in dfn.CFG_METADATA:
            if sec in ["fmt_tdms"]:
                # ignored sections
                continue
            if sec in self.rtdc_ds.config:
                meta[sec] = self.rtdc_ds.config[sec].copy()
        # update number of events
        if filtered:
            meta["experiment"]["event count"] = np.sum(self.rtdc_ds.filter.all)
        else:
            meta["experiment"]["event count"] = self.rtdc_ds.filter.all.size

        if filtered:
            filtarr = self.rtdc_ds.filter.all
            nev = np.sum(filtarr)
        else:
            nev = len(self.rtdc_ds)
            filtarr = np.ones(nev, dtype=bool)

        # write meta data
        with write(path_or_h5file=path, meta=meta, mode="append") as h5obj:
            # write each feature individually
            for feat in features:
                # event-wise, because
                # - tdms-based datasets don't allow indexing with numpy
                # - there might be memory issues
                if feat == "contour":
                    cont_list = []
                    cmax = 0
                    for ii in range(len(self.rtdc_ds)):
                        if filtarr[ii]:
                            dat = self.rtdc_ds["contour"][ii]
                            cont_list.append(dat)
                            cmax = max(cmax, dat.max())
                    write(h5obj,
                          data={"contour": cont_list},
                          mode="append",
                          compression=compression)
                elif feat in ["mask", "image"]:
                    # store image stacks (reduced file size and save time)
                    m = 64
                    im0 = self.rtdc_ds[feat][0]
                    imstack = np.zeros((m, im0.shape[0], im0.shape[1]),
                                       dtype=im0.dtype)
                    jj = 0
                    for ii in range(len(self.rtdc_ds)):
                        if filtarr[ii]:
                            dat = self.rtdc_ds[feat][ii]
                            imstack[jj] = dat
                            if (jj + 1) % m == 0:
                                jj = 0
                                write(h5obj,
                                      data={feat: imstack},
                                      mode="append",
                                      compression=compression)
                            else:
                                jj += 1
                    # write rest
                    if jj:
                        write(h5obj,
                              data={feat: imstack[:jj, :, :]},
                              mode="append",
                              compression=compression)
                elif feat == "trace":
                    for tr in self.rtdc_ds["trace"].keys():
                        tr0 = self.rtdc_ds["trace"][tr][0]
                        trdat = np.zeros((nev, tr0.size), dtype=tr0.dtype)
                        jj = 0
                        for ii in range(len(self.rtdc_ds)):
                            if filtarr[ii]:
                                trdat[jj] = self.rtdc_ds["trace"][tr][ii]
                                jj += 1
                        write(h5obj,
                              data={"trace": {tr: trdat}},
                              mode="append",
                              compression=compression)
                else:
                    write(h5obj,
                          data={feat: self.rtdc_ds[feat][filtarr]},
                          mode="append")

[docs]    def tsv(self, path, features, filtered=True, override=False):
        """Export the data of the current instance to a .tsv file

        Parameters
        ----------
        path: str
            Path to a .tsv file. The ending .tsv is added automatically.
        features: list of str
            The features in the resulting .tsv file. These are strings
            that are defined in `dclab.definitions.scalar_feature_names`,
            e.g. "area_cvx", "deform", "frame", "fl1_max", "aspect".
        filtered: bool
            If set to `True`, only the filtered data (index in ds._filter)
            are used.
        override: bool
            If set to `True`, an existing file ``path`` will be overridden.
            If set to `False`, raises `OSError` if ``path`` exists.
        """
        features = [c.lower() for c in features]
        path = pathlib.Path(path)
        ds = self.rtdc_ds
        # Make sure that path ends with .tsv
        if path.suffix != ".tsv":
            path = path.with_name(path.name + ".tsv")
        # Check if file already exist
        if not override and path.exists():
            raise OSError("File already exists: {}\n".format(
                str(path).encode("ascii", "ignore")) +
                "Please use the `override=True` option.")
        # Check that features are in dfn.scalar_feature_names
        for c in features:
            if c not in dfn.scalar_feature_names:
                raise ValueError("Unknown feature name {}".format(c))

        # Open file
        with path.open("w") as fd:
            # write header
            header1 = "\t".join([c for c in features])
            fd.write("# "+header1+"\n")
            header2 = "\t".join([dfn.feature_name2label[c] for c in features])
            fd.write("# "+header2+"\n")

        with path.open("ab") as fd:
            # write data
            if filtered:
                data = [ds[c][ds._filter] for c in features]
            else:
                data = [ds[c] for c in features]

            np.savetxt(fd,
                       np.array(data).transpose(),
                       fmt=str("%.10e"),
                       delimiter="\t")