#!/usr/bin/python
# -*- coding: utf-8 -*-
"""Export RT-DC measurement data"""
from __future__ import division, print_function, unicode_literals
import pathlib
import warnings
import imageio
import fcswrite
import numpy as np
from .. import definitions as dfn
from .write_hdf5 import write
[docs]class NoImageWarning(UserWarning):
pass
[docs]class Export(object):
def __init__(self, rtdc_ds):
"""Export functionalities for RT-DC datasets"""
self.rtdc_ds = rtdc_ds
[docs] def avi(self, path, filtered=True, override=False):
"""Exports filtered event images to an avi file
Parameters
----------
path: str
Path to a .tsv file. The ending .tsv is added automatically.
filtered: bool
If set to `True`, only the filtered data (index in ds._filter)
are used.
override: bool
If set to `True`, an existing file ``path`` will be overridden.
If set to `False`, raises `OSError` if ``path`` exists.
Notes
-----
Raises OSError if current dataset does not contain image data
"""
path = pathlib.Path(path)
ds = self.rtdc_ds
# Make sure that path ends with .avi
if path.suffix != ".avi":
path = path.with_name(path.name + ".avi")
# Check if file already exist
if not override and path.exists():
raise OSError("File already exists: {}\n".format(
str(path).encode("ascii", "ignore")) +
"Please use the `override=True` option.")
# Start exporting
if "image" in ds:
# Open video for writing
vout = imageio.get_writer(uri=path,
format="FFMPEG",
fps=25,
codec="rawvideo",
pixelformat="yuv420p",
macro_block_size=None,
ffmpeg_log_level="error")
# write the filtered frames to avi file
for evid in np.arange(len(ds)):
# skip frames that were filtered out
if filtered and not ds._filter[evid]:
continue
try:
image = ds["image"][evid]
except BaseException:
warnings.warn("Could not read image {}!".format(evid),
NoImageWarning)
continue
else:
if np.isnan(image[0, 0]):
# This is a nan-valued image
image = np.zeros_like(image, dtype=np.uint8)
# Convert image to RGB
image = image.reshape(image.shape[0], image.shape[1], 1)
image = np.repeat(image, 3, axis=2)
vout.append_data(image)
else:
msg = "No image data to export: dataset {} !".format(ds.title)
raise OSError(msg)
[docs] def fcs(self, path, features, filtered=True, override=False):
"""Export the data of an RT-DC dataset to an .fcs file
Parameters
----------
mm: instance of dclab.RTDCBase
The dataset that will be exported.
path: str
Path to a .tsv file. The ending .tsv is added automatically.
features: list of str
The features in the resulting .tsv file. These are strings
that are defined in `dclab.definitions.scalar_feature_names`,
e.g. "area_cvx", "deform", "frame", "fl1_max", "aspect".
filtered: bool
If set to `True`, only the filtered data (index in ds._filter)
are used.
override: bool
If set to `True`, an existing file ``path`` will be overridden.
If set to `False`, raises `OSError` if ``path`` exists.
Notes
-----
Due to incompatibility with the .fcs file format, all events with
NaN-valued features are not exported.
"""
features = [c.lower() for c in features]
ds = self.rtdc_ds
path = pathlib.Path(path)
# Make sure that path ends with .fcs
if path.suffix != ".fcs":
path = path.with_name(path.name + ".fcs")
# Check if file already exist
if not override and path.exists():
raise OSError("File already exists: {}\n".format(
str(path).encode("ascii", "ignore")) +
"Please use the `override=True` option.")
# Check that features are in dfn.scalar_feature_names
for c in features:
if c not in dfn.scalar_feature_names:
msg = "Unknown or unsupported feature name: {}".format(c)
raise ValueError(msg)
# Collect the header
chn_names = [dfn.feature_name2label[c] for c in features]
# Collect the data
if filtered:
data = [ds[c][ds._filter] for c in features]
else:
data = [ds[c] for c in features]
data = np.array(data).transpose()
fcswrite.write_fcs(filename=str(path),
chn_names=chn_names,
data=data)
[docs] def hdf5(self, path, features, filtered=True, override=False,
compression="gzip"):
"""Export the data of the current instance to an HDF5 file
Parameters
----------
path: str
Path to an .rtdc file. The ending .rtdc is added
automatically.
features: list of str
The features in the resulting .tsv file. These are strings
that are defined in `dclab.definitions.feature_names`, e.g.
"area_cvx", "deform", "frame", "fl1_max", "image".
filtered: bool
If set to `True`, only the filtered data (index in ds._filter)
are used.
override: bool
If set to `True`, an existing file ``path`` will be overridden.
If set to `False`, raises `OSError` if ``path`` exists.
compression: str or None
Compression method for "contour", "image", and "trace" data
as well as logs; one of [None, "lzf", "gzip", "szip"].
"""
path = pathlib.Path(path)
# Make sure that path ends with .rtdc
if not path.suffix == ".rtdc":
path = path.parent / (path.name + ".rtdc")
# Check if file already exist
if not override and path.exists():
raise OSError("File already exists: {}\n".format(
str(path).encode("ascii", "ignore")) +
"Please use the `override=True` option.")
elif path.exists():
path.unlink()
meta = {}
# only export configuration meta data (no user-defined config)
for sec in dfn.CFG_METADATA:
if sec in ["fmt_tdms"]:
# ignored sections
continue
if sec in self.rtdc_ds.config:
meta[sec] = self.rtdc_ds.config[sec].copy()
# update number of events
if filtered:
meta["experiment"]["event count"] = np.sum(self.rtdc_ds.filter.all)
else:
meta["experiment"]["event count"] = self.rtdc_ds.filter.all.size
if filtered:
filtarr = self.rtdc_ds.filter.all
nev = np.sum(filtarr)
else:
nev = len(self.rtdc_ds)
filtarr = np.ones(nev, dtype=bool)
# write meta data
with write(path_or_h5file=path, meta=meta, mode="append") as h5obj:
# write each feature individually
for feat in features:
# event-wise, because
# - tdms-based datasets don't allow indexing with numpy
# - there might be memory issues
if feat == "contour":
cont_list = []
cmax = 0
for ii in range(len(self.rtdc_ds)):
if filtarr[ii]:
dat = self.rtdc_ds["contour"][ii]
cont_list.append(dat)
cmax = max(cmax, dat.max())
write(h5obj,
data={"contour": cont_list},
mode="append",
compression=compression)
elif feat in ["mask", "image"]:
# store image stacks (reduced file size and save time)
m = 64
im0 = self.rtdc_ds[feat][0]
imstack = np.zeros((m, im0.shape[0], im0.shape[1]),
dtype=im0.dtype)
jj = 0
for ii in range(len(self.rtdc_ds)):
if filtarr[ii]:
dat = self.rtdc_ds[feat][ii]
imstack[jj] = dat
if (jj + 1) % m == 0:
jj = 0
write(h5obj,
data={feat: imstack},
mode="append",
compression=compression)
else:
jj += 1
# write rest
if jj:
write(h5obj,
data={feat: imstack[:jj, :, :]},
mode="append",
compression=compression)
elif feat == "trace":
for tr in self.rtdc_ds["trace"].keys():
tr0 = self.rtdc_ds["trace"][tr][0]
trdat = np.zeros((nev, tr0.size), dtype=tr0.dtype)
jj = 0
for ii in range(len(self.rtdc_ds)):
if filtarr[ii]:
trdat[jj] = self.rtdc_ds["trace"][tr][ii]
jj += 1
write(h5obj,
data={"trace": {tr: trdat}},
mode="append",
compression=compression)
else:
write(h5obj,
data={feat: self.rtdc_ds[feat][filtarr]},
mode="append")
[docs] def tsv(self, path, features, filtered=True, override=False):
"""Export the data of the current instance to a .tsv file
Parameters
----------
path: str
Path to a .tsv file. The ending .tsv is added automatically.
features: list of str
The features in the resulting .tsv file. These are strings
that are defined in `dclab.definitions.scalar_feature_names`,
e.g. "area_cvx", "deform", "frame", "fl1_max", "aspect".
filtered: bool
If set to `True`, only the filtered data (index in ds._filter)
are used.
override: bool
If set to `True`, an existing file ``path`` will be overridden.
If set to `False`, raises `OSError` if ``path`` exists.
"""
features = [c.lower() for c in features]
path = pathlib.Path(path)
ds = self.rtdc_ds
# Make sure that path ends with .tsv
if path.suffix != ".tsv":
path = path.with_name(path.name + ".tsv")
# Check if file already exist
if not override and path.exists():
raise OSError("File already exists: {}\n".format(
str(path).encode("ascii", "ignore")) +
"Please use the `override=True` option.")
# Check that features are in dfn.scalar_feature_names
for c in features:
if c not in dfn.scalar_feature_names:
raise ValueError("Unknown feature name {}".format(c))
# Open file
with path.open("w") as fd:
# write header
header1 = "\t".join([c for c in features])
fd.write("# "+header1+"\n")
header2 = "\t".join([dfn.feature_name2label[c] for c in features])
fd.write("# "+header2+"\n")
with path.open("ab") as fd:
# write data
if filtered:
data = [ds[c][ds._filter] for c in features]
else:
data = [ds[c] for c in features]
np.savetxt(fd,
np.array(data).transpose(),
fmt=str("%.10e"),
delimiter="\t")