Gene-wise Aggregate Scatter and Distribution Plots¶
Displays the output of selected aggregations upon the count array on a scatter plot with optional adjoined kernel density estimates. e.g. mean counts vs mean variance etc. By default, such outputs are \(log_2\) transformed as well.
The available axis aggregation functions are: + frequency + mean + variance + standard_dev + fano + mean_rank + cv_squared
The default axis selections are ‘mean’ vs ‘variance’.
Plotting Guide Setup
A shared setup for all plotting guides.
# OS-independent path management.
from os import environ
from pathlib import Path
import numpy as np
import GSForge as gsf
import holoviews as hv
hv.extension('bokeh')
OSF_PATH = Path(environ.get("GSFORGE_DEMO_DATA", default="~/GSForge_demo_data/")).expanduser().joinpath("osfstorage", "oryza_sativa")
NORMED_GEM_PATH = OSF_PATH.joinpath("AnnotatedGEMs", "oryza_sativa_hisat2_normed.nc")
TOUR_BORUTA = OSF_PATH.joinpath("GeneSetCollections", "tour_boruta")
agem = gsf.AnnotatedGEM(NORMED_GEM_PATH)
agem
<GSForge.AnnotatedGEM>
Name: Oryza Sativa
Selected GEM Variable: 'counts'
Gene 66338
Sample 475
gsc = gsf.GeneSetCollection.from_folder(
gem=agem, target_dir=TOUR_BORUTA, name="Boruta Results")
gsc
<GSForge.GeneSetCollection>
Boruta Results
GeneSets (2 total): Support Count
Boruta_treatment: 771
Boruta_genotype: 663
Creating a Gene-wise scatter plot¶
gsf.plots.gem.GenewiseAggregateScatter(agem, datashade=True)
Select an Aggregation method¶
gsf.plots.gem.GenewiseAggregateScatter(agem, y_axis_selector="cv_squared", datashade=True)
Select and Group Genes¶
We can also limit and select to sets of genes.
Note that the aggregation transform occurs on the x_count_data
.
Selecting more than one gene set returns the union of those sets by default.
gsf.plots.gem.GenewiseAggregateScatter(
gsc, datashade=False, selected_gene_sets=["Boruta_treatment", "Boruta_genotype"])
/home/travis/virtualenv/python3.7.1/lib/python3.7/site-packages/GSForge/models/_Interface.py:281: UserWarning: 1434 Unavailable genes of the 1434 requested. Using the 0 available.
UserWarning)
---------------------------------------------------------------------------
IndexError Traceback (most recent call last)
~/virtualenv/python3.7.1/lib/python3.7/site-packages/xarray/backends/netCDF4_.py in _getitem(self, key)
103 original_array = self.get_array(needs_lock=False)
--> 104 array = getitem(original_array, key)
105 except IndexError:
src/netCDF4/_netCDF4.pyx in netCDF4._netCDF4.Variable.__getitem__()
~/virtualenv/python3.7.1/lib/python3.7/site-packages/netCDF4/utils.py in _out_array_shape(count)
457 if n == 1:
--> 458 c = count[..., i].ravel()[0] # All elements should be identical.
459 out.append(c)
IndexError: index 0 is out of bounds for axis 0 with size 0
During handling of the above exception, another exception occurred:
IndexError Traceback (most recent call last)
/tmp/ipykernel_5622/305074663.py in <module>
1 gsf.plots.gem.GenewiseAggregateScatter(
----> 2 gsc, datashade=False, selected_gene_sets=["Boruta_treatment", "Boruta_genotype"])
~/virtualenv/python3.7.1/lib/python3.7/site-packages/GSForge/utils/__init__.py in call(*args, **kwargs)
34
35 else:
---> 36 result = func(*args, **kwargs)
37 return result
38
~/virtualenv/python3.7.1/lib/python3.7/site-packages/GSForge/models/_Interface.py in __new__(cls, *args, **params)
407 params["selected_gene_sets"] = [params.get("selected_gene_sets")]
408 inst = cls.instance(**params) # See the param code for more on this `instance` function.
--> 409 return inst.__call__()
410
411 def __call__(self):
~/virtualenv/python3.7.1/lib/python3.7/site-packages/GSForge/plots/gem/_genewise_aggregate_scatter.py in __call__(self)
175
176 def __call__(self):
--> 177 counts = self.x_count_data
178
179 x_axis_name = f'{self.x_axis_selector}'
~/virtualenv/python3.7.1/lib/python3.7/site-packages/GSForge/models/_Interface.py in x_count_data(self)
286
287 logger.info(f'Preparing count data using mask mode {self.count_mask}.')
--> 288 counts = mask_modes[self.count_mask](counts)
289
290 # Optional transform.
~/virtualenv/python3.7.1/lib/python3.7/site-packages/GSForge/models/_Interface.py in <lambda>(c)
241
242 mask_modes = {
--> 243 "complete": lambda c: c.fillna(0),
244 "masked": lambda c: c.where(c > 0.0),
245 "dropped": lambda c: c.where(c > 0.0).dropna(dim=self.gem.gene_index_name),
~/virtualenv/python3.7.1/lib/python3.7/site-packages/xarray/core/dataarray.py in fillna(self, value)
2445 "fillna on a DataArray"
2446 )
-> 2447 out = ops.fillna(self, value)
2448 return out
2449
~/virtualenv/python3.7.1/lib/python3.7/site-packages/xarray/core/ops.py in fillna(data, other, join, dataset_join)
151 dataset_join=dataset_join,
152 dataset_fill_value=np.nan,
--> 153 keep_attrs=True,
154 )
155
~/virtualenv/python3.7.1/lib/python3.7/site-packages/xarray/core/computation.py in apply_ufunc(func, input_core_dims, output_core_dims, exclude_dims, vectorize, join, dataset_join, dataset_fill_value, keep_attrs, kwargs, dask, output_dtypes, output_sizes, meta, dask_gufunc_kwargs, *args)
1178 join=join,
1179 exclude_dims=exclude_dims,
-> 1180 keep_attrs=keep_attrs,
1181 )
1182 # feed Variables directly through apply_variable_ufunc
~/virtualenv/python3.7.1/lib/python3.7/site-packages/xarray/core/computation.py in apply_dataarray_vfunc(func, signature, join, exclude_dims, keep_attrs, *args)
291
292 data_vars = [getattr(a, "variable", a) for a in args]
--> 293 result_var = func(*data_vars)
294
295 if signature.num_outputs > 1:
~/virtualenv/python3.7.1/lib/python3.7/site-packages/xarray/core/computation.py in apply_variable_ufunc(func, signature, exclude_dims, dask, output_dtypes, vectorize, keep_attrs, dask_gufunc_kwargs, *args)
650 if isinstance(arg, Variable)
651 else arg
--> 652 for arg, core_dims in zip(args, signature.input_core_dims)
653 ]
654
~/virtualenv/python3.7.1/lib/python3.7/site-packages/xarray/core/computation.py in <listcomp>(.0)
650 if isinstance(arg, Variable)
651 else arg
--> 652 for arg, core_dims in zip(args, signature.input_core_dims)
653 ]
654
~/virtualenv/python3.7.1/lib/python3.7/site-packages/xarray/core/computation.py in broadcast_compat_data(variable, broadcast_dims, core_dims)
562 core_dims: Tuple[Hashable, ...],
563 ) -> Any:
--> 564 data = variable.data
565
566 old_dims = variable.dims
~/virtualenv/python3.7.1/lib/python3.7/site-packages/xarray/core/variable.py in data(self)
342 return self._data
343 else:
--> 344 return self.values
345
346 @data.setter
~/virtualenv/python3.7.1/lib/python3.7/site-packages/xarray/core/variable.py in values(self)
515 def values(self):
516 """The variable's data as a numpy.ndarray"""
--> 517 return _as_array_or_item(self._data)
518
519 @values.setter
~/virtualenv/python3.7.1/lib/python3.7/site-packages/xarray/core/variable.py in _as_array_or_item(data)
257 TODO: remove this (replace with np.asarray) once these issues are fixed
258 """
--> 259 data = np.asarray(data)
260 if data.ndim == 0:
261 if data.dtype.kind == "M":
~/virtualenv/python3.7.1/lib/python3.7/site-packages/xarray/core/indexing.py in __array__(self, dtype)
546
547 def __array__(self, dtype=None):
--> 548 self._ensure_cached()
549 return np.asarray(self.array, dtype=dtype)
550
~/virtualenv/python3.7.1/lib/python3.7/site-packages/xarray/core/indexing.py in _ensure_cached(self)
543 def _ensure_cached(self):
544 if not isinstance(self.array, NumpyIndexingAdapter):
--> 545 self.array = NumpyIndexingAdapter(np.asarray(self.array))
546
547 def __array__(self, dtype=None):
~/virtualenv/python3.7.1/lib/python3.7/site-packages/xarray/core/indexing.py in __array__(self, dtype)
516
517 def __array__(self, dtype=None):
--> 518 return np.asarray(self.array, dtype=dtype)
519
520 def __getitem__(self, key):
~/virtualenv/python3.7.1/lib/python3.7/site-packages/xarray/core/indexing.py in __array__(self, dtype)
417 def __array__(self, dtype=None):
418 array = as_indexable(self.array)
--> 419 return np.asarray(array[self.key], dtype=None)
420
421 def transpose(self, order):
~/virtualenv/python3.7.1/lib/python3.7/site-packages/xarray/backends/netCDF4_.py in __getitem__(self, key)
90 def __getitem__(self, key):
91 return indexing.explicit_indexing_adapter(
---> 92 key, self.shape, indexing.IndexingSupport.OUTER, self._getitem
93 )
94
~/virtualenv/python3.7.1/lib/python3.7/site-packages/xarray/core/indexing.py in explicit_indexing_adapter(key, shape, indexing_support, raw_indexing_method)
708 """
709 raw_key, numpy_indices = decompose_indexer(key, shape, indexing_support)
--> 710 result = raw_indexing_method(raw_key.tuple)
711 if numpy_indices.tuple:
712 # index the loaded np.ndarray
~/virtualenv/python3.7.1/lib/python3.7/site-packages/xarray/backends/netCDF4_.py in _getitem(self, key)
112 "your data into memory first by calling .load()."
113 )
--> 114 raise IndexError(msg)
115 return array
116
IndexError: The indexing operation you are attempting to perform is not valid on netCDF4.Variable object. Try loading your data into memory first by calling .load().