Gene vs Counts¶
Here genes form the x-axis, with intensity values on the y-axis.
This method is intended to be used to compare a subset of selected genes,
and will give a warning if you try to pass too many genes.
This can be overridden by setting soft_max=<some integer>
.
Plotting Guide Setup
Shared setup for all plotting guides.
# OS-independent path management.
from os import environ
from pathlib import Path
import numpy as np
import GSForge as gsf
import holoviews as hv
hv.extension('bokeh')
OSF_PATH = Path(environ.get("GSFORGE_DEMO_DATA", default="~/GSForge_demo_data/")).expanduser().joinpath("osfstorage", "oryza_sativa")
GEM_PATH = OSF_PATH.joinpath("AnnotatedGEMs", "oryza_sativa_hisat2_raw.nc")
TOUR_BORUTA = OSF_PATH.joinpath("GeneSetCollections", "tour_boruta")
agem = gsf.AnnotatedGEM(GEM_PATH)
agem
<GSForge.AnnotatedGEM>
Name: Oryza Sativa
Selected GEM Variable: 'counts'
Gene 66338
Sample 475
gsc = gsf.GeneSetCollection.from_folder(
gem=agem, target_dir=TOUR_BORUTA, name="Boruta Tour Results")
gsc
<GSForge.GeneSetCollection>
Boruta Tour Results
GeneSets (2 total): Support Count
Boruta_treatment: 771
Boruta_genotype: 663
Plot Genes vs Gene Counts¶
Select Genes of Interest
There are a few too many genes for us to look at all the genes in our collection. Rather we will get the 10 largest (by mean) intensity genes.
counts, _ = gsf.get_gem_data(gsc, selected_gene_sets=["Boruta_treatment"])
selected_genes = counts.isel(Gene=np.argsort(counts.mean(dim="Sample").values)[-10:])["Gene"].values
selected_genes
/home/travis/virtualenv/python3.7.1/lib/python3.7/site-packages/GSForge/models/_Interface.py:281: UserWarning: 771 Unavailable genes of the 771 requested. Using the 0 available.
UserWarning)
---------------------------------------------------------------------------
IndexError Traceback (most recent call last)
~/virtualenv/python3.7.1/lib/python3.7/site-packages/xarray/backends/netCDF4_.py in _getitem(self, key)
103 original_array = self.get_array(needs_lock=False)
--> 104 array = getitem(original_array, key)
105 except IndexError:
src/netCDF4/_netCDF4.pyx in netCDF4._netCDF4.Variable.__getitem__()
~/virtualenv/python3.7.1/lib/python3.7/site-packages/netCDF4/utils.py in _out_array_shape(count)
457 if n == 1:
--> 458 c = count[..., i].ravel()[0] # All elements should be identical.
459 out.append(c)
IndexError: index 0 is out of bounds for axis 0 with size 0
During handling of the above exception, another exception occurred:
IndexError Traceback (most recent call last)
/tmp/ipykernel_5592/2252436617.py in <module>
----> 1 counts, _ = gsf.get_gem_data(gsc, selected_gene_sets=["Boruta_treatment"])
2 selected_genes = counts.isel(Gene=np.argsort(counts.mean(dim="Sample").values)[-10:])["Gene"].values
3 selected_genes
~/virtualenv/python3.7.1/lib/python3.7/site-packages/GSForge/utils/__init__.py in call(*args, **kwargs)
34
35 else:
---> 36 result = func(*args, **kwargs)
37 return result
38
~/virtualenv/python3.7.1/lib/python3.7/site-packages/GSForge/models/_Interface.py in __new__(cls, *args, **params)
407 params["selected_gene_sets"] = [params.get("selected_gene_sets")]
408 inst = cls.instance(**params) # See the param code for more on this `instance` function.
--> 409 return inst.__call__()
410
411 def __call__(self):
~/virtualenv/python3.7.1/lib/python3.7/site-packages/GSForge/operations/core.py in __call__(self)
42
43 def __call__(self):
---> 44 return self.get_gem_data(self.single_object, self.output_type)
~/virtualenv/python3.7.1/lib/python3.7/site-packages/GSForge/models/_Interface.py in get_gem_data(self, single_object, output_type, **params)
390 f'types: {list(modes.keys())}')
391
--> 392 return modes[key](self)
393
394
~/virtualenv/python3.7.1/lib/python3.7/site-packages/GSForge/models/_Interface.py in xarray_tuple(self_)
351 def xarray_tuple(self_):
352 logger.info('Returning counts as an xarray.DataArray and annotations as an xarray.Dataset.')
--> 353 return self_.x_count_data, self_.y_annotation_data
354
355 def pandas_single(self_):
~/virtualenv/python3.7.1/lib/python3.7/site-packages/GSForge/models/_Interface.py in x_count_data(self)
286
287 logger.info(f'Preparing count data using mask mode {self.count_mask}.')
--> 288 counts = mask_modes[self.count_mask](counts)
289
290 # Optional transform.
~/virtualenv/python3.7.1/lib/python3.7/site-packages/GSForge/models/_Interface.py in <lambda>(c)
241
242 mask_modes = {
--> 243 "complete": lambda c: c.fillna(0),
244 "masked": lambda c: c.where(c > 0.0),
245 "dropped": lambda c: c.where(c > 0.0).dropna(dim=self.gem.gene_index_name),
~/virtualenv/python3.7.1/lib/python3.7/site-packages/xarray/core/dataarray.py in fillna(self, value)
2445 "fillna on a DataArray"
2446 )
-> 2447 out = ops.fillna(self, value)
2448 return out
2449
~/virtualenv/python3.7.1/lib/python3.7/site-packages/xarray/core/ops.py in fillna(data, other, join, dataset_join)
151 dataset_join=dataset_join,
152 dataset_fill_value=np.nan,
--> 153 keep_attrs=True,
154 )
155
~/virtualenv/python3.7.1/lib/python3.7/site-packages/xarray/core/computation.py in apply_ufunc(func, input_core_dims, output_core_dims, exclude_dims, vectorize, join, dataset_join, dataset_fill_value, keep_attrs, kwargs, dask, output_dtypes, output_sizes, meta, dask_gufunc_kwargs, *args)
1178 join=join,
1179 exclude_dims=exclude_dims,
-> 1180 keep_attrs=keep_attrs,
1181 )
1182 # feed Variables directly through apply_variable_ufunc
~/virtualenv/python3.7.1/lib/python3.7/site-packages/xarray/core/computation.py in apply_dataarray_vfunc(func, signature, join, exclude_dims, keep_attrs, *args)
291
292 data_vars = [getattr(a, "variable", a) for a in args]
--> 293 result_var = func(*data_vars)
294
295 if signature.num_outputs > 1:
~/virtualenv/python3.7.1/lib/python3.7/site-packages/xarray/core/computation.py in apply_variable_ufunc(func, signature, exclude_dims, dask, output_dtypes, vectorize, keep_attrs, dask_gufunc_kwargs, *args)
650 if isinstance(arg, Variable)
651 else arg
--> 652 for arg, core_dims in zip(args, signature.input_core_dims)
653 ]
654
~/virtualenv/python3.7.1/lib/python3.7/site-packages/xarray/core/computation.py in <listcomp>(.0)
650 if isinstance(arg, Variable)
651 else arg
--> 652 for arg, core_dims in zip(args, signature.input_core_dims)
653 ]
654
~/virtualenv/python3.7.1/lib/python3.7/site-packages/xarray/core/computation.py in broadcast_compat_data(variable, broadcast_dims, core_dims)
562 core_dims: Tuple[Hashable, ...],
563 ) -> Any:
--> 564 data = variable.data
565
566 old_dims = variable.dims
~/virtualenv/python3.7.1/lib/python3.7/site-packages/xarray/core/variable.py in data(self)
342 return self._data
343 else:
--> 344 return self.values
345
346 @data.setter
~/virtualenv/python3.7.1/lib/python3.7/site-packages/xarray/core/variable.py in values(self)
515 def values(self):
516 """The variable's data as a numpy.ndarray"""
--> 517 return _as_array_or_item(self._data)
518
519 @values.setter
~/virtualenv/python3.7.1/lib/python3.7/site-packages/xarray/core/variable.py in _as_array_or_item(data)
257 TODO: remove this (replace with np.asarray) once these issues are fixed
258 """
--> 259 data = np.asarray(data)
260 if data.ndim == 0:
261 if data.dtype.kind == "M":
~/virtualenv/python3.7.1/lib/python3.7/site-packages/xarray/core/indexing.py in __array__(self, dtype)
546
547 def __array__(self, dtype=None):
--> 548 self._ensure_cached()
549 return np.asarray(self.array, dtype=dtype)
550
~/virtualenv/python3.7.1/lib/python3.7/site-packages/xarray/core/indexing.py in _ensure_cached(self)
543 def _ensure_cached(self):
544 if not isinstance(self.array, NumpyIndexingAdapter):
--> 545 self.array = NumpyIndexingAdapter(np.asarray(self.array))
546
547 def __array__(self, dtype=None):
~/virtualenv/python3.7.1/lib/python3.7/site-packages/xarray/core/indexing.py in __array__(self, dtype)
516
517 def __array__(self, dtype=None):
--> 518 return np.asarray(self.array, dtype=dtype)
519
520 def __getitem__(self, key):
~/virtualenv/python3.7.1/lib/python3.7/site-packages/xarray/core/indexing.py in __array__(self, dtype)
417 def __array__(self, dtype=None):
418 array = as_indexable(self.array)
--> 419 return np.asarray(array[self.key], dtype=None)
420
421 def transpose(self, order):
~/virtualenv/python3.7.1/lib/python3.7/site-packages/xarray/backends/netCDF4_.py in __getitem__(self, key)
90 def __getitem__(self, key):
91 return indexing.explicit_indexing_adapter(
---> 92 key, self.shape, indexing.IndexingSupport.OUTER, self._getitem
93 )
94
~/virtualenv/python3.7.1/lib/python3.7/site-packages/xarray/core/indexing.py in explicit_indexing_adapter(key, shape, indexing_support, raw_indexing_method)
708 """
709 raw_key, numpy_indices = decompose_indexer(key, shape, indexing_support)
--> 710 result = raw_indexing_method(raw_key.tuple)
711 if numpy_indices.tuple:
712 # index the loaded np.ndarray
~/virtualenv/python3.7.1/lib/python3.7/site-packages/xarray/backends/netCDF4_.py in _getitem(self, key)
112 "your data into memory first by calling .load()."
113 )
--> 114 raise IndexError(msg)
115 return array
116
IndexError: The indexing operation you are attempting to perform is not valid on netCDF4.Variable object. Try loading your data into memory first by calling .load().
plot = gsf.plots.gem.GeneVsCountsScatter(
gsc, selected_genes=selected_genes, hue="treatment",
count_transform=lambda counts: np.log2(counts.where(counts > 0))
)
plot