Gene vs Counts

Here genes form the x-axis, with intensity values on the y-axis. This method is intended to be used to compare a subset of selected genes, and will give a warning if you try to pass too many genes. This can be overridden by setting soft_max=<some integer>.

Plotting Guide Setup

Shared setup for all plotting guides.

# OS-independent path management.
from os import environ
from pathlib import Path
import numpy as np
import GSForge as gsf
import holoviews as hv
hv.extension('bokeh')

OSF_PATH = Path(environ.get("GSFORGE_DEMO_DATA", default="~/GSForge_demo_data/")).expanduser().joinpath("osfstorage", "oryza_sativa")
GEM_PATH = OSF_PATH.joinpath("AnnotatedGEMs", "oryza_sativa_hisat2_raw.nc")
TOUR_BORUTA = OSF_PATH.joinpath("GeneSetCollections", "tour_boruta")
agem = gsf.AnnotatedGEM(GEM_PATH)
agem
<GSForge.AnnotatedGEM>
Name: Oryza Sativa
Selected GEM Variable: 'counts'
    Gene   66338
    Sample 475
gsc = gsf.GeneSetCollection.from_folder(
    gem=agem, target_dir=TOUR_BORUTA, name="Boruta Tour Results")
gsc
<GSForge.GeneSetCollection>
Boruta Tour Results
GeneSets (2 total): Support Count
    Boruta_treatment: 771
    Boruta_genotype: 663

Plot Genes vs Gene Counts

Select Genes of Interest

There are a few too many genes for us to look at all the genes in our collection. Rather we will get the 10 largest (by mean) intensity genes.

counts, _ = gsf.get_gem_data(gsc, selected_gene_sets=["Boruta_treatment"])
selected_genes = counts.isel(Gene=np.argsort(counts.mean(dim="Sample").values)[-10:])["Gene"].values
selected_genes
/home/travis/virtualenv/python3.7.1/lib/python3.7/site-packages/GSForge/models/_Interface.py:281: UserWarning: 771 Unavailable genes of the 771 requested. Using the 0 available.
  UserWarning)
---------------------------------------------------------------------------
IndexError                                Traceback (most recent call last)
~/virtualenv/python3.7.1/lib/python3.7/site-packages/xarray/backends/netCDF4_.py in _getitem(self, key)
    103                 original_array = self.get_array(needs_lock=False)
--> 104                 array = getitem(original_array, key)
    105         except IndexError:

src/netCDF4/_netCDF4.pyx in netCDF4._netCDF4.Variable.__getitem__()

~/virtualenv/python3.7.1/lib/python3.7/site-packages/netCDF4/utils.py in _out_array_shape(count)
    457         if n == 1:
--> 458             c = count[..., i].ravel()[0] # All elements should be identical.
    459             out.append(c)

IndexError: index 0 is out of bounds for axis 0 with size 0

During handling of the above exception, another exception occurred:

IndexError                                Traceback (most recent call last)
/tmp/ipykernel_5592/2252436617.py in <module>
----> 1 counts, _ = gsf.get_gem_data(gsc, selected_gene_sets=["Boruta_treatment"])
      2 selected_genes = counts.isel(Gene=np.argsort(counts.mean(dim="Sample").values)[-10:])["Gene"].values
      3 selected_genes

~/virtualenv/python3.7.1/lib/python3.7/site-packages/GSForge/utils/__init__.py in call(*args, **kwargs)
     34 
     35         else:
---> 36             result = func(*args, **kwargs)
     37         return result
     38 

~/virtualenv/python3.7.1/lib/python3.7/site-packages/GSForge/models/_Interface.py in __new__(cls, *args, **params)
    407                 params["selected_gene_sets"] = [params.get("selected_gene_sets")]
    408         inst = cls.instance(**params)  # See the param code for more on this `instance` function.
--> 409         return inst.__call__()
    410 
    411     def __call__(self):

~/virtualenv/python3.7.1/lib/python3.7/site-packages/GSForge/operations/core.py in __call__(self)
     42 
     43     def __call__(self):
---> 44         return self.get_gem_data(self.single_object, self.output_type)

~/virtualenv/python3.7.1/lib/python3.7/site-packages/GSForge/models/_Interface.py in get_gem_data(self, single_object, output_type, **params)
    390                              f'types: {list(modes.keys())}')
    391 
--> 392         return modes[key](self)
    393 
    394 

~/virtualenv/python3.7.1/lib/python3.7/site-packages/GSForge/models/_Interface.py in xarray_tuple(self_)
    351         def xarray_tuple(self_):
    352             logger.info('Returning counts as an xarray.DataArray and annotations as an xarray.Dataset.')
--> 353             return self_.x_count_data, self_.y_annotation_data
    354 
    355         def pandas_single(self_):

~/virtualenv/python3.7.1/lib/python3.7/site-packages/GSForge/models/_Interface.py in x_count_data(self)
    286 
    287         logger.info(f'Preparing count data using mask mode {self.count_mask}.')
--> 288         counts = mask_modes[self.count_mask](counts)
    289 
    290         # Optional transform.

~/virtualenv/python3.7.1/lib/python3.7/site-packages/GSForge/models/_Interface.py in <lambda>(c)
    241 
    242         mask_modes = {
--> 243             "complete": lambda c: c.fillna(0),
    244             "masked": lambda c: c.where(c > 0.0),
    245             "dropped": lambda c: c.where(c > 0.0).dropna(dim=self.gem.gene_index_name),

~/virtualenv/python3.7.1/lib/python3.7/site-packages/xarray/core/dataarray.py in fillna(self, value)
   2445                 "fillna on a DataArray"
   2446             )
-> 2447         out = ops.fillna(self, value)
   2448         return out
   2449 

~/virtualenv/python3.7.1/lib/python3.7/site-packages/xarray/core/ops.py in fillna(data, other, join, dataset_join)
    151         dataset_join=dataset_join,
    152         dataset_fill_value=np.nan,
--> 153         keep_attrs=True,
    154     )
    155 

~/virtualenv/python3.7.1/lib/python3.7/site-packages/xarray/core/computation.py in apply_ufunc(func, input_core_dims, output_core_dims, exclude_dims, vectorize, join, dataset_join, dataset_fill_value, keep_attrs, kwargs, dask, output_dtypes, output_sizes, meta, dask_gufunc_kwargs, *args)
   1178             join=join,
   1179             exclude_dims=exclude_dims,
-> 1180             keep_attrs=keep_attrs,
   1181         )
   1182     # feed Variables directly through apply_variable_ufunc

~/virtualenv/python3.7.1/lib/python3.7/site-packages/xarray/core/computation.py in apply_dataarray_vfunc(func, signature, join, exclude_dims, keep_attrs, *args)
    291 
    292     data_vars = [getattr(a, "variable", a) for a in args]
--> 293     result_var = func(*data_vars)
    294 
    295     if signature.num_outputs > 1:

~/virtualenv/python3.7.1/lib/python3.7/site-packages/xarray/core/computation.py in apply_variable_ufunc(func, signature, exclude_dims, dask, output_dtypes, vectorize, keep_attrs, dask_gufunc_kwargs, *args)
    650         if isinstance(arg, Variable)
    651         else arg
--> 652         for arg, core_dims in zip(args, signature.input_core_dims)
    653     ]
    654 

~/virtualenv/python3.7.1/lib/python3.7/site-packages/xarray/core/computation.py in <listcomp>(.0)
    650         if isinstance(arg, Variable)
    651         else arg
--> 652         for arg, core_dims in zip(args, signature.input_core_dims)
    653     ]
    654 

~/virtualenv/python3.7.1/lib/python3.7/site-packages/xarray/core/computation.py in broadcast_compat_data(variable, broadcast_dims, core_dims)
    562     core_dims: Tuple[Hashable, ...],
    563 ) -> Any:
--> 564     data = variable.data
    565 
    566     old_dims = variable.dims

~/virtualenv/python3.7.1/lib/python3.7/site-packages/xarray/core/variable.py in data(self)
    342             return self._data
    343         else:
--> 344             return self.values
    345 
    346     @data.setter

~/virtualenv/python3.7.1/lib/python3.7/site-packages/xarray/core/variable.py in values(self)
    515     def values(self):
    516         """The variable's data as a numpy.ndarray"""
--> 517         return _as_array_or_item(self._data)
    518 
    519     @values.setter

~/virtualenv/python3.7.1/lib/python3.7/site-packages/xarray/core/variable.py in _as_array_or_item(data)
    257     TODO: remove this (replace with np.asarray) once these issues are fixed
    258     """
--> 259     data = np.asarray(data)
    260     if data.ndim == 0:
    261         if data.dtype.kind == "M":

~/virtualenv/python3.7.1/lib/python3.7/site-packages/xarray/core/indexing.py in __array__(self, dtype)
    546 
    547     def __array__(self, dtype=None):
--> 548         self._ensure_cached()
    549         return np.asarray(self.array, dtype=dtype)
    550 

~/virtualenv/python3.7.1/lib/python3.7/site-packages/xarray/core/indexing.py in _ensure_cached(self)
    543     def _ensure_cached(self):
    544         if not isinstance(self.array, NumpyIndexingAdapter):
--> 545             self.array = NumpyIndexingAdapter(np.asarray(self.array))
    546 
    547     def __array__(self, dtype=None):

~/virtualenv/python3.7.1/lib/python3.7/site-packages/xarray/core/indexing.py in __array__(self, dtype)
    516 
    517     def __array__(self, dtype=None):
--> 518         return np.asarray(self.array, dtype=dtype)
    519 
    520     def __getitem__(self, key):

~/virtualenv/python3.7.1/lib/python3.7/site-packages/xarray/core/indexing.py in __array__(self, dtype)
    417     def __array__(self, dtype=None):
    418         array = as_indexable(self.array)
--> 419         return np.asarray(array[self.key], dtype=None)
    420 
    421     def transpose(self, order):

~/virtualenv/python3.7.1/lib/python3.7/site-packages/xarray/backends/netCDF4_.py in __getitem__(self, key)
     90     def __getitem__(self, key):
     91         return indexing.explicit_indexing_adapter(
---> 92             key, self.shape, indexing.IndexingSupport.OUTER, self._getitem
     93         )
     94 

~/virtualenv/python3.7.1/lib/python3.7/site-packages/xarray/core/indexing.py in explicit_indexing_adapter(key, shape, indexing_support, raw_indexing_method)
    708     """
    709     raw_key, numpy_indices = decompose_indexer(key, shape, indexing_support)
--> 710     result = raw_indexing_method(raw_key.tuple)
    711     if numpy_indices.tuple:
    712         # index the loaded np.ndarray

~/virtualenv/python3.7.1/lib/python3.7/site-packages/xarray/backends/netCDF4_.py in _getitem(self, key)
    112                 "your data into memory first by calling .load()."
    113             )
--> 114             raise IndexError(msg)
    115         return array
    116 

IndexError: The indexing operation you are attempting to perform is not valid on netCDF4.Variable object. Try loading your data into memory first by calling .load().
plot = gsf.plots.gem.GeneVsCountsScatter(
    gsc, selected_genes=selected_genes, hue="treatment",
    count_transform=lambda counts: np.log2(counts.where(counts > 0))
)
plot