Gene vs Counts

Plotting Guide Setup

In [1]:
import os
import numpy as np

import holoviews as hv
hv.extension('bokeh')

import GSForge as gsf

Declare used paths

In [2]:
# OS-independent path management.
from os import fspath, environ
from pathlib import Path
In [3]:
OSF_PATH = Path(environ.get("GSFORGE_DEMO_DATA", default="~/GSForge_demo_data")).expanduser()
AGEM_PATH = OSF_PATH.joinpath("osfstorage", "rice.nc")
BOR_COLL_PATH = OSF_PATH.joinpath("osfstorage", "boruta_gene_sets")
assert AGEM_PATH.exists()

Load an AnnotatedGEM

In [4]:
agem = gsf.AnnotatedGEM(AGEM_PATH)
agem
Out[4]:
<GSForge.AnnotatedGEM>
Name: Rice
Selected GEM Variable: 'counts'
    Gene   55986
    Sample 475
In [5]:
agem.data
Out[5]:
<xarray.Dataset>
Dimensions:     (Gene: 55986, Sample: 475)
Coordinates:
  * Gene        (Gene) object 'LOC_Os06g05820' ... 'LOC_Os07g03418'
  * Sample      (Sample) object 'SRX1423934' 'SRX1423935' ... 'SRX1424408'
Data variables:
    SampleSRR   (Sample) object ...
    Treatment   (Sample) object ...
    Time        (Sample) int64 ...
    Tissue      (Sample) object ...
    Genotype    (Sample) object ...
    Subspecies  (Sample) object ...
    counts      (Sample, Gene) int64 ...
    lengths     (Gene) float64 ...
Attributes:
    __GSForge.AnnotatedGEM.params:  {"count_array_name": "counts", "gene_inde...
In [6]:
gsc = gsf.GeneSetCollection.from_folder(gem=agem, target_dir=BOR_COLL_PATH, 
                                        name="Boruta Results")
gsc
Out[6]:
<GSForge.GeneSetCollection>
Boruta Results
GeneSets (3 total): Support Count
    Boruta_Treatment: 681
    Boruta_Genotype: 661
    Boruta_Subspecies: 231

Select Genes of Interest

In [7]:
counts, _ = gsf.get_data(gsc, selected_gene_sets=["Boruta_Treatment"])

Select by highest mean.

In [8]:
# selected_genes = counts.isel(Gene=np.argsort(counts.mean(dim="Sample").values)[-10:])["Gene"].values
# selected_genes

Select some members of a gene set.

In [9]:
selected_genes = gsc.gene_sets["Boruta_Treatment"].gene_support()[:10]
selected_genes
Out[9]:
array(['LOC_Os03g11550', 'LOC_Os10g28020', 'LOC_Os02g53130',
       'LOC_Os02g32550', 'LOC_Os03g59760', 'LOC_Os02g43410',
       'LOC_Os02g01100', 'LOC_Os01g09640', 'LOC_Os04g45480',
       'LOC_Os10g42780'], dtype=object)

Gene vs Count Scatter Plots

In [10]:
gsf.plots.GenesVsCounts(gsc, selected_genes=selected_genes, hue="Treatment").opts(
    hv.opts.Scatter(jitter=0.2, width=800, height=500, legend_position="right", xrotation=90, padding=0.1))
Out[10]:


Right click to download this notebook from GitHub.