Samplewise Distributions

Plotting Guide Setup

In [1]:
import os
import numpy as np

import holoviews as hv
hv.extension(
    'bokeh',
    'matplotlib'
)
import matplotlib.pyplot as plt
%matplotlib inline

import GSForge as gsf

Declare used paths

In [2]:
# OS-independent path management.
from os import fspath, environ
from pathlib import Path
In [3]:
OSF_PATH = Path(environ.get("GSFORGE_DEMO_DATA", default="~/GSForge_demo_data")).expanduser()
AGEM_PATH = OSF_PATH.joinpath("osfstorage", "rice.nc")
BOR_COLL_PATH = OSF_PATH.joinpath("osfstorage", "boruta_gene_sets")
assert AGEM_PATH.exists()

Load an AnnotatedGEM

In [4]:
agem = gsf.AnnotatedGEM(AGEM_PATH)
agem
Out[4]:
<GSForge.AnnotatedGEM>
Name: Rice
Selected GEM Variable: 'counts'
    Gene   55986
    Sample 475
In [5]:
gsc = gsf.GeneSetCollection.from_folder(gem=agem, target_dir=BOR_COLL_PATH, 
                                        name="Boruta Results")
gsc
Out[5]:
<GSForge.GeneSetCollection>
Boruta Results
GeneSets (3 total): Support Count
    Boruta_Treatment: 681
    Boruta_Genotype: 661
    Boruta_Subspecies: 231

Creating a Sample-wise Distribution plot

In [6]:
gsf.get_data(agem,annotation_variables="Treatment" )
Out[6]:
(<xarray.DataArray 'counts' (Sample: 475, Gene: 55986)>
 array([[ 20,   0,   0, ...,   0, 637,   0],
        [  2,   0,   0, ...,   0, 186,   0],
        [ 22,   0,   0, ...,   0, 545,   0],
        ...,
        [  8,   0,   0, ...,   0, 411,   0],
        [  8,   0,   0, ...,   0, 311,   0],
        [ 21,   0,   0, ...,   0, 666,   0]])
 Coordinates:
   * Gene     (Gene) object 'LOC_Os06g05820' ... 'LOC_Os07g03418'
   * Sample   (Sample) object 'SRX1423934' 'SRX1423935' ... 'SRX1424408',
 <xarray.DataArray 'Treatment' (Sample: 475)>
 array(['CONTROL', 'CONTROL', 'CONTROL', ..., 'RECOV_DROUGHT', 'RECOV_DROUGHT',
        'RECOV_DROUGHT'], dtype=object)
 Coordinates:
   * Sample   (Sample) object 'SRX1423934' 'SRX1423935' ... 'SRX1424408')
In [7]:
fig, ax = plt.subplots(figsize=(10, 7))
gsf.plots.SampleWiseDistribution(agem, ax=ax, annotation_variables="Treatment", 
                                 count_transform=lambda counts: np.log2(counts.where(counts > 0)))
Out[7]:
<matplotlib.axes._subplots.AxesSubplot at 0x7f02fc4d4748>
In [8]:
fig, ax = plt.subplots(figsize=(10, 7))
gsf.plots.SampleWiseDistribution(agem, ax=ax, annotation_variables="Genotype", 
                                 count_transform=lambda counts: np.log2(counts.where(counts > 0)))
Out[8]:
<matplotlib.axes._subplots.AxesSubplot at 0x7f03062452e8>


Right click to download this notebook from GitHub.