UpSetPlotsΒΆ
from os import environ
from pathlib import Path
import numpy as np
import pandas as pd
import xarray as xr
import GSForge as gsf
import upsetplot
import matplotlib.pyplot as plt
import holoviews as hv
%matplotlib inline
hv.extension('bokeh')
OSF_PATH = Path(environ.get("GSFORGE_DEMO_DATA", default="~/GSForge_demo_data/")).expanduser().joinpath("osfstorage", "oryza_sativa")
GEM_PATH = OSF_PATH.joinpath("AnnotatedGEMs", "oryza_sativa_hisat2_raw.nc")
LIT_DGE_GSC_PATH = OSF_PATH.joinpath("GeneSetCollections", "literature", "DGE")
LIT_TF_PATH = OSF_PATH.joinpath("GeneSetCollections", "literature", "TF")
BORUTA_GSC_PATH = OSF_PATH.joinpath("GeneSetCollections", "boruta")
agem = gsf.AnnotatedGEM(GEM_PATH)
agem
<GSForge.AnnotatedGEM>
Name: Oryza Sativa
Selected GEM Variable: 'counts'
Gene 66338
Sample 475
%%time
lit_dge_coll = gsf.GeneSetCollection.from_folder(gem=agem, target_dir=LIT_DGE_GSC_PATH, name="Literature DGE")
lit_tf_coll = gsf.GeneSetCollection.from_folder(gem=agem, target_dir=LIT_TF_PATH, name="Literature TF")
boruta_gsc = gsf.GeneSetCollection.from_folder(gem=agem, target_dir=BORUTA_GSC_PATH, name="Boruta Results")
tf_geneset = gsf.GeneSet.from_GeneSets(*list(lit_tf_coll.gene_sets.values()), name='transcription factors')
CPU times: user 17 s, sys: 59.5 ms, total: 17 s
Wall time: 17 s
combined_gsc = gsf.GeneSetCollection(gem=agem,
gene_sets={**lit_dge_coll.gene_sets,
**boruta_gsc.gene_sets,
'transcription factors': tf_geneset})
gsf.plots.collections.WithinCollectionOverlapHeatMap(combined_gsc)
gsf.plots.collections.UpsetPlotInterface(combined_gsc, min_overlap_size=2,
upset_kwargs=dict(orientation='vertical'))
---------------------------------------------------------------------------
AttributeError Traceback (most recent call last)
/tmp/ipykernel_5780/614032527.py in <module>
1 gsf.plots.collections.UpsetPlotInterface(combined_gsc, min_overlap_size=2,
----> 2 upset_kwargs=dict(orientation='vertical'))
~/virtualenv/python3.7.1/lib/python3.7/site-packages/GSForge/utils/__init__.py in call(*args, **kwargs)
34
35 else:
---> 36 result = func(*args, **kwargs)
37 return result
38
~/virtualenv/python3.7.1/lib/python3.7/site-packages/GSForge/models/_Interface.py in __new__(cls, *args, **params)
407 params["selected_gene_sets"] = [params.get("selected_gene_sets")]
408 inst = cls.instance(**params) # See the param code for more on this `instance` function.
--> 409 return inst.__call__()
410
411 def __call__(self):
~/virtualenv/python3.7.1/lib/python3.7/site-packages/GSForge/plots/collections/_upsetplot_interface.py in __call__(self, *args, **kwargs)
44 upset_series = self.build_membership_series(self.gene_set_collection.as_dict(keys=self.selected_gene_sets),
45 min_size=self.min_overlap_size)
---> 46 return upsetplot.UpSet(upset_series, **self.upset_kwargs)
~/virtualenv/python3.7.1/lib/python3.7/site-packages/upsetplot/plotting.py in __init__(self, data, orientation, sort_by, sort_categories_by, subset_size, sum_over, min_subset_size, max_subset_size, min_degree, max_degree, facecolor, other_dots_color, shading_color, with_lines, element_size, intersection_plot_elements, totals_plot_elements, show_counts, show_percentages)
423 min_degree=min_degree,
424 max_degree=max_degree,
--> 425 reverse=not self._horizontal)
426 self.subset_styles = [{"facecolor": facecolor}
427 for i in range(len(self.intersections))]
~/virtualenv/python3.7.1/lib/python3.7/site-packages/upsetplot/plotting.py in _process_data(df, sort_by, sort_categories_by, subset_size, sum_over, min_subset_size, max_subset_size, min_degree, max_degree, reverse)
150 sum_over, min_subset_size=None, max_subset_size=None,
151 min_degree=None, max_degree=None, reverse=False):
--> 152 df, agg = _aggregate_data(df, subset_size, sum_over)
153 total = agg.sum()
154 df = _check_index(df)
~/virtualenv/python3.7.1/lib/python3.7/site-packages/upsetplot/plotting.py in _aggregate_data(df, subset_size, sum_over)
28 raise ValueError('subset_size should be one of %s. Got %r'
29 % (_SUBSET_SIZE_VALUES, subset_size))
---> 30 if df.ndim == 1:
31 # Series
32 input_name = df.name
AttributeError: type object 'ValueError' has no attribute 'ndim'
data = gsf.plots.collections.UpsetPlotInterface.build_membership_series(combined_gsc.as_dict(), min_size=5)
# Create the figure at the right size and resolution.
# fig_inches = 3.5
fig_dpi = 300 # 300 is a common DPI requirement.
# Construct the figure.
fig, ax = plt.subplots(dpi=fig_dpi)
ax.axis('off')
upsetplot.plot(data, fig=fig, orientation='vertical')
data = gsf.plots.collections.UpsetPlotInterface.build_membership_series(lit_dge_coll.as_dict(), min_size=5)
data
# upsetplot.UpSet(data=data, orientation='vertical')
upsetplot.UpSet(data=data)