05-Reduced Dimension Embeddings

UMAP Embeddings

import numpy as np
import pandas as pd
import xarray as xr
import umap
import umap.plot
import GSForge as gsf
import holoviews as hv
import matplotlib.pyplot as plt
hv.extension("bokeh")
# OS-independent path management.
from os import  environ
from pathlib import Path
OSF_PATH = Path(environ.get("GSFORGE_DEMO_DATA", default="~/GSForge_demo_data/")).expanduser().joinpath("osfstorage", "oryza_sativa")
NORMED_GEM_PATH = OSF_PATH.joinpath("AnnotatedGEMs", "oryza_sativa_hisat2_normed.nc")
agem = gsf.AnnotatedGEM(NORMED_GEM_PATH)
agem
<GSForge.AnnotatedGEM>
Name: Oryza sativa
Selected GEM Variable: 'counts'
    Gene   55986
    Sample 475
agem.count_array_names
['counts',
 'TPM_counts',
 'uq_raw_counts',
 'uq_tpm_counts',
 'quantile_counts',
 'tmm_counts']
counts, labels = gsf.get_gem_data(agem, annotation_variables=['treatment', 'genotype', 'time'])
mapper = umap.UMAP(random_state=50, metric='manhattan').fit(counts.values)
fig, axes = plt.subplots(1, 3, figsize=(21, 7))
umap.plot.points(mapper, labels=labels['treatment'], background='black', ax=axes[0], color_key_cmap='Set1');
umap.plot.points(mapper, labels=labels['genotype'], background='black', ax=axes[1], color_key_cmap='Set2');
umap.plot.points(mapper, labels=labels['time'], background='black', ax=axes[2], color_key_cmap='plasma');
<AxesSubplot:>
../../_images/05-Dimensional_Reductions_5_1.png
counts, labels = gsf.get_gem_data(agem, annotation_variables=['treatment', 'genotype', 'time'])
mapper = umap.UMAP(densmap=True, random_state=50, metric='manhattan').fit(counts.values)
fig, axes = plt.subplots(1, 3, figsize=(21, 7))
umap.plot.points(mapper, labels=labels['treatment'], background='black', ax=axes[0], color_key_cmap='Set1');
umap.plot.points(mapper, labels=labels['genotype'], background='black', ax=axes[1], color_key_cmap='Set2');
umap.plot.points(mapper, labels=labels['time'], background='black', ax=axes[2], color_key_cmap='plasma');
<AxesSubplot:>
../../_images/05-Dimensional_Reductions_6_1.png

TPM Counts

counts, labels = gsf.get_gem_data(agem, annotation_variables=['treatment', 'genotype', 'time'], count_variable='TPM_counts')
mapper = umap.UMAP(random_state=66, metric='manhattan').fit(counts.values)
fig, axes = plt.subplots(1, 3, figsize=(21, 7))
umap.plot.points(mapper, labels=labels['treatment'], background='black', ax=axes[0], color_key_cmap='Set1');
umap.plot.points(mapper, labels=labels['genotype'], background='black', ax=axes[1], color_key_cmap='Set2');
umap.plot.points(mapper, labels=labels['time'], background='black', ax=axes[2], color_key_cmap='plasma');
<AxesSubplot:>
../../_images/05-Dimensional_Reductions_8_1.png
counts, labels = gsf.get_gem_data(agem, annotation_variables=['treatment', 'genotype', 'time'], count_variable='TPM_counts')
mapper = umap.UMAP(densmap=True, random_state=95, metric='manhattan').fit(counts.values)
fig, axes = plt.subplots(1, 3, figsize=(21, 7))
umap.plot.points(mapper, labels=labels['treatment'], background='black', ax=axes[0], color_key_cmap='Set1');
umap.plot.points(mapper, labels=labels['genotype'], background='black', ax=axes[1], color_key_cmap='Set2');
umap.plot.points(mapper, labels=labels['time'], background='black', ax=axes[2], color_key_cmap='plasma');
<AxesSubplot:>
../../_images/05-Dimensional_Reductions_9_1.png

TMM Counts

counts, labels = gsf.get_gem_data(agem, annotation_variables=['treatment', 'genotype', 'time'], count_variable='tmm_counts')
mapper = umap.UMAP(densmap=True, random_state=50, metric='manhattan').fit(counts.values)
fig, axes = plt.subplots(1, 3, figsize=(21, 7))
umap.plot.points(mapper, labels=labels['treatment'], background='black', ax=axes[0], color_key_cmap='Set1');
umap.plot.points(mapper, labels=labels['genotype'], background='black', ax=axes[1], color_key_cmap='Set2');
umap.plot.points(mapper, labels=labels['time'], background='black', ax=axes[2], color_key_cmap='plasma');
<AxesSubplot:>
../../_images/05-Dimensional_Reductions_11_1.png

Quantile Transformed

counts, labels = gsf.get_gem_data(agem, annotation_variables=['treatment', 'genotype', 'time'], count_variable='quantile_counts')
mapper = umap.UMAP(densmap=True, random_state=50, metric='manhattan').fit(counts.values)
fig, axes = plt.subplots(1, 3, figsize=(21, 7))
umap.plot.points(mapper, labels=labels['treatment'], background='black', ax=axes[0], color_key_cmap='Set1');
umap.plot.points(mapper, labels=labels['genotype'], background='black', ax=axes[1], color_key_cmap='Set2');
umap.plot.points(mapper, labels=labels['time'], background='black', ax=axes[2], color_key_cmap='plasma');
<AxesSubplot:>
../../_images/05-Dimensional_Reductions_13_1.png

Upper Quantile of Raw

counts, labels = gsf.get_gem_data(agem, annotation_variables=['treatment', 'genotype', 'time'], count_variable='uq_raw_counts')
mapper = umap.UMAP(densmap=True, random_state=50, metric='manhattan').fit(counts.values)
fig, axes = plt.subplots(1, 3, figsize=(21, 7))
umap.plot.points(mapper, labels=labels['treatment'], background='black', ax=axes[0], color_key_cmap='Set1');
umap.plot.points(mapper, labels=labels['genotype'], background='black', ax=axes[1], color_key_cmap='Set2');
umap.plot.points(mapper, labels=labels['time'], background='black', ax=axes[2], color_key_cmap='plasma');
<AxesSubplot:>
../../_images/05-Dimensional_Reductions_15_1.png

Upper Quantile of TPM

counts, labels = gsf.get_gem_data(agem, annotation_variables=['treatment', 'genotype', 'time'], count_variable='uq_tpm_counts')
mapper = umap.UMAP(densmap=True, random_state=50, metric='manhattan').fit(counts.values)
fig, axes = plt.subplots(1, 3, figsize=(21, 7))
umap.plot.points(mapper, labels=labels['treatment'], background='black', ax=axes[0], color_key_cmap='Set1');
umap.plot.points(mapper, labels=labels['genotype'], background='black', ax=axes[1], color_key_cmap='Set2');
umap.plot.points(mapper, labels=labels['time'], background='black', ax=axes[2], color_key_cmap='plasma');
<AxesSubplot:>
../../_images/05-Dimensional_Reductions_17_1.png
# counts, labels = gsf.get_gem_data(agem, annotation_variables=['treatment', 'genotype', 'time'], count_variable='TPM_counts')
# mapper = umap.UMAP(densmap=True, random_state=66, metric='manhattan').fit(counts.values)
# fig, axes = plt.subplots(1, 2, figsize=(10, 5), dpi=300)
# umap.plot.points(mapper, labels=labels['treatment'], background='black', ax=axes[0], color_key_cmap='Set1');
# umap.plot.points(mapper, labels=labels['genotype'], background='black', ax=axes[1], color_key_cmap='Set2');
# umap.plot.points(mapper, labels=labels['time'], background='black', ax=axes[2], color_key_cmap='plasma');
# treatments = labels['treatment'].to_series().unique()
# treatment_colors = hv.plotting.util.process_cmap('tab10', ncolors=len(treatments), categorical=True)
# treatment_cmap = {g: c for g, c in zip(treatments, treatment_colors)}
# treatment_cmap = {
#     'CONTROL': '#1f78b4',
#     'HEAT': '#e31a1c',
#     'RECOV_HEAT': '#fb9a99',
#     'DROUGHT': '#6a3d9a',
#     'RECOV_DROUGHT': '#cab2d6',
# }
# # treatment_cmap

# times = labels['time'].to_series().unique()
# time_colors = hv.plotting.util.process_cmap('viridis', ncolors=len(times), categorical=False)
# time_cmap = {g: c for g, c in zip(times, time_colors)}

# plt.rcParams.update({'font.size': 10, 'font.family': 'serif'})
# # counts, labels = gsf.get_gem_data(agem, annotation_variables=['treatment', 'genotype', 'time'], count_variable='TPM_counts',
# #                                   count_transform=lambda c: np.log2(c + 0.25))
# # mapper = umap.UMAP(random_state=42, metric='manhattan', densmap=False, n_neighbors=50).fit(counts.values)


# fig_inches = 3.5
# fig_dpi = 300
# fig_pixels = int(fig_inches * fig_dpi)

# fig, axes = plt.subplots(figsize=(fig_inches, fig_inches), dpi=fig_dpi)



# # colors = labels['treatment'].to_series().map(treatment_cmap).values
# # colors = labels['time'].to_series().map(time_cmap).values
# colors = labels['genotype'].to_series().map(genotype_cmap).values


# axes.scatter(mapper.embedding_[:, 0], mapper.embedding_[:, 1], s=10, marker='.', linewidths=0, c=colors);


# axes.xaxis.set_visible(False)
# axes.yaxis.set_visible(False)

# for spine in axes.spines.values():
#     spine.set_edgecolor('grey')
#     spine.set_linewidth(0.5)
    
# fig, axes = plt.subplots(dpi=fig_dpi, figsize=(3, 0.75))
# axes.axis('off')
# handles = [mpatches.Patch(color=c, label=k) for k, c in genotype_cmap.items()]
# axes.legend(handles=handles, ncol=2);