Spatially-informed cell type identification

[1]:
import os
import pandas as pd
import numpy as np
import scanpy as sc
import squidpy as sq
import space as sp
import seaborn as sns
import matplotlib.pyplot as plt
import matplotlib.colors as clr
import matplotlib.pyplot as plt
from matplotlib.colors import LinearSegmentedColormap
from matplotlib import cm
from matplotlib import colors
from sklearn.metrics import adjusted_rand_score as ARI
%matplotlib inline
/home/zhangjinsong/anaconda3/envs/space-srt/lib/python3.10/site-packages/torch_geometric/typing.py:47: UserWarning: An issue occurred while importing 'pyg-lib'. Disabling its usage. Stacktrace: /lib/x86_64-linux-gnu/libm.so.6: version `GLIBC_2.29' not found (required by /home/zhangjinsong/anaconda3/envs/space-srt/lib/python3.10/site-packages/libpyg.so)
  warnings.warn(f"An issue occurred while importing 'pyg-lib'. "
/home/zhangjinsong/anaconda3/envs/space-srt/lib/python3.10/site-packages/torch_geometric/typing.py:90: UserWarning: An issue occurred while importing 'torch-spline-conv'. Disabling its usage. Stacktrace: /lib/x86_64-linux-gnu/libm.so.6: version `GLIBC_2.27' not found (required by /home/zhangjinsong/anaconda3/envs/space-srt/lib/python3.10/site-packages/torch_spline_conv/_basis_cuda.so)
  warnings.warn(
/home/zhangjinsong/anaconda3/envs/space-srt/lib/python3.10/site-packages/torch_geometric/typing.py:101: UserWarning: An issue occurred while importing 'torch-sparse'. Disabling its usage. Stacktrace: /lib/x86_64-linux-gnu/libm.so.6: version `GLIBC_2.29' not found (required by /home/zhangjinsong/anaconda3/envs/space-srt/lib/python3.10/site-packages/torch_sparse/_metis_cuda.so)
  warnings.warn(f"An issue occurred while importing 'torch-sparse'. "
/home/zhangjinsong/anaconda3/envs/space-srt/lib/python3.10/site-packages/tqdm/auto.py:21: TqdmWarning: IProgress not found. Please update jupyter and ipywidgets. See https://ipywidgets.readthedocs.io/en/stable/user_install.html
  from .autonotebook import tqdm as notebook_tqdm
[ ]:
plt.rcParams['axes.unicode_minus'] = False
plt.rc('font', family='Helvetica')
plt.rcParams['pdf.fonttype'] = 42
plt.rcParams["figure.dpi"] = 200
COLOR = 'black'
plt.rcParams['text.color'] = COLOR
plt.rcParams['axes.labelcolor'] = COLOR
plt.rcParams['xtick.color'] = COLOR
plt.rcParams['ytick.color'] = COLOR
sc.settings.verbosity = 0             # verbosity: errors (0), warnings (1), info (2), hints (3)
sc.logging.print_header()
sc.set_figure_params(dpi=120,dpi_save=300,facecolor='w',frameon=True,figsize=(4,4))
%config InlineBackend.figure_format='retina'
%matplotlib inline
scanpy==1.9.6 anndata==0.10.3 umap==0.5.4 numpy==1.23.4 scipy==1.11.3 pandas==2.1.1 scikit-learn==1.3.2 statsmodels==0.14.0 igraph==0.10.8 pynndescent==0.5.10
[ ]:
featurePlotCols=["lightgrey","whitesmoke","#ffffcc","#ffeda0","#fed976","#feb24c","#fd8d3c","#fc4e2a","#e31a1c","#bd0026","#800026","#800026"]
mymap2 = colors.LinearSegmentedColormap.from_list('my_colormap',featurePlotCols,N=512)
[ ]:
sp.__version__
'0.6.9'
[ ]:
import torch
torch.set_num_threads(10)

Plot Functions

[ ]:
def heatmapPlot(data,
                cmap=None,
                fmt = 'd',
                minimum=0,
                figsize=(9.5,7.8),):
    """
    """
    from matplotlib import pyplot as plt
    import seaborn as sns
    %matplotlib inline
    plt.figure(figsize=figsize)

    if cmap is None:
        cmap = sns.cubehelix_palette(light=1, as_cmap=True, rot=.3)

    sns.set(font_scale=1.5)
    ax = sns.heatmap(data,
                     xticklabels=1,
                     yticklabels=1,
                     cmap=cmap, vmin=minimum,
                     cbar_kws={"shrink": 1})
#     plt.xticks()
    fmt = fmt
    thresh = data.max().max() / 1.5
    import itertools
    for i, j in itertools.product(range(data.shape[0]), range(data.shape[1])):
        if data.iloc[i, j] > minimum:
            plt.text(j+0.5, i+0.62, format(data.iloc[i, j], fmt),
                 ha='center',
                 va='bottom',
                 fontsize=8,
                 color="white" if data.iloc[i, j] > thresh else "black")

Load Data

[9]:
adata = sc.read('/home/zhangjinsong/SPAGAT/SPACE_pkg/MERFISH_slice153-1/slice153-1_annot_refined_sp_adata.h5ad')
[10]:
adata
[10]:
AnnData object with n_obs × n_vars = 7409 × 254
    obs: 'fovID', 'fov_x', 'fov_y', 'volume', 'center_x', 'center_y', 'slice_id', 'sample_id', 'BICCN_cluster_label', 'BICCN_subclass_label', 'BICCN_class_label', 'BICCN_ontology_term_id', 'assay_ontology_term_id', 'disease_ontology_term_id', 'tissue_ontology_term_id', 'cell_type_ontology_term_id', 'ethnicity_ontology_term_id', 'development_stage_ontology_term_id', 'sex_ontology_term_id', 'is_primary_data', 'organism_ontology_term_id', 'cell_type', 'assay', 'disease', 'organism', 'sex', 'tissue', 'ethnicity', 'development_stage', 'n_genes_by_counts', 'total_counts', 'total_counts_mt', 'pct_counts_mt', 'mouse', 'slice', 'dataset', 'sample', 'subclass', 'subclass_preprocessed', 'leiden_SPACE', 'louvain_SPACE', 'leiden_c8', 'leiden_c5', 'leiden_c0', 'leiden_c0-10-15', 'leiden_c7', 'leiden_c4', 'leiden_c1', 'leiden_c15', 'leiden_c27', 'leiden_c15-27', 'leiden_c23', 'leiden_inh', 'leiden_c0-2', 'celltype_SPACE', 'leiden_c16', 'celltype_SPACE_oldname'
    var: 'feature_biotype', 'feature_is_filtered', 'feature_name', 'feature_reference', 'mt', 'n_cells_by_counts', 'mean_counts', 'pct_dropout_by_counts', 'total_counts'
    uns: 'Exp', 'SPACE', 'X_normalization', 'celltype_SPACE_colors', 'dendrogram_celltype_SPACE', 'dendrogram_leiden_SPACE', 'dendrogram_leiden_c0-2', 'dendrogram_leiden_c16', 'dendrogram_leiden_c23', 'dendrogram_leiden_inh', 'layer_descriptions', 'leiden', 'leiden_SPACE_colors', 'leiden_c0-10-15_colors', 'leiden_c0-2_colors', 'leiden_c0_colors', 'leiden_c15-27_colors', 'leiden_c15_colors', 'leiden_c1_colors', 'leiden_c27_colors', 'leiden_c4_colors', 'leiden_c5_colors', 'leiden_c7_colors', 'leiden_c8_colors', 'louvain', 'metadata', 'publication_doi', 'schema_version', 'subclass_preprocessed_colors', 'title', 'umap'
    obsm: 'UmapExp', 'UmapSPACE', 'X_pca', 'X_umap', 'latent', 'spatial'
    obsp: 'Exp_connectivities', 'Exp_distances', 'SPACE_connectivities', 'SPACE_distances'

RUN SPACE

[9]:
adata=sp.SPACE(adata=adata, outdir='MERFISH_slice153-1',alpha=0.05, GPU=8,patience=50, epoch=3000)
Construct Graph
Average links: 22.05
Load SPACE model
SPACE_Graph(
  (encoder): GAT_Encoder(
    (hidden_layer1): GATv2Conv(254, 128, heads=6)
    (hidden_layer2): GATv2Conv(768, 128, heads=6)
    (conv_z): GATv2Conv(768, 10, heads=6)
  )
  (decoder): InnerProductDecoder()
  (decoder_x): Sequential(
    (0): Linear(in_features=10, out_features=128, bias=True)
    (1): BatchNorm1d(128, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
    (2): LeakyReLU(negative_slope=0.01)
    (3): Dropout(p=0.1, inplace=False)
    (4): Linear(in_features=128, out_features=254, bias=True)
    (5): ReLU()
  )
)
Train SPACE model
  0%|                                                                                                                                                                                        | 0/3001 [00:00<?, ?it/s]/home/zhangjinsong/anaconda3/envs/space-srt/lib/python3.10/site-packages/space/train.py:142: FutureWarning: The behavior of DataFrame concatenation with empty or all-NA entries is deprecated. In a future version, this will no longer exclude empty or all-NA columns when determining the result dtypes. To retain the old behavior, exclude the relevant entries before the concat operation.
  df_loss=pd.concat([df_loss,pd.Series({'feature_loss':loss2,'graph_loss':loss1,'epoch':epoch}).to_frame().T],ignore_index=True)
 72%|█████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▎                                               | 2173/3001 [12:31<04:46,  2.89it/s]
====> Epoch: 2173, Loss: 17.9199
EarlyStopping: run 2173 iteration

../_images/tutorial_spatial_celltype_slice153_12_4.png
[10]:
sc.tl.leiden(adata,resolution=1,neighbors_key='SPACE',key_added='leiden_SPACE')
[11]:
adata.obsm['UmapSPACE'] = adata.obsm['X_umap'].copy()
[11]:
sc.set_figure_params(dpi=120,dpi_save=300,facecolor='w',frameon=True,figsize=(4,4.5))
sc.pl.embedding(adata, basis='UmapSPACE',color=['leiden_SPACE','subclass_preprocessed'],legend_loc='on data', legend_fontsize=8, legend_fontoutline=1)
/home/zhangjinsong/anaconda3/envs/space-srt/lib/python3.10/site-packages/scanpy/plotting/_tools/scatterplots.py:1251: FutureWarning: The default value of 'ignore' for the `na_action` parameter in pandas.Categorical.map is deprecated and will be changed to 'None' in a future version. Please set na_action to the desired value to avoid seeing this warning
  color_vector = pd.Categorical(values.map(color_map))
/home/zhangjinsong/anaconda3/envs/space-srt/lib/python3.10/site-packages/scanpy/plotting/_tools/scatterplots.py:394: UserWarning: No data for colormapping provided via 'c'. Parameters 'cmap' will be ignored
  cax = scatter(
/home/zhangjinsong/anaconda3/envs/space-srt/lib/python3.10/site-packages/scanpy/plotting/_tools/scatterplots.py:1251: FutureWarning: The default value of 'ignore' for the `na_action` parameter in pandas.Categorical.map is deprecated and will be changed to 'None' in a future version. Please set na_action to the desired value to avoid seeing this warning
  color_vector = pd.Categorical(values.map(color_map))
/home/zhangjinsong/anaconda3/envs/space-srt/lib/python3.10/site-packages/scanpy/plotting/_tools/scatterplots.py:394: UserWarning: No data for colormapping provided via 'c'. Parameters 'cmap' will be ignored
  cax = scatter(
../_images/tutorial_spatial_celltype_slice153_15_1.png

Annotation

[13]:
sc.set_figure_params(dpi=120,facecolor='w',frameon=True,figsize=(4,4.5))
sc.pl.embedding(adata,basis='UmapSPACE',color=['subclass_preprocessed','celltype_SPACE'],ncols=2,legend_fontsize=8, legend_loc='on data', legend_fontoutline=1)
/home/zhangjinsong/anaconda3/envs/space-srt/lib/python3.10/site-packages/scanpy/plotting/_tools/scatterplots.py:1251: FutureWarning: The default value of 'ignore' for the `na_action` parameter in pandas.Categorical.map is deprecated and will be changed to 'None' in a future version. Please set na_action to the desired value to avoid seeing this warning
  color_vector = pd.Categorical(values.map(color_map))
/home/zhangjinsong/anaconda3/envs/space-srt/lib/python3.10/site-packages/scanpy/plotting/_tools/scatterplots.py:394: UserWarning: No data for colormapping provided via 'c'. Parameters 'cmap' will be ignored
  cax = scatter(
/home/zhangjinsong/anaconda3/envs/space-srt/lib/python3.10/site-packages/scanpy/plotting/_tools/scatterplots.py:1251: FutureWarning: The default value of 'ignore' for the `na_action` parameter in pandas.Categorical.map is deprecated and will be changed to 'None' in a future version. Please set na_action to the desired value to avoid seeing this warning
  color_vector = pd.Categorical(values.map(color_map))
/home/zhangjinsong/anaconda3/envs/space-srt/lib/python3.10/site-packages/scanpy/plotting/_tools/scatterplots.py:394: UserWarning: No data for colormapping provided via 'c'. Parameters 'cmap' will be ignored
  cax = scatter(
../_images/tutorial_spatial_celltype_slice153_17_1.png

Confusion matrix with original celltype

[14]:
cmtx = sc.metrics.confusion_matrix("celltype_SPACE", "subclass_preprocessed", adata.obs)
y_ordered=['Astrocytes','Microglia','Oligodendrocytes','OPC','L2/3 IT', 'L4/5 IT', 'L5 IT', 'L6 IT', 'L6 IT Car3','L5 PT','L6 CT', 'L6b','L5/6 NP', 'Pvalb', 'Sst','Sst Chodl','Vip','Lamp5','Sncg', 'Endothelial','Pericytes','PVM','SMC','VLMC']
x_ordered=['c1-Astrocytes', 'c2-Astrocytes', 'c3-Astrocytes','Microglia','c1-Oligodendrocytes', 'c2-Oligodendrocytes','c3-Oligodendrocytes','OPC',  'c1-L2/3 IT', 'c2-L2/3 IT', 'c1-L4/5 IT', 'c2-L4/5 IT', 'c3-L4/5 IT', 'L5 IT','c1-L5/6 IT','c2-L5/6 IT', 'L6 IT', 'L6 IT Car3','L5 PT','L6 CT','L6b', 'L5/6 NP',
           'c1-Pvalb','c2-Pvalb','Sst','Vip','Lamp5','c1-Endothelial','c2-Endothelial',
           'Pericytes','PVM','SMC', 'VLMC',]
heatmapPlot(cmtx.T.loc[y_ordered, x_ordered], fmt='0.2f', minimum=0.05, figsize=(18,12))
plt.ylabel('Original cell type')
plt.xlabel('SPACE refined cell type')
plt.tight_layout()
# plt.savefig('./MERFISH_slice153-1/confusion_matrix_bwt_space_and_ori.pdf', dpi=300, bbox_inches='tight')
../_images/tutorial_spatial_celltype_slice153_19_0.png
[15]:
ARI(adata.obs.subclass_preprocessed.values, adata.obs.celltype_SPACE.values)
[15]:
0.597624867956563

sankey plot

[11]:
scm=adata.obs.groupby(['subclass_preprocessed','celltype_SPACE']).size().reset_index()
scm.columns = ['source','target','value']
/tmp/ipykernel_29999/2280780624.py:1: FutureWarning: The default of observed=False is deprecated and will be changed to True in a future version of pandas. Pass observed=False to retain current behavior or observed=True to adopt the future default and silence this warning.
  scm=adata.obs.groupby(['subclass_preprocessed','celltype_SPACE']).size().reset_index()
[12]:
adata.uns['celltype_SPACE_colors']
[12]:
array(['#ffff00', '#1ce6ff', '#ff34ff', '#ff4a46', '#008941', '#006fa6',
       '#a30059', '#ffdbe5', '#7a4900', '#0000a6', '#63ffac', '#b79762',
       '#004d43', '#8fb0ff', '#997d87', '#5a0007', '#809693', '#6a3a4c',
       '#1b4400', '#4fc601', '#3b5dff', '#4a3b53', '#ff2f80', '#61615a',
       '#ba0900', '#6b7900', '#00c2a0', '#ffaa92', '#ff90c9', '#b903aa',
       '#d16100', '#ddefff', '#000035', '#7b4f4b', '#a1c299'],
      dtype=object)
[13]:
import plotly
import plotly.graph_objects as go


scm=adata.obs.groupby(['subclass_preprocessed','celltype_SPACE']).size().reset_index()
scm.columns = ['source','target','value']

source_index = np.arange(len(scm.source.cat.categories))
target_index = np.arange(len(scm.source.cat.categories), len(scm.source.cat.categories) + len(scm.target.cat.categories))
sources = []
targets = []
for s in source_index:
    sources.extend([s]*len(target_index))
for i in range(len(source_index)):
    targets.extend(list(target_index))

values = scm.value.values

opacity=0.3
source_color_dict = dict(zip(list(adata.obs.subclass_preprocessed.cat.categories), list(adata.uns['subclass_preprocessed_colors'])))
link_colors = []
for i in scm.source.values:
    r,g,b,a = colors.to_rgba(source_color_dict[i], alpha=opacity)
    color_code = 'rgba({},{},{},{})'.format(r*255.0,g*255.0,b*255.0,a)
    link_colors.append(color_code)

fig = go.Figure(data=[go.Sankey(
    valueformat = ".0f",
    node = dict(
      pad = 5,
      thickness = 20,
      line = dict(color = "black", width = 0),
      label = list(scm.source.cat.categories) + list(scm.target.cat.categories),# ["A1", "A2", "B1", "B2", "C1", "C2"],
      color = list(adata.uns['subclass_preprocessed_colors']) + list(adata.uns['celltype_SPACE_colors'])
    ),
    link = dict(
      source = sources,#[0, 1, 0, 2, 3, 3], # indices correspond to labels, eg A1, A2, A1, B1, ...
      target = targets,#[2, 3, 3, 4, 4, 5],
      value = values,
      color = link_colors #[8, 4, 2, 8, 4, 2]
  ))])

fig.update_layout(title_text="Orignal cell-types vs. SPACE refined cell-types (Slice153)",
#                   font_family="Helvetica",
                  font_color="black",
                  font_size=10,
                  autosize=False,
                  width=600,
                  height=800,
)
fig.show()
fig.write_image(os.path.join(sc.settings.figdir,'celltype_sankey.pdf'))
/tmp/ipykernel_29999/2561098115.py:5: FutureWarning: The default of observed=False is deprecated and will be changed to True in a future version of pandas. Pass observed=False to retain current behavior or observed=True to adopt the future default and silence this warning.
  scm=adata.obs.groupby(['subclass_preprocessed','celltype_SPACE']).size().reset_index()

Umap and Spatial

[14]:
sc.set_figure_params(dpi=120,dpi_save=300,facecolor='w',frameon=True,figsize=(4,4.5))
sc.pl.embedding(adata,basis='UmapSPACE',color=['celltype_SPACE'],legend_fontsize=8,save='_celltype_SPACE.pdf')
/home/zhangjinsong/anaconda3/envs/space-srt/lib/python3.10/site-packages/scanpy/plotting/_tools/scatterplots.py:1251: FutureWarning:

The default value of 'ignore' for the `na_action` parameter in pandas.Categorical.map is deprecated and will be changed to 'None' in a future version. Please set na_action to the desired value to avoid seeing this warning

/home/zhangjinsong/anaconda3/envs/space-srt/lib/python3.10/site-packages/scanpy/plotting/_tools/scatterplots.py:394: UserWarning:

No data for colormapping provided via 'c'. Parameters 'cmap' will be ignored

../_images/tutorial_spatial_celltype_slice153_26_1.png
[15]:
sc.set_figure_params(dpi=120,dpi_save=300,facecolor='w',frameon=True,figsize=(4,4.5))
sc.pl.embedding(adata,basis='UmapSPACE',color=['subclass_preprocessed'],legend_fontsize=8,save='_subclass.pdf')
/home/zhangjinsong/anaconda3/envs/space-srt/lib/python3.10/site-packages/scanpy/plotting/_tools/scatterplots.py:1251: FutureWarning:

The default value of 'ignore' for the `na_action` parameter in pandas.Categorical.map is deprecated and will be changed to 'None' in a future version. Please set na_action to the desired value to avoid seeing this warning

/home/zhangjinsong/anaconda3/envs/space-srt/lib/python3.10/site-packages/scanpy/plotting/_tools/scatterplots.py:394: UserWarning:

No data for colormapping provided via 'c'. Parameters 'cmap' will be ignored

../_images/tutorial_spatial_celltype_slice153_27_1.png

Astrocytes subtypes

[16]:
sc.pl.spatial(adata, spot_size=30,color='celltype_SPACE', groups=['c1-Astrocytes','c2-Astrocytes','c3-Astrocytes'],
              na_color='whitesmoke', na_in_legend=False,save='_spatial_Astro.pdf')
/home/zhangjinsong/anaconda3/envs/space-srt/lib/python3.10/site-packages/scanpy/plotting/_tools/scatterplots.py:1251: FutureWarning:

The default value of 'ignore' for the `na_action` parameter in pandas.Categorical.map is deprecated and will be changed to 'None' in a future version. Please set na_action to the desired value to avoid seeing this warning

../_images/tutorial_spatial_celltype_slice153_29_1.png

Oligodendrocytes subtypes

[ ]:
sc.pl.spatial(adata, spot_size=30,color='celltype_SPACE', groups=['c1-Oligodendrocytes', 'c2-Oligodendrocytes', 'c3-Oligodendrocytes'],save='_spatial_Oligo_.pdf')
/home/zhangjinsong/anaconda3/lib/python3.8/site-packages/scanpy/plotting/_tools/scatterplots.py:1171: FutureWarning:

Categorical.replace is deprecated and will be removed in a future version. Use Series.replace directly instead.

WARNING: saving figure to file MERFISH_slice153-1/show_spatial_Oligo_.pdf
../_images/tutorial_spatial_celltype_slice153_31_2.png
[ ]: