Example: Classification of cell populationsΒΆ

A typical application of scRNA-Seq is classification of cell populations in a heterogeneous tissue. In this example, ~200 Peripheral Mononuclear Blood Cells (PBMCs) are classified using feature selection, dimensionality reduction, and unsupervised clustering.

import matplotlib.pyplot as plt
from singlet.dataset import Dataset

ds = Dataset(counts_table='example_PBMC')

# Normalize
ds.counts.normalize(method='counts_per_million', inplace=True)
ds.counts.log(inplace=True)

# Select features
ds.feature_selection.expressed(n_samples=3, exp_min=1, inplace=True)
ds.feature_selection.overdispersed_strata(
        n_features_per_stratum=20,
        inplace=True)

# Reduce dimensionality
vs = ds.dimensionality.tsne(
        n_dims=2,
        theta=0.5,
        perplexity=0.8)

# Reset the counts with the reduced values
ds.counts = vs.T

# Cluster
ds.samplesheet['dbscan'] = ds.cluster.dbscan(eps=5, axis='samples')
ds.samplesheet['kmeans'] = ds.cluster.kmeans(n_clusters=7, axis='samples')

# Plot t-SNE
fig, axs = plt.subplots(
        nrows=1, ncols=2, sharex=True, sharey=True,
        figsize=(8, 4))
ds.plot.scatter_reduced_samples(vs, color_by='dbscan', ax=axs[0], zorder=10)
ds.plot.scatter_reduced_samples(vs, color_by='kmeans', ax=axs[1], zorder=10)

axs[0].set_title('DBSCAN')
axs[1].set_title('K-means, 7 clusters')

plt.tight_layout()
plt.show()

You should get figures similar to the following one:

PBMC clusters