%%time
import anndata as ad             # For reading/writing AnnData files
import matplotlib.pyplot as plt  # For plotting
import metacells as mc           # The Metacells package
import numpy as np               # For array/matrix operations
import pandas as pd              # For data frames
import os                        # For filesystem operations
import seaborn as sb             # For plotting
import scipy.sparse as sp        # For sparse matrices
import shutil                    # for filesystem operations
from math import hypot           # For plotting
from typing import *             # For type annotations

CPU times: user 6.8 s, sys: 10.4 s, total: 17.2 s
Wall time: 5.52 s


%%time

# Use SVG for scalable low-element-count diagrams.
%config InlineBackend.figure_formats = ["svg"]

# A matter of personal preference.
sb.set_style("white")

# Running operations on an inefficient layout can make code **much** slower.
# For example, summing the columns of a row-major matrix.
# By default this will just be a warning.
# We set it to be an error here to make sure the vignette does not lead you astray.
#
# Note that this only affects the Metacells package.
# Numpy will happily and silently take 100x longer for running such inefficient operations.
# At least, there's no way I can tell to create a warning or error for this;
# also, the implementation for "inefficient" operations could be *much* faster.
#
# The workaround in either case is to explicitly re-layout the 2D matrix before the operations.
# This turns out to be much faster, especially when the matrix can be reused.
# Note that numpy is also very slow when doing matrix re-layout,
# so the metacells package provides a function for doing it more efficiently.
#
# Sigh.
mc.ut.allow_inefficient_layout(False)

CPU times: user 5.07 ms, sys: 1.1 ms, total: 6.17 ms
Wall time: 5.98 ms

True


%%time
shutil.rmtree("../output/projection", ignore_errors=True)
shutil.rmtree("../mcview/projection", ignore_errors=True)
os.makedirs("../output/projection/preliminary", exist_ok=True)
os.makedirs("../output/projection/corrected", exist_ok=True)

CPU times: user 0 ns, sys: 1.6 ms, total: 1.6 ms
Wall time: 7.73 ms


%%time
cells = ad.read_h5ad("../blobs/blood_aging.clean.h5ad")
mc.ut.top_level(cells)
mc.ut.set_name(cells, "blood_aging.cells")
print(f"Cells: {cells.n_obs} cells, {cells.n_vars} genes")

Cells: 30583 cells, 33511 genes
CPU times: user 138 ms, sys: 287 ms, total: 424 ms
Wall time: 1.09 s


%%time
atlas = ad.read_h5ad("../output/one-pass/final/hca_bm.metacells.h5ad")
mc.ut.top_level(atlas)
mc.ut.set_name(atlas, "hca_bm.atlas.metacells")
print(f"Atlas: {atlas.n_obs} metacells, {atlas.n_vars} genes")

Atlas: 3134 metacells, 27261 genes
CPU times: user 120 ms, sys: 545 ms, total: 665 ms
Wall time: 1.25 s


%%time
atlas_lateral_gene_names = atlas.var_names[atlas.var["lateral_gene"]]
# This will mark as "lateral_gene" any genes that match the above, if they exist in the clean dataset.
mc.pl.mark_lateral_genes(cells, lateral_gene_names=atlas_lateral_gene_names)

lateral_gene_mask = mc.ut.get_v_numpy(cells, "lateral_gene")
lateral_gene_names = set(cells.var_names[lateral_gene_mask])
print(sorted([
    name for name in lateral_gene_names
    if not name.startswith("RPL") and not name.startswith("RPS")
]))
print(f"""and {len([
    name for name in lateral_gene_names if name.startswith("RPL") or name.startswith("RPS")
])} RP[LS].* genes""")

set blood_aging.cells.var[lateral_gene]: 247 true (0.7371%) out of 33511 bools

['ACSM3', 'ANP32B', 'APOE', 'AURKA', 'B2M', 'BIRC5', 'BTG2', 'CALM1', 'CD63', 'CD69', 'CDK4', 'CENPF', 'CENPU', 'CENPW', 'CKS1B', 'CKS2', 'COX4I1', 'CXCR4', 'DNAJB1', 'DONSON', 'DUSP1', 'DUT', 'EEF1A1', 'EEF1B2', 'EIF3E', 'EMP3', 'FKBP4', 'FOS', 'FOSB', 'FTH1', 'G0S2', 'GGH', 'GMNN', 'GPR183', 'H2AFZ', 'H3F3B', 'HBM', 'HIST1H1C', 'HIST1H2AC', 'HIST1H2BG', 'HIST1H4C', 'HLA-A', 'HLA-B', 'HLA-C', 'HLA-DMA', 'HLA-DMB', 'HLA-DPA1', 'HLA-DPB1', 'HLA-DQA1', 'HLA-DQB1', 'HLA-DRA', 'HLA-DRB1', 'HLA-E', 'HLA-F', 'HMGA1', 'HMGB1', 'HMGB2', 'HMGB3', 'HMGN2', 'HNRNPAB', 'HSP90AA1', 'HSP90AB1', 'HSPA1A', 'HSPA1B', 'HSPA6', 'HSPD1', 'HSPE1', 'HSPH1', 'ID2', 'IER2', 'IGHA1', 'IGHA2', 'IGHD', 'IGHG1', 'IGHG2', 'IGHG3', 'IGHG4', 'IGHM', 'IGKC', 'IGKV1-12', 'IGKV1-39', 'IGKV1-5', 'IGKV3-15', 'IGKV4-1', 'IGLC2', 'IGLC3', 'IGLC6', 'IGLC7', 'IGLL1', 'IGLL5', 'IGLV2-34', 'JUN', 'JUNB', 'LEPROTL1', 'LGALS1', 'LINC01206', 'LTB', 'MCM3', 'MCM4', 'MCM7', 'MKI67', 'MT2A', 'MYL12A', 'MYL6', 'NASP', 'NFKBIA', 'NUSAP1', 'PA2G4', 'PCNA', 'PDLIM1', 'PLK3', 'PPP1R15A', 'PTMA', 'PTTG1', 'RAN', 'RANBP1', 'RGCC', 'RGS1', 'RGS2', 'RGS3', 'RRM2', 'SMC4', 'SRGN', 'SRSF7', 'STMN1', 'TK1', 'TMSB4X', 'TOP2A', 'TPX2', 'TSC22D3', 'TUBA1A', 'TUBA1B', 'TUBB', 'TUBB4B', 'TXN', 'TYMS', 'UBA52', 'UBC', 'UBE2C', 'UHRF1', 'YBX1', 'YPEL5', 'ZFP36', 'ZWINT']
and 103 RP[LS].* genes
CPU times: user 9.84 ms, sys: 762 µs, total: 10.6 ms
Wall time: 10.2 ms


NOISY_GENE_NAMES = [
    "CCL3", "CCL4", "CCL5", "CXCL8", "DUSP1", "FOS", "G0S2", "HBB", "HIST1H4C", "IER2", "IGKC",
    "IGLC2", "JUN", "JUNB", "KLRB1", "MT2A", "RPS26", "RPS4Y1", "TRBC1", "TUBA1B", "TUBB"
]


%%time
atlas_noisy_gene_names = atlas.var_names[atlas.var["noisy_gene"]]
# This will mark as "noisy_gene" any genes that match the above, if they exist in the clean dataset.
mc.pl.mark_noisy_genes(cells, noisy_gene_names=atlas_noisy_gene_names)
noisy_gene_mask = mc.ut.get_v_numpy(cells, "noisy_gene")
print(list(cells.var_names[noisy_gene_mask]))

set blood_aging.cells.var[noisy_gene]: 21 true (0.06267%) out of 33511 bools

['JUN', 'G0S2', 'IGKC', 'CXCL8', 'DUSP1', 'HIST1H4C', 'TUBB', 'TRBC1', 'HBB', 'KLRB1', 'TUBA1B', 'RPS26', 'FOS', 'MT2A', 'CCL5', 'CCL3', 'CCL4', 'JUNB', 'IER2', 'RPS4Y1', 'IGLC2']
CPU times: user 7.9 ms, sys: 839 µs, total: 8.73 ms
Wall time: 8.39 ms


%%time
# Either use the guesstimator:
max_parallel_piles = mc.pl.guess_max_parallel_piles(cells)
# Or, if running out of memory manually override:
# max_paralle_piles = ...
print(max_parallel_piles)
mc.pl.set_max_parallel_piles(max_parallel_piles)

162
CPU times: user 450 µs, sys: 694 µs, total: 1.14 ms
Wall time: 844 µs


%%time
mc.pl.divide_and_conquer_pipeline(cells, random_seed=123456)

set blood_aging.cells.var[selected_gene]: * -> False
set blood_aging.cells.var[rare_gene]: 16 true (0.04775%) out of 33511 bools
set blood_aging.cells.var[rare_gene_module]: 33495 outliers (99.95%) and 16 grouped (0.04775%) out of 33511 int32 elements with 1 groups with mean size 16
set blood_aging.cells.obs[cells_rare_gene_module]: 30557 outliers (99.91%) and 26 grouped (0.08501%) out of 30583 int32 elements with 1 groups with mean size 26
set blood_aging.cells.obs[rare_cell]: 26 true (0.08501%) out of 30583 bools
set blood_aging.cells.var[selected_gene]: 3994 true (11.92%) out of 33511 bools
set blood_aging.cells.obs[metacell]: 30583 int32s
set blood_aging.cells.obs[dissolved]: 0 true (0%) out of 30583 bools
set blood_aging.cells.obs[metacell_level]: 30583 int32s

CPU times: user 32.3 s, sys: 14.7 s, total: 47.1 s
Wall time: 1min 46s


%%time
metacells = \
    mc.pl.collect_metacells(cells, name="blood_aging.preliminary.metacells", random_seed=123456)
print(f"Preliminary: {metacells.n_obs} metacells, {metacells.n_vars} genes")

set blood_aging.preliminary.metacells.obs[grouped]: 334 int64s
set blood_aging.preliminary.metacells.obs[total_umis]: 334 float64s
set blood_aging.preliminary.metacells.layers[total_umis]: ndarray 334 X 33511 float32s
set blood_aging.preliminary.metacells.obs[__zeros_downsample_umis]: 334 int64s
set blood_aging.preliminary.metacells.layers[zeros]: ndarray 334 X 33511 int32s
set blood_aging.cells.obs[metacell_name]: 30583 <U8s
set blood_aging.preliminary.metacells.var[gnames]: 33511 objects
set blood_aging.preliminary.metacells.var[rare_gene]: 16 true (0.04775%) out of 33511 bools
set blood_aging.preliminary.metacells.var[rare_gene_module]: 33511 int32s
set blood_aging.preliminary.metacells.var[pre_high_total_gene]: 33511 int32s
set blood_aging.preliminary.metacells.var[high_total_gene]: 33511 int32s
set blood_aging.preliminary.metacells.var[pre_high_relative_variance_gene]: 33511 int32s
set blood_aging.preliminary.metacells.var[high_relative_variance_gene]: 33511 int32s
set blood_aging.preliminary.metacells.var[forbidden_gene]: 104 true (0.3103%) out of 33511 bools
set blood_aging.preliminary.metacells.var[pre_feature_gene]: 33511 int32s
set blood_aging.preliminary.metacells.var[feature_gene]: 33511 int32s
set blood_aging.preliminary.metacells.var[pre_gene_deviant_votes]: 33511 int32s
set blood_aging.preliminary.metacells.var[gene_deviant_votes]: 33511 int32s
set blood_aging.preliminary.metacells.var[lateral_gene]: 247 true (0.7371%) out of 33511 bools
set blood_aging.preliminary.metacells.var[noisy_gene]: 21 true (0.06267%) out of 33511 bools
set blood_aging.preliminary.metacells.var[selected_gene]: 3994 true (11.92%) out of 33511 bools
set blood_aging.preliminary.metacells.obs[metacells_rare_gene_module]: 334 int32s
set blood_aging.preliminary.metacells.obs[rare_metacell]: 1 true (0.2994%) out of 334 bools
set blood_aging.preliminary.metacells.uns[outliers]: 19
set blood_aging.preliminary.metacells.uns[metacells_algorithm]: metacells.0.9.0-dev.1

Preliminary: 334 metacells, 33511 genes
CPU times: user 449 ms, sys: 1.05 s, total: 1.5 s
Wall time: 2.96 s


%%time
mc.pl.compute_for_mcview(adata=cells, gdata=metacells, random_seed=123456)

set blood_aging.preliminary.metacells.uns[mcview_format]: 1.0
set blood_aging.preliminary.metacells.uns[outliers]: 19
set blood_aging.preliminary.metacells.var[marker_gene]: 3366 true (10.04%) out of 33511 bools
set blood_aging.preliminary.metacells.obsp[obs_balanced_ranks]: 3984 nonzero (3.571%) out of 111556 elements
set blood_aging.preliminary.metacells.obsp[obs_pruned_ranks]: 1333 nonzero (1.195%) out of 111556 elements
set blood_aging.preliminary.metacells.obsp[obs_outgoing_weights]: 1333 nonzero (1.195%) out of 111556 elements
set blood_aging.preliminary.metacells.obsp[umap_distances]: csr_matrix 334 X 334 float32s (111222 > 0, 99.7%)
set blood_aging.preliminary.metacells.obs[u]: 334 float32s
set blood_aging.preliminary.metacells.obs[v]: 334 float32s
set blood_aging.preliminary.metacells.obs[w]: 334 float32s
set blood_aging.preliminary.metacells.obsp[obs_balanced_ranks]: 3984 nonzero (3.571%) out of 111556 elements
set blood_aging.preliminary.metacells.obsp[obs_pruned_ranks]: 1333 nonzero (1.195%) out of 111556 elements
set blood_aging.preliminary.metacells.obsp[obs_outgoing_weights]: 1333 nonzero (1.195%) out of 111556 elements
set blood_aging.preliminary.metacells.obsp[umap_distances]: csr_matrix 334 X 334 float32s (111222 > 0, 99.7%)
set blood_aging.preliminary.metacells.obs[x]: 334 float32s
set blood_aging.preliminary.metacells.obs[y]: 334 float32s
set blood_aging.cells.obs[most_similar]: 30583 int32s
set blood_aging.cells.layers[deviant_fold]: csr_matrix 30583 X 33511 float32s (85755336 > 0, 8.367%)
set blood_aging.preliminary.metacells.layers[inner_fold]: csr_matrix 334 X 33511 float32s (879451 > 0, 7.857%)
set blood_aging.preliminary.metacells.var[significant_inner_folds_count]: 33511 int64s with mean 0.154
set blood_aging.preliminary.metacells.layers[inner_stdev_log]: csr_matrix 334 X 33511 float32s (859507 > 0, 7.679%)
set blood_aging.preliminary.metacells.varp[var_similarity]: csr_matrix 33511 X 33511 float32s (1267870 > 0, 0.1129%)

CPU times: user 19min 11s, sys: 40.7 s, total: 19min 51s
Wall time: 41.5 s


%%time

# We'll reuse this below.
def plot_umap(*, type_annotation: Optional[str]) -> None:
    if type_annotation is not None:
        type_color_csv = pd.read_csv("../captured/type_colors.csv")
        color_of_type = pd.Series(
            list(type_color_csv["color"]) + ["magenta", "magenta"],
            index=list(type_color_csv["cell_type"]) + ["Outliers", "(Missing)"]
        )
        type_of_metacell = mc.ut.get_o_numpy(metacells, type_annotation)
        color_of_metacell = np.array(color_of_type[type_of_metacell])

    min_long_edge_size = 4
    umap_x = mc.ut.get_o_numpy(metacells, "x")
    umap_y = mc.ut.get_o_numpy(metacells, "y")
    umap_edges = sp.coo_matrix(mc.ut.get_oo_proper(metacells, "obs_outgoing_weights"))
    sb.set()
    if type_annotation is None:
        plot = sb.scatterplot(x=umap_x, y=umap_y, s=10)
    else:
        plot = sb.scatterplot(x=umap_x, y=umap_y, color=color_of_metacell, s=10)
    for (
        source_index, target_index, weight
    ) in zip(
        umap_edges.row, umap_edges.col, umap_edges.data
    ):
        source_x = umap_x[source_index]
        target_x = umap_x[target_index]
        source_y = umap_y[source_index]
        target_y = umap_y[target_index]
        if hypot(target_x - source_x, target_y - source_y) >= min_long_edge_size:
            plt.plot([source_x, target_x], [source_y, target_y],
                     linewidth=weight * 2, color='indigo')
    plt.show()
    
plot_umap(type_annotation=None)

CPU times: user 1.18 s, sys: 5.06 s, total: 6.24 s
Wall time: 189 ms


%%time
weights = mc.pl.projection_pipeline(adata=atlas, qdata=metacells, reproducible=True)

set blood_aging.preliminary.metacells.uns[project_max_projection_fold_factor]: 3 (X 8)
set blood_aging.preliminary.metacells.uns[project_max_projection_noisy_fold_factor]: 2 (X 4)
set blood_aging.preliminary.metacells.uns[project_max_misfit_genes]: 3
set blood_aging.preliminary.metacells.var[atlas_gene]: 19489 true (58.16%) out of 33511 bools
set blood_aging.preliminary.metacells.var[atlas_lateral_gene]: 247 true (0.7371%) out of 33511 bools
set blood_aging.preliminary.metacells.var[atlas_noisy_gene]: 21 true (0.06267%) out of 33511 bools
set blood_aging.preliminary.metacells.var[atlas_marker_gene]: 3188 true (9.513%) out of 33511 bools
set blood_aging.preliminary.metacells.var[atlas_noisy_gene]: 21 true (0.06267%) out of 33511 bools
set blood_aging.preliminary.metacells.var[projected_noisy_gene]: 21 true (0.06267%) out of 33511 bools
set blood_aging.preliminary.metacells.obs[total_atlas_umis]: 334 float32s
set blood_aging.preliminary.metacells.layers[corrected_fraction]: csr_matrix 334 X 33511 float32s (3820877 > 0, 34.14%)
set blood_aging.preliminary.metacells.layers[projected_fraction]: csr_matrix 334 X 33511 float32s (6509326 > 0, 58.16%)
set blood_aging.preliminary.metacells.layers[fitted]: csr_matrix 334 X 33511 bools (991547 > 0, 8.859%)
set blood_aging.preliminary.metacells.layers[misfit]: csr_matrix 334 X 33511 bools (3150 > 0, 0.02814%)
set blood_aging.preliminary.metacells.layers[projected_fold]: csr_matrix 334 X 33511 float32s (849634 > 0, 7.591%)
set blood_aging.preliminary.metacells.var[fitted_gene_of_B]: 2969 true (8.86%) out of 33511 bools
set blood_aging.preliminary.metacells.var[fitted_gene_of_CD8]: 2969 true (8.86%) out of 33511 bools
set blood_aging.preliminary.metacells.var[fitted_gene_of_CMP]: 2969 true (8.86%) out of 33511 bools
set blood_aging.preliminary.metacells.var[fitted_gene_of_Ery]: 2969 true (8.86%) out of 33511 bools
set blood_aging.preliminary.metacells.var[fitted_gene_of_GMP]: 2969 true (8.86%) out of 33511 bools
set blood_aging.preliminary.metacells.var[fitted_gene_of_HSC]: 2969 true (8.86%) out of 33511 bools
set blood_aging.preliminary.metacells.var[fitted_gene_of_LMPP]: 2967 true (8.854%) out of 33511 bools
set blood_aging.preliminary.metacells.var[fitted_gene_of_MDP1]: 2965 true (8.848%) out of 33511 bools
set blood_aging.preliminary.metacells.var[fitted_gene_of_MEP]: 2969 true (8.86%) out of 33511 bools
set blood_aging.preliminary.metacells.var[fitted_gene_of_MK]: 2969 true (8.86%) out of 33511 bools
set blood_aging.preliminary.metacells.var[fitted_gene_of_MPP]: 2969 true (8.86%) out of 33511 bools
set blood_aging.preliminary.metacells.var[fitted_gene_of_NK]: 2969 true (8.86%) out of 33511 bools
set blood_aging.preliminary.metacells.var[fitted_gene_of_T]: 2969 true (8.86%) out of 33511 bools
set blood_aging.preliminary.metacells.var[fitted_gene_of_masBasP]: 2961 true (8.836%) out of 33511 bools
set blood_aging.preliminary.metacells.var[fitted_gene_of_monP1]: 2968 true (8.857%) out of 33511 bools
set blood_aging.preliminary.metacells.var[fitted_gene_of_neutP1]: 2969 true (8.86%) out of 33511 bools
set blood_aging.preliminary.metacells.var[fitted_gene_of_plasma]: 2969 true (8.86%) out of 33511 bools
set blood_aging.preliminary.metacells.var[fitted_gene_of_pro-B]: 2969 true (8.86%) out of 33511 bools
set blood_aging.preliminary.metacells.obs[projected_type]: 334 objects
set blood_aging.preliminary.metacells.obs[projected_secondary_type]: 334 objects
set blood_aging.preliminary.metacells.obs[projected_correlation]: 334 float32s with mean 0.8631
set blood_aging.preliminary.metacells.obs[similar]: 263 true (78.74%) out of 334 bools
set blood_aging.preliminary.metacells.uns[projection_algorithm]: metacells.0.9.0-dev.1

CPU times: user 34.5 s, sys: 25 s, total: 59.5 s
Wall time: 45.9 s


%%time
plot_umap(type_annotation="projected_type")

CPU times: user 1.15 s, sys: 5.24 s, total: 6.39 s
Wall time: 332 ms


%%time
cells.write_h5ad("../output/projection/preliminary/blood_aging.cells.h5ad")

CPU times: user 434 ms, sys: 1.18 s, total: 1.62 s
Wall time: 5.08 s


%%time
metacells.write_h5ad("../output/projection/preliminary/blood_aging.metacells.h5ad")

CPU times: user 174 ms, sys: 170 ms, total: 343 ms
Wall time: 813 ms


%%time
mc.pl.write_projection_weights(
    "../output/projection/preliminary/blood_aging.atlas_weights.csv",
    adata=atlas,
    qdata=metacells,
    weights=weights
)

CPU times: user 64.2 ms, sys: 9.62 ms, total: 73.8 ms
Wall time: 66.1 ms


%%time
os.system(
    "Rscript ../scripts/import_dataset.r blood_aging projection/preliminary 'BA PRJ|PRE' "
    "projected_type hca_bm one-pass/final"
)

ℹ creating ../mcview/projection/preliminary
→ You can edit the app configuration at '../mcview/projection/preliminary/config/config.yaml'
ℹ Importing blood_aging-projection-preliminary
ℹ Reading '../output/projection/preliminary/blood_aging.metacells.h5ad'
ℹ Processing metacell matrix
ℹ Processing 2d projection
ℹ Using 2D projection from anndata object
ℹ Calculating top genes per metacell (marker genes)
ℹ Calculating metacell correlations of default marker genes
ℹ Processing inner-folds matrix
ℹ Calculating top inner-fold genes
→ Added the Inner-fold tab to the config file. To change the tab order or remove it - edit the tabs section at: '../mcview/projection/preliminary/config/config.yaml'
ℹ Processing inner-stdev matrix
ℹ Calculating top inner-stdev genes
→ Added the Stdev-fold tab to the config file. To change the tab order or remove it - edit the tabs section at: '../mcview/projection/preliminary/config/config.yaml'
ℹ Taking cell type annotations from projected_type field in the anndata object
ℹ Loading cell type color annotations from '../captured/type_colors.csv'
ℹ Clustering in order to get gene modules. k = 31
ℹ Number of genes considered = 1000
ℹ Loading previously calculated 30 correlated and anti-correlated genes for each gene
ℹ Reading dataset 'hca_bm-one-pass-final' at project: '../mcview/one-pass/final'
ℹ Calculating top atlas-query fold genes
✔ succesfully imported atlas projections
✔ blood_aging-projection-preliminary dataset imported succesfully to '../mcview/projection/preliminary' project
• You can now run the app using: run_app("../mcview/projection/preliminary")
• or create a bundle using: create_bundle("../mcview/projection/preliminary",
name = "name_of_bundle")

CPU times: user 6.86 ms, sys: 56.8 ms, total: 63.7 ms
Wall time: 47.6 s

0


%%time
mc.ut.set_name(metacells, "blood_aging.corrected.metacells")
print(f"Corrected: {cells.n_obs} metacells, {cells.n_vars} genes")

Corrected: 30583 metacells, 33511 genes
CPU times: user 0 ns, sys: 429 µs, total: 429 µs
Wall time: 376 µs


%%time
weights = mc.pl.projection_pipeline(adata=atlas, qdata=metacells, project_corrections=True, reproducible=True)

set blood_aging.corrected.metacells.uns[project_max_projection_fold_factor]: 3 (X 8)
set blood_aging.corrected.metacells.uns[project_max_projection_noisy_fold_factor]: 2 (X 4)
set blood_aging.corrected.metacells.uns[project_max_misfit_genes]: 3
set blood_aging.corrected.metacells.var[atlas_gene]: 19489 true (58.16%) out of 33511 bools
set blood_aging.corrected.metacells.var[atlas_lateral_gene]: 247 true (0.7371%) out of 33511 bools
set blood_aging.corrected.metacells.var[atlas_noisy_gene]: 21 true (0.06267%) out of 33511 bools
set blood_aging.corrected.metacells.var[atlas_marker_gene]: 3188 true (9.513%) out of 33511 bools
set blood_aging.corrected.metacells.var[atlas_noisy_gene]: 21 true (0.06267%) out of 33511 bools
set blood_aging.corrected.metacells.var[projected_noisy_gene]: 21 true (0.06267%) out of 33511 bools
set blood_aging.corrected.metacells.obs[total_atlas_umis]: 334 float32s
set blood_aging.corrected.metacells.layers[corrected_fraction]: csr_matrix 334 X 33511 float32s (3820877 > 0, 34.14%)
set blood_aging.corrected.metacells.layers[projected_fraction]: csr_matrix 334 X 33511 float32s (6509326 > 0, 58.16%)
set blood_aging.corrected.metacells.layers[fitted]: csr_matrix 334 X 33511 bools (991412 > 0, 8.858%)
set blood_aging.corrected.metacells.layers[misfit]: csr_matrix 334 X 33511 bools (3087 > 0, 0.02758%)
set blood_aging.corrected.metacells.layers[projected_fold]: csr_matrix 334 X 33511 float32s (849634 > 0, 7.591%)
set blood_aging.corrected.metacells.var[fitted_gene_of_B]: 2969 true (8.86%) out of 33511 bools
set blood_aging.corrected.metacells.var[fitted_gene_of_CD8]: 2969 true (8.86%) out of 33511 bools
set blood_aging.corrected.metacells.var[fitted_gene_of_CMP]: 2969 true (8.86%) out of 33511 bools
set blood_aging.corrected.metacells.var[fitted_gene_of_Ery]: 2969 true (8.86%) out of 33511 bools
set blood_aging.corrected.metacells.var[fitted_gene_of_GMP]: 2969 true (8.86%) out of 33511 bools
set blood_aging.corrected.metacells.var[fitted_gene_of_HSC]: 2969 true (8.86%) out of 33511 bools
set blood_aging.corrected.metacells.var[fitted_gene_of_LMPP]: 2967 true (8.854%) out of 33511 bools
set blood_aging.corrected.metacells.var[fitted_gene_of_MDP1]: 2966 true (8.851%) out of 33511 bools
set blood_aging.corrected.metacells.var[fitted_gene_of_MEP]: 2968 true (8.857%) out of 33511 bools
set blood_aging.corrected.metacells.var[fitted_gene_of_MK]: 2968 true (8.857%) out of 33511 bools
set blood_aging.corrected.metacells.var[fitted_gene_of_MPP]: 2968 true (8.857%) out of 33511 bools
set blood_aging.corrected.metacells.var[fitted_gene_of_NK]: 2969 true (8.86%) out of 33511 bools
set blood_aging.corrected.metacells.var[fitted_gene_of_T]: 2969 true (8.86%) out of 33511 bools
set blood_aging.corrected.metacells.var[fitted_gene_of_masBasP]: 2964 true (8.845%) out of 33511 bools
set blood_aging.corrected.metacells.var[fitted_gene_of_monP1]: 2967 true (8.854%) out of 33511 bools
set blood_aging.corrected.metacells.var[fitted_gene_of_neutP1]: 2969 true (8.86%) out of 33511 bools
set blood_aging.corrected.metacells.var[fitted_gene_of_plasma]: 2969 true (8.86%) out of 33511 bools
set blood_aging.corrected.metacells.var[fitted_gene_of_pro-B]: 2969 true (8.86%) out of 33511 bools
set blood_aging.corrected.metacells.var[correction_factor]: 33511 float32s
set blood_aging.corrected.metacells.obs[projected_type]: 334 objects
set blood_aging.corrected.metacells.obs[projected_secondary_type]: 334 objects
set blood_aging.corrected.metacells.obs[projected_correlation]: 334 float32s with mean 0.9399
set blood_aging.corrected.metacells.obs[similar]: 272 true (81.44%) out of 334 bools
set blood_aging.corrected.metacells.uns[projection_algorithm]: metacells.0.9.0-dev.1

CPU times: user 49.5 s, sys: 31.5 s, total: 1min 20s
Wall time: 58.4 s


%%time
plot_umap(type_annotation="projected_type")

CPU times: user 1.17 s, sys: 5.12 s, total: 6.28 s
Wall time: 223 ms


%%time
cells.write_h5ad("../output/projection/corrected/blood_aging.cells.h5ad")
metacells.write_h5ad("../output/projection/corrected/blood_aging.metacells.h5ad")
mc.pl.write_projection_weights(
    "../output/projection/corrected/blood_aging.atlas_weights.csv",
    adata=atlas,
    qdata=metacells,
    weights=weights
)
os.system(
    "Rscript ../scripts/import_dataset.r blood_aging projection/corrected 'BA PRJ|COR' "
    "projected_type hca_bm one-pass/final"
)

ℹ creating ../mcview/projection/corrected
→ You can edit the app configuration at '../mcview/projection/corrected/config/config.yaml'
ℹ Importing blood_aging-projection-corrected
ℹ Reading '../output/projection/corrected/blood_aging.metacells.h5ad'
ℹ Processing metacell matrix
ℹ Processing 2d projection
ℹ Using 2D projection from anndata object
ℹ Calculating top genes per metacell (marker genes)
ℹ Calculating metacell correlations of default marker genes
ℹ Processing inner-folds matrix
ℹ Calculating top inner-fold genes
→ Added the Inner-fold tab to the config file. To change the tab order or remove it - edit the tabs section at: '../mcview/projection/corrected/config/config.yaml'
ℹ Processing inner-stdev matrix
ℹ Calculating top inner-stdev genes
→ Added the Stdev-fold tab to the config file. To change the tab order or remove it - edit the tabs section at: '../mcview/projection/corrected/config/config.yaml'
ℹ Taking cell type annotations from projected_type field in the anndata object
ℹ Loading cell type color annotations from '../captured/type_colors.csv'
ℹ Clustering in order to get gene modules. k = 31
ℹ Number of genes considered = 1000
ℹ Loading previously calculated 30 correlated and anti-correlated genes for each gene
ℹ Reading dataset 'hca_bm-one-pass-final' at project: '../mcview/one-pass/final'
ℹ Calculating top atlas-query fold genes
✔ succesfully imported atlas projections
✔ blood_aging-projection-corrected dataset imported succesfully to '../mcview/projection/corrected' project
• You can now run the app using: run_app("../mcview/projection/corrected")
• or create a bundle using: create_bundle("../mcview/projection/corrected",
name = "name_of_bundle")

CPU times: user 658 ms, sys: 1.09 s, total: 1.74 s
Wall time: 51.3 s

0

Computing Metacells - Projection Process¶

1. Setup¶

2. Reading the data¶

3. Compute the metacells¶

3.1 Decisions¶

3.1.1 Lateral genes¶

3.1.2 Noisy genes¶

3.1.3 Parallelization¶

3.2 Computation¶

3.2.1 Hyper-parameters¶

3.2.2 Assigning cells to metacells¶

3.2.3 Collecting the metacells¶

3.3 Computing for MCView¶

4. Projection¶

4.1 Computing the projection¶

4.2 Saving the data¶

5. Importing into MCView¶

5.1 Installing MCView¶

5.2 Importing the data set¶

5.3 Running MCView¶

6. Technology-Corrected Projection¶

7. Follow-up iterations¶