Running ONTraC on a MERFISH dataset¶

Download the data¶

Warning: The MERFISH dataset is quite large and will take a long time to run on CPU only.

Download merfish_dataset.csv from Zenodo

Running ONTraC¶

If your default shell is not Bash, please adjust this code.

ONTraC will run on CPU if CUDA is not available.

conda activate ONTraC
ONTraC --meta-input original_data.csv \
--NN-dir merfish_NN \
--GNN-dir merfish_GNN \
--NT-dir merfish_NT \
--device cuda --epochs 1000 --batch-size 10 -s 42 --lr 0.03 \
--hidden-feats 4 -k 6 --modularity-loss-weight 0.3 \
--regularization-loss-weight 0.1 --purity-loss-weight 300 \
--beta 0.03 > log/merfish.log

Results visualization¶

Please see the Visualizations tutorials for details.

Loading results

from ONTraC.analysis.data import AnaData
from optparse import Values

options = Values()
options.NN_dir = 'simulation_NN'
options.GNN_dir = 'simulation_GNN'
options.NT_dir = 'simulation_NT'
options.log = 'simulation.log'
options.reverse = True  # Set it to False if you don't want reverse NT score
options.output = None  # We save the output figure by our self here
ana_data = AnaData(options)

Plotting preparation

import numpy as np
import pandas as pd

import matplotlib as mpl

mpl.rcParams['pdf.fonttype'] = 42
mpl.rcParams['ps.fonttype'] = 42
mpl.rcParams['font.family'] = 'Arial'
import matplotlib.pyplot as plt
import seaborn as sns

Spatial cell type distribution

cell_types = ana_data.cell_type_codes['Cell_Type'].tolist()
selected_cell_types = ["VLMC", 'L2/3 IT', 'L4/5 IT', 'L5 IT',"L5 ET", "L5/6 NP" , 'L6 IT',"L6 CT","L6 IT Car3"]

rainbow_cmap = mpl.colormaps['gist_rainbow']

my_pal = {"VLMC": rainbow_cmap(0)}
my_pal.update({cell_type: rainbow_cmap( 0.3 + 0.7 * (i - 1) / (len(selected_cell_types) - 1)) for i, cell_type in enumerate(selected_cell_types[1:])})
my_pal.update({cell_type: 'gray' for cell_type in cell_types if cell_type not in selected_cell_types})

We only show two samples here

seleted_samples = ['mouse1_slice91', 'mouse1_slice131']

data_df = ana_data.meta_data_df[[x in seleted_samples for x in ana_data.meta_data_df['Sample']]]

with sns.axes_style('white', rc={
      'xtick.bottom': True,
      'ytick.left': True
}), sns.plotting_context('paper',
                        rc={
                           'axes.titlesize': 8,
                           'axes.labelsize': 8,
                           'xtick.labelsize': 6,
                           'ytick.labelsize': 6,
                           'legend.fontsize': 6
                        }):
   N = len(seleted_samples)
   fig, axes = plt.subplots(1, N, figsize = (4 * N, 4))
   for i, sample in enumerate(seleted_samples):
      sample_df = data_df.loc[data_df['Sample'] == sample]
      ax = axes[i] if N > 1 else axes
      sns.scatterplot(data = sample_df,
                        x = 'x',
                        y = 'y',
                        hue = 'Cell_Type',
                        palette = my_pal,
                        hue_order = selected_cell_types + [x for x in cell_types if x not in selected_cell_types],
                        edgecolor=None,
                        s = 4,
                        ax=ax)
      ax.set_xticks([])
      ax.set_yticks([])
      ax.set_title(f"{sample}")
      ax.legend(loc='upper left', bbox_to_anchor=(0,-0.2), ncol=4)


   fig.tight_layout()
   fig.savefig('figures/spatial_cell_type.png', dpi=300)

Cell-level NT score spatial distribution

N = len(seleted_samples)
fig, axes = plt.subplots(1, N, figsize = (3.5 * N, 3))
for i, sample in enumerate(seleted_samples):
   sample_df = data_df.loc[data_df['Sample'] == sample]
   sample_df = sample_df.join(ana_data.NT_score['Cell_NTScore'])
   ax = axes[i] if N > 1 else axes
   scatter = ax.scatter(sample_df['x'], sample_df['y'], c=1 - sample_df['Cell_NTScore'], cmap='rainbow', vmin=0, vmax=1, s=1) # substitute with following line if you don't need change the direction of NT score
   # scatter = ax.scatter(sample_df['x'], sample_df['y'], c=sample_df['Cell_NTScore'], cmap='rainbow', vmin=0, vmax=1, s=1)
   ax.set_xticks([])
   ax.set_yticks([])
   plt.colorbar(scatter)
   ax.set_title(f"{sample} cell-level NT score")


fig.tight_layout()
fig.savefig('figures/cell_level_NT_score.png', dpi=300)