Source code for cytomulate.creation.cell_type

# Math computation
import numpy as np
from copy import deepcopy

# Statistical models
from scipy.stats import truncnorm
from scipy.stats import invwishart
from sklearn.mixture import GaussianMixture

# Typing
from typing import Union
from cytomulate.creation.cell_graph import CreationCellGraph

# Superclass
from cytomulate.cell_type_general import GeneralCellType


[docs] class CreationCellType(GeneralCellType): def __init__(self, label: Union[str, int], cell_id: int, n_markers: int) -> None: """Initialize the GeneralCellType object Parameters ---------- label: str or int The label (name) for the cell type cell_id: int The id number assigned to the cell type n_markers: int Number of markers used in the experiment """ super().__init__(label, cell_id, n_markers) self.highly_expressed_markers = None self.lowly_expressed_markers = None # Gating markers are the markers that will not change during differentiation self.gating_markers = None # The probability of a marker being highly expressed self.p = 0.4
[docs] def generate_marker_expression_patterns(self, cell_types: dict, cell_graph: CreationCellGraph) -> None: """Generate marker patterns for the cell types Parameters ---------- cell_types: dict A dictionary of CreationCellType objects cell_graph: CreationCellGraph A cell graph object """ predecessor_cell_labels = list(cell_graph.predecessors(self.label)) if len(predecessor_cell_labels) == 0: # This means this is a root indicator = np.random.binomial(n=1, p=self.p, size=self.n_markers) self.highly_expressed_markers = self.markers[np.where(indicator == 1)] self.lowly_expressed_markers = self.markers[np.where(indicator == 0)] self.gating_markers = np.random.choice(self.markers, size=2, replace=False) elif len(predecessor_cell_labels) == 1: # First inherit from the parent lists self.highly_expressed_markers = deepcopy(cell_types[predecessor_cell_labels[0]].highly_expressed_markers) self.lowly_expressed_markers = deepcopy(cell_types[predecessor_cell_labels[0]].lowly_expressed_markers) self.gating_markers = deepcopy(cell_types[predecessor_cell_labels[0]].gating_markers) # Then for the markers that can be changed # we randomly choose a subset to flip their signs changeable_markers = np.array(list(set(self.markers) - set(self.gating_markers))) indicator = np.random.binomial(n=1, p=self.p/2, size=len(changeable_markers)) markers_to_change = changeable_markers[np.where(indicator == 1)] gating_counter = 0 for m in markers_to_change: if m in self.highly_expressed_markers: self.highly_expressed_markers = np.delete(self.highly_expressed_markers, np.where(self.highly_expressed_markers == m)) self.lowly_expressed_markers = np.append(self.lowly_expressed_markers, m) else: self.lowly_expressed_markers = np.delete(self.lowly_expressed_markers, np.where(self.lowly_expressed_markers == m)) self.highly_expressed_markers = np.append(self.highly_expressed_markers, m) if gating_counter <= 1: self.gating_markers = np.append(self.gating_markers, m) gating_counter += 1 else: raise ValueError('Cell graph is not a tree or collection of non-overlapping trees')
[docs] def generate_marker_expressions(self, cell_types: dict, cell_graph: CreationCellGraph, high_expressions: np.ndarray, low_expressions: np.ndarray) -> None: """Generate the actual expressions Parameters ---------- cell_types: dict A dictionary of CreationCellType objects cell_graph: CreationCellGraph A cell graph object high_expressions: np.ndarray An array of high expression levels low_expressions: np.ndarray An array of low expression levels """ predecessor_cell_labels = list(cell_graph.predecessors(self.label)) # We first generate the means of the cell expression menas if len(predecessor_cell_labels) == 0: # This means this is a root for m in self.highly_expressed_markers: self.cell_mean[m] = np.random.choice(high_expressions, size=1)[0] for m in self.lowly_expressed_markers: self.cell_mean[m] = np.random.choice(low_expressions, size=1)[0] elif len(predecessor_cell_labels) == 1: predecessor_gating_markers = cell_types[predecessor_cell_labels[0]].gating_markers for m in self.markers: if m in predecessor_gating_markers: self.cell_mean[m] = cell_types[predecessor_cell_labels[0]].cell_mean[m] else: if m in self.highly_expressed_markers: self.cell_mean[m] = np.random.choice(high_expressions, size=1)[0] if m in self.lowly_expressed_markers: self.cell_mean[m] = np.random.choice(low_expressions, size=1)[0] else: raise ValueError('Cell graph is not a tree or collection of non-overlapping trees') # Now we use the means to generate the actual expressions for m in self.markers: self.cell_mean[m] = truncnorm.rvs(a=0, b=np.inf, loc=self.cell_mean[m], scale=0.01, size=1)[0]
[docs] def generate_model(self, n_components: int, variance_mode: float = 0.01) -> None: """Randomly generate models for cell types Parameters ---------- n_components: int Number of components in a GMM variance_mode: float The mode of the variance of the inverse wishart distribution """ X = np.random.multivariate_normal(mean=self.cell_mean, cov=np.eye(self.n_markers), size=np.max([self.n_markers**2, n_components])) self.model = GaussianMixture(n_components=n_components).fit(X) for n in range(n_components): self.model.means_[n, :] = self.cell_mean base_correlation = invwishart.rvs(df=self.n_markers + 2, scale=np.eye(self.n_markers), size=1).reshape((1, self.n_markers, self.n_markers)) base_var = np.diag(base_correlation[0,:,:]) base_var = np.diag(np.sqrt(1/base_var)) base_correlation = base_var @ base_correlation @ base_var scaled_matrices = [] variance_var = 0.01 shape_par = (variance_mode**2)/variance_var + 2 scale_par = variance_mode * (shape_par - 1) for n in range(n_components): scale_matrix = np.diag(1/np.random.gamma(shape_par, 1/scale_par, size=self.n_markers)) scaled_matrices.append(scale_matrix @ base_correlation[0,:,:] @ scale_matrix) scaled_matrices = np.array(scaled_matrices) self.model.covariances_ = scaled_matrices # self.model.covariances_ = invwishart.rvs(df=self.n_markers + 2, scale=np.eye(self.n_markers) * variance_mode, # size=n_components).reshape((n_components, self.n_markers, self.n_markers)) self.model.weights_ = np.random.dirichlet(np.ones(n_components), size=1).reshape(-1)