Source code for cytomulate.creation.cytof_data

# Progress bar
from tqdm import tqdm

# Math
import numpy as np
from scipy.stats import truncnorm

# Classes to use
from cytomulate.creation.cell_type import CreationCellType
from cytomulate.creation.cell_graph import CreationCellGraph

# Superclass
from cytomulate.cytof_data_general import GeneralCytofData

# Typing
from typing import Optional, Callable, Union


[docs] class CreationCytofData(GeneralCytofData): def __init__(self, n_batches: int = 1, n_types: int = 10, n_markers: int = 20, n_trees: int = 2, background_noise_model: Optional[Union[Callable, dict]] = None) -> None: """The Creation Mode object for Cytomulate. This class serves as a starting point for the Creation Mode of Cytomulate. The constructor defines the key parameters of the simulation, including the number of batches, cell types, protein markers, and trees. The number of cells is defined later at a sampling step. Parameters ---------- n_batches: int Number of batches to be simulated. All other the parameters apply to every batch. n_types: int Number of cell types to be simulated. n_markers: int Number of protein markers (columns) to be simulated. n_trees: int Number of trees in the cell graph. Each tree encapsulates and represents the relationship between cell types. Note that it is not necessary to add trajectory in complex simulation even though trees are used in general. background_noise_model: Callable or dict The function used to generate background noise. It should only take one input: size. In the cases of multiple batches with different noise models, a dictionary with batch number as keys and function as value is used. """ super().__init__(n_batches, background_noise_model) self.n_markers = n_markers self.cell_graph = CreationCellGraph() labels = np.arange(n_types) # We first initialize all the cell types cell_id = 0 for c_type in labels: self.cell_type_labels_to_ids[c_type] = cell_id self.cell_type_ids_to_labels[cell_id] = c_type self.cell_types[c_type] = CreationCellType(label=c_type, cell_id=cell_id, n_markers=n_markers) cell_id += 1 # Since the simulation of markers and expressions depends on the cell graph # we initialize the cell graph here self.cell_graph.initialize_graph(self.cell_types, n_trees)
[docs] def initialize_cell_types(self, L: int = 4, scale: float = 0.5, n_components: int = 1, variance_mode: float = 0.01) -> None: """Initialize cell type models. This method initialzes the models for each cell type. Namely, a Gaussian Mixture Model is generated for each cell type at this stage according to the parameters specified. Parameters ---------- L: int Number of levels of expressions. The levels are used to differentiate between cell types whose expressions for the same marker may be different. We recommend at least 2, but not too many. scale: float The scale parameter used in generating expression levels' mean, which comes from a truncated normal distribution on the positive reals. The ``scale`` is the standard the deviation of the distribution. When the scale is large, the levels of expressions are more spead out, and vice versa. n_components: int Number of components in a GMM. variance_mode: float The mode of the variance of the inverse wishart distribution. """ # We first generate high expression levels and low expression levels # Truncated normals are used to ensure the ordering high_expressions = np.cumsum(truncnorm.rvs(a=0, b=np.inf, loc=0, scale=scale, size = L)) low_expressions = np.cumsum(truncnorm.rvs(a=0, b=np.inf, loc=0, scale=scale/10, size=L-1)) low_expressions = np.append(0, low_expressions) for c_type in tqdm(self.cell_graph.serialized_graph): self.cell_types[c_type].generate_marker_expression_patterns(self.cell_types, self.cell_graph.graph) self.cell_types[c_type].generate_marker_expressions(self.cell_types, self.cell_graph.graph, high_expressions, low_expressions) self.cell_types[c_type].generate_model(n_components, variance_mode)
[docs] def generate_cell_graph(self, **kwargs) -> None: """Generate cell differentiation paths This method is part of complex simulation's cellular trajectory simulation. It generates differentiation paths, which will be used at the sampling stage. Parameters ---------- kwargs: Parameters used for trajectory generation """ self.cell_graph.generate_trajectories(self.cell_types, **kwargs)