Source code for cytomulate.emulation.cell_type

# Math computation
import numpy as np

# Statistical models
from sklearn.mixture import GaussianMixture

# Typing
from typing import Union

# Superclass
from cytomulate.cell_type_general import GeneralCellType


[docs] class EmulationCellType(GeneralCellType): def __init__(self, label: Union[str, int], cell_id: int, n_markers: int) -> None: """Initialize the EmulationCellType object Parameters ---------- label: str or int The label (name) for the cell type cell_id: int The id number assigned to the cell type n_markers: int Number of markers used in the experiment """ super().__init__(label, cell_id, n_markers)
[docs] def fit(self, data: np.ndarray, max_components: int, min_components: int, covariance_types: Union[list, tuple]) -> None: """Fit cell type models using the data provided The model selection is done using BIC Parameters ---------- data: np.ndarray The expression matrix corresponding to the target cell type max_components: int The maximum number of components to be included in the GMM min_components: int The minimum number of components to be included in the GMM covariance_types: list or tuple A list of strings specifying the type of covariances to be considered during the model fitting """ # We first get the number of data points n = data.shape[0] # If the number of component the greater than the number of points # we will change them to the number of points min_components = np.min([min_components, n]) max_components = np.min([max_components, n]) self.cell_mean = np.mean(data, axis=0) self.cell_covariance = np.cov(data, rowvar=False) for m in range(self.n_markers): self.zero_probabilities[m] = np.mean(data[:, m] < 0.0001) # We use BIC (the smaller the better) to perform model selection smallest_bic = np.Inf current_bic = 0 best_gm = None for n_components in range(min_components, max_components + 1): for cv_type in covariance_types: gm = GaussianMixture(n_components=n_components, covariance_type=cv_type).fit(data) current_bic = gm.bic(data) if current_bic < smallest_bic: smallest_bic = current_bic best_gm = gm self.model = best_gm # The EM algorithm will not give 1 so we normalize it if self.model.n_components == 1: self.model.weights_[0] = 1.