diff --git a/.gitlab-ci.yml b/.gitlab-ci.yml index dbeb387706431aee3182104bd321fda1d0e9d015..926a2fa8a94ff1054606d893e4f7377287318786 100644 --- a/.gitlab-ci.yml +++ b/.gitlab-ci.yml @@ -1,6 +1,7 @@ stages: - checks + - build_package - publish black: @@ -20,12 +21,26 @@ tests: - pip install -r requirements.txt - pytest +build_package: + stage: build_package + image: "registry.forgemia.inra.fr/jbleger/docker-image-pandas-torch-sphinx:master" + before_script: + - pip install build + script: + - rm -rf dist/ + - python -m build + artifacts: + untracked: true + expire_in: 1 week + tags: + - docker + publish_package: stage: publish image: "python:3.9" - script: + before_script: - pip install twine - - python setup.py bdist_wheel + script: - TWINE_PASSWORD=${pypln_token} TWINE_USERNAME=__token__ python -m twine upload dist/* tags: - docker diff --git a/README.md b/README.md index e98669f64e980c17448f4a166c732fda3813ece4..212b1ce9f8233386939238fb68a45a3a78aa72d6 100644 --- a/README.md +++ b/README.md @@ -28,7 +28,7 @@ pip install pyPLNmodels The package comes with an ecological data set to present the functionality ``` import pyPLNmodels -from pyPLNmodels.models import PLNPCA, PLN +from pyPLNmodels.models import PlnPCAcollection, Pln from pyPLNmodels.oaks import load_oaks oaks = load_oaks() ``` @@ -36,16 +36,16 @@ oaks = load_oaks() ### Unpenalized Poisson lognormal model (aka PLN) ``` -pln = PLN("counts ~ 1 + tree + dist2ground + orientation ", data = oaks, take_log_offsets = True) +pln = Pln.from_formula("counts ~ 1 + tree + dist2ground + orientation ", data = oaks, take_log_offsets = True) pln.fit() print(pln) ``` -### Rank Constrained Poisson lognormal for Poisson Principal Component Analysis (aka PLNPCA) +### Rank Constrained Poisson lognormal for Poisson Principal Component Analysis (aka PlnPCACollection) ``` -pca = PLNPCA("counts ~ 1 + tree + dist2ground + orientation ", data = oaks, take_log_offsets = True, ranks = [3,4,5]) +pca = PlnPCAcollection.from_formula("counts ~ 1 + tree + dist2ground + orientation ", data = oaks, take_log_offsets = True, ranks = [3,4,5]) pca.fit() print(pca) ``` diff --git a/docs/source/index.rst b/docs/source/index.rst index 71513b20f7628bf8882b7a15cc63dd205e25516d..8bfed1a86657b1c3f331f59d8645f9b2739795cc 100644 --- a/docs/source/index.rst +++ b/docs/source/index.rst @@ -12,6 +12,7 @@ Welcome to pyPLNmodels's documentation! ./module.rst + Indices and tables working once again ===================================== diff --git a/docs/source/module.rst b/docs/source/module.rst index 7c22fce646290f47f5a58f3e77d18f2dc513dda4..de0c03d0b4179547d31d4fb0f39713131c76c596 100644 --- a/docs/source/module.rst +++ b/docs/source/module.rst @@ -1,15 +1,23 @@ API documentation ================= -.. autoclass:: pyPLNmodels.PLN +.. autoclass:: pyPLNmodels.PlnPCAcollection + :members: + :show-inheritance: + :special-members: __init__ + :undoc-members: + + +.. autoclass:: pyPLNmodels.PlnPCA :members: :inherited-members: :special-members: __init__ :undoc-members: :show-inheritance: -.. autoclass:: pyPLNmodels.PLNPCA +.. autoclass:: pyPLNmodels.Pln :members: - :show-inheritance: + :inherited-members: :special-members: __init__ :undoc-members: + :show-inheritance: diff --git a/pyPLNmodels/__init__.py b/pyPLNmodels/__init__.py index d895c7cd30d9f39e8069cb6793b48a4ddb2dac79..e619625eec72fb550b3a16cb98ba29e774fe8e6e 100644 --- a/pyPLNmodels/__init__.py +++ b/pyPLNmodels/__init__.py @@ -1,22 +1,24 @@ -from .models import PLNPCA, PLN # pylint:disable=[C0114] +from .models import PlnPCAcollection, Pln, PlnPCA # pylint:disable=[C0114] +from .oaks import load_oaks from .elbos import profiled_elbo_pln, elbo_plnpca, elbo_pln from ._utils import ( get_simulated_count_data, get_real_count_data, load_model, - load_plnpca, + load_plnpcacollection, load_pln, ) __all__ = ( - "PLNPCA", - "PLN", + "PlnPCAcollection", + "Pln", + "PlnPCA", "profiled_elbo_pln", "elbo_plnpca", "elbo_pln", "get_simulated_count_data", "get_real_count_data", "load_model", - "load_plnpca", + "load_plnpcacollection", "load_pln", ) diff --git a/pyPLNmodels/_utils.py b/pyPLNmodels/_utils.py index 4b977e0a830375ea43417b979bc5bdbe126f070a..0206b3b4a0c87cf4f3d447121ac7a3e1e2ed083f 100644 --- a/pyPLNmodels/_utils.py +++ b/pyPLNmodels/_utils.py @@ -10,6 +10,7 @@ from matplotlib.patches import Ellipse import matplotlib.pyplot as plt from patsy import dmatrices from typing import Optional, Dict, Any, Union +import pkg_resources torch.set_default_dtype(torch.float64) @@ -93,7 +94,7 @@ def _init_covariance( counts: torch.Tensor, covariates: torch.Tensor, coef: torch.Tensor ) -> torch.Tensor: """ - Initialization for covariance for the PLN model. Take the log of counts + Initialization for covariance for the Pln model. Take the log of counts (careful when counts=0), remove the covariates effects X@coef and then do as a MLE for Gaussians samples. @@ -124,7 +125,7 @@ def _init_components( counts: torch.Tensor, covariates: torch.Tensor, coef: torch.Tensor, rank: int ) -> torch.Tensor: """ - Initialization for components for the PLN model. Get a first guess for covariance + Initialization for components for the Pln model. Get a first guess for covariance that is easier to estimate and then takes the rank largest eigenvectors to get components. Parameters @@ -235,7 +236,7 @@ def sample_pln( seed: int = None, ) -> tuple[torch.Tensor, torch.Tensor, torch.Tensor]: """ - Sample from the Poisson Log-Normal (PLN) model. + Sample from the Poisson Log-Normal (Pln) model. Parameters ---------- @@ -273,7 +274,7 @@ def sample_pln( parameter = torch.exp(offsets + gaussian) if _coef_inflation is not None: - print("ZIPLN is sampled") + print("ZIPln is sampled") zero_inflated_mean = torch.matmul(covariates, _coef_inflation) ksi = torch.bernoulli(1 / (1 + torch.exp(-zero_inflated_mean))) else: @@ -368,7 +369,7 @@ def log_posterior( coef: torch.Tensor, ) -> torch.Tensor: """ - Compute the log posterior of the Poisson Log-Normal (PLN) model. + Compute the log posterior of the Poisson Log-Normal (Pln) model. Parameters ---------- @@ -646,32 +647,6 @@ def _format_model_param( return counts, covariates, offsets -def _remove_useless_intercepts(covariates: torch.Tensor) -> torch.Tensor: - """ - Remove useless intercepts from covariates. - - Parameters - ---------- - covariates : torch.Tensor, shape (n, d) - Covariate data. - - Returns - ------- - torch.Tensor - Covariate data with useless intercepts removed. - """ - covariates = _format_data(covariates) - if covariates.shape[1] < 2: - return covariates - first_column = covariates[:, 0] - second_column = covariates[:, 1] - diff = first_column - second_column - if torch.sum(torch.abs(diff - diff[0])) == 0: - print("removing one") - return covariates[:, 1:] - return covariates - - def _check_data_shape( counts: torch.Tensor, covariates: torch.Tensor, offsets: torch.Tensor ) -> None: @@ -902,9 +877,11 @@ def get_real_count_data(n_samples: int = 270, dim: int = 100) -> np.ndarray: f"\nTaking the whole 100 variables. Requested:dim={dim}, returned:100" ) dim = 100 - counts = pd.read_csv("../example_data/real_data/Y_mark.csv").values[ - :n_samples, :dim - ] + counts_stream = pkg_resources.resource_stream(__name__, "data/scRT/Y_mark.csv") + counts = pd.read_csv(counts_stream).values[:n_samples, :dim] + # counts = pd.read_csv("./pyPLNmodels/data/scRT/Y_mark.csv").values[ + # :n_samples, :dim + # ] print(f"Returning dataset of size {counts.shape}") return counts @@ -964,39 +941,39 @@ def load_model(path_of_directory: str) -> Dict[str, Any]: def load_pln(path_of_directory: str) -> Dict[str, Any]: """ - Load PLN models from the given directory. + Load Pln models from the given directory. Parameters ---------- path_of_directory : str - The path to the directory containing the PLN models. + The path to the directory containing the Pln models. Returns ------- Dict[str, Any] - A dictionary containing the loaded PLN models. + A dictionary containing the loaded Pln models. """ return load_model(path_of_directory) -def load_plnpca( +def load_plnpcacollection( path_of_directory: str, ranks: Optional[list[int]] = None ) -> Dict[int, Dict[str, Any]]: """ - Load PLNPCA models from the given directory. + Load PlnPCAcollection models from the given directory. Parameters ---------- path_of_directory : str - The path to the directory containing the PLNPCA models. + The path to the directory containing the PlnPCAcollection models. ranks : list[int], optional A list of ranks specifying which models to load. If None, all models in the directory will be loaded. Returns ------- Dict[int, Dict[str, Any]] - A dictionary containing the loaded PLNPCA models, with ranks as keys. + A dictionary containing the loaded PlnPCAcollection models, with ranks as keys. Raises ------ @@ -1019,7 +996,7 @@ def load_plnpca( ranks.append(rank) datas = {} for rank in ranks: - datas[rank] = load_model(f"_PLNPCA_rank_{rank}") + datas[rank] = load_model(f"PlnPCA_rank_{rank}") os.chdir(working_dir) return datas diff --git a/example_data/real_data/Y_mark.csv b/pyPLNmodels/data/scRT/Y_mark.csv similarity index 100% rename from example_data/real_data/Y_mark.csv rename to pyPLNmodels/data/scRT/Y_mark.csv diff --git a/example_data/test_data/O_test.csv b/pyPLNmodels/data/test_data/O_test.csv similarity index 100% rename from example_data/test_data/O_test.csv rename to pyPLNmodels/data/test_data/O_test.csv diff --git a/example_data/test_data/Y_test.csv b/pyPLNmodels/data/test_data/Y_test.csv similarity index 100% rename from example_data/test_data/Y_test.csv rename to pyPLNmodels/data/test_data/Y_test.csv diff --git a/example_data/test_data/cov_test.csv b/pyPLNmodels/data/test_data/cov_test.csv similarity index 100% rename from example_data/test_data/cov_test.csv rename to pyPLNmodels/data/test_data/cov_test.csv diff --git a/example_data/test_data/true_parameters/true_Sigma_test.csv b/pyPLNmodels/data/test_data/true_parameters/true_Sigma_test.csv similarity index 100% rename from example_data/test_data/true_parameters/true_Sigma_test.csv rename to pyPLNmodels/data/test_data/true_parameters/true_Sigma_test.csv diff --git a/example_data/test_data/true_parameters/true_beta_test.csv b/pyPLNmodels/data/test_data/true_parameters/true_beta_test.csv similarity index 100% rename from example_data/test_data/true_parameters/true_beta_test.csv rename to pyPLNmodels/data/test_data/true_parameters/true_beta_test.csv diff --git a/pyPLNmodels/elbos.py b/pyPLNmodels/elbos.py index c5f8927cd355ab2c9cb9030266cf4dbce49b4382..68b7324752009030256d0985a35ce77c9631612d 100644 --- a/pyPLNmodels/elbos.py +++ b/pyPLNmodels/elbos.py @@ -16,7 +16,7 @@ def elbo_pln( coef: torch.Tensor, ) -> torch.Tensor: """ - Compute the ELBO (Evidence Lower Bound) for the PLN model. + Compute the ELBO (Evidence Lower Bound) for the Pln model. Parameters: ---------- @@ -75,7 +75,7 @@ def profiled_elbo_pln( latent_var: torch.Tensor, ) -> torch.Tensor: """ - Compute the ELBO (Evidence Lower Bound) for the PLN model with profiled parameters. + Compute the ELBO (Evidence Lower Bound) for the Pln model with profiled parameters. Parameters: ---------- @@ -122,7 +122,7 @@ def elbo_plnpca( coef: torch.Tensor, ) -> torch.Tensor: """ - Compute the ELBO (Evidence Lower Bound) for the PLN model with PCA parametrization. + Compute the ELBO (Evidence Lower Bound) for the Pln model with PCA parametrization. Parameters: ---------- @@ -186,7 +186,7 @@ def elbo_zi_pln( _coef_inflation, dirac, ): - """Compute the ELBO (Evidence LOwer Bound) for the Zero Inflated PLN model. + """Compute the ELBO (Evidence LOwer Bound) for the Zero Inflated Pln model. See the doc for more details on the computation. Args: diff --git a/pyPLNmodels/models.py b/pyPLNmodels/models.py index c3849fb9cce9eeda827cfa6a5f02ba759139c9b4..9997d705cf041fb554a303b7359c9356b0d4303c 100644 --- a/pyPLNmodels/models.py +++ b/pyPLNmodels/models.py @@ -1,10 +1,9 @@ import time from abc import ABC, abstractmethod -import pickle import warnings import os -from functools import singledispatchmethod from collections.abc import Iterable +from typing import Optional, Dict, List, Type, Any import pandas as pd import torch @@ -12,7 +11,6 @@ import numpy as np import seaborn as sns import matplotlib.pyplot as plt from sklearn.decomposition import PCA -from patsy import dmatrices from ._closed_forms import ( @@ -31,9 +29,7 @@ from ._utils import ( _format_model_param, _nice_string_of_dict, _plot_ellipse, - _closest, _check_data_shape, - _check_right_rank, _extract_data_from_formula, _get_dict_initialization, array2tensor, @@ -50,15 +46,7 @@ else: NB_CHARACTERS_FOR_NICE_PLOT = 70 -class _PLN(ABC): - """ - Virtual class for all the PLN models. - - This class must be derivatived. The methods `get_covariance`, `compute_elbo`, - `_random_init_latent_parameters` and `_list_of_parameters_needing_gradient` must - be defined. - """ - +class _Pln(ABC): _WINDOW = 15 n_samples: int dim: int @@ -73,17 +61,31 @@ class _PLN(ABC): def __init__( self, - counts, - covariates=None, - offsets=None, - offsets_formula="logsum", - dict_initialization=None, - take_log_offsets=False, + counts: torch.Tensor, + covariates: Optional[torch.Tensor] = None, + offsets: Optional[torch.Tensor] = None, + offsets_formula: str = "logsum", + dict_initialization: Optional[dict] = None, + take_log_offsets: bool = False, ): """ - Simple initialization method wors fine. - """ + Initializes the _Pln class. + Parameters + ---------- + counts : torch.Tensor + The count data. + covariates : torch.Tensor, optional + The covariate data. Defaults to None. + offsets : torch.Tensor, optional + The offsets data. Defaults to None. + offsets_formula : str, optional + The formula for offsets. Defaults to "logsum". + dict_initialization : dict, optional + The initialization dictionary. Defaults to None. + take_log_offsets : bool, optional + Whether to take the log of offsets. Defaults to False. + """ self._counts, self._covariates, self._offsets = _format_model_param( counts, covariates, offsets, offsets_formula, take_log_offsets ) @@ -98,10 +100,31 @@ class _PLN(ABC): cls, formula: str, data: dict, - offsets_formula="logsum", - dict_initialization=None, - take_log_offsets=False, + offsets_formula: str = "logsum", + dict_initialization: Optional[dict] = None, + take_log_offsets: bool = False, ): + """ + Create a _Pln instance from a formula and data. + + Parameters + ---------- + formula : str + The formula. + data : dict + The data dictionary. + offsets_formula : str, optional + The formula for offsets. Defaults to "logsum". + dict_initialization : dict, optional + The initialization dictionary. Defaults to None. + take_log_offsets : bool, optional + Whether to take the log of offsets. Defaults to False. + + Returns + ------- + _Pln + The initialized _Pln instance. + """ counts, covariates, offsets = _extract_data_from_formula(formula, data) return cls( counts, @@ -112,7 +135,15 @@ class _PLN(ABC): take_log_offsets, ) - def _set_init_parameters(self, dict_initialization): + def _set_init_parameters(self, dict_initialization: dict): + """ + Set initial parameters based on a dictionary. + + Parameters + ---------- + dict_initialization : dict + The initialization dictionary. + """ if "coef" not in dict_initialization.keys(): print("No coef is initialized.") self.coef = None @@ -121,51 +152,110 @@ class _PLN(ABC): setattr(self, key, array) @property - def fitted(self): + def fitted(self) -> bool: + """ + Whether the model is fitted. + + Returns + ------- + bool + True if the model is fitted, False otherwise. + """ return self._fitted @property - def nb_iteration_done(self): + def nb_iteration_done(self) -> int: + """ + The number of iterations done. + + Returns + ------- + int + The number of iterations done. + """ return len(self._plotargs._elbos_list) @property - def n_samples(self): + def n_samples(self) -> int: + """ + The number of samples. + + Returns + ------- + int + The number of samples. + """ return self._counts.shape[0] @property - def dim(self): + def dim(self) -> int: + """ + The dimension. + + Returns + ------- + int + The dimension. + """ return self._counts.shape[1] @property - def nb_cov(self): + def nb_cov(self) -> int: + """ + The number of covariates. + + Returns + ------- + int + The number of covariates. + """ if self.covariates is None: return 0 return self.covariates.shape[1] def _smart_init_coef(self): + """ + Initialize coefficients smartly. + """ self._coef = _init_coef(self._counts, self._covariates, self._offsets) def _random_init_coef(self): + """ + Randomly initialize coefficients. + """ if self.nb_cov == 0: self._coef = None self._coef = torch.randn((self.nb_cov, self.dim), device=DEVICE) @abstractmethod def _random_init_model_parameters(self): - pass - - @abstractmethod - def _smart_init_model_parameters(self): + """ + Abstract method to randomly initialize model parameters. + """ pass @abstractmethod def _random_init_latent_parameters(self): + """ + Abstract method to randomly initialize latent parameters. + """ pass def _smart_init_latent_parameters(self): + """ + Initialize latent parameters smartly. + """ pass - def _init_parameters(self, do_smart_init): + def _init_parameters(self, do_smart_init: bool): + """ + Initialize model parameters. + + Parameters + ---------- + do_smart_init : bool + Whether to perform smart initialization. + """ print("Initialization ...") if do_smart_init: self._smart_init_model_parameters() @@ -176,36 +266,50 @@ class _PLN(ABC): print("Initialization finished") def _put_parameters_to_device(self): + """ + Move parameters to the device. + """ for parameter in self._list_of_parameters_needing_gradient: parameter.requires_grad_(True) @property def _list_of_parameters_needing_gradient(self): """ - A list containing all the parameters that needs to be upgraded via a gradient step. + A list containing all the parameters that need to be upgraded via a gradient step. + + Returns + ------- + List[torch.Tensor] + List of parameters needing gradient. """ + ... def fit( self, - nb_max_iteration=50000, - lr=0.01, - class_optimizer=torch.optim.Rprop, - tol=1e-3, - do_smart_init=True, - verbose=False, + nb_max_iteration: int = 50000, + lr: float = 0.01, + class_optimizer: torch.optim.Optimizer = torch.optim.Rprop, + tol: float = 1e-3, + do_smart_init: bool = True, + verbose: bool = False, ): """ - Main function of the class. Fit a PLN to the data. + Fit the model. + Parameters ---------- - counts : torch.tensor or ndarray or DataFrame. - 2-d count data. - covariates : torch.tensor or ndarray or DataFrame or - None, default = None - If not `None`, the first dimension should equal the first - dimension of `counts`. - offsets : torch.tensor or ndarray or DataFrame or None, default = None - Model offset. If not `None`, size should be the same as `counts`. + nb_max_iteration : int, optional + The maximum number of iterations. Defaults to 50000. + lr : float, optional + The learning rate. Defaults to 0.01. + class_optimizer : torch.optim.Optimizer, optional + The optimizer class. Defaults to torch.optim.Rprop. + tol : float, optional + The tolerance for convergence. Defaults to 1e-3. + do_smart_init : bool, optional + Whether to perform smart initialization. Defaults to True. + verbose : bool, optional + Whether to print training progress. Defaults to False. """ self._pring_beginning_message() self._beginning_time = time.time() @@ -217,7 +321,7 @@ class _PLN(ABC): self._put_parameters_to_device() self.optim = class_optimizer(self._list_of_parameters_needing_gradient, lr=lr) stop_condition = False - while self.nb_iteration_done < nb_max_iteration and stop_condition == False: + while self.nb_iteration_done < nb_max_iteration and not stop_condition: loss = self._trainstep() criterion = self._compute_criterion_and_update_plotargs(loss, tol) if abs(criterion) < tol: @@ -229,7 +333,12 @@ class _PLN(ABC): def _trainstep(self): """ - simple docstrings with black errors + Perform a single training step. + + Returns + ------- + torch.Tensor + The loss value. """ self.optim.zero_grad() loss = -self.compute_elbo() @@ -238,7 +347,20 @@ class _PLN(ABC): self._update_closed_forms() return loss - def pca_projected_latent_variables(self, n_components=None): + def pca_projected_latent_variables(self, n_components: Optional[int] = None): + """ + Perform PCA on the latent variables and project them onto a lower-dimensional space. + + Parameters + ---------- + n_components : int, optional + The number of components to keep. If None, all components are kept. Defaults to None. + + Returns + ------- + numpy.ndarray + The projected latent variables. + """ if n_components is None: n_components = self._get_max_components() if n_components > self.dim: @@ -251,9 +373,22 @@ class _PLN(ABC): @property @abstractmethod def latent_variables(self): + """ + Abstract property representing the latent variables. + """ pass - def _print_end_of_fitting_message(self, stop_condition, tol): + def _print_end_of_fitting_message(self, stop_condition: bool, tol: float): + """ + Print the end-of-fitting message. + + Parameters + ---------- + stop_condition : bool + Whether the stop condition was met. + tol : float + The tolerance for convergence. + """ if stop_condition is True: print( f"Tolerance {tol} reached " @@ -268,12 +403,30 @@ class _PLN(ABC): ) def print_stats(self): + """ + Print the training statistics. + """ print("-------UPDATE-------") print("Iteration number: ", self._plotargs.iteration_number) print("Criterion: ", np.round(self._plotargs.criterions[-1], 8)) print("ELBO:", np.round(self._plotargs._elbos_list[-1], 6)) def _compute_criterion_and_update_plotargs(self, loss, tol): + """ + Compute the convergence criterion and update the plot arguments. + + Parameters + ---------- + loss : torch.Tensor + The loss value. + tol : float + The tolerance for convergence. + + Returns + ------- + float + The computed criterion. + """ self._plotargs._elbos_list.append(-loss.item()) self._plotargs.running_times.append(time.time() - self._beginning_time) if self._plotargs.iteration_number > self._WINDOW: @@ -286,6 +439,9 @@ class _PLN(ABC): return tol def _update_closed_forms(self): + """ + Update closed-form expressions. + """ pass @abstractmethod @@ -294,24 +450,20 @@ class _PLN(ABC): Compute the Evidence Lower BOund (ELBO) that will be maximized by pytorch. """ + pass def display_covariance(self, ax=None, savefig=False, name_file=""): """ - Display a heatmap of covariance to visualize correlations. - - If covariance is too big (size is > 400), will only display the - first block of size (400,400). + Display the covariance matrix. Parameters ---------- - ax : matplotlib Axes, optional - Axes in which to draw the plot, otherwise use the - currently-active Axes. - savefig: bool, optional - If True the figure will be saved. Default is False. + ax : matplotlib.axes.Axes, optional + The axes to plot on. If None, a new figure will be created. Defaults to None. + savefig : bool, optional + Whether to save the figure. Defaults to False. name_file : str, optional - The name of the file the graphic will be saved to if saved. - Default is an empty string. + The name of the file to save. Defaults to "". """ if self.dim > 400: warnings.warn("Only displaying the first 400 variables.") @@ -321,9 +473,17 @@ class _PLN(ABC): sns.heatmap(self.covariance, ax=ax) if savefig: plt.savefig(name_file + self._NAME) - plt.show() # to avoid displaying a blanck screen + plt.show() # to avoid displaying a blank screen + + def __repr__(self): + """ + Generate the string representation of the object. - def __str__(self): + Returns + ------- + str + The string representation of the object. + """ delimiter = "=" * NB_CHARACTERS_FOR_NICE_PLOT string = f"A multivariate Poisson Lognormal with {self._description} \n" string += f"{delimiter}\n" @@ -335,19 +495,33 @@ class _PLN(ABC): string += f" {self._useful_methods_strings}\n" string += f"* Additional properties for {self._NAME}\n" string += f" {self._additional_properties_string}\n" - string += f"* Additionial methods for {self._NAME}\n" + string += f"* Additional methods for {self._NAME}\n" string += f" {self._additional_methods_string}" return string @property def _additional_methods_string(self): + """ + Abstract property representing the additional methods string. + """ pass @property def _additional_properties_string(self): + """ + Abstract property representing the additional properties string. + """ pass def show(self, axes=None): + """ + Show plots. + + Parameters + ---------- + axes : numpy.ndarray, optional + The axes to plot on. If None, a new figure will be created. Defaults to None. + """ print("Likelihood:", -self.loglike) if self._fitted is False: nb_axes = 1 @@ -365,10 +539,21 @@ class _PLN(ABC): @property def _elbos_list(self): + """ + Property representing the list of ELBO values. + """ return self._plotargs._elbos_list @property def loglike(self): + """ + Property representing the log-likelihood. + + Returns + ------- + float + The log-likelihood. + """ if self._fitted is False: t0 = time.time() self._plotargs._elbos_list.append(self.compute_elbo().item()) @@ -377,22 +562,62 @@ class _PLN(ABC): @property def BIC(self): + """ + Property representing the Bayesian Information Criterion (BIC). + + Returns + ------- + float + The BIC value. + """ return -self.loglike + self.number_of_parameters / 2 * np.log(self.n_samples) @property def AIC(self): + """ + Property representing the Akaike Information Criterion (AIC). + + Returns + ------- + float + The AIC value. + """ return -self.loglike + self.number_of_parameters @property def latent_parameters(self): + """ + Property representing the latent parameters. + + Returns + ------- + dict + The dictionary of latent parameters. + """ return {"latent_var": self.latent_var, "latent_mean": self.latent_mean} @property def model_parameters(self): + """ + Property representing the model parameters. + + Returns + ------- + dict + The dictionary of model parameters. + """ return {"coef": self.coef, "covariance": self.covariance} @property def dict_data(self): + """ + Property representing the data dictionary. + + Returns + ------- + dict + The dictionary of data. + """ return { "counts": self.counts, "covariates": self.covariates, @@ -401,27 +626,80 @@ class _PLN(ABC): @property def _model_in_a_dict(self): + """ + Property representing the model in a dictionary. + + Returns + ------- + dict + The dictionary representing the model. + """ return self.dict_data | self._dict_parameters @property def _dict_parameters(self): + """ + Property representing the dictionary of parameters. + + Returns + ------- + dict + The dictionary of parameters. + """ return self.model_parameters | self.latent_parameters @property def coef(self): + """ + Property representing the coefficients. + + Returns + ------- + torch.Tensor or None + The coefficients or None. + """ return self._cpu_attribute_or_none("_coef") @property def latent_mean(self): + """ + Property representing the latent mean. + + Returns + ------- + torch.Tensor or None + The latent mean or None. + """ return self._cpu_attribute_or_none("_latent_mean") @property def latent_var(self): + """ + Property representing the latent variance. + + Returns + ------- + torch.Tensor or None + The latent variance or None. + """ return self._cpu_attribute_or_none("_latent_var") @latent_mean.setter @array2tensor def latent_mean(self, latent_mean): + """ + Setter for the latent mean property. + + Parameters + ---------- + latent_mean : torch.Tensor + The latent mean. + + Raises + ------ + ValueError + If the shape of the latent mean is incorrect. + """ if latent_mean.shape != (self.n_samples, self.dim): raise ValueError( f"Wrong shape. Expected {self.n_samples, self.dim}, got {latent_mean.shape}" @@ -431,6 +709,19 @@ class _PLN(ABC): @latent_var.setter @array2tensor def latent_var(self, latent_var): + """ + Setter for the latent variance property. + + Parameters + ---------- + latent_var : torch.Tensor + The latent variance. + + Raises + ------ + ValueError + If the shape of the latent variance is incorrect. + """ if latent_var.shape != (self.n_samples, self.dim): raise ValueError( f"Wrong shape. Expected {self.n_samples, self.dim}, got {latent_var.shape}" @@ -438,6 +729,19 @@ class _PLN(ABC): self._latent_var = latent_var def _cpu_attribute_or_none(self, attribute_name): + """ + Get the CPU attribute or return None. + + Parameters + ---------- + attribute_name : str + The attribute name. + + Returns + ------- + torch.Tensor or None + The attribute value or None. + """ if hasattr(self, attribute_name): attr = getattr(self, attribute_name) if isinstance(attr, torch.Tensor): @@ -445,7 +749,15 @@ class _PLN(ABC): return attr return None - def save(self, path_of_directory="./"): + def save(self, path_of_directory: str = "./"): + """ + Save the model parameters to disk. + + Parameters + ---------- + path_of_directory : str, optional + The path of the directory to save the parameters, by default "./". + """ path = f"{path_of_directory}/{self.path_to_directory}{self.directory_name}" os.makedirs(path, exist_ok=True) for key, value in self._dict_parameters.items(): @@ -461,19 +773,56 @@ class _PLN(ABC): @property def counts(self): + """ + Property representing the counts. + + Returns + ------- + torch.Tensor or None + The counts or None. + """ return self._cpu_attribute_or_none("_counts") @property def offsets(self): + """ + Property representing the offsets. + + Returns + ------- + torch.Tensor or None + The offsets or None. + """ return self._cpu_attribute_or_none("_offsets") @property def covariates(self): + """ + Property representing the covariates. + + Returns + ------- + torch.Tensor or None + The covariates or None. + """ return self._cpu_attribute_or_none("_covariates") @counts.setter @array2tensor def counts(self, counts): + """ + Setter for the counts property. + + Parameters + ---------- + counts : torch.Tensor + The counts. + + Raises + ------ + ValueError + If the shape of the counts is incorrect or if the input is not integers. + """ if self.counts.shape != counts.shape: raise ValueError( f"Wrong shape for the counts. Expected {self.counts.shape}, got {counts.shape}" @@ -485,6 +834,19 @@ class _PLN(ABC): @offsets.setter @array2tensor def offsets(self, offsets): + """ + Setter for the offsets property. + + Parameters + ---------- + offsets : torch.Tensor + The offsets. + + Raises + ------ + ValueError + If the shape of the offsets is incorrect. + """ if self.offsets.shape != offsets.shape: raise ValueError( f"Wrong shape for the offsets. Expected {self.offsets.shape}, got {offsets.shape}" @@ -494,12 +856,38 @@ class _PLN(ABC): @covariates.setter @array2tensor def covariates(self, covariates): + """ + Setter for the covariates property. + + Parameters + ---------- + covariates : torch.Tensor + The covariates. + + Raises + ------ + ValueError + If the shape of the covariates or counts is incorrect. + """ _check_data_shape(self.counts, covariates, self.offsets) self._covariates = covariates @coef.setter @array2tensor def coef(self, coef): + """ + Setter for the coef property. + + Parameters + ---------- + coef : torch.Tensor or None + The coefficients. + + Raises + ------ + ValueError + If the shape of the coef is incorrect. + """ if coef is None: pass elif coef.shape != (self.nb_cov, self.dim): @@ -510,6 +898,14 @@ class _PLN(ABC): @property def dict_for_printing(self): + """ + Property representing the dictionary for printing. + + Returns + ------- + dict + The dictionary for printing. + """ return { "Loglike": np.round(self.loglike, 2), "Dimension": self.dim, @@ -520,22 +916,79 @@ class _PLN(ABC): @property def optim_parameters(self): + """ + Property representing the optimization parameters. + + Returns + ------- + dict + The dictionary of optimization parameters. + """ return {"Number of iterations done": self.nb_iteration_done} @property def _useful_properties_string(self): - return ".latent_variables, .model_parameters, .latent_parameters, \ -.optim_parameters" + """ + Property representing the useful properties as a string. + + Returns + ------- + str + The string representation of the useful properties. + """ + return ".latent_variables, .model_parameters, .latent_parameters, .optim_parameters" @property def _useful_methods_strings(self): - return ".show(), .coef() .transform(), .sigma(), .predict(), \ -.pca_projected_latent_variables()" + """ + Property representing the useful methods as a string. + + Returns + ------- + str + The string representation of the useful methods. + """ + return ".show(), .coef() .transform(), .sigma(), .predict(), .pca_projected_latent_variables()" def sigma(self): + """ + Method returning the covariance matrix. + + Returns + ------- + torch.Tensor or None + The covariance matrix or None. + """ return self.covariance def predict(self, covariates=None): + """ + Method for making predictions. + + Parameters + ---------- + covariates : torch.Tensor, optional + The covariates, by default None. + + Returns + ------- + torch.Tensor or None + The predicted values or None. + + Raises + ------ + AttributeError + If there are no covariates in the model. + RuntimeError + If the shape of thecovariates is incorrect. + + Notes + ----- + - If `covariates` is not provided and there are no covariates in the model, None is returned. + - If `covariates` is provided, it should have the shape `(n_samples, nb_cov)`, where `n_samples` is the number of samples and `nb_cov` is the number of covariates. + - The predicted values are obtained by multiplying the covariates by the coefficients. + + """ if covariates is not None and self.nb_cov == 0: raise AttributeError("No covariates in the model, can't predict") if covariates is None: @@ -544,31 +997,63 @@ class _PLN(ABC): return None return self.covariates @ self.coef if covariates.shape[-1] != self.nb_cov: - error_string = f"X has wrong shape ({covariates.shape}).Should" + error_string = f"X has wrong shape ({covariates.shape}). Should" error_string += f" be ({self.n_samples, self.nb_cov})." raise RuntimeError(error_string) return covariates @ self.coef @property def directory_name(self): + """ + Property representing the directory name. + + Returns + ------- + str + The directory name. + """ return f"{self._NAME}_nbcov_{self.nb_cov}_dim_{self.dim}" @property def path_to_directory(self): + """ + Property representing the path to the directory. + + Returns + ------- + str + The path to the directory. + """ return "" # need to do a good init for M and S -class PLN(_PLN): - _NAME = "PLN" +class Pln(_Pln): + _NAME = "Pln" coef: torch.Tensor @property def _description(self): + """ + Property representing the description of the model. + + Returns + ------- + str + The description of the model. + """ return "full covariance model." @property def coef(self): + """ + Property representing the coefficients. + + Returns + ------- + torch.Tensor or None + The coefficients or None. + """ if ( hasattr(self, "_latent_mean") and hasattr(self, "_covariates") @@ -579,12 +1064,26 @@ class PLN(_PLN): @coef.setter def coef(self, coef): + """ + Setter for the coef property. + + Parameters + ---------- + coef : torch.Tensor + The coefficients. + """ pass def _smart_init_latent_parameters(self): + """ + Method for smartly initializing the latent parameters. + """ self._random_init_latent_parameters() def _random_init_latent_parameters(self): + """ + Method for randomly initializing the latent parameters. + """ if not hasattr(self, "_latent_var"): self._latent_var = 1 / 2 * torch.ones((self.n_samples, self.dim)).to(DEVICE) if not hasattr(self, "_latent_mean"): @@ -592,16 +1091,35 @@ class PLN(_PLN): @property def _list_of_parameters_needing_gradient(self): + """ + Property representing the list of parameters needing gradient. + + Returns + ------- + list + The list of parameters needing gradient. + """ return [self._latent_mean, self._latent_var] def _get_max_components(self): + """ + Method for getting the maximum number of components. + + Returns + ------- + int + The maximum number of components. + """ return self.dim def compute_elbo(self): """ - Compute the Evidence Lower BOund (ELBO) that will be - maximized by pytorch. Here we use the profiled ELBO - for the full covariance matrix. + Method for computing the evidence lower bound (ELBO). + + Returns + ------- + torch.Tensor + The computed ELBO. """ return profiled_elbo_pln( self._counts, @@ -612,19 +1130,41 @@ class PLN(_PLN): ) def _smart_init_model_parameters(self): + """ + Method for smartly initializing the model parameters. + """ # no model parameters since we are doing a profiled ELBO pass def _random_init_model_parameters(self): + """ + Method for randomly initializing the model parameters. + """ # no model parameters since we are doing a profiled ELBO pass @property def _coef(self): + """ + Property representing the coefficients. + + Returns + ------- + torch.Tensor + The coefficients. + """ return _closed_formula_coef(self._covariates, self._latent_mean) @property def _covariance(self): + """ + Property representing the covariance matrix. + + Returns + ------- + torch.Tensor or None + The covariance matrix or None. + """ return _closed_formula_covariance( self._covariates, self._latent_mean, @@ -634,21 +1174,56 @@ class PLN(_PLN): ) def _pring_beginning_message(self): - print(f"Fitting a PLN model with {self._description}") + """ + Method for printing the beginning message. + """ + print(f"Fitting a Pln model with {self._description}") @property def latent_variables(self): + """ + Property representing the latent variables. + + Returns + ------- + torch.Tensor + The latent variables. + """ return self.latent_mean @property def number_of_parameters(self): + """ + Property representing the number of parameters. + + Returns + ------- + int + The number of parameters. + """ return self.dim * (self.dim + self.nb_cov) def transform(self): + """ + Method for transforming the model. + + Returns + ------- + torch.Tensor + The transformed model. + """ return self.latent_variables @property def covariance(self): + """ + Property representing the covariance matrix. + + Returns + ------- + torch.Tensor or None + The covariance matrix or None. + """ if all( hasattr(self, attr) for attr in [ @@ -664,28 +1239,79 @@ class PLN(_PLN): @covariance.setter def covariance(self, covariance): - pass + """ + Setter for the covariance property. + + Parameters + ---------- + covariance : torch.Tensor + The covariance matrix. + """ + pass -class PLNPCA: - _NAME = "PLNPCA" +class PlnPCAcollection: + _NAME = "PlnPCAcollection" + _dict_models: dict def __init__( self, - counts, - covariates=None, - offsets=None, - offsets_formula="logsum", - ranks=range(3, 5), - dict_of_dict_initialization=None, - take_log_offsets=False, + counts: torch.Tensor, + covariates: Optional[torch.Tensor] = None, + offsets: Optional[torch.Tensor] = None, + offsets_formula: str = "logsum", + ranks: Iterable[int] = range(3, 5), + dict_of_dict_initialization: Optional[dict] = None, + take_log_offsets: bool = False, ): + """ + Constructor for PlnPCAcollection. + + Parameters + ---------- + counts : torch.Tensor + The counts. + covariates : torch.Tensor, optional + The covariates, by default None. + offsets : torch.Tensor, optional + The offsets, by default None. + offsets_formula : str, optional + The formula for offsets, by default "logsum". + ranks : Iterable[int], optional + The range of ranks, by default range(3, 5). + dict_of_dict_initialization : dict, optional + The dictionary of initialization, by default None. + take_log_offsets : bool, optional + Whether to take the logarithm of offsets, by default False. + """ + self._dict_models = {} self._init_data(counts, covariates, offsets, offsets_formula, take_log_offsets) self._init_models(ranks, dict_of_dict_initialization) def _init_data( - self, counts, covariates, offsets, offsets_formula, take_log_offsets + self, + counts: torch.Tensor, + covariates: Optional[torch.Tensor], + offsets: Optional[torch.Tensor], + offsets_formula: str, + take_log_offsets: bool, ): + """ + Method for initializing the data. + + Parameters + ---------- + counts : torch.Tensor + The counts. + covariates : torch.Tensor, optional + The covariates, by default None. + offsets : torch.Tensor, optional + The offsets, by default None. + offsets_formula : str + The formula for offsets. + take_log_offsets : bool + Whether to take the logarithm of offsets. + """ self._counts, self._covariates, self._offsets = _format_model_param( counts, covariates, offsets, offsets_formula, take_log_offsets ) @@ -697,11 +1323,34 @@ class PLNPCA: cls, formula: str, data: dict, - offsets_formula="logsum", - ranks=range(3, 5), - dict_of_dict_initialization=None, - take_log_offsets=False, - ): + offsets_formula: str = "logsum", + ranks: Iterable[int] = range(3, 5), + dict_of_dict_initialization: Optional[dict] = None, + take_log_offsets: bool = False, + ) -> "PlnPCAcollection": + """ + Create an instance of PlnPCAcollection from a formula. + + Parameters + ---------- + formula : str + The formula. + data : dict + The data dictionary. + offsets_formula : str, optional + The formula for offsets, by default "logsum". + ranks : Iterable[int], optional + The range of ranks, by default range(3, 5). + dict_of_dict_initialization : dict, optional + The dictionary of initialization, by default None. + take_log_offsets : bool, optional + Whether to take the logarithm of offsets, by default False. + + Returns + ------- + PlnPCAcollection + The created PlnPCAcollection instance. + """ counts, covariates, offsets = _extract_data_from_formula(formula, data) return cls( counts, @@ -714,130 +1363,270 @@ class PLNPCA: ) @property - def covariates(self): - return self.list_models[0].covariates + def covariates(self) -> torch.Tensor: + """ + Property representing the covariates. + + Returns + ------- + torch.Tensor + The covariates. + """ + return self[self.ranks[0]].covariates @property - def counts(self): - return self.list_models[0].counts + def counts(self) -> torch.Tensor: + """ + Property representing the counts. + + Returns + ------- + torch.Tensor + The counts. + """ + return self[self.ranks[0]].counts @property - def coef(self): - return {model.rank: model.coef for model in self.list_models} + def coef(self) -> Dict[int, torch.Tensor]: + """ + Property representing the coefficients. + + Returns + ------- + Dict[int, torch.Tensor] + The coefficients. + """ + return {model.rank: model.coef for model in self.values()} @property - def components(self): - return {model.rank: model.components for model in self.list_models} + def components(self) -> Dict[int, torch.Tensor]: + """ + Property representing the components. + + Returns + ------- + Dict[int, torch.Tensor] + The components. + """ + return {model.rank: model.components for model in self.values()} @property - def latent_mean(self): - return {model.rank: model.latent_mean for model in self.list_models} + def latent_mean(self) -> Dict[int, torch.Tensor]: + """ + Property representing the latent means. + + Returns + ------- + Dict[int, torch.Tensor] + The latent means. + """ + return {model.rank: model.latent_mean for model in self.values()} @property - def latent_var(self): - return {model.rank: model.latent_var for model in self.list_models} + def latent_var(self) -> Dict[int, torch.Tensor]: + """ + Property representing the latent variances. + + Returns + ------- + Dict[int, torch.Tensor] + The latent variances. + """ + return {model.rank: model.latent_var for model in self.values()} @counts.setter @array2tensor - def counts(self, counts): - for model in self.list_models: + def counts(self, counts: torch.Tensor): + """ + Setter for the counts property. + + Parameters + ---------- + counts : torch.Tensor + The counts. + """ + for model in self.values(): model.counts = counts @coef.setter @array2tensor - def coef(self, coef): - for model in self.list_models: + def coef(self, coef: torch.Tensor): + """ + Setter for the coef property. + + Parameters + ---------- + coef : torch.Tensor + The coefficients. + """ + for model in self.values(): model.coef = coef @covariates.setter @array2tensor - def covariates(self, covariates): - for model in self.list_models: + def covariates(self, covariates: torch.Tensor): + """ + Setter for the covariates property. + + Parameters + ---------- + covariates : torch.Tensor + The covariates. + """ + for model in self.values(): model.covariates = covariates @property - def offsets(self): - return self.list_models[0].offsets + def offsets(self) -> torch.Tensor: + """ + Property representing the offsets. + + Returns + ------- + torch.Tensor + The offsets. + """ + return self[self.ranks[0]].offsets @offsets.setter @array2tensor - def offsets(self, offsets): - for model in self.list_models: + def offsets(self, offsets: torch.Tensor): + """ + Setter for the offsets property. + + Parameters + ---------- + offsets : torch.Tensor + The offsets. + """ + for model in self.values(): model.offsets = offsets - def _init_models(self, ranks, dict_of_dict_initialization): + def _init_models( + self, ranks: Iterable[int], dict_of_dict_initialization: Optional[dict] + ): + """ + Method for initializing the models. + + Parameters + ---------- + ranks : Iterable[int] + The range of ranks. + dict_of_dict_initialization : dict, optional + The dictionary of initialization. + """ if isinstance(ranks, (Iterable, np.ndarray)): - self.list_models = [] for rank in ranks: if isinstance(rank, (int, np.integer)): dict_initialization = _get_dict_initialization( rank, dict_of_dict_initialization ) - self.list_models.append( - _PLNPCA( - counts=self._counts, - covariates=self._covariates, - offsets=self._offsets, - rank=rank, - dict_initialization=dict_initialization, - ) + self._dict_models[rank] = PlnPCA( + counts=self._counts, + covariates=self._covariates, + offsets=self._offsets, + rank=rank, + dict_initialization=dict_initialization, ) else: raise TypeError( - f"Please instantiate with either a list " - f"of integers or an integer." + "Please instantiate with either a list " + "of integers or an integer." ) elif isinstance(ranks, (int, np.integer)): dict_initialization = _get_dict_initialization( ranks, dict_of_dict_initialization ) - self.list_models = [ - _PLNPCA( - self._counts, - self._covariates, - self._offsets, - ranks, - dict_initialization, - ) - ] + self._dict_models[rank] = PlnPCA( + self._counts, + self._covariates, + self._offsets, + ranks, + dict_initialization, + ) + else: raise TypeError( f"Please instantiate with either a list " f"of integers or an integer." ) @property - def ranks(self): - return [model.rank for model in self.list_models] + def ranks(self) -> List[int]: + """ + Property representing the ranks. - @property - def dict_models(self): - return {model.rank: model for model in self.list_models} + Returns + ------- + List[int] + The ranks. + """ + return [model.rank for model in self.values()] - def _pring_beginning_message(self): - return f"Adjusting {len(self.ranks)} PLN models for PCA analysis \n" + def _pring_beginning_message(self) -> str: + """ + Method for printing the beginning message. + + Returns + ------- + str + The beginning message. + """ + return f"Adjusting {len(self.ranks)} Pln models for PCA analysis \n" @property - def dim(self): + def dim(self) -> int: + """ + Property representing the dimension. + + Returns + ------- + int + The dimension. + """ return self[self.ranks[0]].dim @property - def nb_cov(self): + def nb_cov(self) -> int: + """ + Property representing the number of covariates. + + Returns + ------- + int + The number of covariates. + """ return self[self.ranks[0]].nb_cov - ## should do something for this weird init. pb: if doing the init of self._counts etc - ## only in PLNPCA, then we don't do it for each _PLNPCA but then PLN is not doing it. def fit( self, - nb_max_iteration=100000, - lr=0.01, - class_optimizer=torch.optim.Rprop, - tol=1e-3, - do_smart_init=True, - verbose=False, + nb_max_iteration: int = 100000, + lr: float = 0.01, + class_optimizer: Type[torch.optim.Optimizer] = torch.optim.Rprop, + tol: float = 1e-3, + do_smart_init: bool = True, + verbose: bool = False, ): + """ + Fit the PlnPCAcollection. + + Parameters + ---------- + nb_max_iteration : int, optional + The maximum number of iterations, by default 100000. + lr : float, optional + The learning rate, by default 0.01. + class_optimizer : Type[torch.optim.Optimizer], optional + The optimizer class, by default torch.optim.Rprop. + tol : float, optional + The tolerance, by default 1e-3. + do_smart_init : bool, optional + Whether to do smart initialization, by default True. + verbose : bool, optional + Whether to print verbose output, by default False. + """ self._pring_beginning_message() - for i in range(len(self.list_models)): - model = self.list_models[i] + for i in range(len(self.values())): + model = self[self.ranks[i]] model.fit( nb_max_iteration, lr, @@ -846,18 +1635,33 @@ class PLNPCA: do_smart_init, verbose, ) - if i < len(self.list_models) - 1: - next_model = self.list_models[i + 1] + if i < len(self.values()) - 1: + next_model = self[self.ranks[i + 1]] self.init_next_model_with_previous_parameters(next_model, model) self._print_ending_message() - def init_next_model_with_previous_parameters(self, next_model, current_model): + def init_next_model_with_previous_parameters( + self, next_model: Any, current_model: Any + ): + """ + Initialize the next model with the parameters of the current model. + + Parameters + ---------- + next_model : Any + The next model to initialize. + current_model : Any + The current model. + """ next_model.coef = current_model.coef next_model.components = torch.zeros(self.dim, next_model.rank) with torch.no_grad(): next_model._components[:, : current_model.rank] = current_model._components def _print_ending_message(self): + """ + Method for printing the ending message. + """ delimiter = "=" * NB_CHARACTERS_FOR_NICE_PLOT print(f"{delimiter}\n") print("DONE!") @@ -865,32 +1669,170 @@ class PLNPCA: print(f" Best model(lower AIC): {self._criterion_dict('AIC')}\n ") print(f"{delimiter}\n") - def _criterion_dict(self, criterion="AIC"): + def _criterion_dict(self, criterion: str = "AIC") -> int: + """ + Return the rank of the best model according to the specified criterion. + + Parameters + ---------- + criterion : str, optional + The criterion to use ('AIC' or 'BIC'), by default 'AIC'. + + Returns + ------- + int + The rank of the best model. + """ return self.best_model(criterion).rank - def __getitem__(self, rank): - if (rank in self.ranks) is False: - asked_rank = rank - rank = _closest(self.ranks, asked_rank) - warning_string = " \n No such a model in the collection." - warning_string += "Returning model with _closest value.\n" - warning_string += f"Requested: {asked_rank}, returned: {rank}" - warnings.warn(message=warning_string) - return self.dict_models[rank] + def __getitem__(self, rank: int) -> Any: + """ + Get the model with the specified rank. + + Parameters + ---------- + rank : int + The rank of the model. + + Returns + ------- + Any + The model with the specified rank. + """ + return self._dict_models[rank] + + def __len__(self) -> int: + """ + Get the number of models in the collection. + + Returns + ------- + int + The number of models in the collection. + """ + return len(self._dict_models) + + def __iter__(self): + """ + Iterate over the models in the collection. + + Returns + ------- + Iterator + Iterator over the models. + """ + return iter(self._dict_models) + + def __contains__(self, rank: int) -> bool: + """ + Check if a model with the specified rank exists in the collection. + + Parameters + ---------- + rank : int + The rank to check. + + Returns + ------- + bool + True if a model with the specified rank exists, False otherwise. + """ + return rank in self._dict_models.keys() + + def keys(self): + """ + Get the ranks of the models in the collection. + + Returns + ------- + KeysView + The ranks of the models. + """ + return self._dict_models.keys() + + def get(self, key: Any, default: Any) -> Any: + """ + Get the model with the specified key, or return a default value if the key does not exist. + + Parameters + ---------- + key : Any + The key to search for. + default : Any + The default value to return if the key does not exist. + + Returns + ------- + Any + The model with the specified key, or the default value if the key does not exist. + """ + if key in self: + return self[key] + else: + return default + + def values(self): + """ + Get the models in the collection. + + Returns + ------- + ValuesView + The models in the collection. + """ + return self._dict_models.values() + + def items(self): + """ + Get the key-value pairs of the models in the collection. + + Returns + ------- + ItemsView + The key-value pairs of the models. + """ + return self._dict_models.items() @property - def BIC(self): - return {model.rank: int(model.BIC) for model in self.list_models} + def BIC(self) -> Dict[int, int]: + """ + Property representing the BIC scores of the models in the collection. + + Returns + ------- + Dict[int, int] + The BIC scores of the models. + """ + return {model.rank: int(model.BIC) for model in self.values()} @property - def AIC(self): - return {model.rank: int(model.AIC) for model in self.list_models} + def AIC(self) -> Dict[int, int]: + """ + Property representing the AIC scores of the models in the collection. + + Returns + ------- + Dict[int, int] + The AIC scores of the models. + """ + return {model.rank: int(model.AIC) for model in self.values()} @property - def loglikes(self): - return {model.rank: model.loglike for model in self.list_models} + def loglikes(self) -> Dict[int, Any]: + """ + Property representing the log-likelihoods of the models in the collection. + + Returns + ------- + Dict[int, Any] + The log-likelihoods of the models. + """ + return {model.rank: model.loglike for model in self.values()} def show(self): + """ + Show a plot with BIC scores, AIC scores, and negative log-likelihoods of the models. + """ bic = self.BIC aic = self.AIC loglikes = self.loglikes @@ -915,62 +1857,116 @@ class PLNPCA: plt.show() @property - def best_BIC_model_rank(self): + def best_BIC_model_rank(self) -> int: + """ + Property representing the rank of the best model according to the BIC criterion. + + Returns + ------- + int + The rank of the best model. + """ return self.ranks[np.argmin(list(self.BIC.values()))] @property - def best_AIC_model_rank(self): + def best_AIC_model_rank(self) -> int: + """ + Property representing the rank of the best model according to the AIC criterion. + + Returns + ------- + int + The rank of the best model. + """ return self.ranks[np.argmin(list(self.AIC.values()))] - def best_model(self, criterion="AIC"): + def best_model(self, criterion: str = "AIC") -> Any: + """ + Get the best model according to the specified criterion. + + Parameters + ---------- + criterion : str, optional + The criterion to use ('AIC' or 'BIC'), by default 'AIC'. + + Returns + ------- + Any + The best model. + """ if criterion == "BIC": return self[self.best_BIC_model_rank] if criterion == "AIC": return self[self.best_AIC_model_rank] raise ValueError(f"Unknown criterion {criterion}") - def save(self, path_of_directory="./", ranks=None): + def save(self, path_of_directory: str = "./", ranks: Optional[List[int]] = None): + """ + Save the models in the specified directory. + + Parameters + ---------- + path_of_directory : str, optional + The path of the directory to save the models, by default "./". + ranks : Optional[List[int]], optional + The ranks of the models to save, by default None. + """ if ranks is None: ranks = self.ranks - for model in self.list_models: + for model in self.values(): if model.rank in ranks: model.save(path_of_directory) @property - def directory_name(self): + def directory_name(self) -> str: + """ + Property representing the directory name. + + Returns + ------- + str + The directory name. + """ return f"{self._NAME}_nbcov_{self.nb_cov}_dim_{self.dim}" @property - def n_samples(self): - return self.list_models[0].n_samples + def n_samples(self) -> int: + """ + Property representing the number of samples. - @property - def _p(self): - return self[self.ranks[0]].p + Returns + ------- + int + The number of samples. + """ + return self[self.ranks[0]].n_samples - @property - def models(self): - return self.dict_models.values() + def __repr__(self) -> str: + """ + Return a string representation of the PlnPCAcollection object. - def __str__(self): - nb_models = len(self.list_models) + Returns + ------- + str + The string representation of the PlnPCAcollection object. + """ + nb_models = len(self) delimiter = "\n" + "-" * NB_CHARACTERS_FOR_NICE_PLOT + "\n" to_print = delimiter - to_print += f"Collection of {nb_models} PLNPCA models with \ + to_print += f"Collection of {nb_models} PlnPCAcollection models with \ {self.dim} variables." to_print += delimiter to_print += f" - Ranks considered:{self.ranks}\n" dict_bic = {"rank": "criterion"} | self.BIC to_print += f" - BIC metric:\n{_nice_string_of_dict(dict_bic)}\n" - dict_to_print = self.best_model(criterion="BIC")._rank to_print += f" Best model(lower BIC): {dict_to_print}\n \n" dict_aic = {"rank": "criterion"} | self.AIC to_print += f" - AIC metric:\n{_nice_string_of_dict(dict_aic)}\n" to_print += f" Best model(lower AIC): \ - {self.best_model(criterion = 'AIC')._rank}\n" + {self.best_model(criterion='AIC')._rank}\n" to_print += delimiter - to_print += f"* Useful properties\n" + to_print += "* Useful properties\n" to_print += f" {self._useful_properties_string}\n" to_print += "* Useful methods \n" to_print += f" {self._useful_methods_strings}" @@ -978,28 +1974,62 @@ class PLNPCA: return to_print @property - def _useful_methods_strings(self): + def _useful_methods_strings(self) -> str: + """ + Property representing the useful methods. + + Returns + ------- + str + The string representation of the useful methods. + """ return ".show(), .best_model()" @property - def _useful_properties_string(self): + def _useful_properties_string(self) -> str: + """ + Property representing the useful properties. + + Returns + ------- + str + The string representation of the useful properties. + """ return ".BIC, .AIC, .loglikes" # Here, setting the value for each key in _dict_parameters -class _PLNPCA(_PLN): - _NAME = "_PLNPCA" +class PlnPCA(_Pln): + _NAME: str = "PlnPCA" _components: torch.Tensor def __init__( self, - counts, - covariates=None, - offsets=None, - offsets_formula="logsum", - rank=5, - dict_initialization=None, + counts: torch.Tensor, + covariates: Optional[torch.Tensor] = None, + offsets: Optional[torch.Tensor] = None, + offsets_formula: str = "logsum", + rank: int = 5, + dict_initialization: Optional[Dict[str, torch.Tensor]] = None, ): + """ + Initialize the PlnPCA object. + + Parameters + ---------- + counts : torch.Tensor + The counts tensor. + covariates : torch.Tensor, optional + The covariates tensor, by default None. + offsets : torch.Tensor, optional + The offsets tensor, by default None. + offsets_formula : str, optional + The offsets formula, by default "logsum". + rank : int, optional + The rank of the approximation, by default 5. + dict_initialization : Dict[str, torch.Tensor], optional + The dictionary for initialization, by default None. + """ self._rank = rank self._counts, self._covariates, self._offsets = _format_model_param( counts, covariates, offsets, None, take_log_offsets=False @@ -1013,14 +2043,43 @@ class _PLNPCA(_PLN): @classmethod def from_formula( - cls, formula, data, rank=5, offsets_formula="logsum", dict_initialization=None + cls, + formula: str, + data: Any, + rank: int = 5, + offsets_formula: str = "logsum", + dict_initialization: Optional[Dict[str, torch.Tensor]] = None, ): + """ + Create a PlnPCA object from a formula. + + Parameters + ---------- + formula : str + The formula. + data : Any + The data. + rank : int, optional + The rank of the approximation, by default 5. + offsets_formula : str, optional + The offsets formula, by default "logsum". + dict_initialization : Dict[str, torch.Tensor], optional + The dictionary for initialization, by default None. + + Returns + ------- + PlnPCA + The created PlnPCA object. + """ counts, covariates, offsets = _extract_data_from_formula(formula, data) return cls( counts, covariates, offsets, offsets_formula, rank, dict_initialization ) def _check_if_rank_is_too_high(self): + """ + Check if the rank is too high and issue a warning if necessary. + """ if self.dim < self.rank: warning_string = ( f"\nThe requested rank of approximation {self.rank} " @@ -1031,16 +2090,40 @@ class _PLNPCA(_PLN): self._rank = self.dim @property - def latent_mean(self): + def latent_mean(self) -> torch.Tensor: + """ + Property representing the latent mean. + + Returns + ------- + torch.Tensor + The latent mean tensor. + """ return self._cpu_attribute_or_none("_latent_mean") @property - def latent_var(self): + def latent_var(self) -> torch.Tensor: + """ + Property representing the latent variance. + + Returns + ------- + torch.Tensor + The latent variance tensor. + """ return self._cpu_attribute_or_none("_latent_var") @latent_mean.setter @array2tensor - def latent_mean(self, latent_mean): + def latent_mean(self, latent_mean: torch.Tensor): + """ + Setter for the latent mean. + + Parameters + ---------- + latent_mean : torch.Tensor + The latent mean tensor. + """ if latent_mean.shape != (self.n_samples, self.rank): raise ValueError( f"Wrong shape. Expected {self.n_samples, self.rank}, got {latent_mean.shape}" @@ -1049,7 +2132,15 @@ class _PLNPCA(_PLN): @latent_var.setter @array2tensor - def latent_var(self, latent_var): + def latent_var(self, latent_var: torch.Tensor): + """ + Setter for the latent variance. + + Parameters + ---------- + latent_var : torch.Tensor + The latent variance tensor. + """ if latent_var.shape != (self.n_samples, self.rank): raise ValueError( f"Wrong shape. Expected {self.n_samples, self.rank}, got {latent_var.shape}" @@ -1057,42 +2148,103 @@ class _PLNPCA(_PLN): self._latent_var = latent_var @property - def directory_name(self): + def directory_name(self) -> str: + """ + Property representing the directory name. + + Returns + ------- + str + The directory name. + """ return f"{self._NAME}_rank_{self._rank}" - # return f"PLNPCA_nbcov_{self.nb_cov}_dim_{self.dim}/{self._NAME}_rank_{self._rank}" @property - def covariates(self): + def covariates(self) -> torch.Tensor: + """ + Property representing the covariates. + + Returns + ------- + torch.Tensor + The covariates tensor. + """ return self._cpu_attribute_or_none("_covariates") @covariates.setter @array2tensor - def covariates(self, covariates): + def covariates(self, covariates: torch.Tensor): + """ + Setter for the covariates. + + Parameters + ---------- + covariates : torch.Tensor + The covariates tensor. + """ _check_data_shape(self.counts, covariates, self.offsets) self._covariates = covariates print("Setting coef to initialization") self._smart_init_coef() @property - def path_to_directory(self): - return f"PLNPCA_nbcov_{self.nb_cov}_dim_{self.dim}/" + def path_to_directory(self) -> str: + """ + Property representing the path to the directory. + + Returns + ------- + str + The path to the directory. + """ + return f"PlnPCAcollection_nbcov_{self.nb_cov}_dim_{self.dim}/" @property - def rank(self): + def rank(self) -> int: + """ + Property representing the rank. + + Returns + ------- + int + The rank. + """ return self._rank - def _get_max_components(self): + def _get_max_components(self) -> int: + """ + Get the maximum number of components. + + Returns + ------- + int + The maximum number of components. + """ return self._rank def _pring_beginning_message(self): + """ + Print the beginning message. + """ print("-" * NB_CHARACTERS_FOR_NICE_PLOT) - print(f"Fitting a PLNPCA model with {self._rank} components") + print(f"Fitting a PlnPCAcollection model with {self._rank} components") @property - def model_parameters(self): + def model_parameters(self) -> Dict[str, torch.Tensor]: + """ + Property representing the model parameters. + + Returns + ------- + Dict[str, torch.Tensor] + The model parameters. + """ return {"coef": self.coef, "components": self.components} def _smart_init_model_parameters(self): + """ + Initialize the model parameters smartly. + """ if not hasattr(self, "_coef"): super()._smart_init_coef() if not hasattr(self, "_components"): @@ -1101,14 +2253,23 @@ class _PLNPCA(_PLN): ) def _random_init_model_parameters(self): + """ + Randomly initialize the model parameters. + """ super()._random_init_coef() self._components = torch.randn((self.dim, self._rank)).to(DEVICE) def _random_init_latent_parameters(self): + """ + Randomly initialize the latent parameters. + """ self._latent_var = 1 / 2 * torch.ones((self.n_samples, self._rank)).to(DEVICE) self._latent_mean = torch.ones((self.n_samples, self._rank)).to(DEVICE) def _smart_init_latent_parameters(self): + """ + Initialize the latent parameters smartly. + """ if not hasattr(self, "_latent_mean"): self._latent_mean = ( _init_latent_mean( @@ -1128,11 +2289,27 @@ class _PLNPCA(_PLN): @property def _list_of_parameters_needing_gradient(self): + """ + Property representing the list of parameters needing gradient. + + Returns + ------- + List[torch.Tensor] + The list of parameters needing gradient. + """ if self._coef is None: return [self._components, self._latent_mean, self._latent_var] return [self._components, self._coef, self._latent_mean, self._latent_var] - def compute_elbo(self): + def compute_elbo(self) -> torch.Tensor: + """ + Compute the evidence lower bound (ELBO). + + Returns + ------- + torch.Tensor + The ELBO value. + """ return elbo_plnpca( self._counts, self._covariates, @@ -1144,20 +2321,52 @@ class _PLNPCA(_PLN): ) @property - def number_of_parameters(self): + def number_of_parameters(self) -> int: + """ + Property representing the number of parameters. + + Returns + ------- + int + The number of parameters. + """ return self.dim * (self.nb_cov + self._rank) - self._rank * (self._rank - 1) / 2 @property - def _additional_properties_string(self): + def _additional_properties_string(self) -> str: + """ + Property representing the additional properties string. + + Returns + ------- + str + The additional properties string. + """ return ".projected_latent_variables" @property - def _additional_methods_string(self): + def _additional_methods_string(self) -> str: + """ + Property representing the additional methods string. + + Returns + ------- + str + The additional methods string. + """ string = " only for rank=2: .viz()" return string @property - def covariance(self): + def covariance(self) -> Optional[torch.Tensor]: + """ + Property representing the covariance. + + Returns + ------- + Optional[torch.Tensor] + The covariance tensor or None if components are not present. + """ if hasattr(self, "_components"): cov_latent = self._latent_mean.T @ self._latent_mean cov_latent += torch.diag(torch.sum(torch.square(self._latent_var), dim=0)) @@ -1166,19 +2375,58 @@ class _PLNPCA(_PLN): return None @property - def _description(self): + def _description(self) -> str: + """ + Property representing the description. + + Returns + ------- + str + The description string. + """ return f" {self.rank} principal component." @property - def latent_variables(self): + def latent_variables(self) -> torch.Tensor: + """ + Property representing the latent variables. + + Returns + ------- + torch.Tensor + The latent variables. + """ return torch.matmul(self._latent_mean, self._components.T) @property - def projected_latent_variables(self): + def projected_latent_variables(self) -> torch.Tensor: + """ + Property representing the projected latent variables. + + Returns + ------- + torch.Tensor + The projected latent variables. + """ ortho_components = torch.linalg.qr(self._components, "reduced")[0] return torch.mm(self.latent_variables, ortho_components).detach().cpu() - def pca_projected_latent_variables(self, n_components=None): + def pca_projected_latent_variables( + self, n_components: Optional[int] = None + ) -> np.ndarray: + """ + Perform PCA on projected latent variables. + + Parameters + ---------- + n_components : Optional[int] + Number of components to keep. Defaults to None. + + Returns + ------- + np.ndarray + The transformed projected latent variables. + """ if n_components is None: n_components = self._get_max_components() if n_components > self.dim: @@ -1189,12 +2437,33 @@ class _PLNPCA(_PLN): return pca.fit_transform(self.projected_latent_variables.detach().cpu()) @property - def components(self): + def components(self) -> torch.Tensor: + """ + Property representing the components. + + Returns + ------- + torch.Tensor + The components. + """ return self._cpu_attribute_or_none("_components") @components.setter @array2tensor - def components(self, components): + def components(self, components: torch.Tensor): + """ + Setter for the components. + + Parameters + ---------- + components : torch.Tensor + The components to set. + + Raises + ------ + ValueError + If the components have an invalid shape. + """ if components.shape != (self.dim, self.rank): raise ValueError( f"Wrong shape. Expected {self.dim, self.rank}, got {components.shape}" @@ -1202,6 +2471,26 @@ class _PLNPCA(_PLN): self._components = components def viz(self, ax=None, colors=None): + """ + Visualize the PlnPCA model. + + Parameters + ---------- + ax : Optional[Any], optional + The matplotlib axis to use. If None, the current axis is used, by default None. + colors : Optional[Any], optional + The colors to use for plotting, by default None. + + Raises + ------ + RuntimeError + If the rank is less than 2. + + Returns + ------- + Any + The matplotlib axis. + """ if ax is None: ax = plt.gca() if self._rank < 2: @@ -1218,14 +2507,27 @@ class _PLNPCA(_PLN): _plot_ellipse(x[i], y[i], cov=covariances[i], ax=ax) return ax - def transform(self, project=True): + def transform(self, project: bool = True) -> torch.Tensor: + """ + Transform the model. + + Parameters + ---------- + project : bool, optional + Whether to project the latent variables, by default True. + + Returns + ------- + torch.Tensor + The transformed model. + """ if project is True: return self.projected_latent_variables return self.latent_variables -class ZIPLN(PLN): - _NAME = "ZIPLN" +class ZIPln(Pln): + _NAME = "ZIPln" _pi: torch.Tensor _coef_inflation: torch.Tensor diff --git a/pyproject.toml b/pyproject.toml index 94b2dc2a7d819ecd920828f1fa82abe1552dffca..fd481fbfe17985f8b4f5e71ff209b961b222e415 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -43,4 +43,11 @@ classifiers = [ # that you indicate whether you support Python 2, Python 3 or both. "Programming Language :: Python :: 3 :: Only", ] -dependencies = {file = ["requirements.txt"]} +dependencies = [ +"matplotlib", +"numpy", +"pandas", +"scipy", +"seaborn", +"torch" +] diff --git a/setup.py b/setup.py deleted file mode 100644 index bdb83ce6b66d8572e37a0280ab55c6cd25835386..0000000000000000000000000000000000000000 --- a/setup.py +++ /dev/null @@ -1,59 +0,0 @@ -# -*- coding: utf-8 -*- -from setuptools import setup, find_packages - -VERSION = "0.0.43" - -with open("README.md", "r") as fh: - long_description = fh.read() -with open("requirements.txt", "r") as fh: - requirements = [line.strip() for line in fh] - -setup( - name="pyPLNmodels", - version=VERSION, - description="Package implementing PLN models", - project_urls={ - "Source": "https://forgemia.inra.fr/bbatardiere/pyplnmodels", - "Documentation": "https://bbatardiere.pages.mia.inra.fr/pyplnmodels/index.html", - }, - author="Bastien Batardière, Julien Chiquet, Joon Kwon", - author_email="bastien.batardiere@gmail.com, julien.chiquet@inrae.fr, joon.kwon@inrae.fr", - long_description=long_description, - packages=find_packages(), - python_requires=">=3", - keywords=[ - "python", - "count", - "data", - "count data", - "high dimension", - "scRNAseq", - "PLN", - ], - install_requires=requirements, - py_modules=[ - "pyPLNmodels._utils", - "pyPLNmodels.elbos", - "pyPLNmodels.models", - "pyPLNmodels._closed_forms", - ], - long_description_content_type="text/markdown", - license="MIT", - # See https://pypi.python.org/pypi?%3Aaction=list_classifiers - classifiers=[ - # How mature is this project? Common values are - # 3 - Alpha - # 4 - Beta - # 5 - Production/Stable - "Development Status :: 3 - Alpha", - # Indicate who your project is intended for - "Intended Audience :: Science/Research", - # Pick your license as you wish (should match "license" above) - "License :: OSI Approved :: MIT License", - # Specify the Python versions you support here. In particular, ensure - # that you indicate whether you support Python 2, Python 3 or both. - "Programming Language :: Python :: 3 :: Only", - ], - include_package_data=True, - package_data={"": ["data/oaks/*.csv"]}, -) diff --git a/tests/conftest.py b/tests/conftest.py index f4cdbbbda93ffee62871db3ec65cbe120e5ea013..f70e6acae77dd8fc4e30d7b3a5982b3f206cb428 100644 --- a/tests/conftest.py +++ b/tests/conftest.py @@ -5,8 +5,8 @@ from functools import singledispatch import pytest import torch from pytest_lazyfixture import lazy_fixture as lf -from pyPLNmodels import load_model, load_plnpca -from pyPLNmodels.models import PLN, _PLNPCA, PLNPCA +from pyPLNmodels import load_model, load_plnpcacollection +from pyPLNmodels.models import Pln, PlnPCA, PlnPCAcollection sys.path.append("../") @@ -58,40 +58,42 @@ instances = [] def convenient_plnpca(*args, **kwargs): dict_init = kwargs.pop("dict_initialization", None) if isinstance(args[0], str): - return _PLNPCA.from_formula( + return PlnPCA.from_formula( *args, **kwargs, dict_initialization=dict_init, rank=RANK ) print("rank:", RANK) - return _PLNPCA(*args, **kwargs, dict_initialization=dict_init, rank=RANK) + return PlnPCA(*args, **kwargs, dict_initialization=dict_init, rank=RANK) def convenientplnpca(*args, **kwargs): dict_init = kwargs.pop("dict_initialization", None) if isinstance(args[0], str): - return PLNPCA.from_formula( + return PlnPCAcollection.from_formula( *args, **kwargs, dict_of_dict_initialization=dict_init, ranks=RANKS ) - return PLNPCA(*args, **kwargs, dict_of_dict_initialization=dict_init, ranks=RANKS) + return PlnPCAcollection( + *args, **kwargs, dict_of_dict_initialization=dict_init, ranks=RANKS + ) def convenientpln(*args, **kwargs): if isinstance(args[0], str): - return PLN.from_formula(*args, **kwargs) - return PLN(*args, **kwargs) + return Pln.from_formula(*args, **kwargs) + return Pln(*args, **kwargs) def generate_new_model(model, *args, **kwargs): name_dir = model.directory_name name = model._NAME - if name in ("PLN", "_PLNPCA"): + if name in ("Pln", "PlnPCA"): path = model.path_to_directory + name_dir init = load_model(path) - if name == "PLN": + if name == "Pln": new = convenientpln(*args, **kwargs, dict_initialization=init) - if name == "_PLNPCA": + if name == "PlnPCA": new = convenient_plnpca(*args, **kwargs, dict_initialization=init) - if name == "PLNPCA": - init = load_plnpca(name_dir) + if name == "PlnPCAcollection": + init = load_plnpcacollection(name_dir) new = convenientplnpca(*args, **kwargs, dict_initialization=init) return new diff --git a/tests/test_common.py b/tests/test_common.py index 4f539f0d9e0fc4bad0066ada154e120cc1511e12..64cb6caed1c09abef3b05cc6d73a5e9791db3e9a 100644 --- a/tests/test_common.py +++ b/tests/test_common.py @@ -10,7 +10,7 @@ from tests.import_data import true_sim_0cov, true_sim_2cov @pytest.mark.parametrize("any_pln", dict_fixtures["fitted_pln"]) -@filter_models(["PLN", "_PLNPCA"]) +@filter_models(["Pln", "PlnPCA"]) def test_properties(any_pln): assert hasattr(any_pln, "latent_parameters") assert hasattr(any_pln, "latent_variables") @@ -24,7 +24,7 @@ def test_print(any_pln): @pytest.mark.parametrize("any_pln", dict_fixtures["fitted_pln"]) -@filter_models(["PLN", "_PLNPCA"]) +@filter_models(["Pln", "PlnPCA"]) def test_show_coef_transform_covariance_pcaprojected(any_pln): any_pln.show() any_pln._plotargs._show_loss() @@ -39,7 +39,7 @@ def test_show_coef_transform_covariance_pcaprojected(any_pln): @pytest.mark.parametrize("sim_pln", dict_fixtures["loaded_and_fitted_pln"]) -@filter_models(["PLN", "_PLNPCA"]) +@filter_models(["Pln", "PlnPCA"]) def test_predict_simulated(sim_pln): if sim_pln.nb_cov == 0: assert sim_pln.predict() is None @@ -60,7 +60,7 @@ def test_verbose(any_instance_pln): @pytest.mark.parametrize( "simulated_fitted_any_pln", dict_fixtures["loaded_and_fitted_sim_pln"] ) -@filter_models(["PLN", "_PLNPCA"]) +@filter_models(["Pln", "PlnPCA"]) def test_find_right_covariance(simulated_fitted_any_pln): if simulated_fitted_any_pln.nb_cov == 0: true_covariance = true_sim_0cov["Sigma"] @@ -73,7 +73,7 @@ def test_find_right_covariance(simulated_fitted_any_pln): @pytest.mark.parametrize( "real_fitted_and_loaded_pln", dict_fixtures["loaded_and_fitted_real_pln"] ) -@filter_models(["PLN", "_PLNPCA"]) +@filter_models(["Pln", "PlnPCA"]) def test_right_covariance_shape(real_fitted_and_loaded_pln): assert real_fitted_and_loaded_pln.covariance.shape == (100, 100) @@ -81,7 +81,7 @@ def test_right_covariance_shape(real_fitted_and_loaded_pln): @pytest.mark.parametrize( "simulated_fitted_any_pln", dict_fixtures["loaded_and_fitted_pln"] ) -@filter_models(["PLN", "_PLNPCA"]) +@filter_models(["Pln", "PlnPCA"]) def test_find_right_coef(simulated_fitted_any_pln): if simulated_fitted_any_pln.nb_cov == 2: true_coef = true_sim_2cov["beta"] @@ -92,7 +92,7 @@ def test_find_right_coef(simulated_fitted_any_pln): @pytest.mark.parametrize("pln", dict_fixtures["loaded_and_fitted_pln"]) -@filter_models(["PLN", "_PLNPCA"]) +@filter_models(["Pln", "PlnPCA"]) def test_fail_count_setter(pln): wrong_counts = torch.randint(size=(10, 5), low=0, high=10) with pytest.raises(Exception): @@ -110,7 +110,7 @@ def test__print_end_of_fitting_message(instance): @pytest.mark.parametrize("pln", dict_fixtures["fitted_pln"]) -@filter_models(["PLN", "_PLNPCA"]) +@filter_models(["Pln", "PlnPCA"]) def test_fail_wrong_covariates_prediction(pln): X = torch.randn(pln.n_samples, pln.nb_cov + 1) with pytest.raises(Exception): diff --git a/tests/test_pln_full.py b/tests/test_pln_full.py index a98873536dd581493020e13662ea979db340a7b0..0fa0e7ea26bcd2e9ca451e3541a02af2dea09a9f 100644 --- a/tests/test_pln_full.py +++ b/tests/test_pln_full.py @@ -5,13 +5,13 @@ from tests.utils import filter_models @pytest.mark.parametrize("fitted_pln", dict_fixtures["fitted_pln"]) -@filter_models(["PLN"]) +@filter_models(["Pln"]) def test_number_of_iterations_pln_full(fitted_pln): nb_iterations = len(fitted_pln._elbos_list) assert 50 < nb_iterations < 300 @pytest.mark.parametrize("pln", dict_fixtures["loaded_and_fitted_pln"]) -@filter_models(["PLN"]) +@filter_models(["Pln"]) def test_latent_var_full(pln): assert pln.transform().shape == pln.counts.shape diff --git a/tests/test_plnpca.py b/tests/test_plnpcacollection.py similarity index 72% rename from tests/test_plnpca.py rename to tests/test_plnpcacollection.py index 85f4288517e8253ec5be2997769a2e444790bb96..247335c4db95f792874c7e93da3a5e12860c8152 100644 --- a/tests/test_plnpca.py +++ b/tests/test_plnpcacollection.py @@ -9,14 +9,14 @@ from tests.utils import MSE, filter_models @pytest.mark.parametrize("plnpca", dict_fixtures["loaded_and_fitted_pln"]) -@filter_models(["PLNPCA"]) +@filter_models(["PlnPCAcollection"]) def test_best_model(plnpca): best_model = plnpca.best_model() print(best_model) @pytest.mark.parametrize("plnpca", dict_fixtures["loaded_and_fitted_pln"]) -@filter_models(["PLNPCA"]) +@filter_models(["PlnPCAcollection"]) def test_projected_variables(plnpca): best_model = plnpca.best_model() plv = best_model.projected_latent_variables @@ -24,21 +24,21 @@ def test_projected_variables(plnpca): @pytest.mark.parametrize("fitted_pln", dict_fixtures["fitted_pln"]) -@filter_models(["_PLNPCA"]) +@filter_models(["PlnPCA"]) def test_number_of_iterations_plnpca(fitted_pln): nb_iterations = len(fitted_pln._elbos_list) assert 100 < nb_iterations < 5000 @pytest.mark.parametrize("plnpca", dict_fixtures["loaded_and_fitted_pln"]) -@filter_models(["_PLNPCA"]) +@filter_models(["PlnPCA"]) def test_latent_var_pca(plnpca): assert plnpca.transform(project=False).shape == plnpca.counts.shape assert plnpca.transform().shape == (plnpca.n_samples, plnpca.rank) @pytest.mark.parametrize("plnpca", dict_fixtures["loaded_and_fitted_pln"]) -@filter_models(["PLNPCA"]) +@filter_models(["PlnPCAcollection"]) def test_additional_methods_pca(plnpca): plnpca.show() plnpca.BIC @@ -47,10 +47,9 @@ def test_additional_methods_pca(plnpca): @pytest.mark.parametrize("plnpca", dict_fixtures["loaded_and_fitted_pln"]) -@filter_models(["PLNPCA"]) +@filter_models(["PlnPCAcollection"]) def test_viz_pca(plnpca): - models = plnpca.models - for model in models: + for model in plnpca.values(): _, ax = plt.subplots() model.viz(ax=ax) plt.show() @@ -63,14 +62,18 @@ def test_viz_pca(plnpca): @pytest.mark.parametrize("plnpca", dict_fixtures["loaded_and_fitted_pln"]) -@filter_models(["PLNPCA"]) -def test__closest(plnpca): - with pytest.warns(UserWarning): - plnpca[9] - - -@pytest.mark.parametrize("plnpca", dict_fixtures["loaded_and_fitted_pln"]) -@filter_models(["PLNPCA"]) +@filter_models(["PlnPCAcollection"]) def test_wrong_criterion(plnpca): with pytest.raises(ValueError): plnpca.best_model("AIK") + + +@pytest.mark.parametrize("collection", dict_fixtures["loaded_and_fitted_pln"]) +@filter_models(["PlnPCAcollection"]) +def test_item(collection): + print(collection[collection.ranks[0]]) + with pytest.raises(KeyError): + collection[collection.ranks[0] + 50] + assert collection.ranks[0] in collection + assert collection.ranks[0] in list(collection.keys()) + collection.get(collection.ranks[0], None) diff --git a/tests/test_setters.py b/tests/test_setters.py index 727c6ac697d253cf42d8957c4ff2541e926db718..14ad481a165d9754137dd356f897251c2af4d3c3 100644 --- a/tests/test_setters.py +++ b/tests/test_setters.py @@ -15,12 +15,12 @@ def test_data_setter_with_torch(pln): @pytest.mark.parametrize("pln", dict_fixtures["loaded_and_fitted_pln"]) -@filter_models(["PLN", "_PLNPCA"]) +@filter_models(["Pln", "PlnPCA"]) def test_parameters_setter_with_torch(pln): pln.latent_mean = pln.latent_mean pln.latent_var = pln.latent_var pln.coef = pln.coef - if pln._NAME == "_PLNPCA": + if pln._NAME == "PlnPCA": pln.components = pln.components pln.fit() @@ -40,7 +40,7 @@ def test_data_setter_with_numpy(pln): @pytest.mark.parametrize("pln", dict_fixtures["loaded_and_fitted_pln"]) -@filter_models(["PLN", "_PLNPCA"]) +@filter_models(["Pln", "PlnPCA"]) def test_parameters_setter_with_numpy(pln): np_latent_mean = pln.latent_mean.numpy() np_latent_var = pln.latent_var.numpy() @@ -51,7 +51,7 @@ def test_parameters_setter_with_numpy(pln): pln.latent_mean = np_latent_mean pln.latent_var = np_latent_var pln.coef = np_coef - if pln._NAME == "_PLNPCA": + if pln._NAME == "PlnPCA": pln.components = pln.components.numpy() pln.fit() @@ -71,7 +71,7 @@ def test_data_setter_with_pandas(pln): @pytest.mark.parametrize("pln", dict_fixtures["loaded_and_fitted_pln"]) -@filter_models(["PLN", "_PLNPCA"]) +@filter_models(["Pln", "PlnPCA"]) def test_parameters_setter_with_pandas(pln): pd_latent_mean = pd.DataFrame(pln.latent_mean.numpy()) pd_latent_var = pd.DataFrame(pln.latent_var.numpy()) @@ -82,7 +82,7 @@ def test_parameters_setter_with_pandas(pln): pln.latent_mean = pd_latent_mean pln.latent_var = pd_latent_var pln.coef = pd_coef - if pln._NAME == "_PLNPCA": + if pln._NAME == "PlnPCA": pln.components = pd.DataFrame(pln.components.numpy()) pln.fit() @@ -113,7 +113,7 @@ def test_fail_data_setter_with_torch(pln): @pytest.mark.parametrize("pln", dict_fixtures["loaded_and_fitted_pln"]) -@filter_models(["PLN", "_PLNPCA"]) +@filter_models(["Pln", "PlnPCA"]) def test_fail_parameters_setter_with_torch(pln): n, dim_latent = pln.latent_mean.shape dim = pln.counts.shape[1] @@ -130,7 +130,7 @@ def test_fail_parameters_setter_with_torch(pln): with pytest.raises(ValueError): pln.latent_var = torch.zeros(n, dim_latent + 1) - if pln._NAME == "_PLNPCA": + if pln._NAME == "PlnPCA": with pytest.raises(ValueError): pln.components = torch.zeros(dim, dim_latent + 1)