diff --git a/pyPLNmodels/microcosm.py b/pyPLNmodels/microcosm.py index 7f2575cc1eb33626722aab292fdefeefd9165fce..95d63cf4973de43ed32df5bbcd56e055b5306ac6 100644 --- a/pyPLNmodels/microcosm.py +++ b/pyPLNmodels/microcosm.py @@ -26,9 +26,9 @@ def load_microcosm( Parameters ---------- n_samples : int, optional - Number of samples, by default max_samples. + Number of samples, by default 300. dim : int, optional - Dimension, by default max_dim. + Dimension, by default 200. get_affil: bool, optional (keyword-only) If True, will return the affiliations also. Default to False . cov_list: list, optional (keyword-only). diff --git a/pyPLNmodels/models.py b/pyPLNmodels/models.py index a8deabcc446ece2bb09d46bedfa264c74ac4f1d2..182c599201b5a52ee91a683bc48983167e607793 100644 --- a/pyPLNmodels/models.py +++ b/pyPLNmodels/models.py @@ -13,6 +13,7 @@ import matplotlib.pyplot as plt from sklearn.decomposition import PCA import matplotlib from scipy import stats +from statsmodels.api import OLS from pyPLNmodels._closed_forms import ( _closed_formula_coef, @@ -459,6 +460,51 @@ class _model(ABC): self._print_end_of_fitting_message(stop_condition, tol) self._fitted = True + def summary( + self, + variable_number, + yname: str = None, + xname: list[str] = None, + title: str = None, + alpha: float = 0.05, + slim: bool = False, + ): + """ + Summary from statsmodels on the latent variables. + + parameters + ---------- + yname : str, Optional + Name of endogenous (response) variable. The Default is y. + xname : str, Optional + Names for the exogenous variables. Default is var_## for ## + in the number of regressors. + Must match the number of parameters in the model. + + title : str, Optional + Title for the top table. If not None, then this replaces the default title. + alpha : float, optional + The significance level for the confidence intervals. + slim: bool, Optional + Flag indicating to produce reduced set or diagnostic information. Default is False. + """ + if self.exog is None: + print("No exog in the model, can not perform a summary.") + else: + ols = self._fit_ols(variable_number) + return ols.summary( + yname=yname, xname=xname, title=title, alpha=alpha, slim=slim + ) + + ## write docstrings on the summary function + + def _fit_ols(self, variable_number): + return OLS( + self.latent_variables.numpy()[:, variable_number], + self.exog.numpy(), + hasconst=True, + ).fit() + @property def fitted(self) -> bool: """ @@ -4577,6 +4623,14 @@ class ZIPln(_model): variables = self.latent_prob return self._viz_variables(variables, colors=colors, ax=ax, show_cov=False) + def _fit_ols(self, variable_number): + latent_variables, _ = self.latent_variables + return OLS( + latent_variables.numpy()[:, variable_number], + self.exog.numpy(), + hasconst=True, + ).fit() + class Brute_ZIPln(ZIPln): @property diff --git a/pyproject.toml b/pyproject.toml index ae5c5f4f3a7806685f09f2f105461cd000deadd3..507915cbf09e6ac1622dac5b13b02bcd64bd0673 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -53,6 +53,7 @@ dependencies = [ "scikit-learn", "patsy", "tqdm", +"statsmodels", ] [project.optional-dependencies] tests = ["pytest","pytest_lazy_fixture"] @@ -70,5 +71,3 @@ include-package-data = true [tool.setuptools.packages.find] include = ["pyPLNmodels", "pyPLNmodels.*"] namespaces = false - - diff --git a/tests/test_common.py b/tests/test_common.py index f4f05d6125afda5643a2d95302c89c38aab4202b..450790df4bcaca78a2062be901edb3c77b859333 100644 --- a/tests/test_common.py +++ b/tests/test_common.py @@ -12,10 +12,8 @@ pln_and_plnpca = ["Pln", "PlnPCA"] single_models = ["Pln", "PlnPCA", "ZIPln"] -# @pytest.mark.parametrize("any_model", dict_fixtures["loaded_and_fitted_model"]) -# @filter_models(single_models) -@pytest.mark.parametrize("any_model", dict_fixtures["loaded_model"]) -@filter_models(["ZIPln"]) +@pytest.mark.parametrize("any_model", dict_fixtures["loaded_and_fitted_model"]) +@filter_models(single_models) def test_properties(any_model): assert hasattr(any_model, "latent_parameters") assert hasattr(any_model, "latent_variables") @@ -25,7 +23,7 @@ def test_properties(any_model): @pytest.mark.parametrize("sim_model", dict_fixtures["loaded_and_fitted_model"]) @filter_models(pln_and_plnpca) -def test_predict_simulated(sim_model): +def test_predict_simulated_and_summary(sim_model): if sim_model.nb_cov == 0: assert sim_model.predict() is None with pytest.raises(AttributeError): @@ -35,6 +33,7 @@ def test_predict_simulated(sim_model): prediction = sim_model.predict(X) expected = X @ sim_model.coef assert torch.all(torch.eq(expected, prediction)) + print(sim_model.summary(variable_number=2)) @pytest.mark.parametrize("any_instance_model", dict_fixtures["instances"])