Source code for susi.SOMRegressor

"""SOMRegressor class."""

import numpy as np
from sklearn.base import RegressorMixin

from .SOMEstimator import SOMEstimator



[docs]
class SOMRegressor(RegressorMixin, SOMEstimator):
    """Supervised SOM for estimating continuous variables (= regression).

    Parameters
    ----------
    n_rows : int, optional (default=10)
        Number of rows for the SOM grid

    n_columns : int, optional (default=10)
        Number of columns for the SOM grid

    init_mode_unsupervised : str, optional (default="random")
        Initialization mode of the unsupervised SOM

    init_mode_supervised : str, optional (default="random")
        Initialization mode of the supervised SOM

    n_iter_unsupervised : int, optional (default=1000)
        Number of iterations for the unsupervised SOM

    n_iter_supervised : int, optional (default=1000)
        Number of iterations for the supervised SOM

    train_mode_unsupervised : str, optional (default="online")
        Training mode of the unsupervised SOM

    train_mode_supervised : str, optional (default="online")
        Training mode of the supervised SOM

    neighborhood_mode_unsupervised : str, optional (default="linear")
        Neighborhood mode of the unsupervised SOM

    neighborhood_mode_supervised : str, optional (default="linear")
        Neighborhood mode of the supervised SOM

    learn_mode_unsupervised : str, optional (default="min")
        Learning mode of the unsupervised SOM

    learn_mode_supervised : str, optional (default="min")
        Learning mode of the supervised SOM

    distance_metric : str, optional (default="euclidean")
        Distance metric to compare on feature level (not SOM grid).
        Possible metrics: {"euclidean", "manhattan", "mahalanobis",
        "tanimoto", "spectralangle"}. Note that "tanimoto" tends to be slow.

        .. versionadded:: 1.1.1
            Spectral angle metric.

    learning_rate_start : float, optional (default=0.5)
        Learning rate start value

    learning_rate_end : float, optional (default=0.05)
        Learning rate end value (only needed for some lr definitions)

    nbh_dist_weight_mode : str, optional (default="pseudo-gaussian")
        Formula of the neighborhood distance weight. Possible formulas
        are: {"pseudo-gaussian", "mexican-hat"}.

    missing_label_placeholder : int or str or None, optional (default=None)
        Label placeholder for datapoints with no label. This is needed for
        semi-supervised learning.

    n_jobs : int or None, optional (default=None)
        The number of jobs to run in parallel.

    random_state : int, RandomState instance or None, optional (default=None)
        If int, random_state is the seed used by the random number generator;
        If RandomState instance, random_state is the random number generator;
        If None, the random number generator is the RandomState instance used
        by `np.random`.

    verbose : int, optional (default=0)
        Controls the verbosity.

    Attributes
    ----------
    node_list_ : np.ndarray of (int, int) tuples
        List of 2-dimensional coordinates of SOM nodes

    radius_max_ : float, int
        Maximum radius of the neighborhood function

    radius_min_ : float, int
        Minimum radius of the neighborhood function

    unsuper_som_ : np.ndarray
        Weight vectors of the unsupervised SOM
        shape = (self.n_rows, self.n_columns, X.shape[1])

    X_ : np.ndarray
        Input data

    fitted_ : bool
        States if estimator is fitted to X

    max_iterations_ : int
        Maximum number of iterations for the current training

    bmus_ :  list of (int, int) tuples
        List of best matching units (BMUs) of the dataset X

    sample_weights_ : np.ndarray
        Sample weights.

    n_regression_vars_ : int
        Number of regression variables. In most examples, this equals one.

    n_features_in_ : int
        Number of input features

    """

    def _init_super_som(self) -> None:
        """Initialize map for regression."""
        self.max_iterations_ = self.n_iter_supervised
        self.n_regression_vars_ = None

        # check if target variable has dimension 1 or >1
        if len(self.y_.shape) == 1:
            self.n_regression_vars_ = 1
        else:
            self.n_regression_vars_ = self.y_.shape[1]

        # initialize regression SOM
        if self.init_mode_supervised == "random":
            som = np.random.rand(
                self.n_rows, self.n_columns, self.n_regression_vars_
            )

        elif self.init_mode_supervised == "random_data":
            indices = np.random.randint(
                low=0,
                high=self.y_[self.labeled_indices_].shape[0],
                size=self.n_rows * self.n_columns,
            )
            som_list = self.y_[self.labeled_indices_][indices]
            som = som_list.reshape(
                self.n_rows, self.n_columns, self.y_.shape[1]
            )

        elif self.init_mode_supervised == "random_minmax":
            som = np.random.uniform(
                low=np.min(self.y_[self.labeled_indices_]),
                high=np.max(self.y_[self.labeled_indices_]),
                size=(self.n_rows, self.n_columns, self.n_regression_vars_),
            )

        else:
            raise ValueError(
                f"Invalid init_mode_supervised: {self.init_mode_supervised}"
            )

        self.super_som_ = som