Source code for quasildr.graphdr

# -*- coding: utf-8 -*-
"""
This module implements the quaslinear representation method GraphDR and relevant
functions. The method is implemented in the graphdr function. A scikit-learn
Estimator-like interface to graphdr is also provided through the GraphDR class.
"""
import os

import scipy
import numpy as np

from scipy.sparse import csgraph, csr_matrix
from multiprocess import Pool
from scipy.sparse import issparse
from sklearn.decomposition import PCA
from sklearn.base import BaseEstimator
from sklearn.neighbors import kneighbors_graph
from sklearn.exceptions import NotFittedError

from .external import neighbors


[docs]def knn_graph(X, n_neighbors=15, space='l2', space_params=None, method='hnsw', num_threads=8, params={'post': 2},
              verbose=False):
    """Construct an approximate K-nearest neighbors graph with nmslib.

    Parameters
    ----------
    X : array, shape (n_samples, n_features)
        A input 2D array to construct KNN graph on. Each row represent a sample.
    n_neighbors : int, optional
        Default is `15`. The number of nearest neighbors per sample.
    space : string, optional
        Default is `l2`. The metric/non-metric distance functions to use to compute distances.
        Note that l2 corresponds to Euclidean distance.
        see https://github.com/nmslib/nmslib/blob/master/manual/manual.pdf
            * bit_hamming
            * l1
            * l1_sparse
            * l2, euclidean
            * l2_sparse
            * linf
            * linf_sparse
            * lp:p=...
            * lp_sparse:p=...
            * angulardist, angulardist sparse, angulardist sparse fast
            * jsmetrslow, jsmetrfast, jsmetrfastapprox
            * leven
            * sqfd minus func, sqfd heuristic func:alpha=..., sqfd gaussian func:alpha=...
            * jsdivslow, jsdivfast, jsdivfastapprox
            * cosinesimil, cosine, cosinesimil sparse
            * normleven
            * kldivfast
            * kldivgenslow, kldivgenfast, kldivgenfastrq
            * itakurasaitoslow, itakurasaitofast, itakurasaitofastrq
            * negdotprod_sparse
            * querynorm_negdotprod_sparse
            * renyi_diverg
            * ab_diverg
    space_params : dict or None, optional
        Parameters for configuring the space.
    method : str
        The nmslib method to use. Default is `hnsw`.
    num_threads : int, optional
        Default is `8`. Number of threads to run nmslib with.
    params : dict, optional
        Default is `{'post': 2}`. Input parameters to nmslib index construction.
        See https://github.com/nmslib/nmslib/blob/master/manual/methods.md for details.
    verbose: bool, optional
        Default is False. Print extra information.

    Returns
    -------
    scipy.sparse.csr_matrix
        Sparse matrix representing the constructed KNN graph.
    """

    import nmslib
    if space == 'euclidean':
        space = 'l2'
    elif space == 'manhattan':
        space = 'l1'
    elif space == 'cosine':
        space = 'cosinesimil'

    index = nmslib.init(method=method, space=space, space_params=space_params)
    index.addDataPointBatch(X)
    index.createIndex(params, print_progress=False)
    neighbors = index.knnQueryBatch(X, k=n_neighbors, num_threads=num_threads)

    ind = np.vstack([i for i, d in neighbors])
    sind = np.repeat(np.arange(ind.shape[0]), ind.shape[1])
    tind = ind.flatten()
    g = csr_matrix((np.ones(ind.shape[0] * ind.shape[1]), (sind, tind)), (ind.shape[0], ind.shape[0]))
    return g


[docs]def cg(A, B, x0=None, tol=1e-7, atol=1e-15, use_cuda=False):
    """
    Solve X in linear equation AX = B when A is sparse with conjugate gradient method.
    Support GPU with use_cuda=True.

    Parameters
    ----------
    A : sparse matrix or array
        A N-by-N matrix of the linear system.
    B : array
        Right hand side of the linear system.
    x0 : array or None, optional
        If specified, initialize the solution with x0.
    tol, atol : float, optional
        Tolerances for convergence `norm(residual) <= max(tol*norm(b), atol)`.
        Default is `tol=1e-7, atol=1e-15`.
    use_cuda : bool, optional
        Use GPU acceleration with CUDA. A GPU supporting CUDA is needed and CuPy
        package need to be installed.
        Default is `False`.

    Returns
    ----------
    array
        The solution to X.
    """
    if use_cuda:
        import cupy
        alg = cupy
        A = alg.sparse.csr_matrix(A)
        B = alg.array(B)
    else:
        alg = np

    atol = alg.maximum(alg.sqrt(alg.sum(B * B, axis=0)) * tol, atol)
    if not x0:
        X = alg.ones((A.shape[1], B.shape[1]))
    else:
        X = x0
    R = A.dot(X) - B
    P = - R
    r_k_norms = alg.sum(R * R, axis=0)
    while True:
        AP = A.dot(P)
        alphas = r_k_norms / alg.sum(P * AP, axis=0)
        X += alphas[alg.newaxis, :] * P
        R += alphas[alg.newaxis, :] * AP
        r_kplus1_norms = alg.sum(R * R, axis=0)
        betas = r_kplus1_norms / r_k_norms
        r_k_norms = r_kplus1_norms
        if alg.all(alg.sqrt(r_kplus1_norms) < atol):
            break
        P = betas[alg.newaxis, :] * P - R
    return X.get() if use_cuda else X



[docs]class GraphDR(BaseEstimator):
    """
    Quasilinear data representation that preserves interpretability of a
    linear space.

    This class provides a scikit-learn type interface that wraps around
    the graphdr function.

    Parameters
    ----------
    n_neighbors : int, optional
        Number of nearest neigbors to compute per sample. Default is `10`.
    regularization : float, optional
        Regularization parameter determines the weight on the graph shrinkage term
        relative to the reconstruction error term. A smaller regularization parameter
        leads to output that are more similar to a linear representation. A larger
        regularization parameter provides more contraction based on the graph. Default is `100`.
    n_components : int or None, optional
        Number of dimensions in the output. Specify a n_components smaller than the input
        dimension when `no_rotation=True` can reduce the amount of computation.
        Default is `None`.
    no_rotation : bool, optional
        :math:`W` is fixed to identity matrix so that the output is not rotated
        relative to input. no_rotation can also speed up the computation for large
        input when `n_components` is also specified, because only the required top
        `n_components` are computed. If `no_rotation=True`, you may consider preprocessing
        the input with another linear dimensionality reduction method like PCA.
        Default is `False`.
    metric : str, optional
        distance metric for constructing nearest_neighbors graph. Note that here
        `metric` can be non-metric distance function. The allowed values depend on
        the `method` specified. As three nearest neighbors algorithms are supported,
        and these algorithms support different distance metrics. Default is `euclidean`.
        * 'small' :
            Use sklearn.neighbors.kneighbors_graph
            Supported values are
                * euclidean
                * manhattan
                * chebyshev
                * minkowski
                * wminkowski
                * seuclidean
                * mahalanobis
                * hamming
                * canberra
                * braycurtis
            See https://scikit-learn.org/stable/modules/generated/sklearn.neighbors.DistanceMetric.html
        * 'big_nndescent' or 'big_nnumap' :
            umap.nndescent. Supported values are
                * euclidean
                * manhattan
                * chebyshev
                * minkowski
                * canberra
                * braycurtis
                * mahalanobis
                * wminkowski
                * seuclidean
                * cosine
                * correlation
                * haversine
                * hamming
                * jaccard
                * dice
                * russelrao
                * kulsinski
                * rogerstanimoto
                * sokalmichener
                * sokalsneath
                * yule
        * 'big_nmslib' :
        nmslib. See https://github.com/nmslib/nmslib/blob/master/manual/manual.pdf for details:
                * bit_hamming
                * l1
                * l1_sparse
                * l2, euclidean
                * l2_sparse
                * linf
                * linf_sparse
                * lp:p=...
                * lp_sparse:p=...
                * angulardist, angulardist sparse, angulardist sparse fast
                * jsmetrslow, jsmetrfast, jsmetrfastapprox
                * leven
                * sqfd minus func, sqfd heuristic func:alpha=..., sqfd gaussian func:alpha=...
                * jsdivslow, jsdivfast, jsdivfastapprox
                * cosinesimil, cosine, cosinesimil sparse
                * normleven
                * kldivfast
                * kldivgenslow, kldivgenfast, kldivgenfastrq
                * itakurasaitoslow, itakurasaitofast, itakurasaitofastrq
                * negdotprod_sparse
                * querynorm_negdotprod_sparse
                * renyi_diverg
                * ab_diverg
    metric_params: dict, optional
        Some distance metrics requires additional arguments which can be provided as
        a dictionary. Default is `{}`.
    method: str, optional {'auto', 'small', 'big_nmslib', 'big_umap', 'big_nndescent'}
        Method to use. Different algorithms strongly affect the computation speed.
        Optimal choices are dependent on the input size. Default is `auto`.
        * 'auto'
            Choose automatically based on input size. Recommended for most uses.
        * 'small'
            For input of small size, it is affordable to compute nearest-neigbors exactly
            and use matrix inversion in the algorithm.
        * 'big_nmslib'
            For input of large size. If NMSlib is installed, it is the fastest algorithm that
            is supported.
        * 'big_nndescent'
            For input of large size. The NN-Descent algorithm implemented in the the umap package.
            Do not require external libraries.
        * 'big_nnumap'
            For input of large size. Do not require external libraries.
    _lambda : float or None, optional
        Regularization parameter :math:`\\lambda`. If not specified :math:`\\lambda` is
        computed from `regularization`,
        specifying `regularization` instead of `_lambda` is generally recommended as it
        automatically adjusts based on graph density. Default is None.
    symmetrize : bool, optional
        Symmetrize the nearest-neighbors graph by averaging with its transpose. Default is `True`.
    rescale : bool, optional
        Rescale each dimension in the output to align with the input. It works with and without
        `no_rotation`. Default is `False`
    refine_iter : int, optional
        Number of refinement iterations. Default is `3`.
    refine_threshold : float, optional
        Specify the cutoff for refinement. Unit is IQR (distance between the 25% quantile and the 75% quantile)
        of overall "stress" distribution across all edges. Default is `12`.
    _refine_threshold : float or None, optional
        Used internally. Default is None.
    tol, atol : float, optional
        Tolerances for iterative linear solver `norm(residual) <= max(tol*norm(b), atol)`.
        Default is `tol=1e-7, atol=1e-15`.
    nmslib_params : dict, optional
        Parameters given to nmslib createIndex function.
        See https://github.com/nmslib/nmslib/blob/master/manual/methods.md for details.
        Default is `{'post': 2}`.
    n_jobs : int, optional
        Number of jobs to run in parallel. Specifying `n_jobs > 1` can speed up computation
        when method is one of `big_nmslib`, `big_nndescent`, or `big_nnumap`. Note that
        when `use_cuda=True` specifying `n_jobs` does not run the jobs in parallel but
        can reduce amount of GPU memory required by splitting the problem and run sequentially.
        Default is 1.
    n_jobs_nmslib : int, optional
        Number of threads used by nmslib. Default is `8`.
    use_cuda : bool, optional
        Use CUDA GPU acceleration. CuPy has to be installed (https://github.com/cupy/cupy)
        and a CUDA-enabled GPU is required. Default is `False`.
    verbose : bool, optional
        Print additional information. Default is `False`.

    Attributes
    ----------
    embedding_ : array, shape (n_samples, n_components)
    W_ : array, shape (n_features, n_components)
        Right linear operator in the transformation Z = KXW. W_ is None when `no_rotation=True`.
"""

    def __init__(
            self,
            n_neighbors=10, regularization=1000, n_components=None, no_rotation=False,
            metric='l2', metric_params={}, method='auto',
            _lambda=None, symmetrize=True, rescale=False,
            refine_iter=0, refine_threshold=12, _refine_threshold=None,
            tol=1e-7, atol=1e-15, nmslib_params={'post': 2},
            n_jobs=1, n_jobs_nmslib=8, use_cuda=False, verbose=False):
        """
        Initialize parameters
        """
        self.n_neighbors = n_neighbors
        self.n_components = n_components
        self.regularization = regularization
        self._lambda = _lambda
        self.metric = metric
        self.metric_params = metric_params
        self.method = method
        self.symmetrize = symmetrize
        self.rescale = rescale
        self.no_rotation = no_rotation
        self.refine_iter = refine_iter
        self.refine_threshold = refine_threshold
        self._refine_threshold = _refine_threshold
        self.nmslib_params = nmslib_params
        self.tol = tol
        self.atol = atol
        self.verbose = verbose
        self.n_jobs = n_jobs
        self.n_jobs_nmslib = n_jobs_nmslib
        self.use_cuda = use_cuda

        self.W_ = None
        self.embedding_ = None
        self.fitted = False

    def _validate_parameters(self):
        # TODO
        pass

[docs]    def fit(self, X, y=None, custom_graph=None, refine_protected_graph=None):
        """Fit the model from data in X.

        Parameters
        ----------
        X : array, shape (n_samples, n_features).
            X may be a sparse matrix. If self.skip_pca is true, X must be orthogonal.
        y : Ignored
        custom_graph : sparse matrix, shape (n_samples, n_samples), optional
            If provided, use it instead of the graph constructed from X.
        refine_protected_graph : sparse matrix

        Returns
        ----------
        self : object
            Returns the instance itself.
        """
        output = graphdr(X, n_neighbors=self.n_neighbors, regularization=self.regularization,
                         metric=self.metric, metric_params=self.metric_params,
                         n_components=self.n_components, no_rotation=self.no_rotation, _lambda=self._lambda,
                         custom_graph=custom_graph, rescale=self.rescale, symmetrize=self.symmetrize,
                         refine_iter=self.refine_iter, refine_threshold=self.refine_threshold,
                         _refine_threshold=self._refine_threshold, refine_protected_graph=refine_protected_graph,
                         tol=self.tol, atol=self.atol, nmslib_params=self.nmslib_params, verbose=self.verbose,
                         n_jobs=self.n_jobs, n_jobs_nmslib=self.n_jobs_nmslib, use_cuda=self.use_cuda,
                         return_all=True)

        if self.no_rotation:
            Z, _ = output
            self.W_ = None
        else:
            Z, W, _ = output
            self.W_ = W

        self.embedding_ = Z
        self.fitted = True
        return self

[docs]    def fit_transform(self, X, y=None, custom_graph=None, refine_protected_graph=None):
        """Fit the model from data in X and transform X.
        Parameters
        ----------
        X : array, shape (n_samples, n_features)
        y : Ignored

        Returns
        ---------
        Z : array, shape (n_samples, n_components)
            Embedding of the input data.
        """
        self.fit(X, y, custom_graph=custom_graph, refine_protected_graph=refine_protected_graph)
        return self.embedding_

[docs]    def transform(self, X, y=None, custom_graph=None, refine_protected_graph=None):
        """Apply fitted model to transform X.
        Parameters
        ----------
        X : array, shape (n_samples, n_features)
        y : Ignored

        Returns
        ---------
        Z : array, shape (n_samples, n_components)
            Embedding of the input data.
        """
        if not self.fitted:
            raise NotFittedError('This GraphDR object has not been fitted yet.')

        Z = graphdr(X, n_neighbors=self.n_neighbors, regularization=self.regularization,
                         metric=self.metric, metric_params=self.metric_params,
                         n_components=self.n_components, no_rotation=True, _lambda=self._lambda, 
                         custom_graph=custom_graph, rescale=self.rescale, symmetrize=self.symmetrize,
                         refine_iter=self.refine_iter, refine_threshold=self.refine_threshold,
                         _refine_threshold=self._refine_threshold, refine_protected_graph=refine_protected_graph,
                         tol=self.tol, atol=self.atol, nmslib_params=self.nmslib_params, verbose=self.verbose,
                         n_jobs=self.n_jobs, n_jobs_nmslib=self.n_jobs_nmslib, use_cuda=self.use_cuda,
                         return_all=False)
                         
        if self.W_ is not None:
            Z = Z @ self.W_
        return Z

[docs]def graphdr(X, n_neighbors=10, regularization=100, n_components=None, no_rotation=False,
            metric='euclidean', metric_params={}, method='auto',
            _lambda=None, init=None, symmetrize=True,
            custom_graph=None, rescale=False, refine_iter=0, refine_threshold=12,
            _refine_threshold=None, refine_protected_graph=None, tol=1e-7, atol=1e-15,
            nmslib_params={'post': 2}, n_jobs=1, n_jobs_nmslib=8, use_cuda=False, verbose=False,
            return_all=False):
    """
    Quasilinear data representation that preserves interpretability of a linear space.

    Parameters
    ----------
    X : array (n_samples, n_features)
        The input array.
    n_neighbors : int, optional
        Number of nearest neigbors to compute per sample. Default is `10`.
    regularization : float, optional
        Regularization parameter determines the weight on the graph shrinkage term
        relative to the reconstruction error term. A smaller regularization parameter
        leads to output that are more similar to a linear representation. A larger
        regularization parameter provides more contraction based on the graph. Default is `100`.
    n_components : int or None, optional
        Number of dimensions in the output. Specify a n_components smaller than the input
        dimension when `no_rotation=True` can reduce the amount of computation.
        Default is `None`.
    no_rotation : bool, optional
        :param refine_protected_graph:
        :math:`W` is fixed to identity matrix so that the output is not rotated
        relative to input. no_rotation can also speed up the computation for large
        input when `n_components` is also specified, because only the required top
        `n_components` are computed. If `no_rotation=True`, you may consider preprocessing
        the input with another linear dimensionality reduction method like PCA.
        Default is `False`.
    metric : str, optional
        distance metric for constructing nearest_neighbors graph. Note that here
        `metric` can be non-metric distance function. The allowed values depend on
        the `method` specified. As three nearest neighbors algorithms are supported,
        and these algorithms support different distance metrics. Default is `euclidean`.
        * 'small':
            Use sklearn.neighbors.kneighbors_graph
            Supported values are
                * euclidean
                * manhattan
                * chebyshev
                * minkowski
                * wminkowski
                * seuclidean
                * mahalanobis
                * hamming
                * canberra
                * braycurtis
            See https://scikit-learn.org/stable/modules/generated/sklearn.neighbors.DistanceMetric.html
        * 'big_nndescent' or 'big_nnumap':
            umap.nndescent. Supported values are
                * euclidean
                * manhattan
                * chebyshev
                * minkowski
                * canberra
                * braycurtis
                * mahalanobis
                * wminkowski
                * seuclidean
                * cosine
                * correlation
                * haversine
                * hamming
                * jaccard
                * dice
                * russelrao
                * kulsinski
                * rogerstanimoto
                * sokalmichener
                * sokalsneath
                * yule
        * 'big_nmslib'
        nmslib. See https://github.com/nmslib/nmslib/blob/master/manual/manual.pdf for details:
                * bit_hamming
                * l1
                * l1_sparse
                * l2, euclidean
                * l2_sparse
                * linf
                * linf_sparse
                * lp:p=...
                * lp_sparse:p=...
                * angulardist, angulardist sparse, angulardist sparse fast
                * jsmetrslow, jsmetrfast, jsmetrfastapprox
                * leven
                * sqfd minus func, sqfd heuristic func:alpha=..., sqfd gaussian func:alpha=...
                * jsdivslow, jsdivfast, jsdivfastapprox
                * cosinesimil, cosine, cosinesimil sparse
                * normleven
                * kldivfast
                * kldivgenslow, kldivgenfast, kldivgenfastrq
                * itakurasaitoslow, itakurasaitofast, itakurasaitofastrq
                * negdotprod_sparse
                * querynorm_negdotprod_sparse
                * renyi_diverg
                * ab_diverg
    metric_params: dict, optional
        Some distance metrics requires additional arguments which can be provided as
        a dictionary. Default is `{}`.
    method: str, optional {'auto', 'small', 'big_nmslib', 'big_umap', 'big_nndescent'}
        Method to use. Different algorithms strongly affect the computation speed.
        Optimal choices are dependent on the input size. Default is `auto`.
        * 'auto'
            Choose automatically based on input size. Recommended for most uses.
        * 'small'
            For input of small size, it is affordable to compute nearest-neigbors exactly
            and use matrix inversion in the algorithm.
        * 'big_nmslib'
            For input of large size. If NMSlib is installed, it is the fastest algorithm that
            is supported.
        * 'big_nndescent'
            For input of large size. The NN-Descent algorithm implemented in the the umap package.
            Do not require external libraries.
        * 'big_nnumap'
            For input of large size. Do not require external libraries.
    _lambda : float or None, optional
        Regularization parameter :math:`\\lambda`. If not specified :math:`\\lambda` is
        computed from `regularization`,
        specifying `regularization` instead of `_lambda` is generally recommended as it
        automatically adjusts based on graph density. Default is None.
    init : array or None, optional
        Initialize output representation Z with this array if solved through iterative solver.
        `init` is only used when `no_rotation=True` and method is not `small`.
        Default is None.
    symmetrize : bool, optional
        Symmetrize the nearest-neighbors graph by averaging with its transpose. Default is `True`.
    custom_graph : sparse matrix or None, optional
        If specified, use this user-provided graph instead of constructing nearest-neighbors
        graph from `X`. Default is None.
    rescale : bool, optional
        Rescale each dimension in the output to align with the input. It works with and without
        `no_rotation`. Default is `False`
    refine_iter : int, optional
        Number of refinement iterations. Default is `3`.
    refine_threshold : float, optional
        Specify the cutoff for refinement. Unit is IQR (distance between the 25% quantile and the 75% quantile)
        of overall "stress" distribution across all edges. Default is `12`.
    _refine_threshold : float or None, optional
        Used internally. Default is None.
    refine_protected_graph : sparse matrix or None, optional
        Sparse matrix of size (n_samples, n_samples).
        Specify edges (any non-zero edges in refine_protected_graph) that will
        not be removed during refinement. Useful for graph construction for complex experimental designs
        like batch design so that edges connecting batches will not be removed. Default is `None`.
    tol, atol : float, optional
        Tolerances for iterative linear solver `norm(residual) <= max(tol*norm(b), atol)`.
        Default is `tol=1e-7, atol=1e-15`.
    nmslib_params : dict, optional
        Parameters given to nmslib createIndex function.
        See https://github.com/nmslib/nmslib/blob/master/manual/methods.md for details.
        Default is `{'post': 2}`.
    n_jobs : int, optional
        Number of jobs to run in parallel. Specifying `n_jobs > 1` can speed up computation
        when method is one of `big_nmslib`, `big_nndescent`, or `big_nnumap`. Note that
        when `use_cuda=True` specifying `n_jobs` does not run the jobs in parallel but
        can reduce amount of GPU memory required by splitting the problem and run sequentially.
        Default is 1.
    n_jobs_nmslib : int, optional
        Number of threads used by nmslib. Default is `8`.
    use_cuda : bool, optional
        Use CUDA GPU acceleration. CuPy has to be installed (https://github.com/cupy/cupy)
        and a CUDA-enabled GPU is required. Default is `False`.
    verbose : bool, optional
        Print additional information. Default is `False`.
    return_all : bool, optional
        Return a tuple of (Z, W, graph). Default is `False`.

    Returns
    -------
    Z: array, (optional) W: array, (optional) Graph: sparse matrix
    * Z : Embedding of the input data.
    * W : Linear operator on the feature space, returned if `return_all==True`.
    * graph : The graph used for GraphDR embedding, returned if `return_all==True`.

    """
    if method == 'auto':
        if X.shape[0] < 10000:
            method = 'small'
        else:
            try:
                import nmslib
                method = 'big_nmslib'
            except ImportError:
                method = 'big_nndescent'

    X_mean = X.mean(axis=0)

    if method == 'small':
        if custom_graph is None:
            graph = kneighbors_graph(np.asarray(X), int(n_neighbors), metric=metric,
                                     metric_params=metric_params, include_self=False)
        else:
            graph = custom_graph

        if symmetrize:
            graph = 0.5 * (graph + graph.T)

        graphL = csgraph.laplacian(graph)
        if _lambda is None:
            _lambda = regularization / (graph.sum(axis=1).mean() / (X.shape[0] / 10000))
        G = scipy.sparse.eye(X.shape[0]) + _lambda * graphL
        if issparse(G):
            Ginv = np.linalg.inv(G.todense())
        else:
            Ginv = np.linalg.inv(G)

        if no_rotation:
            if n_components:
                try:
                    X = X[:, :n_components]
                    X_mean = X_mean[:n_components]
                except:
                    raise ValueError('n_components value is not valid.')
            Z = np.asarray(np.dot(X.T, Ginv).T)
        else:
            C = np.dot(np.dot(X.T, Ginv), X)
            _, W = np.linalg.eigh(C)
            W = np.array(W)
            W = W[:, ::-1]
            if n_components:
                try:
                    W = W[:, :n_components]
                except:
                    raise ValueError('n_components value is not valid.')
            Z = np.asarray(np.dot(np.dot(W.T, X.T), Ginv).T)

        graph = scipy.sparse.csr_matrix(graph)
        graph.eliminate_zeros()

    else:
        if custom_graph is None:
            if method == 'big_nmslib':
                graph = knn_graph(X, n_neighbors=int(n_neighbors), num_threads=n_jobs_nmslib, space=metric,
                                  params=nmslib_params, space_params=metric_params, verbose=verbose)
            elif method == 'big_nnumap':
                graph = \
                neighbors.neighbors(X, n_neighbors=int(n_neighbors), metric=metric, metric_kwds=metric_params)[0]
            elif method == 'big_nndescent':
                graph = \
                neighbors.neighbors(X, n_neighbors=int(n_neighbors), metric=metric, metric_kwds=metric_params,
                                             smoothknn=False)[0]
            else:
                raise ValueError('Unrecognized method {}' % method)
        else:
            graph = custom_graph

        if symmetrize:
            graph = 0.5 * (graph + graph.T)

        graphL = csgraph.laplacian(graph)

        if _lambda is None:
            _lambda = regularization / (graph.sum(axis=1).mean() / (X.shape[0] / 10000))

        G = scipy.sparse.eye(X.shape[0]) + _lambda * graphL

        def msolve(A, Y, init=None):
            if use_cuda:
                Z = np.asarray(np.hstack(list(
                    map(lambda y: cg(A, y, x0=None, tol=tol, atol=atol, use_cuda=True), np.split(Y, n_jobs, axis=1)))))
            else:
                if n_jobs <= 1:
                    Z = np.asarray(np.hstack([scipy.sparse.linalg.cg(A, Y[:, i],
                                                                     x0=init[:, i] if init is not None else None,
                                                                     tol=tol, atol=atol)[0][:, np.newaxis] for i in
                                              range(Y.shape[1])]))
                else:
                    p = Pool(n_jobs)
                    try:
                        if init is None:
                            Z = np.asarray(np.hstack(list(
                                p.map(lambda y: cg(A, y, x0=None, tol=tol, atol=atol, use_cuda=False),
                                      np.split(Y, Y.shape[1], axis=1)))))
                        else:
                            Z = np.asarray(np.hstack(list(
                                p.map(lambda y, init_: cg(A, y, x0=init_, tol=tol, atol=atol, use_cuda=False),
                                      zip(np.split(Y, Y.shape[1], axis=1), np.split(init, init.shape[1], axis=1))))))
                        p.close()
                        p.join()
                    except KeyboardInterrupt:
                        print("Caught KeyboardInterrupt, terminating workers")
                        p.terminate()
                        p.join()
                        raise
            return Z

        if no_rotation:
            if n_components:
                try:
                    X = X[:, :n_components]
                    X_mean = X_mean[:n_components]
                except:
                    raise ValueError('n_components value is not valid.')
            Z = msolve(G, X, init=init)
        else:
            GinvX = msolve(G, X)
            C = np.dot(X.T, GinvX)
            lambdas, W = np.linalg.eigh(C)
            W = np.array(W)
            W = W[:, ::-1]
            if n_components:
                try:
                    W = W[:, :n_components]
                except:
                    raise ValueError('n_components value is not valid.')
            Z = np.dot(GinvX, W)




    # refine the graph by deleting edges with high stress
    if refine_iter > 0:
        if refine_protected_graph is not None:
            frozen_bool = np.isin(graph.nonzero()[0] * graph.shape[0] + graph.nonzero()[1],
                                  refine_protected_graph.nonzero()[0] * refine_protected_graph.shape[0] + refine_protected_graph.nonzero()[1])
        distZ = np.sqrt(np.sum((Z[graph.nonzero()[0], :] - Z[graph.nonzero()[1], :]) ** 2, axis=1))
        stress = distZ
        if verbose:
            print("Refinement round %d start..." % (refine_iter - 1))
            print("Number of starting edges: %d" % np.sum(graph.data > 0))

        if _refine_threshold is None:
            q1, q3 = np.percentile(stress, [25, 75])
            iqr = q3 - q1
            _refine_threshold = q3 + (iqr * refine_threshold)

        if refine_protected_graph is not None:
            graph.data[frozen_bool] = graph.data[frozen_bool] * ((stress < _refine_threshold)[frozen_bool])
        else:
            graph.data = graph.data * (stress < _refine_threshold)
        graph.eliminate_zeros()
        if verbose:
            print("Number of refined edges: %d" % np.sum(graph.data > 0))

        n_components, labels = scipy.sparse.csgraph.connected_components(graph)
        component_sizes = np.bincount(labels)
        if np.any(component_sizes <= 5):
            # largest_component = np.where(component_sizes == component_sizes.max())[0][0]
            if verbose:
                print("Connected components sizes: %s" % str(component_sizes))
            large_components = np.isin(labels, np.where(component_sizes > 5)[0])
            if np.mean(large_components) < 0.95:
                if verbose:
                    print("Connected component(s) contains few than 95 percent of all data points. Stop refinement.")
            else:
                from sklearn.neighbors import NearestNeighbors
                _, fillinds = NearestNeighbors(n_neighbors=1).fit(X[large_components, :]).kneighbors(
                    X[~large_components, :])
                g_reconnect = scipy.sparse.csr_matrix((np.repeat(2, len(fillinds)), (
                    np.where(~large_components)[0], np.where(large_components)[0][fillinds[:, 0]])), graph.shape)
                graph = graph + g_reconnect
                if verbose:
                    print("Number of reconnection edges: %d" % np.sum(g_reconnect.data > 0))
                Z = graphdr(X, custom_graph=graph, init=Z, _lambda=_lambda, method=method, refine_iter=refine_iter - 1,
                            refine_threshold=refine_threshold, _refine_threshold=_refine_threshold,
                            no_rotation=no_rotation, rescale=False, n_jobs_nmslib=n_jobs_nmslib, n_jobs=n_jobs,
                            use_cuda=use_cuda)
        else:
            if verbose:
                print("Number of reconnection edges: %d" % 0)
            Z = graphdr(X, custom_graph=graph, init=Z, _lambda=_lambda, method=method, refine_iter=refine_iter - 1,
                        refine_threshold=refine_threshold, _refine_threshold=_refine_threshold,
                        no_rotation=no_rotation, rescale=False, n_jobs_nmslib=n_jobs_nmslib, n_jobs=n_jobs,
                        use_cuda=use_cuda)


    if rescale:
        if no_rotation:
            scale_factor = (np.mean(X * Z, axis=0) - X.mean(axis=0) * Z.mean(axis=0)) / (
                        np.mean(Z ** 2, axis=0) - np.mean(Z, axis=0) ** 2)
            Z = Z * scale_factor[np.newaxis, :]
            Z = Z - Z.mean(axis=0)[np.newaxis, :] + X_mean[np.newaxis, :]
        else:
            XW = np.dot(X, W)
            scale_factor = (np.mean(XW * Z, axis=0) - XW.mean(axis=0) * Z.mean(axis=0)) / (
                        np.mean(Z ** 2, axis=0) - np.mean(Z, axis=0) ** 2)
            Z = Z * scale_factor[np.newaxis, :]
            Z = Z - Z.mean(axis=0)[np.newaxis, :]

    if return_all:
        if no_rotation:
            return Z, graph
        else:
            return Z, W, graph
    else:
        return Z