Source code for crispy.scms

"""
Subspace Constrained Mean Shift (SCMS) algorithm for density ridge estimation.

This module provides functions to identify density ridges in high-dimensional data using the SCMS algorithm,
including support for parallel computation and filtering to improve efficiency. Core functionalities include
walker initialization, Gaussian kernel evaluation, ridge-shifting processes, and multiprocessing utilities.
"""

import numpy as np
from joblib import Parallel, delayed, cpu_count
import time
import sys

#======================================================================================================================#


[docs]
def find_ridge(X, G, D=3, h=1, d=1, eps=1e-6, maxT=1000, weights=None, converge_frac=99, ncpu=None,
               return_unconverged=True, f_h=5):
    """
    Identify density ridges in data using the Subspace Constrained Mean Shift (SCMS) algorithm.

    This function iteratively shifts walkers towards the density ridges of the input data by computing
    local density estimates and projecting onto the subspace of interest.

    Parameters
    ----------
    X : ndarray
        Coordinates of the data points, shape (n, D, 1).

    G : ndarray
        Initial coordinates of the walkers, shape (m, D, 1).

    D : int, optional, default=3
        Dimensionality of the data points.

    h : float, optional, default=1
        Smoothing bandwidth for the Gaussian kernel.

    d : int, optional, default=1
        Number of dimensions to retain in the ridge subspace.

    eps : float, optional, default=1e-6
        Convergence criterion. The maximum allowable error for a walker to be considered converged.

    maxT : int, optional, default=1000
        Maximum number of iterations.

    weights : ndarray, optional
        Weights for the data points. If `None`, all points are equally weighted.

    converge_frac : float, optional, default=99
        Fraction of walkers that must converge to terminate the algorithm, expressed as a percentage.

    ncpu : int, optional
        Number of CPUs to use for parallel processing. Defaults to the number of available CPUs.

    return_unconverged : bool, optional, default=True
        If True, returns both converged and unconverged walkers. Otherwise, only converged walkers are returned.

    f_h : float, optional, default=5
        Factor for filtering data points based on their distance to walkers. Points farther than `f_h * h`
        are excluded to reduce computational overhead.

    Returns
    -------
    G_converged : ndarray
        Coordinates of the converged walkers.

    G_unconverged : ndarray, optional
        Coordinates of unconverged walkers, returned only if `return_unconverged=True`.

    Notes
    -----
    - The algorithm stops when either the fraction of converged walkers meets `converge_frac`
      or the maximum number of iterations (`maxT`) is reached.
    - The SCMS algorithm leverages Gaussian kernels to compute local density estimates and iteratively
      shifts walkers toward regions of high density.

    Examples
    --------
    Find ridges in a dataset with 3D coordinates:

    >>> import numpy as np
    >>> from crispy import scms
    >>> data = np.random.random((100, 3, 1))  # Random 3D data
    >>> walkers = np.random.random((20, 3, 1))  # Random walker positions
    >>> ridges, unconverged = scms.find_ridge(data, walkers, h=0.5, maxT=500)
    """
    # Convert data to float32 for efficiency
    G = G.astype(np.float32)
    X = X.astype(np.float32)
    h = np.float32(h)
    eps = np.float32(eps)

    if weights is None:
        weights = np.float32(1)
    else:
        weights = weights.astype(np.float32)
    converge_frac = np.float32(converge_frac)

    n = len(X)
    m = len(G)
    t = 0

    error = np.full(m, 1e+08, dtype=np.float32)

    print("==========================================================================")
    print(f"Starting the run. Number of data points: {n}, Number of walkers: {m}")
    print("==========================================================================")

    # Start timing
    start_time = time.time()
    last_print_time = start_time

    pct_error = np.percentile(error, converge_frac)
    ncpu = cpu_count() if ncpu is None else ncpu

    while ((pct_error > eps) & (t < maxT)):
        # Loop through iterations
        t += 1

        itermask = error > eps
        GjList = G[itermask]

        # Filter out data points too far away to save computation time
        X, c, weights, dist = wgauss_n_filtered_points_multiproc(X, GjList, h, weights, f_h=f_h, ncpu=ncpu)

        mask = dist < f_h * h

        ni, mi = len(X), len(GjList)

        current_time = time.time()
        if current_time - last_print_time >= 1:
            elapsed_time = current_time - start_time
            formatted_time = time.strftime("%H:%M:%S", time.gmtime(elapsed_time))
            sys.stdout.write(
                f"\rIteration {t} | Data points: {ni} | Walkers remaining: {mi}/{m} ({100 - mi / m * 100:0.1f}% complete) | {converge_frac}-percentile error: {pct_error:0.3f} | Total run time: {formatted_time}")
            sys.stdout.flush()

        GRes, errorRes = shift_wakers_multiproc(GjList, X, h, d, c, mask, ncpu)
        G[itermask], error[itermask]  = GRes, errorRes

        pct_error = np.percentile(error, converge_frac)

    sys.stdout.write("\n")
    mask = error < eps

    ncpu = cpu_count() if ncpu is None else ncpu
    print("the number of cpu used: ", ncpu)

    if return_unconverged:
        return G[mask], G[~mask]
    else:
        return G[mask]




[docs]
def wgauss_n_filtered_points(X, G, h, weights, f_h=5):
    """
    Compute weighted Gaussian values for data points relative to walker positions,
    filtering out distant points to optimize computation.

    This function calculates the Gaussian weights for data points (`X`) centered at walker
    positions (`G`) using a Gaussian kernel with bandwidth `h`. Data points farther than
    `f_h * h` from all walkers are excluded to reduce computational cost.

    Parameters
    ----------
    X : ndarray
        Coordinates of the data points, shape (n, D, 1), where `n` is the number of points
        and `D` is the dimensionality.

    G : ndarray
        Coordinates of the walkers, shape (m, D, 1), where `m` is the number of walkers.

    h : float
        Smoothing bandwidth of the Gaussian kernel.

    weights : ndarray
        Weights of the data points, shape (n,).

    f_h : float, optional, default=5
        Distance multiplier cutoff for filtering points. Data points farther than
        `f_h * h` from all walkers are excluded.

    Returns
    -------
    X_filtered : ndarray
        Filtered coordinates of the data points, shape (k, D, 1), where `k` is the number of
        points that passed the filtering.

    c : ndarray
        Weighted Gaussian values for each data point, shape (k,).

    weights_filtered : ndarray
        Filtered weights corresponding to `X_filtered`, shape (k,).

    dist : ndarray
        Distances between remaining data points and walker positions, shape (m, k).

    Notes
    -----
    - The filtering step significantly reduces the number of data points to consider,
      which improves the efficiency of subsequent calculations.

    Examples
    --------
    Filter and compute Gaussian weights for a dataset:

    >>> import numpy as np
    >>> from crispy import scms
    >>> data = np.random.random((100, 3, 1))  # 3D data points
    >>> walkers = np.random.random((10, 3, 1))  # 3D walker positions
    >>> weights = np.ones(100)  # Equal weights for data points
    >>> X_filtered, c, weights_filtered, dist = scms.wgauss_n_filtered_points(data, walkers, h=0.5, weights=weights)
    """
    # Find data points that are too far from walkers
    dist, diff = euclidean_dist(X, G)
    toofar = np.all(dist > f_h * h, axis=0)

    # Filter out distant data
    X = X[~toofar, :, :]
    diff = diff[:, ~toofar, :]
    dist = dist[:, ~toofar]
    weights = weights[~toofar]

    # Calculate the Gaussian values
    inv_cov = 1 / (h**2)
    exponent = -0.5 * np.sum(diff**2 * inv_cov, axis=-1)
    c = np.exp(exponent)

    return X, c * weights, weights, dist



    # Use available_cpus as the default if ncpu is -1

[docs]
def chunk_data(ncpu, data_list, data_size):
    """
    Divide data into chunks for multiprocessing.

    This function splits data into approximately equal-sized chunks to facilitate
    parallel processing across multiple CPUs.

    Parameters
    ----------
    ncpu : int
        Number of CPUs to use for parallel processing. If `ncpu` is negative, the entire
        dataset is treated as a single chunk.

    data_list : list of ndarray
        List of data arrays to be chunked. Each array should have the same size along
        the first axis (`data_size`).

    data_size : int
        The total number of data points (size of the first dimension of arrays in `data_list`).

    Returns
    -------
    chunks : tuple of lists of ndarray
        A tuple where each element corresponds to a list of chunks for a particular array in
        `data_list`. The total number of chunks is determined by `ncpu`.

    Notes
    -----
    - The function computes the chunk size as `data_size // ncpu` to ensure chunks are of
      approximately equal size.
    - If `ncpu` is negative, the entire dataset is returned as a single chunk.

    Examples
    --------
    Divide data into chunks for parallel processing:

    >>> import numpy as np
    >>> from crispy import scms
    >>> data1 = np.random.random((100, 3))  # Dataset 1
    >>> data2 = np.random.random((100, 3))  # Dataset 2
    >>> ncpu = 4
    >>> chunks = scms.chunk_data(ncpu, [data1, data2], data_size=100)
    >>> for chunk1, chunk2 in zip(*chunks):
    ...     print(chunk1.shape, chunk2.shape)
    """
    chunk_size = max(1, data_size // ncpu) if ncpu > 0 else data_size
    chunks = ()
    for data in data_list:
        chunks += ([data[i:i + chunk_size] for i in range(0, data_size, chunk_size)],)
    return chunks




[docs]
def wgauss_n_filtered_points_multiproc(X, G, h, weights, f_h, ncpu):
    """
    Compute weighted Gaussian values for data points relative to walker positions
    in parallel, filtering out distant points to optimize computation.

    This function extends `wgauss_n_filtered_points` to support multiprocessing for
    efficient computation on large datasets.

    Parameters
    ----------
    X : ndarray
        Coordinates of the data points, shape (n, D, 1), where `n` is the number of points
        and `D` is the dimensionality.

    G : ndarray
        Coordinates of the walkers, shape (m, D, 1), where `m` is the number of walkers.

    h : float
        Smoothing bandwidth of the Gaussian kernel.

    weights : ndarray
        Weights of the data points, shape (n,).

    f_h : float
        Distance multiplier cutoff for filtering points. Data points farther than
        `f_h * h` from all walkers are excluded.

    ncpu : int
        Number of CPUs to use for parallel processing. If set to `None`, defaults
        to the number of available CPUs.

    Returns
    -------
    X_filtered : ndarray
        Filtered coordinates of the data points, shape (k, D, 1), where `k` is the number of
        points that passed the filtering.

    c : ndarray
        Weighted Gaussian values for each filtered data point, shape (k,).

    weights_filtered : ndarray
        Filtered weights corresponding to `X_filtered`, shape (k,).

    dist : ndarray
        Distances between remaining data points and walker positions, shape (m, k).

    Notes
    -----
    - This function splits the dataset (`X` and `weights`) into chunks for parallel processing
      using the specified number of CPUs.
    - Multiprocessing accelerates computation on large datasets, especially when the number
      of walkers and data points is high.
    - Internally, this function calls `wgauss_n_filtered_points` for each chunk of the data.

    Examples
    --------
    Compute Gaussian weights in parallel:

    >>> import numpy as np
    >>> from crispy import scms
    >>> data = np.random.random((1000, 3, 1))  # Large dataset of 3D data points
    >>> walkers = np.random.random((20, 3, 1))  # Walker positions
    >>> weights = np.ones(1000)  # Equal weights for all data points
    >>> X_filtered, c, weights_filtered, dist = scms.wgauss_n_filtered_points_multiproc(
    ...     data, walkers, h=0.5, weights=weights, f_h=5, ncpu=4)
    """
    ncpu = cpu_count() if ncpu is None else ncpu

    # Split GjList into chunks for parallel processing
    chunks = chunk_data(ncpu, [X, weights], len(X))

    results = Parallel(n_jobs=ncpu)(delayed(wgauss_n_filtered_points)(X_chunk, G, h, weights_chunk, f_h)
                                    for X_chunk, weights_chunk in zip(*chunks))
    X, c, weights, dist = zip(*results)
    X = np.concatenate(X, axis=0)
    c = np.concatenate(c, axis=1)
    weights = np.concatenate(weights, axis=0)
    dist = np.concatenate(dist, axis=1)

    return X, c, weights, dist




[docs]
def shift_wakers_multiproc(G, X, h, d, c, mask, ncpu):
    """
    Shift walkers towards density ridges using the SCMS algorithm with multiprocessing.

    This function parallelizes the SCMS walker-shifting process for improved efficiency
    on large datasets. It divides the walkers into chunks and processes them concurrently
    across multiple CPUs.

    Parameters
    ----------
    G : ndarray
        Initial coordinates of the walkers, shape (m, D, 1), where `m` is the number of walkers
        and `D` is the dimensionality.

    X : ndarray
        Coordinates of the data points, shape (n, D, 1), where `n` is the number of data points.

    h : float
        Smoothing bandwidth for the Gaussian kernel.

    d : int
        Target dimensionality of the ridge subspace.

    c : ndarray
        Weighted Gaussian values computed for the data points and walkers, shape (m, n).

    mask : ndarray of bool
        Boolean mask indicating valid (True) data points for each walker. Shape is (m, n).

    ncpu : int
        Number of CPUs to use for parallel processing. If set to `None`, defaults to the number
        of available CPUs.

    Returns
    -------
    G_updated : ndarray
        Updated coordinates of the walkers after the SCMS shift, shape (m, D, 1).

    error : ndarray
        Convergence error for each walker, shape (m,). The error represents the displacement
        of each walker and is used to determine convergence.

    Notes
    -----
    - The walkers (`G`) are divided into chunks, and each chunk is processed independently
      on a separate CPU.
    - Internally, this function calls `shift_walkers` for each chunk, ensuring consistency
      with the SCMS algorithm.
    - Multiprocessing is particularly beneficial when the number of walkers or data points
      is large.

    Examples
    --------
    Perform a parallel SCMS shift for walkers:

    >>> import numpy as np
    >>> from crispy import scms
    >>> data = np.random.random((100, 3, 1))  # 3D data points
    >>> walkers = np.random.random((10, 3, 1))  # Initial walker positions
    >>> c = np.random.random((10, 100))  # Weighted Gaussian values
    >>> mask = np.random.choice([True, False], size=(10, 100))  # Boolean mask
    >>> h = 1.0
    >>> d = 1
    >>> ncpu = 4  # Use 4 CPUs
    >>> G_updated, error = scms.shift_wakers_multiproc(walkers, data, h, d, c, mask, ncpu)
    """
    ncpu = cpu_count() if ncpu is None else ncpu

    # Split GjList into chunks for parallel processing
    chunks = chunk_data(ncpu, [G, c, mask], len(G))

    results = Parallel(n_jobs=ncpu)(delayed(shift_walkers)(G_chunk, X, h, d, c_chunk, mask_chunk)
                                    for G_chunk, c_chunk, mask_chunk in zip(*chunks))
    GRes, errorRes = zip(*results)
    GRes, errorRes = np.concatenate(GRes, axis=0), np.concatenate(errorRes, axis=0)
    return GRes, errorRes




[docs]
def shift_walkers(G, X, h, d, c, mask):
    """
    Shift walkers towards density ridges using the Subspace Constrained Mean Shift (SCMS) algorithm.

    This function updates the positions of walkers (`G`) based on local density estimates
    from data points (`X`) and a Gaussian kernel with bandwidth `h`. The shift is constrained
    to the subspace defined by the eigenvectors of the Hessian matrix with the largest eigenvalues.

    Parameters
    ----------
    G : ndarray
        Coordinates of the walkers, shape (m, D, 1), where `m` is the number of walkers
        and `D` is the dimensionality.

    X : ndarray
        Coordinates of the data points, shape (n, D, 1), where `n` is the number of points.

    h : float
        Smoothing bandwidth for the Gaussian kernel.

    d : int
        Target dimensionality of the ridge subspace.

    c : ndarray
        Weighted Gaussian values computed for the data points and walkers, shape (m, n).

    mask : ndarray of bool
        Boolean mask indicating valid (True) data points for each walker. Shape is (m, n).

    Returns
    -------
    G_updated : ndarray
        Updated coordinates of the walkers after the SCMS shift, shape (m, D, 1).

    error : ndarray
        Convergence error for each walker, shape (m,). The error represents the displacement
        of each walker and is used to determine convergence.

    Notes
    -----
    - The SCMS algorithm shifts walkers towards regions of high density and projects their
      movement onto the subspace spanned by the eigenvectors of the Hessian matrix with the
      largest eigenvalues.
    - The convergence error is calculated as the magnitude of the shift relative to the
      density gradient.

    Examples
    --------
    Perform a single SCMS shift for walkers:

    >>> import numpy as np
    >>> from crispy import scms
    >>> data = np.random.random((100, 3, 1))  # 3D data points
    >>> walkers = np.random.random((10, 3, 1))  # Walker positions
    >>> c = np.random.random((10, 100))  # Weighted Gaussian values
    >>> mask = np.random.choice([True, False], size=(10, 100))  # Boolean mask
    >>> h = 1.0
    >>> d = 1
    >>> G_updated, error = scms.shift_walkers(walkers, data, h, d, c, mask)
    """
    m, D = G.shape[0], G.shape[1]
    n = X.shape[0]

    # Compute internally to make parallel processing easier. They take incredibly little space
    H = np.eye(D) * h**2
    Hinv = np.eye(D) / h**2

    # Compute mean probability
    pj = np.mean(c, axis=1)  # (m,)

    # Compute u for selected elements only
    mask_indices = np.argwhere(mask)  # (k, 2) where k is the number of True values in mask
    diff_selected = G[mask_indices[:, 0]] - X[mask_indices[:, 1]]  # (k, D, 1)

    # Compute u directly without creating the intermediate step
    u_diff = np.einsum('ij,njk->nik', Hinv, diff_selected)  # (k, D, 1) #/h**2 still needed?

    # Compute g for selected walker points using the mask
    c_selected = c[mask_indices[:, 0], mask_indices[:, 1]]  # (k,)
    g = np.zeros(G.shape)  # (m, D, 1)
    np.add.at(g, mask_indices[:, 0], -1 * c_selected[:, None, None] * u_diff / n)

    # Compute u_diff.T @ u_diff using einsum to avoid explicit transpose and matmul
    product = np.einsum('nik,njk->nij', u_diff, u_diff) - Hinv  # (k, D, D)

    # Update Hessian matrix
    Hess = np.zeros((m, D, D))  # (m, D, D)
    np.add.at(Hess, mask_indices[:, 0], c_selected[:, None, None] * product / n)

    # Expand dimensions for pj
    pj = pj[:, None, None]  # (m, 1, 1)

    # Compute Sigmainv
    Sigmainv = (-1 * Hess + np.einsum('mik,mil->mkl', g, g)/pj)/pj  # (m, D, D)

    # Compute the shift for each walker
    shift0 = G + np.einsum('ij,mjk->mik', H, g) / pj # (m, D, 1)

    # Eigen decomposition for Sigmainv
    EigVal, EigVec = np.linalg.eigh(Sigmainv) # (m, D), (m, D, D)

    # Get the eigenvectors with the largest eigenvalues
    V = EigVec[:, :, d:D]  # (m, D, D-d)

    # Update G for each walker
    G = np.einsum('mij,mjk->mik', np.einsum('mik,mjk->mij', V, V), (shift0 - G)) + G  # (m, D, 1)

    # Compute the error term
    tmp = np.einsum('mji,mjk->mik', V, g)  # (m, D, 1)
    error = np.sqrt(np.einsum('mik,mik->m', tmp, tmp) / np.einsum('mik,mik->m', g, g))  # (m,)

    return G, error




[docs]
def shift_particles(G, X, D, h, d, c, n, H, Hinv):
    """
    Shift walkers toward density ridges using the Subspace Constrained Mean Shift (SCMS) algorithm.

    This function updates the positions of walkers (`G`) based on local density estimates
    computed from data points (`X`) and projects their movement onto the subspace of
    interest, defined by eigenvectors of the Hessian matrix.

    Parameters
    ----------
    G : ndarray
        Initial coordinates of the walkers, shape (m, D, 1), where `m` is the number of walkers
        and `D` is the dimensionality.

    X : ndarray
        Coordinates of the data points, shape (n, D, 1), where `n` is the number of points.

    D : int
        Dimensionality of the data points.

    h : float
        Smoothing bandwidth of the Gaussian kernel.

    d : int
        Target dimensionality of the ridge subspace.

    c : ndarray
        Weighted Gaussian values computed for the data points and walkers, shape (m, n).

    n : int
        Number of data points (`n = X.shape[0]`).

    H : ndarray
        Covariance matrix for the Gaussian kernel, shape (D, D).

    Hinv : ndarray
        Inverse of the covariance matrix, shape (D, D).

    Returns
    -------
    G_updated : ndarray
        Updated coordinates of the walkers, shape (m, D, 1).

    error : ndarray
        Convergence error for each walker, shape (m,). The error represents the displacement
        of each walker and is used to determine convergence.

    Notes
    -----
    - The SCMS algorithm shifts walkers toward regions of high density by iteratively
      estimating gradients and projecting movements onto the ridge subspace.
    - The eigen decomposition of the Hessian matrix is used to constrain movement to the
      subspace defined by the largest eigenvalues.
    - The convergence error is computed as the displacement magnitude of each walker
      relative to the density gradient.

    Examples
    --------
    Perform a single SCMS shift for walkers:

    >>> import numpy as np
    >>> from crispy import scms
    >>> data = np.random.random((100, 3, 1))  # 3D data points
    >>> walkers = np.random.random((10, 3, 1))  # Walker positions
    >>> h = 1.0
    >>> d = 1
    >>> c = np.random.random((10, 100))  # Weighted Gaussian values
    >>> n = data.shape[0]
    >>> H = np.eye(3) * h**2  # Covariance matrix
    >>> Hinv = np.linalg.inv(H)  # Inverse covariance matrix
    >>> G_updated, error = scms.shift_particles(walkers, data, D=3, h=h, d=d, c=c, n=n, H=H, Hinv=Hinv)
    """
    # Compute mean probability
    pj = np.mean(c, axis=1)

    # Expand dimensions for broadcasting
    X_expanded = X[None, :, :, :] # (n, 1, D, 1)
    G_expanded = G[:, None, :, :] # (1, m, D, 1)

    # Compute u for all walker points
    u = np.matmul(Hinv, (G_expanded - X_expanded)) #/ h**2

    # Compute g for all walker points
    c_expanded = c[:, :, None, None]
    g = -1 * np.sum(c_expanded * u, axis=1) / n

    # Compute the Hessian matrix for all walker points
    u_T = np.transpose(u, axes=(0, 1, 3, 2))
    Hess = np.sum(c_expanded * (np.matmul(u, u_T) - Hinv), axis=1) / n

    # Expand dimensions for pj
    pj = pj[:, None, None]

    Sigmainv = -1 * Hess / pj + np.matmul(g, np.transpose(g, axes=(0, 2, 1))) / pj**2

    # Compute the shift for each walker
    shift0 = G + np.matmul(H, g) / pj

    # Eigen decomposition for Sigmainv
    EigVal, EigVec = np.linalg.eigh(Sigmainv)

    # Get the eigenvectors with the largest eigenvalues
    V = EigVec[:, :, d:D]

    # Compute VVT
    VVT = np.matmul(V, np.transpose(V, axes=(0, 2, 1)))

    # Update G for each walker
    G = np.matmul(VVT, (shift0 - G)) + G

    # Compute the error term
    tmp = np.matmul(np.transpose(V, axes=(0, 2, 1)), g)
    error = np.sqrt(np.sum(tmp**2, axis=(1, 2)) / np.sum(g**2, axis=(1, 2)))

    return G, error




[docs]
def euclidean_dist(X, G):
    """
    Compute the Euclidean distances and differences between data points and walkers.

    This function calculates pairwise Euclidean distances between points in `X` and `G` and
    returns both the distances and the differences in their coordinates.

    Parameters
    ----------
    X : ndarray
        Coordinates of the data points, shape (n, D, 1), where `n` is the number of points
        and `D` is the dimensionality.

    G : ndarray
        Coordinates of the walkers, shape (m, D, 1), where `m` is the number of walkers.

    Returns
    -------
    distances : ndarray
        Pairwise Euclidean distances between each point in `G` and each point in `X`,
        shape (m, n).

    diff : ndarray
        Pairwise coordinate differences between points in `G` and points in `X`,
        shape (m, n, D).

    Notes
    -----
    - This function is useful for calculating distances and displacements required
      in SCMS-based ridge detection.

    Examples
    --------
    Compute pairwise distances and differences:

    >>> import numpy as np
    >>> from crispy import scms
    >>> data = np.random.random((100, 3, 1))  # 3D data points
    >>> walkers = np.random.random((10, 3, 1))  # Walker positions
    >>> distances, diff = scms.euclidean_dist(data, walkers)
    >>> print(distances.shape)  # Should be (10, 100)
    >>> print(diff.shape)  # Should be (10, 100, 3)
    """
    X_squeezed = np.squeeze(X, axis=-1)
    G_squeezed = np.squeeze(G, axis=-1)

    # Compute differences for all combinations of G and X
    diff = G_squeezed[:, np.newaxis, :] - X_squeezed[np.newaxis, :, :]
    distances = np.linalg.norm(diff, axis=-1)

    return distances, diff




[docs]
def vectorized_gaussian(X, G, h):
    """
    Compute Gaussian kernel values for data points relative to walker positions.

    This function calculates the Gaussian kernel values for each pair of points in `X` and
    `G`, based on the Euclidean distances between them, and returns both the kernel values
    and the distances.

    Parameters
    ----------
    X : ndarray
        Coordinates of the data points, shape (n, D, 1), where `n` is the number of points
        and `D` is the dimensionality.

    G : ndarray
        Mean positions (walker coordinates) for the Gaussian kernel, shape (m, D, 1),
        where `m` is the number of walkers.

    h : float
        Smoothing bandwidth of the Gaussian kernel.

    Returns
    -------
    c : ndarray
        Gaussian kernel values for each pair of data point and walker, shape (m, n).

    distances : ndarray
        Pairwise Euclidean distances between each walker in `G` and each point in `X`,
        shape (m, n).

    Notes
    -----
    This function is optimized to handle pairwise distance calculations and kernel
    evaluations efficiently.

    Examples
    --------
    Compute Gaussian kernel values and distances:

    >>> import numpy as np
    >>> from crispy import scms
    >>> data = np.random.random((100, 3, 1))  # 3D data points
    >>> walkers = np.random.random((10, 3, 1))  # Walker positions
    >>> h = 1.0  # Bandwidth
    >>> c, distances = scms.vectorized_gaussian(data, walkers, h)
    >>> print(c.shape)  # Should be (10, 100)
    >>> print(distances.shape)  # Should be (10, 100)
    """
    # Calculate Euclidean distances between each walker in G and each point in X
    X_squeezed = np.squeeze(X, axis=-1)
    G_squeezed = np.squeeze(G, axis=-1)

    # Compute differences for all combinations of G and X
    diff = G_squeezed[:, np.newaxis, :] - X_squeezed[np.newaxis, :, :]

    # Compute the inverse covariance (assumes h is scalar)
    inv_cov = 1 / (h**2)

    # Calculate the exponent for the Gaussian function
    exponent = -0.5 * np.sum(diff**2 * inv_cov, axis=-1)

    # Compute the Gaussian exponential
    c = np.exp(exponent)

    return c