• 设为首页
  • 点击收藏
  • 手机版
    手机扫一扫访问
    迪恩网络手机版
  • 关注官方公众号
    微信扫一扫关注
    迪恩网络公众号

Python extmath.norm函数代码示例

原作者: [db:作者] 来自: [db:来源] 收藏 邀请

本文整理汇总了Python中sklearn.utils.extmath.norm函数的典型用法代码示例。如果您正苦于以下问题:Python norm函数的具体用法?Python norm怎么用?Python norm使用的例子?那么恭喜您, 这里精选的函数代码示例或许可以为您提供帮助。



在下文中一共展示了norm函数的19个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于我们的系统推荐出更棒的Python代码示例。

示例1: test_norm_squared_norm

def test_norm_squared_norm():
    X = np.random.RandomState(42).randn(50, 63)
    X *= 100        # check stability
    X += 200

    assert_almost_equal(np.linalg.norm(X.ravel()), norm(X))
    assert_almost_equal(norm(X) ** 2, squared_norm(X), decimal=6)
    assert_almost_equal(np.linalg.norm(X), np.sqrt(squared_norm(X)), decimal=6)
开发者ID:93sam,项目名称:scikit-learn,代码行数:8,代码来源:test_extmath.py


示例2: test_norm_squared_norm

def test_norm_squared_norm():
    X = np.random.RandomState(42).randn(50, 63)
    X *= 100        # check stability
    X += 200

    assert_almost_equal(np.linalg.norm(X.ravel()), norm(X))
    assert_almost_equal(norm(X) ** 2, squared_norm(X), decimal=6)
    assert_almost_equal(np.linalg.norm(X), np.sqrt(squared_norm(X)), decimal=6)
    # Check the warning with an int array and np.dot potential overflow
    assert_warns_message(
                    UserWarning, 'Array type is integer, np.dot may '
                    'overflow. Data should be float type to avoid this issue',
                    squared_norm, X.astype(int))
开发者ID:BasilBeirouti,项目名称:scikit-learn,代码行数:13,代码来源:test_extmath.py


示例3: mean_shift

def mean_shift(X, bandwidth, n_seeds, kernel_function='gaussian', max_iterations=100, proximity_thresh=5):
    '''
    ---Parameters---
    X : data in form (samples, dims)
    bandwidth : radius of nearest neighbors
    n_seeds : 
    kernel_update_function : can be "gaussian" or "flat" or your own kernel
    proximity_thresh : minimum distance (in pixels) a new cluster must be away from previous ones

    ---Returns---
    cluster_centers : 
    cluster_counts : how many pixels are with the neighborhood of each cluster
    '''

    import numpy as np
    from sklearn.neighbors import BallTree, NearestNeighbors
    from sklearn.utils import extmath
    from sklearn.metrics.pairwise import euclidean_distances
    from collections import defaultdict 

    if kernel_function == 'gaussian':
        kernel_update_function = gaussian_kernel
    elif kernel_function == 'flat':
        kernel_update_function = flat_kernel
    else:
        kernel_update_function = kernel_function


    n_points, n_features = X.shape
    stop_thresh = 1e-2 * bandwidth # when mean has converged                                                                                                               
    cluster_centers = []
    cluster_counts = [] 
    # ball_tree = BallTree(X)# to efficiently look up nearby points
    neighbors = NearestNeighbors(radius=bandwidth).fit(X)

    seeds = X[(np.random.uniform(0,X.shape[0], n_seeds)).astype(np.int)]
 
    # For each seed, climb gradient until convergence or max_iterations                                                                                                     
    for weighted_mean in seeds:
         completed_iterations = 0
         while True:
             points_within = X[neighbors.radius_neighbors([weighted_mean], bandwidth, return_distance=False)[0]]
             old_mean = weighted_mean  # save the old mean                                                                                                                  
             weighted_mean = kernel_update_function(old_mean, points_within, bandwidth)
             converged = extmath.norm(weighted_mean - old_mean) < stop_thresh
             if converged or completed_iterations == max_iterations:
                # Only add cluster if it's different enough from other centers
                if len(cluster_centers) > 0:
                    diff_from_prev = [np.linalg.norm(weighted_mean-cluster_centers[i], 2) for i in range(len(cluster_centers))]
                    if np.min(diff_from_prev) > proximity_thresh:
                        cluster_centers.append(weighted_mean)
                        cluster_counts.append(points_within.shape[0])
                else:
                    cluster_centers.append(weighted_mean)
                    cluster_counts.append(points_within.shape[0])
                break
             completed_iterations += 1
 
    return cluster_centers, cluster_counts
开发者ID:MerDane,项目名称:pyKinectTools,代码行数:59,代码来源:MeanShift.py


示例4: test_logistic_derivative_lipschitz_constant

def test_logistic_derivative_lipschitz_constant():
    # Tests Lipschitz-continuity of of the derivative of logistic loss
    rng = check_random_state(42)
    grad_weight = 2.08e-1
    lipschitz_constant = _logistic_derivative_lipschitz_constant(
        X, mask, grad_weight)
    for _ in range(20):
        x_1 = rng.rand((w.shape[0] + 1)) * rng.randint(1000)
        x_2 = rng.rand((w.shape[0] + 1)) * rng.randint(1000)
        gradient_difference = extmath.norm(
            _logistic_data_loss_and_spatial_grad_derivative(
                X, y, x_1, mask, grad_weight)
            - _logistic_data_loss_and_spatial_grad_derivative(
                X, y, x_2, mask, grad_weight))
        point_difference = extmath.norm(x_1 - x_2)
        assert_true(
            gradient_difference <= lipschitz_constant * point_difference)
开发者ID:CandyPythonFlow,项目名称:nilearn,代码行数:17,代码来源:test_graph_net.py


示例5: _pa

    def _pa(self, loss_t, x_t):
        denom = extmath.norm(x_t) ** 2.0
        # special case when L_2 norm of x_t is zero (followed libol
        # implementation)
        if denom == 0:
            return 1

        d = loss_t / denom
        return d
开发者ID:jsouza,项目名称:pamtl,代码行数:9,代码来源:partl_regression.py


示例6: test__squared_loss_derivative_lipschitz_constant

def test__squared_loss_derivative_lipschitz_constant():
    # Tests Lipschitz-continuity of the derivative of _squared_loss loss
    # function
    rng = check_random_state(42)
    grad_weight = 2.08e-1
    lipschitz_constant = _squared_loss_derivative_lipschitz_constant(
        X, mask, grad_weight)
    for _ in range(20):
        x_1 = rng.rand(*w.shape) * rng.randint(1000)
        x_2 = rng.rand(*w.shape) * rng.randint(1000)
        gradient_difference = extmath.norm(
            _squared_loss_and_spatial_grad_derivative(X, y, x_1, mask,
                                                      grad_weight)
            - _squared_loss_and_spatial_grad_derivative(X, y, x_2, mask,
                                                        grad_weight))
        point_difference = extmath.norm(x_1 - x_2)
        assert_true(
            gradient_difference <= lipschitz_constant * point_difference)
开发者ID:CandyPythonFlow,项目名称:nilearn,代码行数:18,代码来源:test_graph_net.py


示例7: _reorth

def _reorth(basis, target, rows=None, alpha=0.5):
    """Reorthogonalize a vector using iterated Gram-Schmidt

    Parameters
    ----------
    basis: ndarray, shape (n_features, n_basis)
        The matrix whose rows are a set of basis to reorthogonalize against

    target: ndarray, shape (n_features,)
        The target vector to be reorthogonalized

    rows: {array-like, None}, default None
        Indices of rows from basis to use. Use all if None

    alpha: float, default 0.5
        Parameter for determining whether to do a second reorthogonalization.

    Returns
    -------
    reorthed_target: ndarray, shape (n_features,)
        The reorthogonalized vector
    """
    if rows is not None:
        basis = basis[rows]
    norm_target = norm(target)

    norm_target_old = 0
    n_reorth = 0

    while norm_target < alpha * norm_target_old or n_reorth == 0:
        for row in basis:
            t = fast_dot(row, target)
            target = target - t * row

        norm_target_old = norm_target
        norm_target = norm(target)
        n_reorth += 1

        if n_reorth > 4:
            # target in span(basis) => accpet target = 0
            target = np.zeros(basis.shape[0])
            break

    return target
开发者ID:amueller,项目名称:pca,代码行数:44,代码来源:tga.py


示例8: _bistochastic_normalize

def _bistochastic_normalize(X, max_iter=1000, tol=1e-5):
    """Normalize rows and columns of ``X`` simultaneously so that all
    rows sum to one constant and all columns sum to a different
    constant.

    """
    # According to paper, this can also be done more efficiently with
    # deviation reduction and balancing algorithms.
    X = make_nonnegative(X)
    X_scaled = X
    dist = None
    for _ in range(max_iter):
        X_new, _, _ = _scale_normalize(X_scaled)
        if issparse(X):
            dist = norm(X_scaled.data - X.data)
        else:
            dist = norm(X_scaled - X_new)
        X_scaled = X_new
        if dist is not None and dist < tol:
            break
    return X_scaled
开发者ID:VirgileFritsch,项目名称:scikit-learn,代码行数:21,代码来源:spectral.py


示例9: test_tikhonov_regularization_vs_graph_net

def test_tikhonov_regularization_vs_graph_net():
    # Test for one of the extreme cases of Graph-Net: That is, with
    # l1_ratio = 0 (pure Smooth), we compare Graph-Net's performance
    # with the analytical solution for Tikhonov Regularization

    # XXX A small dataset here (this test is very lengthy)
    G = get_gradient_matrix(w.size, mask)
    optimal_model = np.dot(sp.linalg.pinv(
        np.dot(X.T, X) + y.size * np.dot(G.T, G)), np.dot(X.T, y))
    graph_net = BaseSpaceNet(
        mask=mask_, alphas=1. * X.shape[0], l1_ratios=0., max_iter=400,
        fit_intercept=False,
        screening_percentile=100., standardize=False)
    graph_net.fit(X_, y.copy())
    coef_ = graph_net.coef_[0]
    graph_net_perf = 0.5 / y.size * extmath.norm(
        np.dot(X, coef_) - y) ** 2\
        + 0.5 * extmath.norm(np.dot(G, coef_)) ** 2
    optimal_model_perf = 0.5 / y.size * extmath.norm(
        np.dot(X, optimal_model) - y) ** 2\
        + 0.5 * extmath.norm(np.dot(G, optimal_model)) ** 2
    assert_almost_equal(graph_net_perf, optimal_model_perf, decimal=1)
开发者ID:CandyPythonFlow,项目名称:nilearn,代码行数:22,代码来源:test_graph_net.py


示例10: test_lasso_vs_graph_net

def test_lasso_vs_graph_net():
    # Test for one of the extreme cases of Graph-Net: That is, with
    # l1_ratio = 1 (pure Lasso), we compare Graph-Net's performance with
    # Scikit-Learn lasso
    lasso = Lasso(max_iter=100, tol=1e-8, normalize=False)
    graph_net = BaseSpaceNet(mask=mask, alphas=1. * X_.shape[0],
                             l1_ratios=1, is_classif=False,
                             penalty="graph-net", max_iter=100)
    lasso.fit(X_, y)
    graph_net.fit(X, y)
    lasso_perf = 0.5 / y.size * extmath.norm(np.dot(
        X_, lasso.coef_) - y) ** 2 + np.sum(np.abs(lasso.coef_))
    graph_net_perf = 0.5 * ((graph_net.predict(X) - y) ** 2).mean()
    np.testing.assert_almost_equal(graph_net_perf, lasso_perf, decimal=3)
开发者ID:CandyPythonFlow,项目名称:nilearn,代码行数:14,代码来源:test_space_net.py


示例11: f_regression_nosparse

def f_regression_nosparse(X, y, center=True):
    """Univariate linear regression tests

    Quick linear model for testing the effect of a single regressor,
    sequentially for many regressors.

    This is done in 3 steps:
    1. the regressor of interest and the data are orthogonalized
       with respect to constant regressors
    2. the cross correlation between data and regressors is computed
    3. it is converted to an F score then to a p-value

    Parameters
    ----------
    X : {array-like, sparse matrix}  shape = (n_samples, n_features)
        The set of regressors that will tested sequentially.

    y : array of shape(n_samples).
        The data matrix

    center : True, bool,
        If true, X and y will be centered.

    Returns
    -------
    F : array, shape=(n_features,)
        F values of features.

    pval : array, shape=(n_features,)
        p-values of F-scores.
    """
    X, y = check_arrays(X, y, dtype=np.float)
    y = y.ravel()
    if center:
        y = y - np.mean(y)
        X = X.copy('F')  # faster in fortran
        X -= X.mean(axis=0)

    # compute the correlation
    corr = np.dot(y, X)
    # XXX could use corr /= row_norms(X.T) here, but the test doesn't pass
    corr /= np.asarray(np.sqrt((X ** 2).sum(axis=0))).ravel()
    corr /= norm(y)

    # convert to p-value
    degrees_of_freedom = y.size - (2 if center else 1)
    F = corr ** 2 / (1 - corr ** 2) * degrees_of_freedom
    pv = stats.f.sf(F, 1, degrees_of_freedom)
    return F, pv
开发者ID:CandyPythonFlow,项目名称:nilearn,代码行数:49,代码来源:sklearn_f_regression_nosparse.py


示例12: mean_shift

def mean_shift(X, bandwidth, seeds, kernel_update_function, max_iterations=10):
    n_points, n_features = X.shape
    stop_thresh = 1e-3 * bandwidth  # when mean has converged                                                                                                               
    cluster_centers = []
    ball_tree = BallTree(X)  # to efficiently look up nearby points

    # For each seed, climb gradient until convergence or max_iterations                                                                                                     
    for weighted_mean in seeds:
         completed_iterations = 0
         while True:
             points_within = X[ball_tree.query_radius([weighted_mean], bandwidth*3)[0]]
             old_mean = weighted_mean  # save the old mean                                                                                                                  
             weighted_mean = kernel_update_function(old_mean, points_within, bandwidth)
             converged = extmath.norm(weighted_mean - old_mean) < stop_thresh
             if converged or completed_iterations == max_iterations:
                 cluster_centers.append(weighted_mean)
                 break
             completed_iterations += 1

    return cluster_centers
开发者ID:denizsokmen,项目名称:cvProjects,代码行数:20,代码来源:part2.py


示例13: _iter

def _iter(X,
          weighted_mean,
          kernel_update_function,
          bandwidth,
          ball_tree,
          stop_thresh,
          max_iter):
    """Return the cluster center and the within points visited while iterated from the seed
    to the centroid. This code has been isolated to be executed in parallel using JobLib."""
    visited_points = set()
    completed_iterations = 0
    while True:
        within_idx = ball_tree.query_radius([weighted_mean], bandwidth*3)[0]
        [visited_points.add(x) for x in within_idx]
        points_within = X[within_idx]
        old_mean = weighted_mean  # save the old mean
        weighted_mean = kernel_update_function(old_mean, points_within, bandwidth)
        converged = extmath.norm(weighted_mean - old_mean) < stop_thresh
        if converged or completed_iterations == max_iter:
            return weighted_mean, visited_points
        completed_iterations += 1
开发者ID:arutaku,项目名称:mean_shift,代码行数:21,代码来源:mean_shift.py


示例14: mean_shift

def mean_shift(X, bandwidth=None, seeds=None, kernel="flat",
               max_cluster_radius=-1., max_iterations=300):
    """Perform MeanShift Clustering of data using the specified kernel

    Parameters
    ----------

    X : array [n_samples, n_features]
        Input points to be clustered

    bandwidth : float,
        Kernel bandwidth

    seeds: array [n_seeds, n_features], optional
        Points used as initial kernel locations
        If not set, then use every point as a seed (which may
        be very slow---consider using the `get_bin_seeds` function
        to create a reduced set of seeds.

    max_cluster_radius: float, default -1.
        Used only in post-processing.
        If negative, then each point is clustered into its nearest cluster.
        If positive, then those points that are not within `max_cluster_radius`
        of any cluster center are said to be 'orphans' that do not belong to
        any cluster. Orphans are given cluster label -1.

    Returns
    -------

    cluster_centers : array [n_clusters, n_features]
        Coordinates of cluster centers

    labels : array [n_samples]
        cluster labels for each point

    Notes
    -----
    See examples/plot_meanshift.py for an example.

    """

    if seeds is None:
        seeds = X
    elif len(seeds) == 0:
        raise ValueError, "If a list of seeds is provided it cannot be empty."

    if not (kernel in KERNELS):
        valid_kernels = " ".join(KERNELS)
        raise ValueError, "Kernel %s is not valid. Valid kernel choices are: %s " % (kernel, valid_kernels)

    # Set maximum neighbor query distance based on kernel
    if kernel in ["flat"]:
        query_distance = bandwidth
        kernel_update_function = flat_kernel_update
        print "Using flat kernel update"
    elif kernel in ["gaussian"]:
        query_distance = bandwidth * 3 # A bit arbitrary
        kernel_update_function = gaussian_kernel_update
        print "Using gaussian kernel update"
    else:
        raise ValueError, "Kernel %s not implemented correctly" % kernel

    n_points, n_features = X.shape
    stop_thresh = 1e-3 * bandwidth  # when mean has converged
    center_intensity_dict = {}
    ball_tree = BallTree(X)  # to efficiently look up nearby points

    # For each seed, climb gradient until convergence or max_iterations
    for weighted_mean in seeds:
        completed_iterations = 0
        while True:
            # Find mean of points within bandwidth
            points_within = X[ball_tree.query_radius([weighted_mean], query_distance)[0]]
            if len(points_within) == 0:
                break  # Depending on seeding strategy this condition may occur
            old_mean = weighted_mean  # save the old mean
            weighted_mean = kernel_update_function(old_mean, points_within, bandwidth)
            # If converged or at max_iterations, addS the cluster
            if extmath.norm(weighted_mean - old_mean) < stop_thresh or \
                   completed_iterations == max_iterations:
                center_intensity_dict[tuple(weighted_mean)] = len(points_within)
                break
            completed_iterations += 1

    # POST PROCESSING: remove near duplicate points
    # If the distance between two kernels is less than the bandwidth,
    # then we have to remove one because it is a duplicate. Remove the
    # one with fewer points.
    print "%d clusters before removing duplicates " % len(center_intensity_dict)
    sorted_by_intensity = sorted(center_intensity_dict.items(),
                                 key=lambda tup: tup[1], reverse=True)
    sorted_centers = np.array([tup[0] for tup in sorted_by_intensity])
    unique = np.ones(len(sorted_centers), dtype=np.bool)
    cc_tree = BallTree(sorted_centers)
    for i, center in enumerate(sorted_centers):
        if unique[i]:
            neighbor_idxs = cc_tree.query_radius([center], bandwidth)[0]
            unique[neighbor_idxs] = 0
            unique[i] = 1  # leave the current point as unique
    cluster_centers = sorted_centers[unique]
#.........这里部分代码省略.........
开发者ID:lemanou,项目名称:PCG_2016,代码行数:101,代码来源:clee_mean_shift.py


示例15: variable_bw_mean_shift

def variable_bw_mean_shift(X, bandwidth_array, seeds=None, max_iterations=300):
    """Variable bandwidth mean shift with gaussian kernel

	Parameters
	----------
	X : array-like, shape=[n_samples, n_features]
		Input data.

	bandwidth : array[float], shape=[n_samples]
		Kernel bandwidth.

	seeds : array[float, float], shape=(n_seeds, n_features), optional
		Point used as initial kernel locations. Default is
		setting each point in input data as a seed.

	max_iter : int, default 300
		Maximum number of iterations, per seed point before the clustering
		operation terminates (for that seed point), if has not converged yet.

	Returns
	-------
	cluster_centers : array, shape=[n_clusters, n_features]
		Coordinates of cluster centers.

	labels : array, shape=[n_samples]
		Cluster labels for each point.

	Notes
	-----
	Code adapted from scikit-learn library.

	"""

    if not seeds:
        seeds = X

    n_points, n_features = X.shape
    stop_thresh = 1e-3 * np.mean(bandwidth_array)  # when mean has converged
    center_intensity_dict = {}
    cluster_centers = []
    ball_tree = BallTree(X)  # to efficiently look up nearby points

    def gaussian_kernel(x, points, bandwidth):
        distances = euclidean_distances(points, x)
        weights = np.exp(-1 * (distances ** 2 / bandwidth ** 2))
        return np.sum(points * weights, axis=0) / np.sum(weights)

        # For each seed, climb gradient until convergence or max_iterations

    for i, weighted_mean in enumerate(seeds):
        completed_iterations = 0
        while True:
            points_within = X[ball_tree.query_radius([weighted_mean], bandwidth_array[i])[0]]
            old_mean = weighted_mean  # save the old mean
            weighted_mean = gaussian_kernel(old_mean, points_within, bandwidth_array[i])
            converged = extmath.norm(weighted_mean - old_mean) < stop_thresh

            if converged or completed_iterations == max_iterations:
                if completed_iterations == max_iterations:
                    print("reached max iterations")
                cluster_centers.append(weighted_mean)
                center_intensity_dict[tuple(weighted_mean)] = len(points_within)
                break

            completed_iterations += 1

            # POST PROCESSING: remove near duplicate points
            # If the distance between two kernels is less than the bandwidth,
            # then we have to remove one because it is a duplicate. Remove the
            # one with fewer points.
    sorted_by_intensity = sorted(center_intensity_dict.items(), key=lambda tup: tup[1], reverse=True)
    sorted_centers = np.array([tup[0] for tup in sorted_by_intensity])
    unique = np.ones(len(sorted_centers), dtype=np.bool)
    ball_tree = BallTree(sorted_centers)

    for i, center in enumerate(sorted_centers):
        if unique[i]:
            neighbor_idxs = ball_tree.query_radius([center], np.mean(bandwidth_array))[0]
            unique[neighbor_idxs] = 0
            unique[i] = 1  # leave the current point as unique
    cluster_centers = sorted_centers[unique]

    # ASSIGN LABELS: a point belongs to the cluster that it is closest to
    nbrs = NearestNeighbors(n_neighbors=1, algorithm="ball_tree").fit(cluster_centers)
    labels = np.zeros(n_points, dtype=np.int)
    distances, idxs = nbrs.kneighbors(X)
    labels = idxs.flatten()

    return cluster_centers, labels
开发者ID:rohanp,项目名称:LDFMap,代码行数:89,代码来源:cluster.py


示例16: discretize

def discretize(vectors, copy=True, max_svd_restarts=30, n_iter_max=20,
               random_state=None):
    """Search for a partition matrix (clustering) which is closest to the
    eigenvector embedding.

    Parameters
    ----------
    vectors : array-like, shape: (n_samples, n_clusters)
        The embedding space of the samples.

    copy : boolean, optional, default: True
        Whether to copy vectors, or perform in-place normalization.

    max_svd_restarts : int, optional, default: 30
        Maximum number of attempts to restart SVD if convergence fails

    n_iter_max : int, optional, default: 30
        Maximum number of iterations to attempt in rotation and partition
        matrix search if machine precision convergence is not reached

    random_state: int seed, RandomState instance, or None (default)
        A pseudo random number generator used for the initialization of the
        of the rotation matrix

    Returns
    -------
    labels : array of integers, shape: n_samples
        The labels of the clusters.

    References
    ----------

    - Multiclass spectral clustering, 2003
      Stella X. Yu, Jianbo Shi
      http://www1.icsi.berkeley.edu/~stellayu/publication/doc/2003kwayICCV.pdf

    Notes
    -----

    The eigenvector embedding is used to iteratively search for the
    closest discrete partition.  First, the eigenvector embedding is
    normalized to the space of partition matrices. An optimal discrete
    partition matrix closest to this normalized embedding multiplied by
    an initial rotation is calculated.  Fixing this discrete partition
    matrix, an optimal rotation matrix is calculated.  These two
    calculations are performed until convergence.  The discrete partition
    matrix is returned as the clustering solution.  Used in spectral
    clustering, this method tends to be faster and more robust to random
    initialization than k-means.

    """

    from scipy.sparse import csc_matrix
    from scipy.linalg import LinAlgError

    random_state = check_random_state(random_state)

    vectors = as_float_array(vectors, copy=copy)

    eps = np.finfo(float).eps
    n_samples, n_components = vectors.shape

    # Normalize the eigenvectors to an equal length of a vector of ones.
    # Reorient the eigenvectors to point in the negative direction with respect
    # to the first element.  This may have to do with constraining the
    # eigenvectors to lie in a specific quadrant to make the discretization
    # search easier.
    norm_ones = np.sqrt(n_samples)
    for i in range(vectors.shape[1]):
        vectors[:, i] = (vectors[:, i] / norm(vectors[:, i])) \
            * norm_ones
        if vectors[0, i] != 0:
            vectors[:, i] = -1 * vectors[:, i] * np.sign(vectors[0, i])

    # Normalize the rows of the eigenvectors.  Samples should lie on the unit
    # hypersphere centered at the origin.  This transforms the samples in the
    # embedding space to the space of partition matrices.
    vectors = vectors / np.sqrt((vectors ** 2).sum(axis=1))[:, np.newaxis]

    svd_restarts = 0
    has_converged = False

    # If there is an exception we try to randomize and rerun SVD again
    # do this max_svd_restarts times.
    while (svd_restarts < max_svd_restarts) and not has_converged:

        # Initialize first column of rotation matrix with a row of the
        # eigenvectors
        rotation = np.zeros((n_components, n_components))
        rotation[:, 0] = vectors[random_state.randint(n_samples), :].T

        # To initialize the rest of the rotation matrix, find the rows
        # of the eigenvectors that are as orthogonal to each other as
        # possible
        c = np.zeros(n_samples)
        for j in range(1, n_components):
            # Accumulate c to ensure row is as orthogonal as possible to
            # previous picks as well as current one
            c += np.abs(np.dot(vectors, rotation[:, j - 1]))
            rotation[:, j] = vectors[c.argmin(), :].T
#.........这里部分代码省略.........
开发者ID:arbelm2,项目名称:cbio,代码行数:101,代码来源:spectral.py


示例17: mean_shift

def mean_shift(X, intensities=None, bandwidth=None, seeds=None,
               cluster_all=True, max_iterations=300, verbose=False, use_scipy=True):
    """mean_shift(X, intensities=None, bandwidth=None, seeds=None,
                  cluster_all=True, max_iterations=300, verbose=False, use_scipy=True)

    Mean shift algorithm

    Implementation taken from scikit-learn with two minor variants:

        - Use (by default) scipy KD-trees, which are faster in our case
        - weigthed version of mean-shift using `intensities` as
          weights (i.e., we compute centers of mass rather than means)

    Parameters
    ----------

    X : array-like, shape=[n_samples, n_features]
        Input data.

    intensities : array-like, shape=[n_samples]
        Voxel intensities, used to weight the mean

    bandwidth : float
        Kernel bandwidth.

    seeds : array-like, shape=[n_seeds, n_features]
        Point used as initial kernel locations.

    use_scipy : bool
        If true use cKDTree from scipy.spatial, otherwise
        use NearestNeighbors from sklearn.neighbors

    Returns
    -------

    cluster_centers : array, shape=[n_clusters, n_features]
        Coordinates of cluster centers.

    labels : array, shape=[n_samples]
        Cluster labels for each point.

    volumes : array, shape=[n_clusters]
        Volume of each cluster (# of points in the cluster)

    masses : array, shape=[n_clusters]
        Mass of each cluster (sum of intensities of points in the cluster).

    trajectories : list
        MS trajectories for debugging purposes.
    """
    if seeds is None:
        seeds = X
    n_points, n_features = X.shape
    stop_thresh = 1e-3 * bandwidth  # when mean has converged
    center_volume_dict = {}
    center_mass_dict = {}
    # tee.log('Fitting NearestNeighbors on', n_points, 'points')
    if use_scipy:
        kdtree = cKDTree(X)
    else:
        nbrs = NearestNeighbors(radius=bandwidth).fit(X)

    # For each seed, climb gradient until convergence or max_iterations
    trajectories = {}  # for each seed, a list of points
    tee.log('Moving kernels for', len(seeds), 'seeds')
    pbar = pb.ProgressBar(widgets=['Moving %d seeds: ' % len(seeds), pb.Percentage()],
                          maxval=len(seeds)).start()
    for seed_no, my_mean in enumerate(seeds):
        completed_iterations = 0
        seed = my_mean
        trajectories[seed_no] = []
        while True:
            # Find mean of points within bandwidth
            if use_scipy:
                i_nbrs = kdtree.query_ball_point(my_mean, r=bandwidth)
            else:
                i_nbrs = nbrs.radius_neighbors([my_mean], bandwidth,
                                               return_distance=False)[0]
            points_within = X[i_nbrs]
            if len(points_within) == 0:
                break  # Depending on seeding strategy this condition may occur
            my_old_mean = my_mean  # save the old mean
            if intensities is None:
                my_mean = np.mean(points_within, axis=0)
            else:
                my_mean = np.average(points_within, axis=0, weights=intensities[i_nbrs])
            # If converged or at max_iterations, addS the cluster
            if extmath.norm(my_mean - my_old_mean) < stop_thresh or completed_iterations == max_iterations:
                center_volume_dict[tuple(my_mean)] = len(points_within)
                center_mass_dict[tuple(my_mean)] = sum(intensities[i_nbrs])
                break
            completed_iterations += 1
            trajectories[seed_no].append(my_mean)
        if verbose:
            print('seed', seed, '-->', my_mean,
                  center_volume_dict[tuple(my_mean)], center_mass_dict[tuple(my_mean)], completed_iterations)

        pbar.update(seed_no+1)
    pbar.finish()
    # POST PROCESSING: remove near duplicate points
#.........这里部分代码省略.........
开发者ID:paolo-f,项目名称:bcfind,代码行数:101,代码来源:mscd.py


示例18: _fit

    def _fit(self, X):
        """Fit the model on X

        Parameters
        ----------
        X: array-like, shape (n_samples, n_features)
            Training vector, where n_samples in the number of samples and
            n_features is the number of features.
        """
        self.nnzs = []
        X_orig = X.copy()
        if self.trim_proportion < 0 or self.trim_proportion > 0.5:
            raise ValueError('`trim_proportion` must be between 0 and 0.5,'
                             ' got %s.' % self.trim_proportion)
        
        lam = 1.0 / np.sqrt(np.max(X.shape))
        rng = check_random_state(self.random_state)
        X = check_array(X)
        self.obj = []
        n_samples, n_features = X.shape
        X = as_float_array(X, copy=self.copy)
        # Center data
        if self.centering == 'mean':
            self.center_ = np.mean(X, axis=0)
        elif self.centering == 'median':
            self.center_ = np.median(X, axis=0)
        else:
            raise ValueError("`centering` must be 'mean' or 'median', "
                             "got %s" % self.centering)
        X -= self.center_

        if self.n_components is None:
            n_components = X.shape[1]
        elif not 0 <= self.n_components <= n_features:
            raise ValueError("n_components=%r invalid for n_features=%d"
                             % (self.n_components, n_features))
        else:
            n_components = self.n_components

        self.components_ = np.empty((n_components, n_features))
        for k in range(n_components):
            # compute k'th principle component
            mu = rng.rand(n_features) - 0.5
            mu = mu / norm(mu)

            # initialize using a few EM iterations
            for i in range(3):
                dots = fast_dot(X, mu)
                mu = fast_dot(dots.T, X)
                mu = mu / norm(mu)

            # grassmann average
            for i in range(n_samples):
                prev_mu = mu
                dot_signs = np.sign(fast_dot(X, mu))
                mu = _trimmed_mean(X * dot_signs[:, np.newaxis],
                                   self.trim_proportion)
                mu = mu / norm(mu)

                
                if np.max(np.abs(mu - prev_mu)) < self.tol:
                    break

            # store the estimated vector and possibly re-orthonormalize
            if k > 0:
                mu = _reorth(self.components_[:k-1], mu)
                mu = mu / norm(mu)

            self.components_[k] = mu

            if k < n_components - 1:
                X = X - fast_dot(fast_dot(X, mu)[:, np.newaxis],
                                 mu[np.newaxis, :])
                L = X + self.center_
                S = X_orig - L
                
                o = norm_(L, 'nuc') + lam*np.sum(np.abs(S))
                #print('TGA Objective = ', o)
                self.obj.append(o)
                self.nnzs.append(np.sum(S > 0))
开发者ID:amueller,项目名称:pca,代码行数:80,代码来源:tga.py


示例19: _paii

 def _paii(self, loss_t, x_t):
     return loss_t / (extmath.norm(x_t) ** 2.0) + 1.0 / 2.0 * self.C
开发者ID:jsouza,项目名称:pamtl,代码行数:2,代码来源:partl_regression.py



注:本文中的sklearn.utils.extmath.norm函数示例由纯净天空整理自Github/MSDocs等源码及文档管理平台,相关代码片段筛选自各路编程大神贡献的开源项目,源码版权归原作者所有,传播和使用请参考对应项目的License;未经允许,请勿转载。


鲜花

握手

雷人

路过

鸡蛋
该文章已有0人参与评论

请发表评论

全部评论

专题导读
上一篇:
Python extmath.pinvh函数代码示例发布时间:2022-05-27
下一篇:
Python extmath.logsumexp函数代码示例发布时间:2022-05-27
热门推荐
阅读排行榜

扫描微信二维码

查看手机版网站

随时了解更新最新资讯

139-2527-9053

在线客服(服务时间 9:00~18:00)

在线QQ客服
地址:深圳市南山区西丽大学城创智工业园
电邮:jeky_zhao#qq.com
移动电话:139-2527-9053

Powered by 互联科技 X3.4© 2001-2213 极客世界.|Sitemap