• 设为首页
  • 点击收藏
  • 手机版
    手机扫一扫访问
    迪恩网络手机版
  • 关注官方公众号
    微信扫一扫关注
    迪恩网络公众号

Python extmath.row_norms函数代码示例

原作者: [db:作者] 来自: [db:来源] 收藏 邀请

本文整理汇总了Python中sklearn.utils.extmath.row_norms函数的典型用法代码示例。如果您正苦于以下问题:Python row_norms函数的具体用法?Python row_norms怎么用?Python row_norms使用的例子?那么恭喜您, 这里精选的函数代码示例或许可以为您提供帮助。



在下文中一共展示了row_norms函数的20个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于我们的系统推荐出更棒的Python代码示例。

示例1: test_row_norms

def test_row_norms():
    X = np.random.RandomState(42).randn(100, 100)
    for dtype in (np.float32, np.float64):
        if dtype is np.float32:
            precision = 4
        else:
            precision = 5

        X = X.astype(dtype)
        sq_norm = (X ** 2).sum(axis=1)

        assert_array_almost_equal(sq_norm, row_norms(X, squared=True),
                                  precision)
        assert_array_almost_equal(np.sqrt(sq_norm), row_norms(X), precision)

        for csr_index_dtype in [np.int32, np.int64]:
            Xcsr = sparse.csr_matrix(X, dtype=dtype)
            # csr_matrix will use int32 indices by default,
            # up-casting those to int64 when necessary
            if csr_index_dtype is np.int64:
                Xcsr.indptr = Xcsr.indptr.astype(csr_index_dtype)
                Xcsr.indices = Xcsr.indices.astype(csr_index_dtype)
            assert Xcsr.indices.dtype == csr_index_dtype
            assert Xcsr.indptr.dtype == csr_index_dtype
            assert_array_almost_equal(sq_norm, row_norms(Xcsr, squared=True),
                                      precision)
            assert_array_almost_equal(np.sqrt(sq_norm), row_norms(Xcsr),
                                      precision)
开发者ID:BasilBeirouti,项目名称:scikit-learn,代码行数:28,代码来源:test_extmath.py


示例2: test_row_norms

def test_row_norms():
    X = np.random.RandomState(42).randn(100, 100)
    sq_norm = (X ** 2).sum(axis=1)

    assert_array_almost_equal(sq_norm, row_norms(X, squared=True), 5)
    assert_array_almost_equal(np.sqrt(sq_norm), row_norms(X))

    Xcsr = sparse.csr_matrix(X, dtype=np.float32)
    assert_array_almost_equal(sq_norm, row_norms(Xcsr, squared=True), 5)
    assert_array_almost_equal(np.sqrt(sq_norm), row_norms(Xcsr))
开发者ID:93sam,项目名称:scikit-learn,代码行数:10,代码来源:test_extmath.py


示例3: euclidean_distances

def euclidean_distances(X, Y=None):
    YY = row_norms(Y, squared=True)[np.newaxis, :]
    if X is Y:  # shortcut in the common case euclidean_distances(X, X)
        XX = YY.T
    else:
        XX = row_norms(X, squared=True)[:, np.newaxis]

    distances = np.dot(X, Y.T)
    distances *= -2
    distances += XX
    distances += YY
    np.maximum(distances, 0, out=distances)

    return distances
开发者ID:haoopeng,项目名称:MLAlgorithms,代码行数:14,代码来源:different_kernels.py


示例4: fit

    def fit(self, X, y):
        """Fit factorization machine to training data.

        Parameters
        ----------
        X : array-like or sparse, shape = [n_samples, n_features]
            Training vectors, where n_samples is the number of samples
            and n_features is the number of features.

        y : array-like, shape = [n_samples]
            Target values.

        Returns
        -------
        self : Estimator
            Returns self.
        """
        if self.degree > 3:
            raise ValueError("FMs with degree >3 not yet supported.")

        X, y = self._check_X_y(X, y)
        X = self._augment(X)
        n_features = X.shape[1]  # augmented
        X_col_norms = row_norms(X.T, squared=True)
        dataset = get_dataset(X, order="fortran")
        rng = check_random_state(self.random_state)
        loss_obj = self._get_loss(self.loss)

        if not (self.warm_start and hasattr(self, 'w_')):
            self.w_ = np.zeros(n_features, dtype=np.double)

        if self.fit_lower == 'explicit':
            n_orders = self.degree - 1
        else:
            n_orders = 1

        if not (self.warm_start and hasattr(self, 'P_')):
            self.P_ = 0.01 * rng.randn(n_orders, self.n_components, n_features)

        if not (self.warm_start and hasattr(self, 'lams_')):
            if self.init_lambdas == 'ones':
                self.lams_ = np.ones(self.n_components)
            elif self.init_lambdas == 'random_signs':
                self.lams_ = np.sign(rng.randn(self.n_components))
            else:
                raise ValueError("Lambdas must be initialized as ones "
                                 "(init_lambdas='ones') or as random "
                                 "+/- 1 (init_lambdas='random_signs').")

        y_pred = self._get_output(X)

        converged = _cd_direct_ho(self.P_, self.w_, dataset, X_col_norms, y,
                                  y_pred, self.lams_, self.degree, self.alpha,
                                  self.beta, self.fit_linear,
                                  self.fit_lower == 'explicit', loss_obj,
                                  self.max_iter, self.tol, self.verbose)
        if not converged:
            warnings.warn("Objective did not converge. Increase max_iter.")

        return self
开发者ID:vene,项目名称:polylearn,代码行数:60,代码来源:factorization_machine.py


示例5: test_get_auto_step_size

def test_get_auto_step_size():
    X = np.array([[1, 2, 3], [2, 3, 4], [2, 3, 2]], dtype=np.float64)
    alpha = 1.2
    fit_intercept = False
    # sum the squares of the second sample because that's the largest
    max_squared_sum = 4 + 9 + 16
    max_squared_sum_ = row_norms(X, squared=True).max()
    assert_almost_equal(max_squared_sum, max_squared_sum_, decimal=4)

    for fit_intercept in (True, False):
        step_size_sqr = 1.0 / (max_squared_sum + alpha + int(fit_intercept))
        step_size_log = 4.0 / (max_squared_sum + 4.0 * alpha +
                               int(fit_intercept))

        step_size_sqr_ = get_auto_step_size(max_squared_sum_, alpha, "squared",
                                            fit_intercept)
        step_size_log_ = get_auto_step_size(max_squared_sum_, alpha, "log",
                                            fit_intercept)

        assert_almost_equal(step_size_sqr, step_size_sqr_, decimal=4)
        assert_almost_equal(step_size_log, step_size_log_, decimal=4)

    msg = 'Unknown loss function for SAG solver, got wrong instead of'
    assert_raise_message(ValueError, msg, get_auto_step_size,
                         max_squared_sum_, alpha, "wrong", fit_intercept)
开发者ID:1992huanghai,项目名称:scikit-learn,代码行数:25,代码来源:test_sag.py


示例6: compute_distances

    def compute_distances(self, x1, x2=None):
        """
        The method
        - extracts normalized continuous attributes and then uses `row_norms`
          and `safe_sparse_do`t to compute the distance as x^2 - 2xy - y^2
          (the trick from sklearn);
        - calls a function in Cython that adds the contributions of discrete
          columns
        """
        if self.normalize:
            x1 = x1 - self.means
            x1 /= np.sqrt(2 * self.vars)

        # adapted from sklearn.metric.euclidean_distances
        xx = row_norms(x1.T, squared=True)[:, np.newaxis]
        distances = safe_sparse_dot(x1.T, x1, dense_output=True)
        distances *= -2
        distances += xx
        distances += xx.T
        with np.errstate(invalid="ignore"):  # Nans are fixed below
            np.maximum(distances, 0, out=distances)
        distances.flat[::distances.shape[0] + 1] = 0.0

        fixer = _distance.fix_euclidean_cols_normalized if self.normalize \
            else _distance.fix_euclidean_cols
        fixer(distances, x1, self.means, self.vars)
        return np.sqrt(distances)
开发者ID:acopar,项目名称:orange3,代码行数:27,代码来源:distance.py


示例7: test_labels_assignment_and_inertia

def test_labels_assignment_and_inertia():
    # pure numpy implementation as easily auditable reference gold
    # implementation
    rng = np.random.RandomState(42)
    noisy_centers = centers + rng.normal(size=centers.shape)
    labels_gold = - np.ones(n_samples, dtype=np.int)
    mindist = np.empty(n_samples)
    mindist.fill(np.infty)
    for center_id in range(n_clusters):
        dist = np.sum((X - noisy_centers[center_id]) ** 2, axis=1)
        labels_gold[dist < mindist] = center_id
        mindist = np.minimum(dist, mindist)
    inertia_gold = mindist.sum()
    assert_true((mindist >= 0.0).all())
    assert_true((labels_gold != -1).all())

    # perform label assignment using the dense array input
    x_squared_norms = (X ** 2).sum(axis=1)
    labels_array, inertia_array = _labels_inertia(
        X, x_squared_norms, noisy_centers)
    assert_array_almost_equal(inertia_array, inertia_gold)
    assert_array_equal(labels_array, labels_gold)

    # perform label assignment using the sparse CSR input
    x_squared_norms_from_csr = row_norms(X_csr, squared=True)
    labels_csr, inertia_csr = _labels_inertia(
        X_csr, x_squared_norms_from_csr, noisy_centers)
    assert_array_almost_equal(inertia_csr, inertia_gold)
    assert_array_equal(labels_csr, labels_gold)
开发者ID:Lavanya-Basavaraju,项目名称:scikit-learn,代码行数:29,代码来源:test_k_means.py


示例8: get_kpp_init

def get_kpp_init(X,n_clusters,random_state=None):
    random_state = None
    random_state = check_random_state(random_state)
    x_squared_norms = row_norms(X, squared=True)
    centers = sklearn.cluster.k_means_._k_init(X, n_clusters, random_state=random_state,x_squared_norms=x_squared_norms) # n_clusters x D
    W =  np.transpose( centers )  # D x D^(1)
    W_tf = tf.constant(W)
    return centers,W,W_tf
开发者ID:brando90,项目名称:hbf_tensorflow_code,代码行数:8,代码来源:initializations.py


示例9: _kmeans_spark

def _kmeans_spark(X, n_clusters, max_iter=300, worker_nums=10, init='k-means++', random_state=None, tol=1e-4):
    from pyspark import SparkContext, SparkConf

    conf = SparkConf().setAppName('K-Means_Spark').setMaster('local[%d]'%worker_nums)
    sc = SparkContext(conf=conf)
    data = sc.parallelize(X)
    data.cache()

    random_state = check_random_state(random_state)

    best_labels, best_inertia, best_centers = None, None, None

    x_squared_norms = row_norms(X, squared=True)
    #  x_squared_norms = data.map(lambda x: (x*x).sum(axis=0)).collect()
    #  x_squared_norms = np.array(x_squared_norms, dtype='float64')

    centers = _init_centroids(X, n_clusters, init, random_state, x_squared_norms=x_squared_norms)

    bs = X.shape[0]/worker_nums
    data_temp = []
    for i in range(worker_nums-1):
        data_temp.append(X[i*bs:(i+1)*bs])
    data_temp.append(X[(worker_nums-1)*bs:])
    data_temp = np.array(data_temp, dtype='float64')
    data_temp = sc.parallelize(data_temp)
    data_temp.cache()


    for i in range(max_iter):
        centers_old = centers.copy()

        all_distances = data_temp.map(lambda x: euclidean_distances(centers, x, squared=True)).collect()
        temp_all_distances = all_distances[0]
        for i in range(1, worker_nums):
            temp_all_distances = np.hstack((temp_all_distances, all_distances[i]))
        all_distances = temp_all_distances

        #  all_distances = data.map(lambda x: euclidean_distances(centers, x, squared=True)).collect()
        #  # reshape, from (1, n_samples, k) to (k, n_samples)
        #  all_distances = np.asarray(all_distances, dtype="float64").T[0]

        # Assignment, also called E-step of EM
        labels, inertia = _labels_inertia(X, x_squared_norms, centers, all_distances=all_distances)
        # re-computation of the centroids, also called M-step of EM
        centers = _centers(X, labels, n_clusters)

        if best_inertia is None or inertia < best_inertia:
            best_labels  = labels.copy()
            best_centers = centers.copy()
            best_inertia = inertia

        shift = squared_norm(centers_old - centers)
        if shift <= tol:
            break

    return best_centers, best_labels, best_inertia
开发者ID:cyh24,项目名称:PySparkML,代码行数:56,代码来源:k_means_.py


示例10: test_row_norms

def test_row_norms():
    X = np.random.RandomState(42).randn(100, 100)
    for dtype in (np.float32, np.float64):
        if dtype is np.float32:
            precision = 4
        else:
            precision = 5

        X = X.astype(dtype)
        sq_norm = (X ** 2).sum(axis=1)

        assert_array_almost_equal(sq_norm, row_norms(X, squared=True),
                                  precision)
        assert_array_almost_equal(np.sqrt(sq_norm), row_norms(X), precision)

        Xcsr = sparse.csr_matrix(X, dtype=dtype)
        assert_array_almost_equal(sq_norm, row_norms(Xcsr, squared=True),
                                  precision)
        assert_array_almost_equal(np.sqrt(sq_norm), row_norms(Xcsr), precision)
开发者ID:antoinewdg,项目名称:scikit-learn,代码行数:19,代码来源:test_extmath.py


示例11: get_auto_step_size

def get_auto_step_size(X, alpha, loss, gamma=None, sample_weight=None):
    """Compute automatic step size for SAG solver
    Stepsize computed using the following objective:
        minimize_w  1 / n_samples * \sum_i loss(w^T x_i, y_i)
                    + alpha * 0.5 * ||w||^2_2
    Parameters
    ----------
    X : ndarray
        Array of samples x_i.
    alpha : float
        Constant that multiplies the l2 penalty term.
    loss : string, in {"log", "squared"}
        The loss function used in SAG solver.

    Returns
    -------
    step_size : float
        Step size used in SAG/SAGA solver.
    """
    if sample_weight is None:
        weighted_norms = row_norms(X, squared=True)
    else:
        weighted_norms = sample_weight * row_norms(X, squared=True)
    L = np.max(weighted_norms)
    n_samples = X.shape[0]

    if loss == 'log':
        # inverse Lipschitz constant for log loss
        lipschitz_constant = 0.25 * L + alpha
    elif loss == 'squared':
        lipschitz_constant = L + alpha
    elif loss == 'modified_huber':
        lipschitz_constant = 2 * L + alpha
    elif loss == 'smooth_hinge':
        lipschitz_constant = L + gamma + alpha
    elif loss == 'squared_hinge':
        lipschitz_constant = 2 * L + alpha
    else:
        raise ValueError("`auto` stepsize is only available for `squared` or "
                         "`log` losses (got `%s` loss). Please specify a "
                         "stepsize." % loss)
    return 1.0 / lipschitz_constant
开发者ID:casotto,项目名称:lightning,代码行数:42,代码来源:sag.py


示例12: prepare_data

 def prepare_data(x):
     if self.discrete.any():
         data = Cosine.discrete_to_indicators(x, self.discrete)
     else:
         data = x.copy()
     for col, mean in enumerate(self.means):
         column = data[:, col]
         column[np.isnan(column)] = mean
     if self.axis == 0:
         data = data.T
     data /= row_norms(data)[:, np.newaxis]
     return data
开发者ID:acopar,项目名称:orange3,代码行数:12,代码来源:distance.py


示例13: fit

    def fit(self, X):
        x_squared_norms = row_norms(X, squared=True)
        rng = np.random.RandomState(self.random_state)

        if self.init == "kmeans++":
            # Private function of sklearn.cluster.k_means_, to get the initial centers.
            init_centers = _k_init(X, self.n_clusters, x_squared_norms, rng)
        elif self.init == "random":
            random_samples = rng.random_integers(0, X.shape[0], size=self.n_clusters)
            init_centers = X[random_samples, :]
        else:
            raise ValueError("init should be either kmeans++ or random")

        # Assign initial labels. skip norm of x**2
        init_distances = np.sum(init_centers**2, axis=1) - 2 * np.dot(X, init_centers.T)
        init_labels = np.argmin(init_distances, axis=1)
        self.labels_ = init_labels

        self.centers_ = init_centers
        self.n_samples_ = np.zeros(self.n_clusters)

        # Count the number of samples in each cluster.
        for i in range(self.n_clusters):
            self.n_samples_[i] = np.sum(self.labels_ == i)

        for i, (sample, label) in enumerate(zip(X, self.labels_)):
            curr_label = label
            max_cost = np.inf
            while max_cost > 0:
                distances = x_squared_norms[i] - 2 * np.dot(sample, self.centers_.T) + np.sum(self.centers_**2, axis=1)

                curr_distance = distances[curr_label]
                other_distance = np.delete(distances, curr_label)
                curr_n_samples = self.n_samples_[curr_label]
                other_n_samples = np.delete(self.n_samples_, curr_label)
                cost = (curr_n_samples / (curr_n_samples - 1) * curr_distance) - (other_n_samples / (other_n_samples + 1) * other_distance)
                max_cost_ind = np.argmax(cost)
                max_cost = cost[max_cost_ind]

                if max_cost > 0:
                    # We deleted the label index from other_n_samples
                    if max_cost_ind > curr_label:
                        max_cost_ind += 1

                    # Reassign the clusters
                    self.labels_[i] = max_cost_ind

                    self.centers_[curr_label] = (curr_n_samples * self.centers_[curr_label] - sample) / (curr_n_samples - 1)
                    moved_n_samples = self.n_samples_[max_cost_ind]
                    self.centers_[max_cost_ind] = (moved_n_samples * self.centers_[max_cost_ind] + sample) / (moved_n_samples + 1)
                    self.n_samples_[curr_label] -= 1
                    self.n_samples_[max_cost_ind] += 1
                    curr_label = max_cost_ind
开发者ID:MechCoder,项目名称:Hartigan,代码行数:53,代码来源:naive_hartigan.py


示例14: kmeans_subsample

def kmeans_subsample(X, n_clusters, random_state=None, n_local_trials=10):

    random_state = check_random_state(random_state)

    n_samples, n_features = X.shape
    x_squared_norms = row_norms(X, squared=True)
    centers = np.empty((n_clusters, n_features))

    # Pick first center randomly
    center_id = random_state.randint(n_samples)
    centers[0] = X[center_id]

    # Initialize list of closest distances and calculate current potential
    closest_dist_sq = euclidean_distances(centers[0].reshape(1, -1), X, Y_norm_squared=x_squared_norms, squared=True)
    current_pot = closest_dist_sq.sum()

    # Pick the remaining n_clusters-1 points
    for c in range(1, n_clusters):
        # Choose center candidates by sampling with probability proportional
        # to the squared distance to the closest existing center
        rand_vals = random_state.random_sample(n_local_trials) * current_pot
        candidate_ids = np.searchsorted(closest_dist_sq.cumsum(), rand_vals)

        # Compute distances to center candidates
        distance_to_candidates = euclidean_distances(X[candidate_ids], X, Y_norm_squared=x_squared_norms, squared=True)

        # Decide which candidate is the best
        best_candidate = None
        best_pot = None
        best_dist_sq = None
        for trial in range(n_local_trials):
            # Compute potential when including center candidate
            new_dist_sq = np.minimum(closest_dist_sq, distance_to_candidates[trial])
            new_pot = new_dist_sq.sum()

            # Store result if it is the best local trial so far
            if (best_candidate is None) or (new_pot < best_pot):
                best_candidate = candidate_ids[trial]
                best_pot = new_pot
                best_dist_sq = new_dist_sq

        # Permanently add best center candidate found in local tries
        centers[c] = X[best_candidate]
        current_pot = best_pot
        closest_dist_sq = best_dist_sq

    return centers
开发者ID:eiriniar,项目名称:CellCnn,代码行数:47,代码来源:downsample.py


示例15: kmeanspp

def kmeanspp(X, k, seed):
    # That we need to do this is a bug in _init_centroids
    x_squared_norms = row_norms(X, squared=True)
    # Use k-means++ to initialise the centroids
    centroids = _init_centroids(X, k, 'k-means++', random_state=seed, x_squared_norms=x_squared_norms)
    # OK, we should just short-circuit and get these from k-means++...
    # quick and dirty solution
    nns = NearestNeighbors()
    nns.fit(X)
    centroid_candidatess = nns.radius_neighbors(X=centroids, radius=0, return_distance=False)
    # Account for "degenerated" solutions: serveral voxels at distance 0, each becoming a centroid
    centroids = set()
    for centroid_candidates in centroid_candidatess:
        centroid_candidates = set(centroid_candidates) - centroids
        if len(set(centroid_candidates) - centroids) == 0:
            raise Exception('Cannot get an unambiguous set of centers;'
                            'theoretically this cannot happen, so check for bugs')
        centroids.add(centroid_candidates.pop())
    return np.array(sorted(centroids))
开发者ID:strawlab,项目名称:braincode,代码行数:19,代码来源:stability.py


示例16: _init_centroids

def _init_centroids(X, k, init, random_state, x_squared_norms=None):
    random_state = check_random_state(random_state)
    n_samples = X.shape[0]

    if x_squared_norms is None:
        x_squared_norms = row_norms(X, squared=True)


    if n_samples < k:
        raise ValueError("n_samples=%d should be larger than k=%d"%(n_samples, k))

    if init == 'k-means++':
        centers = _k_init(X, k, random_state=random_state,
                            x_squared_norms=x_squared_norms)
    elif init == 'random':
        seeds = random_state.permutation(n_samples)[:k]
        centers = X[seeds]

    return centers
开发者ID:cyh24,项目名称:PySparkML,代码行数:19,代码来源:k_means_.py


示例17: test_get_auto_step_size

def test_get_auto_step_size():
    X = np.array([[1, 2, 3], [2, 3, 4], [2, 3, 2]], dtype=np.float64)
    alpha = 1.2
    fit_intercept = False
    # sum the squares of the second sample because that's the largest
    max_squared_sum = 4 + 9 + 16
    max_squared_sum_ = row_norms(X, squared=True).max()
    n_samples = X.shape[0]
    assert_almost_equal(max_squared_sum, max_squared_sum_, decimal=4)

    for saga in [True, False]:
        for fit_intercept in (True, False):
            if saga:
                L_sqr = (max_squared_sum + alpha + int(fit_intercept))
                L_log = (max_squared_sum + 4.0 * alpha +
                         int(fit_intercept)) / 4.0
                mun_sqr = min(2 * n_samples * alpha, L_sqr)
                mun_log = min(2 * n_samples * alpha, L_log)
                step_size_sqr = 1 / (2 * L_sqr + mun_sqr)
                step_size_log = 1 / (2 * L_log + mun_log)
            else:
                step_size_sqr = 1.0 / (max_squared_sum +
                                       alpha + int(fit_intercept))
                step_size_log = 4.0 / (max_squared_sum + 4.0 * alpha +
                                       int(fit_intercept))

            step_size_sqr_ = get_auto_step_size(max_squared_sum_, alpha,
                                                "squared",
                                                fit_intercept,
                                                n_samples=n_samples,
                                                is_saga=saga)
            step_size_log_ = get_auto_step_size(max_squared_sum_, alpha, "log",
                                                fit_intercept,
                                                n_samples=n_samples,
                                                is_saga=saga)

            assert_almost_equal(step_size_sqr, step_size_sqr_, decimal=4)
            assert_almost_equal(step_size_log, step_size_log_, decimal=4)

    msg = 'Unknown loss function for SAG solver, got wrong instead of'
    assert_raise_message(ValueError, msg, get_auto_step_size,
                         max_squared_sum_, alpha, "wrong", fit_intercept)
开发者ID:AlexisMignon,项目名称:scikit-learn,代码行数:42,代码来源:test_sag.py


示例18: predict

    def predict(self, X):
            """Predict the closest cluster each sample in X belongs to.

            In the vector quantization literature, `cluster_centers_` is called
            the code book and each value returned by `predict` is the index of
            the closest code in the code book.

            Parameters
            ----------
            X : {array-like, sparse matrix}, shape = [n_samples, n_features]
                New data to predict.

            Returns
            -------
            labels : array, shape [n_samples,]
                Index of the cluster each sample belongs to.
            """
            #check_is_fitted(self, 'cluster_centers_')

            X = self._check_test_data(X)
            x_squared_norms = row_norms(X, squared=True)
            return _labels_inertia(X, x_squared_norms, self.cluster_centers_)[0]
开发者ID:AnilSener,项目名称:semiKmeans,代码行数:22,代码来源:semiKMeans.py


示例19: run_step

 def run_step(self,run_number,step_size,howlong):
     df_slot = self.get_input_slot('df')
     df_slot.update(run_number, buffer_created=True, buffer_updated=True)
     if df_slot.has_deleted():
         self.reset()
         df_slot.reset()
         df_slot.update(run_number)
     input_df = df_slot.data()
     columns = self.get_columns(input_df)
     if input_df is None or len(input_df)==0:
         return self._return_run_step(self.state_blocked, steps_run=0)
     indices = df_slot.next_created(step_size)
     steps = indices_len(indices)
     step_size -= steps
     steps_run = steps
     if steps != 0:
         indices = fix_loc(indices)
         self._buffer.append(input_df.loc[indices])
         self._df = self._buffer.df()
         self._df.loc[indices,self.UPDATE_COLUMN] = run_number
     if step_size > 0 and df_slot.has_updated():
         indices = df_slot.next_updated(step_size,as_slice=False)
         steps = indices_len(indices)
         if steps != 0:
             steps_run += steps
             indices = fix_loc(indices) # no need, but stick to the stereotype
             updated = self.filter_columns(input_df, indices)
             df = self.filter_columns(self._df, indices)
             norms = row_norms(updated-df)
             selected = (norms > (self._delta*self.get_scale()))
             indices = indices[selected]
             if selected.any():
                 logger.debug('updating at %d', run_number)
                 self._df.loc[indices, self._columns] = updated.loc[indices, self._columns]
                 self._df.loc[indices, self.UPDATE_COLUMN] = run_number
             else:
                 logger.debug('Not updating at %d', run_number)
     return self._return_run_step(df_slot.next_state(), steps_run=steps_run)
开发者ID:jdfekete,项目名称:progressivis,代码行数:38,代码来源:select_delta.py


示例20: _kmeans_single

def _kmeans_single(X, n_clusters, max_iter=300, init='k-means++', random_state=None, tol=1e-4):
    random_state = check_random_state(random_state)

    best_labels, best_inertia, best_centers = None, None, None

    # init
    x_squared_norms = row_norms(X, squared=True)
    centers = _init_centroids(X, n_clusters, init, random_state, x_squared_norms=x_squared_norms)

    #  distances = np.zeros(shape=(X.shape[0],), dtype=np.float64)

    # iterations
    for i in range(max_iter):
        centers_old = centers.copy()
        # Assignment, also called E-step of EM
        labels, inertia = _labels_inertia(X, x_squared_norms, centers)

        # re-computation of the centroids, also called M-step of EM
        centers = _centers(X, labels, n_clusters)

        if best_inertia is None or inertia < best_inertia:
            best_labels  = labels.copy()
            best_centers = centers.copy()
            best_inertia = inertia

        shift = squared_norm(centers_old - centers)
        if shift <= tol:
            break

    if shift > 0:
        # rerun E-step in case of non-convergence so that predicted labels
        # match cluster centers
        best_labels, best_inertia = \
            _labels_inertia(X, x_squared_norms, best_centers)


    return best_centers, best_labels, best_inertia
开发者ID:cyh24,项目名称:PySparkML,代码行数:37,代码来源:k_means_.py



注:本文中的sklearn.utils.extmath.row_norms函数示例由纯净天空整理自Github/MSDocs等源码及文档管理平台,相关代码片段筛选自各路编程大神贡献的开源项目,源码版权归原作者所有,传播和使用请参考对应项目的License;未经允许,请勿转载。


鲜花

握手

雷人

路过

鸡蛋
该文章已有0人参与评论

请发表评论

全部评论

专题导读
上一篇:
Python extmath.safe_sparse_dot函数代码示例发布时间:2022-05-27
下一篇:
Python extmath.randomized_svd函数代码示例发布时间:2022-05-27
热门推荐
阅读排行榜

扫描微信二维码

查看手机版网站

随时了解更新最新资讯

139-2527-9053

在线客服(服务时间 9:00~18:00)

在线QQ客服
地址:深圳市南山区西丽大学城创智工业园
电邮:jeky_zhao#qq.com
移动电话:139-2527-9053

Powered by 互联科技 X3.4© 2001-2213 极客世界.|Sitemap