• 设为首页
  • 点击收藏
  • 手机版
    手机扫一扫访问
    迪恩网络手机版
  • 关注官方公众号
    微信扫一扫关注
    迪恩网络公众号

Python neighbors.NearestNeighbors类代码示例

原作者: [db:作者] 来自: [db:来源] 收藏 邀请

本文整理汇总了Python中sklearn.neighbors.NearestNeighbors的典型用法代码示例。如果您正苦于以下问题:Python NearestNeighbors类的具体用法?Python NearestNeighbors怎么用?Python NearestNeighbors使用的例子?那么恭喜您, 这里精选的类代码示例或许可以为您提供帮助。



在下文中一共展示了NearestNeighbors类的20个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于我们的系统推荐出更棒的Python代码示例。

示例1: __init__

class KDTrees:

    def __init__(self, nb_neighbours, leaf_size):
        self.nbrs = NearestNeighbors(n_neighbors=nb_neighbours, algorithm='ball_tree', metric = 'haversine', leaf_size=leaf_size)
    # Compute distance in time between two points on the map
    def mapDistance(self, x, y):
        if (len(x) > 2):
            return np.sum((x - y) ** 2)
        else:
            if(x[0] < y[0]):
                tmp = y
                y = x
                x = tmp
            pos1 = str(x[0]) + ", " + str(x[1])
            pos2 = str(y[0]) + ", " + str(y[1])
            timestamp = datetime.now()
            sec_to_add = 32 * 3600 + (timestamp - datetime(1970, 1, 1)).total_seconds() - 2*3600 - timestamp.hour * 3600 - timestamp.minute * 60 - timestamp.second
            traject = gmaps.directions(pos1, pos2, mode="transit", departure_time=timestamp.fromtimestamp(sec_to_add))
            try:
                print 'ok'
                return (traject[0]["legs"][0]["arrival_time"]["value"] - traject[0]["legs"][0]["departure_time"]["value"])
            except:
                print 'bug'
                return 1000000000


    def addPoints(self, points):
        self.nbrs.fit(points)

    def getNeighbours(self, points):
        self.nbrs.kneighbors(points)
开发者ID:AWehenkel,项目名称:Hive,代码行数:31,代码来源:KDTree.py


示例2: test_kernel_density_sampling

def test_kernel_density_sampling(n_samples=100, n_features=3):
    rng = np.random.RandomState(0)
    X = rng.randn(n_samples, n_features)

    bandwidth = 0.2

    for kernel in ['gaussian', 'tophat']:
        # draw a tophat sample
        kde = KernelDensity(bandwidth, kernel=kernel).fit(X)
        samp = kde.sample(100)
        assert_equal(X.shape, samp.shape)

        # check that samples are in the right range
        nbrs = NearestNeighbors(n_neighbors=1).fit(X)
        dist, ind = nbrs.kneighbors(X, return_distance=True)

        if kernel == 'tophat':
            assert np.all(dist < bandwidth)
        elif kernel == 'gaussian':
            # 5 standard deviations is safe for 100 samples, but there's a
            # very small chance this test could fail.
            assert np.all(dist < 5 * bandwidth)

    # check unsupported kernels
    for kernel in ['epanechnikov', 'exponential', 'linear', 'cosine']:
        kde = KernelDensity(bandwidth, kernel=kernel).fit(X)
        assert_raises(NotImplementedError, kde.sample, 100)

    # non-regression test: used to return a scalar
    X = rng.randn(4, 1)
    kde = KernelDensity(kernel="gaussian").fit(X)
    assert_equal(kde.sample().shape, (1, 1))
开发者ID:BasilBeirouti,项目名称:scikit-learn,代码行数:32,代码来源:test_kde.py


示例3: find_k_neighbors

def find_k_neighbors(points, neighbor_number=5):
    from sklearn.neighbors import NearestNeighbors
    import numpy as np
    X = np.array(points)
    neighbors = NearestNeighbors(n_neighbors=neighbor_number + 1, algorithm='ball_tree').fit(X)
    distances, indices = neighbors.kneighbors(X)
    return [[str(point), list([str(x) for x in indices[point][1:]])] for point in xrange(len(points))]
开发者ID:vivekaxl,项目名称:LearnerActive,代码行数:7,代码来源:Helper.py


示例4: SMOTE

def SMOTE(minority_samples, N, k):
    """
    The SMOTE algorithm, please refer to: [JAIR'02]SMOTE - Synthetic Minority Over-sampling Technique
    minority_samples The minority sample array
    N Amount of SMOTE N%
    k Number of nearest neighbors
    
    @return (N/100)*len(minority_samples) synthetic minority class samples
    """
    T = len(minority_samples) # number of minority samples
    if N < 100:
        T = N * 1.0 / 100 * T
        N = 100
    N = int(N * 1.0 / 100)
    
    neigh = NearestNeighbors(n_neighbors = k, radius=1.0, algorithm='auto', leaf_size=30, p=2)
    neigh = neigh.fit(minority_samples)
    
    synthetic_samples = []
    for i in range(T):
        target_sample = minority_samples[i]
        tmp = neigh.kneighbors(target_sample, k, return_distance=False)
        nnarray = tmp[0]
        populate(minority_samples, N, k, i, nnarray, synthetic_samples)
        
    return np.array(synthetic_samples, float)
开发者ID:hitalex,项目名称:CCDM2014-contest,代码行数:26,代码来源:SMOTE.py


示例5: random_forest_single_predict

def random_forest_single_predict(test_filename, name, feature_file, train_file, k):
    name_list, data = readfile_real_name(test_filename)
    print 'reading file...'
    test_data = data[name_list.index(name)]
    with open(train_file, 'rb') as f:
        clf = cPickle.load(f)
    print 'done'
    result_rate = (clf.predict_proba(test_data))[0]
    class_name = clf.classes_
    print name
    num = map(get_num, result_rate)
    name_list, feature_list = readfile_real_name_group(feature_file, class_name, num)
    neigh = NearestNeighbors()
    neigh.fit(feature_list)
    kneighbors_result_list = neigh.kneighbors(test_data, k, False)[0]
    print kneighbors_result_list
    for x in kneighbors_result_list:
        print name_list[x]
    classification_result = []
    average_list = []
    real_name = (name.split('_'))[0]
    counter = Counter(kneighbors_result_list)
    if real_name == name_list[counter.most_common(1)[0][0]].split('_')[0]:
        classification_result.append(1)
    else:
        classification_result.append(0)
    num = 0
    for i in kneighbors_result_list:
        if (name_list[i].split('_'))[0] == real_name:
            num += 1
    average_list.append((float)(num) / (float)(k))
    print classification_result, average_list
    return classification_result, average_list
开发者ID:YueDayu,项目名称:AdvancedDataStructureProj2,代码行数:33,代码来源:RF_predict.py


示例6: _set_widths_nearest_neighbor

 def _set_widths_nearest_neighbor(self):
     # Nearest neighbors contain center itself, find one more.
     nbrs = NearestNeighbors(n_neighbors=self.n_neighbors+1, algorithm='ball_tree').fit(self.centers)
     for i in range(len(self.centers)):
         distances, indices = nbrs.kneighbors(self.centers[i]) 
         width = sum(distances[0])/(len(distances[0]-1))
         self.kernels[i].set_param(self.p/width) 
开发者ID:PetraVidnerova,项目名称:pyRBF,代码行数:7,代码来源:hidden_layer.py


示例7: resample

    def resample(self):
        """
        """

        # Start with the minority class
        underx = self.x[self.y == self.minc]
        undery = self.y[self.y == self.minc]

        # Import the k-NN classifier
        from sklearn.neighbors import NearestNeighbors

        # Create a k-NN to fit the whole data
        nn_obj = NearestNeighbors(n_neighbors=self.size_ngh)

        # Fit the whole dataset
        nn_obj.fit(self.x)

        idx_to_exclude = []
        # Loop over the other classes under picking at random
        for key in self.ucd.keys():

            # Get the sample of the current class
            sub_samples_x = self.x[self.y == key]

            # Get the samples associated
            idx_sub_sample = np.nonzero(self.y == key)[0]

            # Find the NN for the current class
            nnhood_idx = nn_obj.kneighbors(sub_samples_x, return_distance=False)

            # Get the label of the corresponding to the index
            nnhood_label = (self.y[nnhood_idx] == key)

            # Check which one are the same label than the current class
            # Make an AND operation through the three neighbours
            nnhood_bool = np.logical_not(np.all(nnhood_label, axis=1))

            # If the minority class remove the majority samples (as in politic!!!! ;))
            if key == self.minc:
                # Get the index to exclude
                idx_to_exclude += nnhood_idx[np.nonzero(nnhood_label[np.nonzero(nnhood_bool)])].tolist()
            else:
                # Get the index to exclude
                idx_to_exclude += idx_sub_sample[np.nonzero(nnhood_bool)].tolist()

        # Create a vector with the sample to select
        sel_idx = np.ones(self.y.shape)
        sel_idx[idx_to_exclude] = 0

        # Get the samples from the majority classes
        sel_x = np.squeeze(self.x[np.nonzero(sel_idx), :])
        sel_y = self.y[np.nonzero(sel_idx)]

        underx = concatenate((underx, sel_x), axis=0)
        undery = concatenate((undery, sel_y), axis=0)

        if self.verbose:
            print("Under-sampling performed: " + str(Counter(undery)))

        return underx, undery
开发者ID:MGolubeva,项目名称:Ubalanced_classes,代码行数:60,代码来源:under_sampling.py


示例8: k_nearest_neighbors_scores

def k_nearest_neighbors_scores(k, eng_vec_dict, fr_vec_dict):
	eng_mat, fr_mat, index_map = build_parallel_mats_from_dicts(eng_vec_dict, fr_vec_dict, translation_dict)
	# k + 1 since we discard the top neighbor, which is itself
	neighbors_en = NearestNeighbors(n_neighbors=k+1, algorithm='ball_tree').fit(eng_mat)
	dist_en, indices_en = neighbors_en.kneighbors(eng_mat)
	neighbors_fr = NearestNeighbors(n_neighbors=k+1, algorithm='ball_tree').fit(fr_mat)
	dist_fr, indices_fr = neighbors_fr.kneighbors(fr_mat)
	# since we built the matrices in parallel, we know now that indices map to each other,
	# so we simply check the overlap of those to calculate precision and recall. 
	# calculate avg recall for k-recall
	avg_recall = 0.
	num_points = len(indices_en) + 0.
	knearest_map_en = dict()
	knearest_map_fr = dict()
	for i in range(0, int(num_points)):
		w_en = index_map[i][0]
		w_fr = index_map[i][1]
		index_set_en = set(indices_en[i][1:]) # should be size k
		index_set_fr = set(indices_fr[i][1:]) # should be size k
		if w_en not in knearest_map_en:
			knearest_map_en[w_en] = map(lambda z: index_map[z], index_set_en)
		if w_fr not in knearest_map_fr:
			knearest_map_fr[w_fr] = map(lambda z: index_map[z], index_set_fr)
		recall_count = sum(1 for i in index_set_fr if i in index_set_en)
		# precision = recall for this task
		recall = (recall_count + 0.)/len(index_set_en)
		avg_recall += recall
	return (avg_recall/num_points), knearest_map_en, knearest_map_fr
开发者ID:kiranvodrahalli,项目名称:hebb_vectors,代码行数:28,代码来源:analysis.py


示例9: _compute_tolerance_distance

    def _compute_tolerance_distance(self, sample, symbol):
        """Compute the distance tolerance.

        Computes distance tolerance in the feature vectors space
        below which we find the symbol similar. Then saves it
        to proper file.

        Args:
            sample (list of lists of int): list of feature-vectors,
                                           on which we base on.
            symbol (String): name of symbol to compute tolerance
        """
        nbrs = NearestNeighbors(n_neighbors=3, algorithm='ball_tree')\
            .fit(sample)
        distances, _ = nbrs.kneighbors(sample)
        print(distances)
        means = []
        for distances_row in distances:
            row = np.delete(distances_row, [0])
            means.append(np.mean(row))
        means.sort()
        critical_index = math.ceil(0.8 * len(means)) - 1
        tolerance_distance = means[critical_index] * 1.3
        print("tolerance distance: %.16f" % tolerance_distance)

        tolerance_distance_path = \
            Classifier._get_file_path(
                self.files[DISTANCE_TOLERANCE_FILE], symbol)

        with open(tolerance_distance_path, 'w') as handle:
            handle.write("%.16f\n" % tolerance_distance)

        return tolerance_distance
开发者ID:0mp,项目名称:io-touchpad,代码行数:33,代码来源:classifier.py


示例10: knn_find

def knn_find(train, test, k = 2):
    """find first K knn neighbors of test samples from train samples
    
    [Args]
    ----
    train: train data {array like, m x n, m samples, n features}
        list of sample, each sample are list of features.
        e.g. [[age = 18, weight = 120, height = 167],
              [age = 45, weight = 180, height = 173],
              ..., ]
        
    test: test data {array like, m x n, m samples, n features}
        data format is the same as train data
    
    k: number of neighbors
        how many neighbors you want to find
        
    [Returns]
    -------
    distances: list of distance of knn-neighbors from test data
        [[dist(test1, train_knn1), dist(test1, train_knn2), ...],
         [dist(test2, train_knn1), dist(test2, train_knn2), ...],
         ..., ]
    
    indices: list of indice of knn-neighbors from test data
        [[test1_train_knn1_index, test1_train_knn2_index, ...],
         [test2_train_knn1_index, test2_train_knn2_index, ...],
         ..., ]    
    """
    nbrs = NearestNeighbors(n_neighbors=k, algorithm="kd_tree").fit(train) # default = "kd_tree" algorithm
    return nbrs.kneighbors(test)
开发者ID:windse7en,项目名称:Angora,代码行数:31,代码来源:knn.py


示例11: createSyntheticSamples

def createSyntheticSamples(X,Y,nearestneigh,numNeighbors,majoritylabel,minoritylabel): 
    (Xminority,Xmajority) = partitionSamples(X,Y)
    numFeatures = Xminority.shape[1]
    Xreduced = pca(Xminority)
    numOrigMinority=len(Xminority)
    #reducedMinoritykmeans = KMeans(init='k-means++', max_iter=500,verbose=False,tol=1e-4,k=numCentroids, n_init=5, n_neighbors=3).fit(Xreduced)
    reducedNN = NearestNeighbors(nearestneigh, algorithm='auto')
    reducedNN.fit(Xreduced)
    #Xsyn=np.array([numOrigMinority,numNeighbors*numFeatures])
    trylist=[]
    #LOOPHERE  for EACH (minority) point...
    for i,row in enumerate(Xreduced):
        neighbor_index = reducedNN.kneighbors(row, return_distance=False) 
        closestPoints = Xminority[neighbor_index]
        #randomly choose one of the k nearest neighbors
        chosenNeighborsIndex = chooseNeighbor(neighbor_index,numNeighbors,i)
        chosenNeighbor = Xminority[chosenNeighborsIndex]
        #Calculate linear combination:        
        #Take te difference between the orig minority sample and its selected neighbor, where X[1,] is the orig point
        diff = Xminority[i,]-chosenNeighbor
        #Multiply this difference by a number between 0 and 1
        r = random.uniform(0,1)
        #Add it back to te orig minority vector and viola this is the synthetic sample
        syth_sample =Xminority[i,:]+r*diff
        syth_sample2 = syth_sample.tolist()
        trylist.append(syth_sample2)
    Xsyn=np.asarray(trylist).reshape(numNeighbors*numOrigMinority,numFeatures)
    maj_col=majoritylabel*np.ones([Xmajority.shape[0],1])
    min_col=minoritylabel*np.ones([Xsyn.shape[0],1])
    syth_Y = np.concatenate((maj_col,min_col),axis=0)
    syth_X = np.concatenate((Xmajority,Xsyn),axis=0)
    if(syth_X.shape[0]!=syth_Y.shape[0]):
        raise Exception("dim mismatch between features matrix and response matrix")
    return (syth_X, syth_Y)
开发者ID:LiaoPan,项目名称:amazon_challenge,代码行数:34,代码来源:SMOTE.py


示例12: construct_A

def construct_A(X, k, binary=False):

    nbrs = NearestNeighbors(n_neighbors=1 + k).fit(X)
    if binary:
        return nbrs.kneighbors_graph(X)
    else:
        return nbrs.kneighbors_graph(X, mode='distance')
开发者ID:abhishekkrthakur,项目名称:LCE,代码行数:7,代码来源:lce.py


示例13: test_connectivity_popagation

def test_connectivity_popagation():
    """
    Check that connectivity in the ward tree is propagated correctly during
    merging.
    """
    from sklearn.neighbors import NearestNeighbors

    X = np.array(
        [
            (0.014, 0.120),
            (0.014, 0.099),
            (0.014, 0.097),
            (0.017, 0.153),
            (0.017, 0.153),
            (0.018, 0.153),
            (0.018, 0.153),
            (0.018, 0.153),
            (0.018, 0.153),
            (0.018, 0.153),
            (0.018, 0.153),
            (0.018, 0.153),
            (0.018, 0.152),
            (0.018, 0.149),
            (0.018, 0.144),
        ]
    )
    nn = NearestNeighbors(n_neighbors=10).fit(X)
    connectivity = nn.kneighbors_graph(X)
    ward = Ward(n_clusters=4, connectivity=connectivity)
    # If changes are not propagated correctly, fit crashes with an
    # IndexError
    ward.fit(X)
开发者ID:VirgileFritsch,项目名称:scikit-learn,代码行数:32,代码来源:test_hierarchical.py


示例14: predict_appliance

def predict_appliance(home, appliance, feature):
    if home in all_homes[appliance]:
        home_to_pick=home
    else:
        home_to_pick=all_homes[appliance][0]
    print home_to_pick

    feature_dict = json.load(open("../data/output/sensitivity-numfeatures-allhomes/%s_%s_%d.json" %(appliance,feature, home_to_pick),"r"))
    f = feature_dict['f']
    k = feature_dict['k']
    clf = KNeighborsRegressor(n_neighbors=k)
    nn = NearestNeighbors(n_neighbors=k)
    df_new =df.copy()
    df_new = df_new.ix[all_homes[appliance]]
    df_new = df_new.ix[~df_new.index.isin([home])]
    #df_new = df_new.drop(home, axis=1)
    nn.fit(df_new[f].dropna())
    distances, indices = nn.kneighbors(df.ix[home][f])
    out = []
    nghbrs_list = df_new.index[indices].values[0]

    for month in range(1, 13):
        if len(nghbrs_list>1):
            out.append(df_new[["%s_%d" %(appliance, month) ]].ix[nghbrs_list].sum().values[0]/k)
        else:
            out.append(df_new[["%s_%d" %(appliance, month) ]].ix[nghbrs_list].values[0]/k)
    return out
开发者ID:nipunbatra,项目名称:Gemello,代码行数:27,代码来源:routes.py


示例15: ContentBased

class ContentBased(object):
    """
    Modelo de recomendación de articulos basados en los tags con mas relevancia de cada uno de ellos.
    El modelo vectoriza cada articulo para poder calcular la similitud entre cada uno de ellos. 
    """
    def __init__(self, stop_words=None, token_pattern=None, metric='cosine', n_neighbors=5):
        if stop_words is None:
            stop_words =  stopwords.words("english")
            
        if token_pattern is None:
            token_pattern = '(?u)\\b[a-zA-Z]\\w\\w+\\b'
            
        self.tfidf_vectorizer = TfidfVectorizer(stop_words=stop_words, token_pattern=token_pattern)
        self.nearest_neigbors = NearestNeighbors(metric=metric, n_neighbors=n_neighbors, algorithm='brute')
        
    def fit(self, datos, columna_descripcion):
        """
        Entrenamos el modelo:
        1/ Vectorizacion de cada articulo (Extracción y ponderación de atributos)
        2/ Calculamos los articulos mas cercanos
        """
        self.datos = datos
        datos_por_tags = self.tfidf_vectorizer.fit_transform(datos[columna_descripcion])        
        self.nearest_neigbors.fit(datos_por_tags)
        
    def predict(self, descripcion):
        """
        Devuelve los articulos mas parecidos a la descripcion propuesta
        """
        descripcion_tags = self.tfidf_vectorizer.transform(descripcion)        
        if descripcion_tags.sum() == 0:
            return pd.DataFrame(columns=self.datos.columns)
        else:
            _, indices = self.nearest_neigbors.kneighbors(descripcion_tags)
            return self.datos.iloc[indices[0], :]
开发者ID:pvalienteverde,项目名称:ElCuadernillo,代码行数:35,代码来源:ContendBased.py


示例16: test_method

def test_method(method, k = 10, tests=5):
    from sklearn.neighbors import NearestNeighbors
    t0 = time.time()
    nn = NearestNeighbors(leaf_size=data.shape[0]).fit(data)

    score = 0.0
    t_nn = 0.0
    t_meth = 0.0
    np.random.seed(0)

    for i in range(tests):
        d = data[np.random.randint(data.shape[0])]

        t0 = time.time()
        method_res = method(d, k)
        t_meth += time.time()-t0

        t0 = time.time()
        nn_res = nn.kneighbors(d, n_neighbors=k, return_distance=False)
        t_nn += time.time()-t0

        score += np.mean(np.in1d(nn_res, method_res))

    t_nn /= tests
    t_meth /= tests

    r1 = 'NN time: %1.10f method time: %1.10f speedup: %1.10f' % (t_nn, t_meth, t_nn/t_meth)

    r2 = '%1.2f%% overlap' % ((score/tests) * 100)
    return r1 + '\n' + r2
开发者ID:dgmp88,项目名称:NonMetricSpaceLib,代码行数:30,代码来源:lazy_test.py


示例17: move

    def move(self, event):
        # add the knn scheme to decide selected region when moving mouse

        if SKLEARN_INSTALLED:
            if event.button == 1 and event.is_dragging:

                # TODO: support multiple datasets here
                data = get_map_data_scatter(self.active_layer_artist.layer,
                                            self.active_layer_artist.visual,
                                            self._vispy_widget)

                # calculate the threshold and call draw visual
                width = event.pos[0] - self.selection_origin[0]
                height = event.pos[1] - self.selection_origin[1]
                drag_distance = math.sqrt(width**2 + height**2)
                canvas_diag = math.sqrt(self._vispy_widget.canvas.size[0]**2 +
                                        self._vispy_widget.canvas.size[1]**2)

                mask = np.zeros(self.active_layer_artist.layer.shape)

                # neighbor num proportioned to mouse moving distance
                n_neighbors = drag_distance / canvas_diag * self.active_layer_artist.layer.shape[0]
                if n_neighbors >= 1:
                    neigh = NearestNeighbors(n_neighbors=n_neighbors)
                    neigh.fit(data)
                    select_index = neigh.kneighbors([self.selection_origin])[1]
                    mask[select_index] = 1
                self.mark_selected(mask, self.active_layer_artist.layer)
开发者ID:PennyQ,项目名称:glue-3d-viewer,代码行数:28,代码来源:scatter_toolbar.py


示例18: _wpca_analysis

def _wpca_analysis(L, C, intensities):
    """
    Determine the eccentricity of each cluster using weighted PCA (See
    Jolliffe 2002, 14.2.1). The smallest normalized explained variance
    is small for flat of filiform objects.

    - L is a numpy matrix (one point on each row)
    - intensities are gray levels of each point

    No cluster assignment is used here: a ball of radius 10 around each
    center is used to find the cloud of points.
    """
    np.set_printoptions(threshold=50000)
    n_points, n_features = L.shape
    tee.log('WPCA - Fitting NearestNeighbors on', n_points, 'points')
    nbrs = NearestNeighbors(radius=10.0).fit(L)
    for i, c in enumerate(C):
        array_c = np.array([c.x, c.y, c.z])
        i_nbrs = nbrs.radius_neighbors([array_c], 10.0, return_distance=False)[0]
        points_within = L[i_nbrs]
        if len(points_within) < 64:  # too small set, there is no point in running PCA
            c.EVR = [0.499, 0.499, 0.002]
            c.last_variance = c.EVR[2]
        else:
            w = np.sqrt(intensities[i_nbrs]/255.0)
            wX = np.dot(np.diag(w), points_within)
            pca = sklearn.decomposition.PCA(n_components=3)
            X_r = pca.fit(wX).transform(wX)
            c.EVR = pca.explained_variance_ratio_
            c.last_variance = c.EVR[2]
        print('WPCA done on', i, '/', len(C), 'name=', c.name, 'EVR=', c.EVR)
开发者ID:paolo-f,项目名称:bcfind,代码行数:31,代码来源:mscd.py


示例19: main

def main():
    vectorizer = CountVectorizer(ngram_range=(1,2),max_df=1.0, min_df=0.0)

    nei = NearestNeighbors(algorithm='brute', metric='jaccard')
    matrix = vectorizer.fit_transform(training_set).todense()
    new_matrix = vectorizer.transform(new_comments).todense()
    nei.fit(matrix)
    path =  '{0}/'.format(pathsplit(abspath(__file__))[0])
    jsonfile = open(path + '{0}-nn.json'.format(n_neighbors), 'w')

    nodes = [{'name': (training_set+new_comments)[i],
              'group':(groups + new_groups)[i]}
             for i in range(len(training_set+new_comments))]
    links = []

    for i in range(len(matrix)):
        dist, idnei = nei.kneighbors(matrix[i], n_neighbors=n_neighbors + 1)
        dist, idnei = dist[0], idnei[0]

        for j in range(len(idnei[1:])):
            links.append({"source":i,"target":idnei[j+1],"value":10*(1 - dist[j+1])})

    for i in range(len(new_comments)):
        dist, idnei = nei.kneighbors(new_matrix[i], n_neighbors=n_neighbors + 1)
        dist, idnei = dist[0], idnei[0]
        for j in range(len(idnei[1:])):
            links.append({"source":len(matrix) + i,"target":idnei[j],"value":10*(1 - dist[j+1])})

    jsondumped = json.dumps({'nodes':nodes, 'links':links}, indent=2)

    jsonfile.write(jsondumped)
开发者ID:opoirion,项目名称:pres_ml,代码行数:31,代码来源:generate_json.py


示例20: removeRedundantFrames

	def removeRedundantFrames(self):
		h, w, d = self.keyframes[0].shape
		n = len(self.keyframes)
		frames = np.zeros((n, 256))
		self.frameHistFeats
		for i, kf in enumerate(self.keyframes):
			frames[i] = tools.getColorHist(kf).ravel()
		
		k = int(np.sqrt(n))
		kmeans = KMeans(k)
		print("Clustering frames into {0} code vectors.".format(k))
		kmeans.fit(self.frameHistFeats)

		bestFrameIndices = []
		bestFrames = []
		NN = NearestNeighbors(1)
		NN.fit(frames)
		centers = kmeans.cluster_centers_
		for center in centers:
			nearest = NN.kneighbors(center, return_distance=False)
			bestFrameIndices.append(nearest[0])
		bestFrameIndices.sort()
		for i in bestFrameIndices:
			bestFrames.append(self.keyframes[i])
		return bestFrames
开发者ID:kaledj,项目名称:YTKeyframes,代码行数:25,代码来源:kfextractor.py



注:本文中的sklearn.neighbors.NearestNeighbors类示例由纯净天空整理自Github/MSDocs等源码及文档管理平台,相关代码片段筛选自各路编程大神贡献的开源项目,源码版权归原作者所有,传播和使用请参考对应项目的License;未经允许,请勿转载。


鲜花

握手

雷人

路过

鸡蛋
该文章已有0人参与评论

请发表评论

全部评论

专题导读
上一篇:
Python ball_tree.BallTree类代码示例发布时间:2022-05-27
下一篇:
Python neighbors.NearestCentroid类代码示例发布时间:2022-05-27
热门推荐
阅读排行榜

扫描微信二维码

查看手机版网站

随时了解更新最新资讯

139-2527-9053

在线客服(服务时间 9:00~18:00)

在线QQ客服
地址:深圳市南山区西丽大学城创智工业园
电邮:jeky_zhao#qq.com
移动电话:139-2527-9053

Powered by 互联科技 X3.4© 2001-2213 极客世界.|Sitemap