• 设为首页
  • 点击收藏
  • 手机版
    手机扫一扫访问
    迪恩网络手机版
  • 关注官方公众号
    微信扫一扫关注
    迪恩网络公众号

Python preprocessing.Scaler类代码示例

原作者: [db:作者] 来自: [db:来源] 收藏 邀请

本文整理汇总了Python中sklearn.preprocessing.Scaler的典型用法代码示例。如果您正苦于以下问题:Python Scaler类的具体用法?Python Scaler怎么用?Python Scaler使用的例子?那么恭喜您, 这里精选的类代码示例或许可以为您提供帮助。



在下文中一共展示了Scaler类的20个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于我们的系统推荐出更棒的Python代码示例。

示例1: run_svm

def run_svm(svc,X):
    X = X.copy()
    scaler  = Scaler()
    X  = scaler.fit_transform(X)
    y_predict = svc.predict(X)
    
    return y_predict
开发者ID:ajrichards,项目名称:cytostream,代码行数:7,代码来源:SupervisedLearning.py


示例2: test_scaler_1d

def test_scaler_1d():
    """Test scaling of dataset along single axis"""
    rng = np.random.RandomState(0)
    X = rng.randn(5)
    X_orig_copy = X.copy()

    scaler = Scaler()
    X_scaled = scaler.fit(X).transform(X, copy=False)
    assert_array_almost_equal(X_scaled.mean(axis=0), 0.0)
    assert_array_almost_equal(X_scaled.std(axis=0), 1.0)

    # check inverse transform
    X_scaled_back = scaler.inverse_transform(X_scaled)
    assert_array_almost_equal(X_scaled_back, X_orig_copy)

    # Test with 1D list
    X = [0., 1., 2, 0.4, 1.]
    scaler = Scaler()
    X_scaled = scaler.fit(X).transform(X, copy=False)
    assert_array_almost_equal(X_scaled.mean(axis=0), 0.0)
    assert_array_almost_equal(X_scaled.std(axis=0), 1.0)

    X_scaled = scale(X)
    assert_array_almost_equal(X_scaled.mean(axis=0), 0.0)
    assert_array_almost_equal(X_scaled.std(axis=0), 1.0)
开发者ID:AlexLerman,项目名称:scikit-learn,代码行数:25,代码来源:test_preprocessing.py


示例3: test_scaler_without_centering

def test_scaler_without_centering():
    rng = np.random.RandomState(42)
    X = rng.randn(4, 5)
    X[:, 0] = 0.0  # first feature is always of zero

    scaler = Scaler(with_mean=False)
    X_scaled = scaler.fit(X).transform(X, copy=True)
    assert not np.any(np.isnan(X_scaled))

    assert_array_almost_equal(
        X_scaled.mean(axis=0), [0., -0.01,  2.24, -0.35, -0.78], 2)
    assert_array_almost_equal(X_scaled.std(axis=0), [0., 1., 1., 1., 1.])
    # Check that X has not been copied
    assert X_scaled is not X

    X_scaled_back = scaler.inverse_transform(X_scaled)
    assert X_scaled_back is not X
    assert X_scaled_back is not X_scaled
    assert_array_almost_equal(X_scaled_back, X)

    X_scaled = scale(X, with_mean=False)
    assert not np.any(np.isnan(X_scaled))

    assert_array_almost_equal(
        X_scaled.mean(axis=0), [0., -0.01,  2.24, -0.35, -0.78], 2)
    assert_array_almost_equal(X_scaled.std(axis=0), [0., 1., 1., 1., 1.])
    # Check that X has not been copied
    assert X_scaled is not X

    X_scaled_back = scaler.inverse_transform(X_scaled)
    assert X_scaled_back is not X
    assert X_scaled_back is not X_scaled
    assert_array_almost_equal(X_scaled_back, X)
开发者ID:Yangqing,项目名称:scikit-learn,代码行数:33,代码来源:test_preprocessing.py


示例4: _pre_fit

    def _pre_fit(self, X, y):
        random_state = check_random_state(self.random_state)

        if self.scale_y:
            self.y_scaler_ = Scaler(copy=True).fit(y)
            y = self.y_scaler_.transform(y)

        if self.metric == "precomputed":
            self.components_ = None
            n_components = X.shape[1]
        else:
            if self.init_components is None:
                if self.verbose: print "Selecting components..."
                self.components_ = select_components(X, y,
                                                     self.n_components,
                                                     random_state=random_state)
            else:
                self.components_ = self.init_components

            n_components = self.components_.shape[0]


        n_nonzero_coefs = self.n_nonzero_coefs
        if 0 < n_nonzero_coefs and n_nonzero_coefs <= 1:
            n_nonzero_coefs = int(n_nonzero_coefs * n_components)
        n_nonzero_coefs = int(n_nonzero_coefs)

        if n_nonzero_coefs > n_components:
            raise AttributeError("n_nonzero_coefs cannot be bigger than "
                                 "n_components.")

        if self.verbose: print "Computing dictionary..."
        start = time.time()
        K = pairwise_kernels(X, self.components_, metric=self.metric,
                             filter_params=True, n_jobs=self.n_jobs,
                             **self._kernel_params())
        if self.verbose: print "Done in", time.time() - start, "seconds"

        if self.scale:
            if self.verbose: print "Scaling dictionary"
            start = time.time()
            copy = True if self.metric == "precomputed" else False
            self.scaler_ = Scaler(copy=copy)
            K = self.scaler_.fit_transform(K)
            if self.verbose: print "Done in", time.time() - start, "seconds"

        # FIXME: this allocates a lot of intermediary memory
        norms = np.sqrt(np.sum(K ** 2, axis=0))

        return n_nonzero_coefs, K, y, norms
开发者ID:nagyistge,项目名称:lightning,代码行数:50,代码来源:kmp.py


示例5: test_scale_sparse_with_mean_raise_exception

def test_scale_sparse_with_mean_raise_exception():
    rng = np.random.RandomState(42)
    X = rng.randn(4, 5)
    X_csr = sp.csr_matrix(X)

    # check scaling and fit with direct calls on sparse data
    assert_raises(ValueError, scale, X_csr, with_mean=True)
    assert_raises(ValueError, Scaler(with_mean=True).fit, X_csr)

    # check transform and inverse_transform after a fit on a dense array
    scaler = Scaler(with_mean=True).fit(X)
    assert_raises(ValueError, scaler.transform, X_csr)

    X_transformed_csr = sp.csr_matrix(scaler.transform(X))
    assert_raises(ValueError, scaler.inverse_transform, X_transformed_csr)
开发者ID:AlexLerman,项目名称:scikit-learn,代码行数:15,代码来源:test_preprocessing.py


示例6: data_to_kernels

def data_to_kernels(tr_data, te_data):
    scaler = Scaler(copy=False)
    scaler.fit_transform(tr_data)
    #tr_data, mu, sigma = standardize(tr_data)
    tr_data = power_normalize(tr_data, 0.5)
    tr_data = L2_normalize(tr_data)

    #te_data, _, _ = standardize(te_data, mu, sigma)
    scaler.transform(te_data)
    te_data = power_normalize(te_data, 0.5)
    te_data = L2_normalize(te_data)

    tr_kernel = np.dot(tr_data, tr_data.T)
    te_kernel = np.dot(te_data, tr_data.T)

    return tr_kernel, te_kernel
开发者ID:danoneata,项目名称:test,代码行数:16,代码来源:per_video.py


示例7: process_data

    def process_data(self):
        test = pandas.read_csv("test.csv")
        testMat = test.as_matrix()

        train = pandas.read_csv("train.csv")
        trainMat = train.as_matrix()
        trainResult = trainMat[:, 0]
        trainMat = trainMat[:, 1:]

        # trainInd = np.where(trainResult == 0)[0]
        # how_many = (trainResult == 1).sum() - len(trainInd)
        # np.random.shuffle(trainInd)
        # addedResult = trainResult[trainInd[:how_many],:]
        # addedData = trainMat[trainInd[:how_many],:]
        # trainResult = np.append(trainResult,addedResult)
        # trainMat = np.vstack((trainMat,addedData))

        cv = StratifiedKFold(trainResult, 2)
        # cv = KFold(n=trainResult.shape[0],k=2)
        reduceFeatures = ExtraTreesClassifier(
            compute_importances=True, random_state=1234, n_jobs=self.cpus, n_estimators=1000, criterion="gini"
        )
        reduceFeatures.fit(trainMat, trainResult)
        trainScaler = Scaler()

        self.cv_data = []
        self.cv_data_nonreduced = []
        for train, test in cv:
            X_train, X_test, Y_train, Y_test = (
                trainMat[train, :],
                trainMat[test, :],
                trainResult[train, :],
                trainResult[test, :],
            )
            X_train = trainScaler.fit_transform(X_train)
            X_test = trainScaler.transform(X_test)
            self.cv_data_nonreduced.append((X_train, X_test, Y_train, Y_test))
            X_train = reduceFeatures.transform(X_train)
            X_test = reduceFeatures.transform(X_test)
            self.cv_data.append((X_train, X_test, Y_train, Y_test))
        testMat = trainScaler.transform(testMat)
        self.testMat_nonreduced = testMat
        self.testMat = reduceFeatures.transform(testMat)
        allData = self.testMat, self.cv_data, self.testMat_nonreduced, self.cv_data_nonreduced
        data_handle = open("allData.pkl", "w")
        pickle.dump(allData, data_handle)
        data_handle.close()
开发者ID:JakeMick,项目名称:kaggle,代码行数:47,代码来源:holistic.py


示例8: get_sl_test_data

def get_sl_test_data(fileEvents,fileLabels,includedChannels,useMeans=False,parentIndices=None):
    ## declare variables
    X = fileEvents[:,includedChannels].copy()
    scaler  = Scaler()
    X = scaler.fit_transform(X)

    #if parentIndices != None:
    #    X = X[parentIndices,:]
    
    #X = (X - X.mean(axis=0)) / X.std(axis=0)

    if useMeans == True:
        clusterIds,X = get_mean_matrix(X,fileLabels)
        #X = (X - X.mean(axis=0)) / X.std(axis=0)
        return clusterIds,X
    
    return X
开发者ID:ajrichards,项目名称:cytostream,代码行数:17,代码来源:SupervisedLearning.py


示例9: test_center_kernel

def test_center_kernel():
    """Test that KernelCenterer is equivalent to Scaler in feature space"""
    X_fit = np.random.random((5, 4))
    scaler = Scaler(with_std=False)
    scaler.fit(X_fit)
    X_fit_centered = scaler.transform(X_fit)
    K_fit = np.dot(X_fit, X_fit.T)

    # center fit time matrix
    centerer = KernelCenterer()
    K_fit_centered = np.dot(X_fit_centered, X_fit_centered.T)
    K_fit_centered2 = centerer.fit_transform(K_fit)
    assert_array_almost_equal(K_fit_centered, K_fit_centered2)

    # center predict time matrix
    X_pred = np.random.random((2, 4))
    K_pred = np.dot(X_pred, X_fit.T)
    X_pred_centered = scaler.transform(X_pred)
    K_pred_centered = np.dot(X_pred_centered, X_fit_centered.T)
    K_pred_centered2 = centerer.transform(K_pred)
    assert_array_almost_equal(K_pred_centered, K_pred_centered2)
开发者ID:Yangqing,项目名称:scikit-learn,代码行数:21,代码来源:test_preprocessing.py


示例10: run_svm_validation

def run_svm_validation(X1,y1,X2,y2,gammaRange=[0.5],cRange=[0.005],useLinear=False):
    #X_train,y_train,X_test,y_test = split_train_test(X1,y1,X2,y2)

    X = np.vstack((X1, X2))
    Y = np.hstack((y1, y2))

    scaler = Scaler()
    X = scaler.fit_transform(X)

    #if useLinear == True:
    #    svc = svm.SVC(kernel='linear')#class_weight={1: 10
    #    #    #    #svc = svm.SVC(kernel='poly',degree=3,C=1.0)
    #    svc.fit(X, Y)
    #    return svc

    C_range = 10.0 ** np.arange(-2, 9)
    gamma_range = 10.0 ** np.arange(-5, 4)
    param_grid = dict(gamma=gamma_range, C=C_range)

    grid = GridSearchCV(SVC(class_weight={1: 100}), param_grid=param_grid, cv=StratifiedKFold(y=Y,k=2))
    grid.fit(X, Y)

    print("The best classifier is: ", grid.best_estimator_)
    return grid.best_estimator_
开发者ID:ajrichards,项目名称:cytostream,代码行数:24,代码来源:SupervisedLearning.py


示例11: test_scaler_2d_arrays

def test_scaler_2d_arrays():
    """Test scaling of 2d array along first axis"""
    rng = np.random.RandomState(0)
    X = rng.randn(4, 5)
    X[:, 0] = 0.0  # first feature is always of zero

    scaler = Scaler()
    X_scaled = scaler.fit(X).transform(X, copy=True)
    assert_false(np.any(np.isnan(X_scaled)))

    assert_array_almost_equal(X_scaled.mean(axis=0), 5 * [0.0])
    assert_array_almost_equal(X_scaled.std(axis=0), [0., 1., 1., 1., 1.])
    # Check that X has not been copied
    assert_true(X_scaled is not X)

    # check inverse transform
    X_scaled_back = scaler.inverse_transform(X_scaled)
    assert_true(X_scaled_back is not X)
    assert_true(X_scaled_back is not X_scaled)
    assert_array_almost_equal(X_scaled_back, X)

    X_scaled = scale(X, axis=1, with_std=False)
    assert_false(np.any(np.isnan(X_scaled)))
    assert_array_almost_equal(X_scaled.mean(axis=1), 4 * [0.0])
    X_scaled = scale(X, axis=1, with_std=True)
    assert_false(np.any(np.isnan(X_scaled)))
    assert_array_almost_equal(X_scaled.mean(axis=1), 4 * [0.0])
    assert_array_almost_equal(X_scaled.std(axis=1), 4 * [1.0])
    # Check that the data hasn't been modified
    assert_true(X_scaled is not X)

    X_scaled = scaler.fit(X).transform(X, copy=False)
    assert_false(np.any(np.isnan(X_scaled)))
    assert_array_almost_equal(X_scaled.mean(axis=0), 5 * [0.0])
    assert_array_almost_equal(X_scaled.std(axis=0), [0., 1., 1., 1., 1.])
    # Check that X has not been copied
    assert_true(X_scaled is X)

    X = rng.randn(4, 5)
    X[:, 0] = 1.0  # first feature is a constant, non zero feature
    scaler = Scaler()
    X_scaled = scaler.fit(X).transform(X, copy=True)
    assert_false(np.any(np.isnan(X_scaled)))
    assert_array_almost_equal(X_scaled.mean(axis=0), 5 * [0.0])
    assert_array_almost_equal(X_scaled.std(axis=0), [0., 1., 1., 1., 1.])
    # Check that X has not been copied
    assert_true(X_scaled is not X)
开发者ID:AlexLerman,项目名称:scikit-learn,代码行数:47,代码来源:test_preprocessing.py


示例12: SVM_fit

def SVM_fit(X_in, y_in, X_out, gamma, C):    

    M = len(X_in[0])   #Number of features
    seed(time())
    
    #To prevent data snooping, breakes the input set into train. cross validation and test sets, with sizes proportional to 8-1-1
    
    #First puts aside 10% of the data for the tests
    test_indices, train_indices = split_indices(len(X_in), int(round(0.1*len(X_in))))

    shuffle(X_in, y_in)
    
    X_test = [X_in[i] for i in test_indices]
    y_test = [y_in[i] for i in test_indices]
    X_in = [X_in[i] for i in train_indices]
    y_in = [y_in[i] for i in train_indices]
  
    
    #scale data first
    scaler = Scaler(copy=False) #in place modification
    #Normalize the data and stores as inner parameters the mean and standard deviation
    #To avoid data snooping, normalization is computed on training set only, and then reported on data
    scaler.fit(X_test, y_test)
    X_in = scaler.transform(X_in)
    X_test = scaler.transform(X_test)
    X_out = scaler.transform(X_out) #uses the same transformation (same mean_ and std_) fit before

    std_test = X_test.std(axis=0)
    f_indices = [j for j in range(M) if std_test[j] > 1e-7]
    
    #Removes feature with null variance    
    X_in = [[X_in[i][j] for j in f_indices] for i in range(len(X_in))]
    X_test = [[X_test[i][j] for j in f_indices] for i in range(len(X_test))]
    X_out = [[X_out[i][j] for j in f_indices] for i in range(len(X_out))]

    M = len(f_indices)
    #Then, on the remaining data, performs a ten-fold cross validation over the number of features considered    
    svc = svm.SVC(kernel='rbf', C=C, gamma=gamma, verbose=False, cache_size=4092, tol=1e-5)   
    svc.fit(X_in, y_in)      
        
    y_out = svc.predict(X_out)
    return y_out
开发者ID:atul2512,项目名称:Quora,代码行数:42,代码来源:answer_classifier.py


示例13: test_scaler_without_centering

def test_scaler_without_centering():
    rng = np.random.RandomState(42)
    X = rng.randn(4, 5)
    X[:, 0] = 0.0  # first feature is always of zero
    X_csr = sp.csr_matrix(X)

    scaler = Scaler(with_mean=False).fit(X)
    X_scaled = scaler.transform(X, copy=True)
    assert_false(np.any(np.isnan(X_scaled)))

    scaler_csr = Scaler(with_mean=False).fit(X_csr)
    X_csr_scaled = scaler_csr.transform(X_csr, copy=True)
    assert_false(np.any(np.isnan(X_csr_scaled.data)))

    assert_equal(scaler.mean_, scaler_csr.mean_)
    assert_array_almost_equal(scaler.std_, scaler_csr.std_)

    assert_array_almost_equal(
        X_scaled.mean(axis=0), [0., -0.01,  2.24, -0.35, -0.78], 2)
    assert_array_almost_equal(X_scaled.std(axis=0), [0., 1., 1., 1., 1.])

    X_csr_scaled_mean, X_csr_scaled_std = mean_variance_axis0(X_csr_scaled)
    assert_array_almost_equal(X_csr_scaled_mean, X_scaled.mean(axis=0))
    assert_array_almost_equal(X_csr_scaled_std, X_scaled.std(axis=0))

    # Check that X has not been modified (copy)
    assert_true(X_scaled is not X)
    assert_true(X_csr_scaled is not X_csr)

    X_scaled_back = scaler.inverse_transform(X_scaled)
    assert_true(X_scaled_back is not X)
    assert_true(X_scaled_back is not X_scaled)
    assert_array_almost_equal(X_scaled_back, X)

    X_csr_scaled_back = scaler_csr.inverse_transform(X_csr_scaled)
    assert_true(X_csr_scaled_back is not X_csr)
    assert_true(X_csr_scaled_back is not X_csr_scaled)
    assert_array_almost_equal(X_scaled_back, X)
开发者ID:AlexLerman,项目名称:scikit-learn,代码行数:38,代码来源:test_preprocessing.py


示例14: Logistic_train

def Logistic_train(X_in, y_in, X_out, cs, file_log=None):    
    if file_log:        
        file_log.writelines('# of Samples: {}, # of Features: {}\n'.format(len(X_in), len(X_in[0])))
    M = len(X_in[0])   #Number of features
    seed(time())
    
    #To prevent data snooping, breakes the input set into train. cross validation and test sets, with sizes proportional to 8-1-1
    
    #First puts aside 10% of the data for the tests
    test_indices, train_indices = split_indices(len(X_in), int(round(0.1*len(X_in))))
    
    X_scaler = [X_in[i] for i in test_indices]
    y_scaler = [y_in[i] for i in test_indices]
    X_in = [X_in[i] for i in train_indices]
    y_in = [y_in[i] for i in train_indices]
    
    
    
    #scale data first
    scaler = Scaler(copy=False) #in place modification
    #Normalize the data and stores as inner parameters the mean and standard deviation
    #To avoid data snooping, normalization is computed on training set only, and then reported on data
    scaler.fit(X_scaler, y_scaler)  
    X_scaler = scaler.transform(X_scaler)
    X_in = scaler.transform(X_in)
    X_out = scaler.transform(X_out) #uses the same transformation (same mean_ and std_) fit before
    
    std_test = X_scaler.std(axis=0)
    f_indices = [j for j in range(M) if std_test[j] > 1e-7]
    
    #Removes feature with null variance
    
    X_in = [[X_in[i][j] for j in f_indices] for i in range(len(X_in))]
    X_scaler = [[X_scaler[i][j] for j in f_indices] for i in range(len(X_scaler))]
    X_out = [[X_out[i][j] for j in f_indices] for i in range(len(X_out))]   
    
    M = len(X_in[0])
    #Then, on the remaining data, performs a ten-fold cross validation over the number of features considered
    best_cv_accuracy = 0.
    best_c = 0.



    for c in cs:
        kfold = cross_validation.StratifiedKFold(y_in, k=10)
        lrc = LogisticRegression(C=c, tol=1e-5)
                            
        in_accuracy = 0.
        cv_accuracy = 0.
        for t_indices, cv_indices in kfold:
    
            X_train = array([X_in[i][:] for i in t_indices])
            y_train = [y_in[i] for i in t_indices]
            X_cv = array([X_in[i][:] for i in cv_indices])
            y_cv = [y_in[i] for i in cv_indices]            
            
            lrc.fit(X_train, y_train)
            in_accuracy += lrc.score(X_train, y_train)
            cv_accuracy += lrc.score(X_cv, y_cv)
              
        in_accuracy /= kfold.k
        cv_accuracy /= kfold.k
        
        if file_log:
            file_log.writelines('C: {}\n'.format(c))  
            file_log.writelines('\tEin= {}\n'.format(1. - in_accuracy))
            file_log.writelines('\tEcv= {}\n'.format(1. - cv_accuracy))

        if (cv_accuracy > best_cv_accuracy):
            best_c = c
            best_cv_accuracy = cv_accuracy
            
    #Now tests the out of sample error
    if file_log:        
        file_log.writelines('\nBEST result: E_cv={}, C={}\n'.format(1. - best_cv_accuracy, best_c)) 
    
    lrc = LogisticRegression(C=best_c, tol=1e-5)

    lrc.fit(X_in, y_in)
    if file_log:        
        file_log.writelines('Ein= {}\n'.format(1. - lrc.score(X_in, y_in)))
        file_log.writelines('Etest= {}\n'.format(1. - lrc.score(X_scaler, y_scaler)))     
        
    y_out = lrc.predict(X_out)
    return y_out
开发者ID:atul2512,项目名称:Quora,代码行数:85,代码来源:answer_classifier.py


示例15: SVM_train

def SVM_train(X_in, y_in, X_out, gammas, cs, file_log=None):    
    if file_log:        
        file_log.writelines('# of Samples: {}, # of Features: {}\n'.format(len(X_in), len(X_in[0])))
    M = len(X_in[0])   #Number of features
    seed(time())
    
    #To prevent data snooping, breaks the input set into train. cross validation
    #and scale sets, with sizes proportional to 8-1-1
    
    #First puts aside 10% of the data for the tests
    scale_set_indices, train_indices = split_indices(len(X_in), int(round(0.1*len(X_in))))

#    shuffle(X_in, y_in)
    
    X_scale = [X_in[i] for i in scale_set_indices]
    y_scale = [y_in[i] for i in scale_set_indices]
    X_in = [X_in[i] for i in train_indices]
    y_in = [y_in[i] for i in train_indices]
        
    #Scale data first
    scaler = Scaler(copy=False)             #WARNING: copy=False => in place modification
    #Normalize the data and stores as inner parameters the mean and standard deviation
    #To avoid data snooping, normalization is computed on a separate subsetonly, and then reported on data
    scaler.fit(X_scale, y_scale)
    X_scale = scaler.transform(X_scale)
    X_in = scaler.transform(X_in)
    X_out = scaler.transform(X_out)         #uses the same transformation (same mean_ and std_) fit before
    
    std_test = X_scale.std(axis=0)
    f_indices = [j for j in range(M) if std_test[j] > 1e-7]
    
    #Removes feature with null variance    
    X_in = [[X_in[i][j] for j in f_indices] for i in range(len(X_in))]
    X_scale = [[X_scale[i][j] for j in f_indices] for i in range(len(X_scale))]
    X_out = [[X_out[i][j] for j in f_indices] for i in range(len(X_out))]
    
    
    if file_log:        
        file_log.writelines('Initial features :{}, Features used: {}\n'.format(M, len(X_in[0])))
    
    M = len(f_indices)
    best_cv_accuracy = 0.
    best_gamma = 0.
    best_c = 0.

     
    #Then, on the remaining data, performs a ten-fold cross validation over the number of features considered
    for c in cs:
        for g in gammas:
            #Balanced cross validation (keeps the ratio of the two classes as
            #constant as possible across the k folds).
            kfold = cross_validation.StratifiedKFold(y_in, k=10)        
            svc = svm.SVC(kernel='rbf', C=c, gamma=g, verbose=False, cache_size=4092, tol=1e-5)
                                
            in_accuracy = 0.
            cv_accuracy = 0.
            for t_indices, cv_indices in kfold:
        
                X_train = array([X_in[i][:] for i in t_indices])
                y_train = [y_in[i] for i in t_indices]
                X_cv = array([X_in[i][:] for i in cv_indices])
                y_cv = [y_in[i] for i in cv_indices]                
                
                svc.fit(X_train, y_train)
                in_accuracy += svc.score(X_train, y_train)
                cv_accuracy += svc.score(X_cv, y_cv)
            
            in_accuracy /= kfold.k
            cv_accuracy /= kfold.k
            if file_log:        
                file_log.writelines('C:{}, gamma:{}\n'.format(c, g))           
                file_log.writelines('\tEin= {}\n'.format(1. - in_accuracy))
                file_log.writelines('\tEcv= {}\n'.format(1. - cv_accuracy))
    
            if (cv_accuracy > best_cv_accuracy):
                best_gamma = g
                best_c = c
                best_cv_accuracy = cv_accuracy
            
    if file_log:        
        file_log.writelines('\nBEST result: E_cv={}, C={}, gamma={}\n'.format(1. - best_cv_accuracy, best_c, best_gamma))
    
    
    svc = svm.SVC(kernel='rbf', C=best_c, gamma=best_gamma, verbose=False, cache_size=4092, tol=1e-5)

    svc.fit(X_in, y_in)
    if file_log:        
        file_log.writelines('Ein= {}\n'.format(1. - svc.score(X_in, y_in)))
        file_log.writelines('Etest= {}\n'.format(1. - svc.score(X_scale, y_scale)))      
        
    y_out = svc.predict(X_out)
#DEBUG:    output = ['{} {:+}\n'.format(id_out[i], int(y_scale[i])) for i in range(len(X_out))]
#DEBUG:    file_log.writelines('------------------------')    
    return y_out
开发者ID:atul2512,项目名称:Quora,代码行数:94,代码来源:answer_classifier.py


示例16: StratifiedKFold

if folding == "stratified":
    cv = StratifiedKFold(y, k=n_folds)
elif folding == "kfolding":
    cv = KFold(n=y.shape[0], k=n_folds)
elif folding == "leaveoneout":
    n_folds[0] = y.shape[0]
    cv = LeaveOneOut(n=y.shape[0])
else:
    print("unknown crossvalidation method!")


# -- classifier
clf = svm.SVC(kernel="linear", probability=True, C=svm_C)

# -- normalizer
scaler = Scaler()

# -- feature selection
fs = SelectPercentile(f_classif, percentile=fs_n)

print("INITIALIZE RESULTS")
if compute_predict:
    predict = np.zeros([n_splits, n_samples, n_dims, n_dims_tg]) ** np.nan
    predictg = np.zeros([n_splits, n_samplesg, n_dimsg, n_dimsg_tg, n_folds]) ** np.nan
else:
    predict = []
    predictg = []

if compute_probas:
    probas = np.zeros([n_splits, n_samples, n_dims, n_dims_tg, n_classes]) ** np.nan
    probasg = np.zeros([n_splits, n_samplesg, n_dimsg, n_dimsg_tg, n_classes, n_folds]) ** np.nan
开发者ID:kingjr,项目名称:natmeg_arhus,代码行数:31,代码来源:skl_king.py


示例17: StratifiedKFold

if folding == 'stratified':
    cv = StratifiedKFold(y, k=n_folds)
elif folding == 'kfolding':
    cv = KFold(n=y.shape[0], k=n_folds)
elif folding == 'leaveoneout':
    n_folds[0] = y.shape[0]
    cv = LeaveOneOut(n=y.shape[0])
else:
    print("unknown crossvalidation method!")


#-- classifier
clf = svm.SVC(kernel='linear', probability=True, C=svm_C)

#-- normalizer
scaler = Scaler()

#-- feature selection
fs = SelectPercentile(f_classif, percentile=fs_n)
#-- grid search
#parameters = {'svm__C': (1e-6,1e-3, 1e-1, .4)}
#clf       = GridSearchCV(svm, parameters,n_jobs=1)

#-- initialize results
predict = np.zeros([n_splits, n_samples, n_dims]) ** np.nan
probas = np.zeros([n_splits, n_samples, n_dims, n_classes]) ** np.nan
predictg = np.zeros([n_splits, n_samplesg, n_dimsg, n_folds]) ** np.nan
probasg = np.zeros([n_splits, n_samplesg, n_dimsg, n_classes, n_folds]) ** np.nan
coef = np.empty([n_splits, n_folds, n_dims, n_classes * (n_classes - 1) / 2, n_features]) ** 0
all_folds = np.zeros([n_splits, n_folds, n_samples]) ** np.nan
y_shfl = np.copy(y)
开发者ID:SherazKhan,项目名称:natmeg_arhus,代码行数:31,代码来源:skl_svm.py


示例18: Scaler

    k = 10
    records = data[:,1:]
    labels = data[:,0]
    n_train = 35000
    #n_val = n - n_train
    n_val = 7000
    trainset = records[:n_train,:]
    trainlabels = labels[:n_train]
    #valset = records[n_train:,:]
    #vallabels = labels[n_train:,:]
    valset = records[n_train:n_train+n_val,:]
    vallabels = labels[n_train:n_train+n_val]
    n,dim = trainset.shape

    # mean centering, stdev normalization and whitening
    scaler = Scaler()
    scaler.fit(trainset)
    trainset = scaler.transform(trainset)
    valset = scaler.transform(valset)
    pca = PCA(n_components=dim,whiten=True)
    pca.fit(trainset)
    trainset = pca.transform(trainset)
    valset = pca.transform(valset)

    config = Train_config()
    config.iterations = 10
    config.nonlinearity = 'tanh'
    config.batchsize = 50
    config.learning_rate = 0.2
    config.momentum = 0.7
    log = open('log.txt','w')
开发者ID:hendrik-p,项目名称:neural_net,代码行数:31,代码来源:net.py


示例19: main

def main():
	X =[]
	Y=[]
	featuresDB = Base(os.getcwd()+"\\Databases\\features.db")
	featuresDB.open()
	print "features open"
 
	for rec in featuresDB:
		vec = []
		vec.append(rec.f1)
		vec.append(rec.f3)
		vec.append(rec.f4)
		vec.append(rec.f5)
		vec.append(rec.f6)
		vec.append(rec.f7)
		vec.append(rec.f10)
		vec.append(rec.f11)
		vec.append(rec.f12)
		vec.append(rec.f13)
		vec.append(rec.f14)
		vec.append(rec.f15)
		vec.append(rec.f16)
		vec.append(rec.f17)
		vec.append(rec.f18)
		vec.append(rec.f19)
		vec.append(rec.f20)
		vec.append(rec.f21)
		vec.append(rec.f22)
		vec.append(rec.f23)
		X.append(vec)
		Y.append(rec.score)
	print "building classifier"	

	Y = np.array(Y)
	ybar = Y.mean()
	for i in range(len(Y)):
		if Y[i]<ybar: 
			Y[i]=1
		else:
			 Y[i]=2
	scaler = Scaler().fit(X)
	X = scaler.transform(X)
	
	X= np.array(X)
	Y=np.array(Y)

	skf = cross_validation.StratifiedKFold(Y,k=2)
	for train, test in skf:
		X_train, X_test = X[train], X[test]
		y_train, y_test = Y[train], Y[test]

	
	clf = ExtraTreesClassifier(n_estimators=8,max_depth=None,min_split=1,random_state=0,compute_importances=True)
	scores = cross_validation.cross_val_score(clf,X_train,y_train,cv=5)
	
	clf.fit_transform(X_train,y_train)
	print "Accuracy: %0.4f (+/- %0.2f)" % (scores.mean(), scores.std() / 2)
	print clf.feature_importances_

	y_pred =clf.predict(X_test)
	print	classification_report(y_test,y_pred)
	
	model=(scaler,clf)
	joblib.dump(model,'AestheticModel\\aestheticModel.pkl')
	
	print "Done"
开发者ID:Perchik,项目名称:RoD2,代码行数:66,代码来源:AestheticModel.py


示例20: load_kernels

def load_kernels(
    dataset, tr_norms=['std', 'sqrt', 'L2'], te_norms=['std', 'sqrt', 'L2'],
    analytical_fim=False, pi_derivatives=False, sqrt_nr_descs=False,
    only_train=False, verbose=0, do_plot=False, outfile=None):

    tr_outfile = outfile % "train" if outfile is not None else outfile

    # Load sufficient statistics.
    samples, _ = dataset.get_data('train')
    tr_data, tr_counts, tr_labels = load_video_data(
        dataset, samples, outfile=tr_outfile, analytical_fim=analytical_fim,
        pi_derivatives=pi_derivatives, sqrt_nr_descs=sqrt_nr_descs, verbose=verbose)

    if verbose > 0:
        print "Train data: %dx%d" % tr_data.shape

    if do_plot:
        plot_fisher_vector(tr_data[0], 'before')

    scalers = []
    for norm in tr_norms:
        if norm == 'std':
            scaler = Scaler()
            tr_data = scaler.fit_transform(tr_data)
            scalers.append(scaler)
        elif norm == 'sqrt':
            tr_data = power_normalize(tr_data, 0.5)
        elif norm == 'sqrt_cnt':
            tr_data = approximate_signed_sqrt(
                tr_data, tr_counts, pi_derivatives=pi_derivatives)
        elif norm == 'L2':
            tr_data = L2_normalize(tr_data)
        if do_plot:
            plot_fisher_vector(tr_data[0], 'after_%s' % norm)

    tr_kernel = np.dot(tr_data, tr_data.T)

    if only_train:
        return tr_kernel, tr_labels, scalers, tr_data

    te_outfile = outfile % "test" if outfile is not None else outfile

    # Load sufficient statistics.
    samples, _ = dataset.get_data('test')
    te_data, te_counts, te_labels = load_video_data(
        dataset, samples, outfile=te_outfile, analytical_fim=analytical_fim,
        pi_derivatives=pi_derivatives, sqrt_nr_descs=sqrt_nr_descs, verbose=verbose)

    if verbose > 0:
        print "Test data: %dx%d" % te_data.shape

    ii = 0
    for norm in te_norms:
        if norm == 'std':
            te_data = scalers[ii].transform(te_data)
            ii += 1
        elif norm == 'sqrt':
            te_data = power_normalize(te_data, 0.5)
        elif norm == 'sqrt_cnt':
            te_data = approximate_signed_sqrt(
                te_data, te_counts, pi_derivatives=pi_derivatives)
        elif norm == 'L2':
            te_data = L2_normalize(te_data)

    te_kernel = np.dot(te_data, tr_data.T)

    return tr_kernel, tr_labels, te_kernel, te_labels
开发者ID:danoneata,项目名称:approx_norm_fv,代码行数:67,代码来源:load_data.py



注:本文中的sklearn.preprocessing.Scaler类示例由纯净天空整理自Github/MSDocs等源码及文档管理平台,相关代码片段筛选自各路编程大神贡献的开源项目,源码版权归原作者所有,传播和使用请参考对应项目的License;未经允许,请勿转载。


鲜花

握手

雷人

路过

鸡蛋
该文章已有0人参与评论

请发表评论

全部评论

专题导读
上一篇:
Python preprocessing.StandardScaler类代码示例发布时间:2022-05-27
下一篇:
Python preprocessing.RobustScaler类代码示例发布时间:2022-05-27
热门推荐
阅读排行榜

扫描微信二维码

查看手机版网站

随时了解更新最新资讯

139-2527-9053

在线客服(服务时间 9:00~18:00)

在线QQ客服
地址:深圳市南山区西丽大学城创智工业园
电邮:jeky_zhao#qq.com
移动电话:139-2527-9053

Powered by 互联科技 X3.4© 2001-2213 极客世界.|Sitemap