• 设为首页
  • 点击收藏
  • 手机版
    手机扫一扫访问
    迪恩网络手机版
  • 关注官方公众号
    微信扫一扫关注
    迪恩网络公众号

Python preprocessing.Normalizer类代码示例

原作者: [db:作者] 来自: [db:来源] 收藏 邀请

本文整理汇总了Python中sklearn.preprocessing.Normalizer的典型用法代码示例。如果您正苦于以下问题:Python Normalizer类的具体用法?Python Normalizer怎么用?Python Normalizer使用的例子?那么恭喜您, 这里精选的类代码示例或许可以为您提供帮助。



在下文中一共展示了Normalizer类的20个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于我们的系统推荐出更棒的Python代码示例。

示例1: normalize_test

def normalize_test():
    X=[1,2,3,4,5,2,6,8]
    from sklearn.preprocessing import Normalizer
    normalizer = Normalizer()
    X2 = normalizer.fit_transform(X)

    print X2
开发者ID:swenker,项目名称:bigdata,代码行数:7,代码来源:scikit_lab.py


示例2: kfold

def kfold(agetext,k,model,nfeatures,check=False,k2 = None,max_df=0.9,min_df=3):
    out = []
    for i in range(k):
        print "iteration: "+str(i)
        agetext = shuffle(agetext)
        X = agetext["text"]
        X = X.tolist()
        label = agetext["agegroup"].tolist()
        vec = TfidfVectorizer(tokenizer = tokenize,token_pattern=r'(?u)\b\w\w+\b|^[_\W]+$',lowercase=False,max_features=nfeatures,max_df = max_df,min_df = min_df,use_idf=True,ngram_range=(1,2))
        docs = []
        for doc in X:
            docs.append(" ".join(doc))
        docs2 = [doc.replace("\t","").replace("\n","") for doc in docs]
        traindocs = docs2[:7999]
        X = vec.fit_transform(traindocs)
        testdocs = docs2[8000:9500]
        X_test = vec.transform(testdocs)
        tlabel = label[:7999]
        testl = label[8000:9500]
        if(check):
            lsa = TruncatedSVD(k2, algorithm = 'arpack')
            normalizer = Normalizer(copy=False)
            X = lsa.fit_transform(X)
            X = normalizer.fit_transform(X)
            X_test = lsa.transform(X_test)
            X_test = normalizer.transform(X_test)
        model.fit(X,tlabel)
        pred = model.predict(X_test)
        out.append(round(accuracy_score(testl, pred),2))
    print str(out)
    print np.mean(out)
开发者ID:hurelyyu,项目名称:CS_Master_UW,代码行数:31,代码来源:TMClassCopy.py


示例3: TfIdf

class TfIdf(Feature):
    def __init__(self):
        self.kbest = None
        self.vect = None
        self.truncated = None
        self.normalizer = None

    def train(self, reviews, labels):
        self.vect = TfidfVectorizer(analyzer='word', ngram_range=(1, 2), stop_words='english')

        reviews_text = [' '.join(list(chain.from_iterable(review))) for review in reviews]
        tfidf_matrix = self.vect.fit_transform(reviews_text).toarray()

        self.truncated = TruncatedSVD(n_components=50)
        self.truncated.fit(tfidf_matrix, labels)

        trunc = self.truncated.transform(tfidf_matrix)
        self.normalizer = Normalizer()
        self.normalizer.fit(trunc)

        self.kbest = SelectKBest(f_classif, k=5)
        self.kbest.fit(self.normalizer.transform(trunc), labels)

    def score(self, data):
        reviews_text = ' '.join(list(chain.from_iterable(data)))
        tfidf_matrix = self.vect.transform([reviews_text]).toarray()

        trunc = self.truncated.transform(tfidf_matrix)

        return tuple(self.kbest.transform(self.normalizer.transform(trunc))[0, :])
开发者ID:EdwardBetts,项目名称:Yulp,代码行数:30,代码来源:tfidf.py


示例4: kfold

def kfold(agetext,k,model,k2):
    import collections
    out = []
    for i in range(k):
        print "iteration: "+str(i)
        agetext = shuffle(agetext)
        datatb = agetext.iloc[:,1:]
        label = agetext["agegroup"].tolist()
        X_train, X_test, y_train, y_test = cross_validation.train_test_split(
            datatb, label, test_size=0.15, random_state=i*6)
        data = X_train.values
        counter = collections.Counter(y_train)
        print counter
        testdata = X_test.values
        lsa = TruncatedSVD(k2, algorithm = 'arpack')
        normalizer = Normalizer(copy=False)
        X = lsa.fit_transform(data)
        X = normalizer.fit_transform(X)
        X_test = lsa.transform(testdata)
        X_test = normalizer.transform(X_test)
        model.fit(X,y_train)
        pred = model.predict(X_test)
        counter = collections.Counter(y_test)
        print counter
        counter = collections.Counter(pred)
        print counter
        out.append(round(accuracy_score(y_test, pred),5))
    print str(out)
    print np.mean(out)
开发者ID:hurelyyu,项目名称:CS_Master_UW,代码行数:29,代码来源:AgeGroup.py


示例5: preprocess

def preprocess(data, n_components, use_tf_idf=True):
    """
    Preproecess the data for clustering by running SVD and
    normalizing the results. This process is also known as
    LSA.

    arguments:
    data -- Dataset, if tf_idf is Truethe object must contain a
            tf_idf table alongside a raw frequencies dataframe.
    n_components -- int, the number of components to use for the SVD
                    a minimum of 100 is recommended.
    use_tf_idf -- bool, whether to use the tf-idf frequencies for the
                  preprocessing.

    returns:
    e -- float, a measure of variance explained by the SVD.
    X -- np.array, an array with the data reduced to n_components.
    """
    if use_tf_idf:
        d = data.tf_idf.as_matrix()
    else:
        d = data.df.as_matrix()
    svd = TruncatedSVD(n_components=n_components)
    X = svd.fit_transform(d)
    norm = Normalizer()

    # Record a measure of explained variance
    e = svd.explained_variance_ratio_.sum()*100
    return e, norm.fit_transform(d)
开发者ID:marcomorucci,项目名称:Clustering-Constitutions,代码行数:29,代码来源:analyze.py


示例6: __init__

    def __init__(self, 
                 YTrain_file,
                 XTrain_file,
                 XTest_file,
                 output_path,
                 normalise,
                 C,
                 class_weight,
                 ):
        """
        Arguments:
      
        """
        self.YTrain = joblib.load(YTrain_file)
        XTrain = joblib.load(XTrain_file)
        self.XTrain = XTrain.reshape(np.size(XTrain, axis=0), -1)
       
        XTest = joblib.load(XTest_file)   
        self.XTest = XTest.reshape(np.size(XTest, axis=0), -1)

        self.output_path = output_path
    
        if normalise:
            normalizer = Normalizer(copy=False)
            normalizer.transform(self.XTrain)
            normalizer.transform(self.XTest)

        self.C = C
        if class_weight == 'none':
            class_weight = None
        self.class_weight = class_weight
开发者ID:dchall88,项目名称:DIGITS,代码行数:31,代码来源:svm_train_test.py


示例7: getPcaFeatures

 def getPcaFeatures(self, images, components, image_size):
     imageDataset = self.getImagesAsDataset(images, image_size)
     norm = Normalizer()
     imageDataset = norm.fit_transform(imageDataset)
     pca = PCA(n_components=components)
     imageDataset = pca.fit_transform(imageDataset)
     return pca, norm, imageDataset
开发者ID:tincho4t,项目名称:aaTP,代码行数:7,代码来源:ImagesProcessor.py


示例8: explore_k

def explore_k(svd_trans, k_range):
    '''
    Explores various values of k in KMeans

    Args:
        svd_trans: dense array with lsi transformed data
        k_range: the range of k-values to explore
    Returns:
        scores: list of intertia scores for each k value
    '''

    scores = []
    # spherical kmeans, so normalize
    normalizer = Normalizer()
    norm_data = normalizer.fit_transform(svd_trans)
    for k in np.arange:
        km = KMeans(n_clusters=k, init='k-means++', max_iter=100, n_init=1,
                    verbose=2)
        km.fit(norm_data)
        scores.append(-1*km.score(norm_data))
    plt.plot(k_range, scores)
    plt.xlabel('# of clusters')
    plt.ylabel('Inertia')
    sns.despine(offset=5, trim=True)
    return scores
开发者ID:lwoloszy,项目名称:albumpitch,代码行数:25,代码来源:genres.py


示例9: _normalize

    def _normalize(self, X, y, X_t):
        from sklearn.preprocessing import Normalizer
        NORM = Normalizer()

        X = NORM.fit_transform(X, y)
        X_t = NORM.transform(X_t)

        return X, X_t
开发者ID:mikbuch,项目名称:pymri,代码行数:8,代码来源:datasets.py


示例10: kmeans

def kmeans(tfidf, svd, svd_trans, k=200, n_words=10):
    '''
    Performs k-means clustering on svd transformed data and plots it

    Args:
        tfidf: sklearn fitted TfidfVectorizer
        svd: sklearn fitted TruncatedSVD
        svd_trans: dense array with lsi transformed data
        k: the k in k-means
    Returns:
        km: the fitted KMean object
    '''

    # spherical kmeans, so normalize
    normalizer = Normalizer()
    norm_data = normalizer.fit_transform(svd_trans)
    km = KMeans(n_clusters=k, init='k-means++', max_iter=100, n_init=5,
                verbose=2)
    km.fit(norm_data)

    original_space_centroids = svd.inverse_transform(km.cluster_centers_)
    order_centroids = original_space_centroids.argsort()[:, ::-1]

    terms = tfidf.get_feature_names()
    terms = prettify(terms)
    terms = np.array(terms)
    fig = plt.figure(figsize=(10, 8))
    for i in range(10):
        print("Cluster {:d}:".format(i))
        for ind in order_centroids[i, :n_words]:
            print(' {:s}'.format(terms[ind]))
        print('\n')

        # Make a figure and axes with dimensions as desired.
        ax = fig.add_subplot(2, 5, i+1)
        ax.set_title('Cluster {:d}'.format(i+1))

        component = order_centroids[i]
        cmap = plt.cm.Purples
        mn = np.min(component[:n_words])
        mx = np.max(component[:n_words])
        norm = mpl.colors.Normalize(mn, mx)

        cb = mpl.colorbar.ColorbarBase(ax, cmap=cmap, norm=norm,
                                       orientation='vertical')
        # sorted_component = np.sort(component)
        colors = sns.color_palette('Purples', 9).as_hex()
        colors = np.repeat(colors[-1], n_words)

        cb.set_ticks(np.linspace(mn, mx, n_words+2)[1:-1])
        cb.ax.yaxis.set_tick_params(size=0)
        cb.ax.tick_params(labelsize=10)
        for color, tick in zip(colors, cb.ax.get_yticklabels()):
            tick.set_color(color)
            tick.set_fontsize(14)
        cb.set_ticklabels(np.array(terms)[order_centroids[i, :n_words][::-1]])
    plt.tight_layout()
    return km
开发者ID:lwoloszy,项目名称:albumpitch,代码行数:58,代码来源:genres.py


示例11: readAndPreProcess

def readAndPreProcess():
	print("\n\n********** CS-412 HW5 Mini Project **********")
	print("************ Submitted by Sankul ************\n\n")
	print("Reading data, please ensure that the dataset is in same folder.")
	resp = pd.read_csv('responses.csv')
	print("Data reading complete!")
	print("Some stats reagarding data:")
	resp.describe()
	
	print("\nStarting pre-processing.....")
	
	print("\nFinding missing values:")
	print("Missing values found, removing them")
	emptyVals = resp.isnull().sum().sort_values(ascending=False)
	emptyPlot = emptyVals.plot(kind='barh', figsize = (20,35))
	plt.show()
	print("Empty values removed")
	
	print("\nChecking for NaN and infinite values in target column (Empathy):")
	if len(resp['Empathy']) - len(resp[np.isfinite(resp['Empathy'])]):
		print("Number of infinite or NaN values in Empathy column: ", len(resp['Empathy']) - len(resp[np.isfinite(resp['Empathy'])]))
		print("Removing them")
		resp = resp[np.isfinite(resp['Empathy'])]
		print("Infinite and NaN values removed")
		
	print("\nChecking for categorical features:")
	if pd.Categorical(resp).dtype.name == 'category':
		print("Categorical features found. Removing them...")
		resp = resp.select_dtypes(exclude=[object])	
		print("Categorical features removed")
		
	print("\nReplacing NaN values with the mean value:")
	resp=resp.fillna(resp.mean()) 
	resp.isnull().sum()
	print("Values replaced")
	
	print("\nSeperating labels from data:")
	Y = resp['Empathy'].values
	X = resp.drop('Empathy',axis=1)
	print("Labels seperated")
	
	print("\nScaling, standardizing and normalizing the data:")
	scaler = MinMaxScaler(feature_range=(0, 1))
	rescaledX = scaler.fit_transform(X)
	
	scaler = StandardScaler().fit(rescaledX)
	standardizedX = scaler.transform(rescaledX)
	
	normalizer = Normalizer().fit(standardizedX)
	normalizedX = normalizer.transform(standardizedX)
	print("Scaling, standardizing and normalizing completed")
	
	print("\nFinal data looks like:")
	print(normalizedX.shape)
	print("Values inside look like:")
	print(normalizedX[0])
	
	return normalizedX,Y
开发者ID:dark-shade,项目名称:CS-412-IML-HW5-Mini-Project,代码行数:58,代码来源:hw5.py


示例12: __init__

 def __init__(self, img_dir):
     self._imgdir = img_dir
     self._extractors = self.__get_extractors()
     self._normalizer = Normalizer()
     self._face_normalizer = Normalizer()
     self._estimator = NearestNeighbors(n_neighbors=3)
     self._face_estimator = NearestNeighbors(n_neighbors=3)
     self._imgnames = []
     self._face_imgnames = []
开发者ID:xulesc,项目名称:general,代码行数:9,代码来源:wally.py


示例13: ScikitNormalizer

class ScikitNormalizer(object):
    def __init__(self):
        self.data_normalizer = Normalizer()

    def fit(self, data):
        self.data_normalizer.fit(data)

    def transform(self, data):
        return (self.data_normalizer.transform(data) + 1) / 2
开发者ID:Falgunithakor,项目名称:SummerResearchDE-BPSO,代码行数:9,代码来源:Normalizer.py


示例14: test_ver2_syntetic_dataset

    def test_ver2_syntetic_dataset(self):

        self.ex = experiment.Experiment()
        self.ex.cf_matrix = load_sparse_data('syntetic_cf.dat')
        n = Normalizer(norm='l2', copy=True)
        self.ex.cf_matrix = n.transform(self.ex.cf_matrix) #normalized.
        self.ex.cb_prox = experiment.Experiment.load_data(PKL + 'cb_prox.pkl')
        self.ex.cf_prox = self.ex.cf_matrix * self.ex.cf_matrix.T
        self.ex.test_corr_sparsity(draw=True, interval=100)
开发者ID:osmanbaskaya,项目名称:acm_mak,代码行数:9,代码来源:unittest_experiment.py


示例15: reduce_dimension

    def reduce_dimension(self, n_components=2):
        """ Return PCA transform of self.data, with n_components. """

        reducer = PCA(n_components=n_components)

        X = self.data.values

        norm = Normalizer()
        Xnorm = norm.fit_transform(X)

        return reducer.fit_transform(Xnorm)
开发者ID:abshinn,项目名称:practice,代码行数:11,代码来源:pima.py


示例16: normalize

 def normalize(self, msi, norm="l1"):
     original_shape = msi.get_image().shape
     collapsed_image = collapse_image(msi.get_image())
     # temporarily save mask, since scipy normalizer removes mask
     is_masked_array = isinstance(msi.get_image(), np.ma.MaskedArray)
     if is_masked_array:
         mask = msi.get_image().mask
     normalizer = Normalizer(norm=norm)
     normalized_image = normalizer.transform(collapsed_image)
     if is_masked_array:
         normalized_image = np.ma.MaskedArray(normalized_image, mask=mask)
     msi.set_image(np.reshape(normalized_image, original_shape))
开发者ID:151706061,项目名称:MITK,代码行数:12,代码来源:normalize.py


示例17: test_pipeline

def test_pipeline():
    norm = Normalizer(norm='l1')
    norm_id = norm.what().id()
    assert norm_id == "Normalizer(norm='l1')"
    kmeans = KMeans(n_clusters=12)
    kmeans_id = kmeans.what().id()
    print(kmeans_id)
    assert kmeans_id == \
        "KMeans(algorithm='auto',init='k-means++',max_iter=300,n_clusters=12,n_init=10,random_state=None,tol=0.0001)"
    # noinspection PyTypeChecker
    pipeline_id = Pipeline((('norm', norm), ('kmeans', kmeans))).what().id()
    assert pipeline_id == "Pipeline(steps=(('norm',%s),('kmeans',%s)))" % (norm_id, kmeans_id)
开发者ID:sdvillal,项目名称:whatami,代码行数:12,代码来源:test_what_sklearn.py


示例18: make_nn_regression

def make_nn_regression(n_samples=100, n_features=100, n_informative=10,
                       dense=False, noise=0.0, test_size=0,
                       normalize_x=True, normalize_y=True,
                       shuffle=True, random_state=None):

    X, y, w = _make_nn_regression(n_samples=n_samples,
                                  n_features=n_features,
                                  n_informative=n_informative,
                                  shuffle=shuffle,
                                  random_state=random_state)

    if dense:
        X = X.toarray()

    if test_size > 0:
        cv = ShuffleSplit(len(y), n_iter=1, random_state=random_state,
                          test_size=test_size, train_size=1-test_size)

        train, test = list(cv)[0]
        X_train, y_train = X[train], y[train]
        X_test, y_test = X[test], y[test]
        if not dense:
            X_train.sort_indices()
            X_test.sort_indices()
    else:
        X_train, y_train = X, y
        if not dense:
            X_train.sort_indices()
        X_test, y_test = None, None

    # Add noise
    if noise > 0.0:
        generator = check_random_state(random_state)
        y_train += generator.normal(scale=noise * np.std(y_train),
                                    size=y_train.shape)
        y_train = np.maximum(y_train, 0)

    if normalize_x:
        normalizer = Normalizer()
        X_train = normalizer.fit_transform(X_train)
        if X_test is not None:
            X_test = normalizer.transform(X_test)

    if normalize_y:
        scaler = MinMaxScaler()
        y_train = scaler.fit_transform(y_train.reshape(-1, 1)).ravel()
        if y_test is not None:
            y_test = scaler.transform(y_test.reshape(-1, 1)).ravel()

    if X_test is not None:
        return X_train, y_train, X_test, y_test, w
    else:
        return X_train, y_train, w
开发者ID:RPGOne,项目名称:sebabulba,代码行数:53,代码来源:samples_generator.py


示例19: KNN

class KNN(Model):

    def __init__(self, X_train, y_train, X_val, y_val):
        super().__init__()
        self.normalizer = Normalizer()
        self.normalizer.fit(X_train)
        self.clf = neighbors.KNeighborsRegressor(n_neighbors=10, weights='distance', p=1)
        self.clf.fit(self.normalizer.transform(X_train), numpy.log(y_train))
        print("Result on validation data: ", self.evaluate(self.normalizer.transform(X_val), y_val))

    def guess(self, feature):
        return numpy.exp(self.clf.predict(self.normalizer.transform(feature)))
开发者ID:codeaudit,项目名称:entity-embedding-rossmann,代码行数:12,代码来源:models.py


示例20: get_tf_idf_M

def get_tf_idf_M(M, tf = ["bin", "raw", "log", "dnorm"], idf = ["c", "smooth", "max", "prob"], norm_samps=False):
    N = len(M)
    if tf == "raw":
        tf_M = np.copy(M) #just the frequency of the word in a text
#    #TODO: check if dnorm is implemented OK
#    elif tf == "dnorm":
#        tf_M = 0.5 + 0.5*(M/(np.amax(M, axis=1).reshape((N,1))))
    if idf == "c":
        idf_v = []
        for i in range(M.shape[1]): #get the number of texts that contain a word words[i]
            idf_v.append(np.count_nonzero(M[:,i])) #count the non zero values in columns of matrix M
        idf_v = np.array(idf_v)
        idf_v = np.log(N/idf_v)
    tf_idf_M = tf_M*idf_v
    if norm_samps:
        normalizer = Normalizer()
        tf_idf_M = normalizer.fit_transform(tf_idf_M)
#    np.savetxt("tf_idf_M_" + str(N) + ".txt", tf_idf_M , fmt="%s")
    return tf_idf_M
        
    
    
    
    
    
    
    
    
    
    
    
    
    
    
    
    
    
    
    
    
    
    
    
    
    
    
    
    
    
    
    
开发者ID:RokIvansek,项目名称:Spectral-clustering-HW,代码行数:19,代码来源:newsgroups20.py



注:本文中的sklearn.preprocessing.Normalizer类示例由纯净天空整理自Github/MSDocs等源码及文档管理平台,相关代码片段筛选自各路编程大神贡献的开源项目,源码版权归原作者所有,传播和使用请参考对应项目的License;未经允许,请勿转载。


鲜花

握手

雷人

路过

鸡蛋
该文章已有0人参与评论

请发表评论

全部评论

专题导读
上一篇:
Python preprocessing.OneHotEncoder类代码示例发布时间:2022-05-27
下一篇:
Python preprocessing.MultiLabelBinarizer类代码示例发布时间:2022-05-27
热门推荐
阅读排行榜

扫描微信二维码

查看手机版网站

随时了解更新最新资讯

139-2527-9053

在线客服(服务时间 9:00~18:00)

在线QQ客服
地址:深圳市南山区西丽大学城创智工业园
电邮:jeky_zhao#qq.com
移动电话:139-2527-9053

Powered by 互联科技 X3.4© 2001-2213 极客世界.|Sitemap