• 设为首页
  • 点击收藏
  • 手机版
    手机扫一扫访问
    迪恩网络手机版
  • 关注官方公众号
    微信扫一扫关注
    迪恩网络公众号

Python pipeline.FeatureUnion类代码示例

原作者: [db:作者] 来自: [db:来源] 收藏 邀请

本文整理汇总了Python中sklearn.pipeline.FeatureUnion的典型用法代码示例。如果您正苦于以下问题:Python FeatureUnion类的具体用法?Python FeatureUnion怎么用?Python FeatureUnion使用的例子?那么恭喜您, 这里精选的类代码示例或许可以为您提供帮助。



在下文中一共展示了FeatureUnion类的20个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于我们的系统推荐出更棒的Python代码示例。

示例1: pca_kpca

def pca_kpca(train_data, labels):
    estimators = make_union(PCA(), TruncatedSVD(), KernelPCA())
#    estimators = [('linear_pca', PCA()), ('kernel_pca', KernelPCA())]
    combined = FeatureUnion(estimators)
    combined.fit(train_data, labels) # combined.fit_tranform(tain_data, labels)

    return combined
开发者ID:kirk86,项目名称:Task-1,代码行数:7,代码来源:misc.py


示例2: test_feature_union

 def test_feature_union(self):
     """Tests that combining multiple featurizers works as expected"""
     modules = ["bag-of-words", "entities"]
     modules_list, _ = modules_to_dictionary(modules)
     feature_union = FeatureUnion(modules_list)
     feature_union.fit(texts_entities, outcomes)
     feature_union.transform(["unknown"])
开发者ID:cgoldammer,项目名称:simple_text_analysis,代码行数:7,代码来源:tests.py


示例3: testLogistic

def testLogistic(lbda=1.0, n_components=20, kbest=4):
	# X = otto.data[:1000, :20]
	# y = otto.target[:1000]
	otto = load_otto()
	X = otto.data[:, :]
	y = otto.target[:]
	# n_components = 20
	# kbest = 4
#	print 'y.shape =', y.shape

	scalar = StandardScaler().fit(X)
	X = scalar.transform(X)

	pca = PCA(n_components=n_components)
	selection = SelectKBest(k=kbest)

	combined_features = FeatureUnion(
		[("pca", pca), ('univ_select', selection)]
	)
	X_features = combined_features.fit(X,y).transform(X)

	logistic = LogisticRegression(C=1.0/lbda)
	pipe = Pipeline(steps=[('features', combined_features), ('logistic', logistic)])
	trainData = X
	trainTarget = y
	pipe.fit(trainData, trainTarget)
	# print trainTarget
	test_otto = load_testotto()
	testData = test_otto.data
	testData = scalar.transform(testData)
	# logging.debug('lambda=%.3f: score is %.3f' % (lbda, pipe.score()))
	'save the prediction'
	prediction = pipe.predict_proba(testData)
	proba = pipe.predict_proba(testData)
	save_submission(lbda, proba, prediction)
开发者ID:Turf1013,项目名称:Machine_Learning,代码行数:35,代码来源:logistic_submission.py


示例4: testSVC

def testSVC(lbda=1.0, n_components=20, kbest=4):
	otto = load_otto()
	X = otto.data
	y = otto.target
	# X = otto.data[:10000, :10]
	# y = otto.target[:10000]

	scaler = StandardScaler().fit(X)
	X = scaler.transform(X)

	pca = PCA(n_components=n_components)
	selection = SelectKBest(k=kbest)

	combined_features = FeatureUnion(
		[("pca", pca), ("univ_select", selection)]
	)
	X_features = combined_features.fit(X, y).transform(X)

	svc = SVC(C=1.0/lbda, kernel='rbf', cache_size=400, probability=True)
	pipe = Pipeline(steps=[('features', combined_features), ('svc', svc)])
	trainData = X
	trainTarget = y
	pipe.fit(trainData, trainTarget)
	test_otto = load_testotto()
	testData = test_otto.data
	testData = scaler.transform(testData)
	'save the prediction'
	prediction = pipe.predict_proba(testData)
	proba = pipe.predict_proba(testData)
	save_submission(lbda, proba, prediction)
开发者ID:Turf1013,项目名称:Machine_Learning,代码行数:30,代码来源:svc_submission.py


示例5: best_estimator

	def best_estimator(self, X, y):
		try:
			pca = PCA(n_components=2)
			selection = SelectKBest(k=2)
			combined_features = FeatureUnion([("pca", pca), ("univ_select", selection)])
			X_features = combined_features.fit(X, y).transform(X)
			regr = linear_model.LassoCV()
			pipeline = Pipeline([("features", combined_features), ("regression", regr)])

			if 'batter' in self.player:
				param_grid = dict(features__pca__n_components=[1, 2, 3],
				                  features__univ_select__k=[1, 2])
			else:
				param_grid = dict(features__pca__n_components=[1, 2,3],
				                  features__univ_select__k=[1,2])

			grid_search = GridSearchCV(pipeline, param_grid=param_grid, verbose=100)
			grid_search.fit(X, y)
			self.modelled = True
			regr = grid_search
			return regr
		except ValueError,e:
			print e
			self.modelled = False
			return None
开发者ID:emschorsch,项目名称:fanduel,代码行数:25,代码来源:Model.py


示例6: prediction

def prediction(train_df, test_df, MODEL):

    print "... start prediction"

    fu_obj = FeatureUnion(transformer_list=features.feature_list)

    train_X = fu_obj.fit_transform(train_df)
    train_y = train_df["Sales"].as_matrix()

    clf = GridSearchCV(estimator=clf_dict[MODEL]["clf"],
                       param_grid=clf_dict[MODEL]["paramteters"],
                       n_jobs=3, scoring=rmspe, verbose=1)
    clf.fit(train_X, train_y)
    print clf.best_score_
    index_sr = pd.Series(get_split_feature_list(fu_obj), name="Feature")
    if hasattr(clf.best_estimator_, "coef_"):
        coef_sr = pd.Series(clf.best_estimator_.coef_, name="Coef")
        coef_df = pd.concat([index_sr, coef_sr], axis=1).set_index("Feature")
        coeffile = SUBMISSION + "coef_%s.csv" % MODEL
        coef_df.to_csv(coeffile)
    if hasattr(clf.best_estimator_, "feature_importances_"):
        coef_sr = pd.Series(clf.best_estimator_.feature_importances_,
                            name="Importance")
        coef_df = pd.concat([index_sr, coef_sr], axis=1).set_index("Feature")
        coeffile = SUBMISSION + "importance_%s.csv" % MODEL
        coef_df.to_csv(coeffile)

    print "... start y_pred"
    test_X = fu_obj.transform(test_df)

    y_pred = clf.predict(test_X)
    pred_sr = pd.Series(y_pred, name="Sales", index=test_df["Id"])
    submissionfile = SUBMISSION + "submission_%s.csv" % MODEL
    pred_sr.to_csv(submissionfile, header=True, index_label="ID")
开发者ID:guruttosekai2011,项目名称:Rossmann_Store_Sales,代码行数:34,代码来源:prediction.py


示例7: trainItalianSexClassifier

	def trainItalianSexClassifier(self):
		#get correct labels from dictionary in trainY and testY
		trainX = self.italianTrainData[0]
		trainY = self.getYlabels(self.italianTrainData[1], 'sex')

		

		combined_features = FeatureUnion([("tfidf", TfidfVectorizer()),
										("ngrams", TfidfVectorizer(ngram_range=(3, 3), analyzer="char")), 
										("counts", CountVectorizer()),
										("latin", Latin()),	
										],transformer_weights={
											'latin': 1,
											'tfidf': 2,
											'ngrams': 2,
											'counts': 1,

        								})
		
		X_features = combined_features.fit(trainX, trainY).transform(trainX)
		classifier = svm.LinearSVC()
		pipeline = Pipeline([("features", combined_features), ("classifier", classifier)])
		pipeline.fit(trainX, trainY)
		
		return pipeline
开发者ID:chrispool,项目名称:lfd,代码行数:25,代码来源:classifiers.py


示例8: fit

 def fit(self, X, y=None):
     Trans2 = Q2Transformer()
     Trans3 = Q3Transformer()
     Trans4 = Q4Transformer()
     combined_features = FeatureUnion([("Q2", Trans2), ("Q3", Trans3), ("Q4", Trans4)])
     self.fit = combined_features.fit(X)
     return self
开发者ID:FangMath,项目名称:MachineLearning_Mini_Project,代码行数:7,代码来源:Models_ml.py


示例9: best_estimator

	def best_estimator(self, X, y):
		try:
			pca = PCA(n_components=2)
			selection = SelectKBest(k=2)
			combined_features = FeatureUnion([("pca", pca), ("univ_select", selection)])
			X_features = combined_features.fit(X, y).transform(X)
			regr = linear_model.LassoCV()
			pipeline = Pipeline([("features", combined_features), ("regression", regr)])

			if 'batter' in self.player:
				param_grid = dict(features__pca__n_components=[1],
				                  features__univ_select__k=[1])
			else:
				param_grid = dict(features__pca__n_components=[1,2,3,4],
				                  features__univ_select__k=[1,2,3,4])

			grid_search = GridSearchCV(pipeline, param_grid=param_grid, verbose=0)
			grid_search.fit(X, y)
			self.modelled = True
			regr = grid_search
			self.R2=r2_score(self.target_matrix,regr.predict(self.feature_matrix)) #Ian: should do R2 on predicted points vs. points on a given day
			return regr
		except ValueError,e:
			print e
			self.modelled = False
			return None
开发者ID:cole-maclean,项目名称:fanduel,代码行数:26,代码来源:Model.py


示例10: rbf_kernels

def rbf_kernels(env, n_samples=100000, gamma=[0.01, 0.1], n_components=100):
    """Represent observation samples using RBF-kernels.

    EXAMPLE
    -------
    >>> env = gym.make('MountainCar-v0')
    >>> n_params, rbf = rbf_kernels(env, n_components=100)
    >>> sample = env.observation_space.sample().reshape((1, env.observation_space.shape[0]))
    >>> rbf(sample).shape
    (1, 100)
    """
    observation_examples = np.array([env.observation_space.sample() for _ in range(n_samples)])

    # Fit feature scaler
    scaler = sklearn.preprocessing.StandardScaler()
    scaler.fit(observation_examples)

    # Fir feature extractor
    features = []
    for g in gamma:
        features.append(('gamma={}'.format(g), RBFSampler(n_components=n_components // len(gamma), gamma=g)))

    features = FeatureUnion(features)
    features.fit(scaler.transform(observation_examples))

    def _rbf_kernels(observation):
        return features.transform(scaler.transform(observation))

    return _rbf_kernels
开发者ID:sotetsuk,项目名称:pyRLbook,代码行数:29,代码来源:function_approximation.py


示例11: concat_feature_extractors

def concat_feature_extractors(train_data, labels):
    # This dataset is way to high-dimensional. Better do PCA:
    pca = PCA(n_components = 2)

    # Maybe some original features where good, too?
    selection = SelectKBest(k = 1)

    # Build estimator from PCA and Univariate selection:

    combined_features = FeatureUnion([("pca", pca), ("univ_select", selection)])

    # Use combined features to transform dataset:
    X_features = combined_features.fit(train_data, labels).transform(train_data)

    # Classify:
    svm = SVC(kernel = "linear")
    svm.fit(X_features, labels)

    # Do grid search over k, n_components and C:

    pipeline = Pipeline([("features", combined_features), ("svm", svm)])

    param_grid = dict(features__pca__n_components = [1, 2, 3],
                      features__univ_select__k = [1, 2],
                      svm__C = [0.1, 1, 10])

    grid_search = GridSearchCV(pipeline, param_grid=param_grid, verbose = 10)
    grid_search.fit(train_data, labels)
    print(grid_search.best_estimator_)
开发者ID:kirk86,项目名称:Task-1,代码行数:29,代码来源:misc.py


示例12: train_model

def train_model(trainset):
	word_vector = TfidfVectorizer(analyzer="word", ngram_range=(2,2), binary = False, max_features= 2000,min_df=1,decode_error="ignore")
#	print word_vector	
	print "works fine"
	char_vector = TfidfVectorizer(ngram_range=(2,3), analyzer="char", binary = False, min_df = 1, max_features = 2000,decode_error= "ignore")
	vectorizer =FeatureUnion([ ("chars", char_vector),("words", word_vector) ])
	corpus = []
	classes = []

	for item in trainset:
		corpus.append(item['text'])
		classes.append(item['label'])

	print "Training instances : ", 0.8*len(classes)
	print "Testing instances : ", 0.2*len(classes) 
	
	matrix = vectorizer.fit_transform(corpus)
	print "feature count : ", len(vectorizer.get_feature_names())
	print "training model"
	X = matrix.toarray()
	y = numpy.asarray(classes)
	model =LinearSVC()
	X_train, X_test, y_train, y_test= train_test_split(X,y,train_size=0.8,test_size=.2,random_state=0)
	y_pred = OneVsRestClassifier(model).fit(X_train, y_train).predict(X_test)
	#y_prob = OneVsRestClassifier(model).fit(X_train, y_train).decision_function(X_test)
	#print y_prob
	#con_matrix = []
	#for row in range(len(y_prob)):
	#	temp = [y_pred[row]]	
	#	for prob in y_prob[row]:
	#		temp.append(prob)
	#	con_matrix.append(temp)
	#for row in con_matrix:
	#	output.write(str(row)+"\n")
	#print y_pred		
	#print y_test
	
	res1=[i for i, j in enumerate(y_pred) if j == 'anonEdited']
	res2=[i for i, j in enumerate(y_test) if j == 'anonEdited']
	reset=[]
	for r in res1:
		if y_test[r] != "anonEdited":
			reset.append(y_test[r])
	for r in res2:
		if y_pred[r] != "anonEdited":
			reset.append(y_pred[r])
	
	
	output=open(sys.argv[2],"w")
	for suspect in reset:
		output.write(str(suspect)+"\n")	
	cm = confusion_matrix(y_test, y_pred)
	print(cm)
	pl.matshow(cm)
	pl.title('Confusion matrix')
	pl.colorbar()
	pl.ylabel('True label')
	pl.xlabel('Predicted label')
	pl.show()
	print accuracy_score(y_pred,y_test)
开发者ID:srini21,项目名称:Amazon-deceptive-reviews,代码行数:60,代码来源:anontesting.py


示例13: test_feature_union_feature_names

def test_feature_union_feature_names():
    word_vect = CountVectorizer(analyzer="word")
    char_vect = CountVectorizer(analyzer="char_wb", ngram_range=(3, 3))
    ft = FeatureUnion([("chars", char_vect), ("words", word_vect)])
    ft.fit(JUNK_FOOD_DOCS)
    feature_names = ft.get_feature_names()
    for feat in feature_names:
        assert_true("chars__" in feat or "words__" in feat)
    assert_equal(len(feature_names), 35)
开发者ID:Givonaldo,项目名称:scikit-learn,代码行数:9,代码来源:test_pipeline.py


示例14: convert_testdata

def convert_testdata(test_gray_data, feature_rule=f.feature_transformer_rule):

    data_df = f.make_test_df(test_gray_data)
    fu = FeatureUnion(transformer_list=feature_rule)
    Std = preprocessing.StandardScaler()

    X_test = fu.fit_transform(data_df)
    #X_test = Std.fit_transform(X_test)

    return X_test
开发者ID:haisland0909,项目名称:Denoising-Dirty-Documents,代码行数:10,代码来源:repredict.py


示例15: get_pca_transformer

def get_pca_transformer(train_x, train_y, n_components=-1):
    if n_components == -1:
        n_components = int(np.ceil(np.sqrt(train_x.shape[1])))

    pca = PCA(n_components=n_components)
    selection = SelectKBest(k=n_components/2)

    combined_features = FeatureUnion([("pca", pca), ("univ_select", selection)])

    return combined_features.fit(train_x, train_y)
开发者ID:challenging,项目名称:kaggle,代码行数:10,代码来源:pre_loader.py


示例16: fit_logreg

 def fit_logreg(self):
     tokenize_sense = CachedFitTransform(Pipeline([
         ('tokenize', Map(compose(tokenize, normalize_special, unescape))),
         ('normalize', MapTokens(normalize_elongations)),
     ]), self.memory)
     features = FeatureUnion([
         # ('w2v_doc', ToCorporas(Pipeline([
         #     ('tokenize', MapCorporas(tokenize_sense)),
         #     ('feature', MergeSliceCorporas(Doc2VecTransform(CachedFitTransform(Doc2Vec(
         #         dm=0, dbow_words=1, size=100, window=10, hs=0, negative=5, sample=1e-3, min_count=1, iter=20,
         #         workers=16
         #     ), self.memory)))),
         # ]).fit([self.train_docs, self.unsup_docs[:10**6], self.val_docs, self.test_docs]))),
         # ('w2v_word_avg', Pipeline([
         #     ('tokenize', tokenize_sense),
         #     ('feature', Word2VecAverage(CachedFitTransform(Word2Vec(
         #         sg=1, size=100, window=10, hs=0, negative=5, sample=1e-3, min_count=1, iter=20, workers=16
         #     ), self.memory))),
         # ]).fit(self.unsup_docs[:10**6])),
         # ('w2v_word_avg_google', Pipeline([
         #     ('tokenize', tokenize_sense),
         #     ('feature', Word2VecAverage(joblib.load('data/google/GoogleNews-vectors-negative300.pickle'))),
         # ])),
         # ('w2v_word_norm_avg', Pipeline([
         #     ('tokenize', tokenize_sense),
         #     ('feature', Word2VecNormAverage(CachedFitTransform(Word2Vec(
         #         sg=1, size=100, window=10, hs=0, negative=5, sample=1e-3, min_count=1, iter=20, workers=16
         #     ), self.memory))),
         # ]).fit(self.unsup_docs[:10**6])),
         ('w2v_word_norm_avg_google', Pipeline([
             ('tokenize', tokenize_sense),
             ('feature', Word2VecNormAverage(joblib.load('data/google/GoogleNews-vectors-negative300.pickle'))),
         ])),
         # ('w2v_word_max', Pipeline([
         #     ('tokenize', tokenize_sense),
         #     ('feature', Word2VecMax(CachedFitTransform(Word2Vec(
         #         sg=1, size=100, window=10, hs=0, negative=5, sample=1e-3, min_count=1, iter=20, workers=16
         #     ), self.memory))),
         # ]).fit(self.unsup_docs[:10**6])),
         # ('w2v_word_max_google', Pipeline([
         #     ('tokenize', tokenize_sense),
         #     ('feature', Word2VecMax(joblib.load('data/google/GoogleNews-vectors-negative300.pickle'))),
         # ])),
         # ('w2v_word_inv', ToCorporas(Pipeline([
         #     ('tokenize', MapCorporas(tokenize_sense)),
         #     ('feature', MergeSliceCorporas(Word2VecInverse(CachedFitTransform(Word2Vec(
         #         sg=1, size=100, window=10, hs=0, negative=5, sample=0, min_count=1, iter=20, workers=16
         #     ), self.memory)))),
         # ]).fit([self.train_docs, self.unsup_docs[:10**5], self.val_docs, self.test_docs]))),
     ])
     classifier = LogisticRegression()
     with temp_log_level({'gensim.models.word2vec': logging.INFO}):
         classifier.fit(features.transform(self.train_docs), self.train_labels())
     estimator = Pipeline([('features', features), ('classifier', classifier)])
     return 'logreg({})'.format(','.join(name for name, _ in features.transformer_list)), estimator
开发者ID:meshiguge,项目名称:senti,代码行数:55,代码来源:senti_models.py


示例17: test_feature_union

def test_feature_union():
    # basic sanity check for feature union
    iris = load_iris()
    X = iris.data
    X -= X.mean(axis=0)
    y = iris.target
    pca = RandomizedPCA(n_components=2, random_state=0)
    select = SelectKBest(k=1)
    fs = FeatureUnion([("pca", pca), ("select", select)])
    fs.fit(X, y)
    X_transformed = fs.transform(X)
    assert_equal(X_transformed.shape, (X.shape[0], 3))

    # check if it does the expected thing
    assert_array_almost_equal(X_transformed[:, :-1], pca.fit_transform(X))
    assert_array_equal(X_transformed[:, -1],
            select.fit_transform(X, y).ravel())

    # test if it also works for sparse input
    # We use a different pca object to control the random_state stream
    fs = FeatureUnion([("pca", pca), ("select", select)])
    X_sp = sparse.csr_matrix(X)
    X_sp_transformed = fs.fit_transform(X_sp, y)
    assert_array_almost_equal(X_transformed, X_sp_transformed.toarray())

    # test setting parameters
    fs.set_params(select__k=2)
    assert_equal(fs.fit_transform(X, y).shape, (X.shape[0], 4))
开发者ID:cdeil,项目名称:scikit-learn,代码行数:28,代码来源:test_pipeline.py


示例18: pca

def pca(x, y, test_x, n_features=-1):
    if n_features == -1:
        n_features = int(np.ceil(np.sqrt(x.shape[1])))

    pca = PCA(n_components=n_features)
    selection = SelectKBest(k=n_features/2)

    combined_features = FeatureUnion([("pca", pca), ("univ_select", selection)])
    combined_features.fit(x, y)

    return combined_features.transform(x), combined_features.transform(test_x)
开发者ID:challenging,项目名称:kaggle,代码行数:11,代码来源:feature_engineering.py


示例19: cv_score

def cv_score(train_df, MODEL):
    print "... start cross validation"

    fu_obj = FeatureUnion(transformer_list=features.feature_list)

    train_X = fu_obj.fit_transform(train_df)
    train_y = train_df["Sales"].as_matrix()
    clf = GridSearchCV(estimator=clf_dict[MODEL]["clf"],
                       param_grid=clf_dict[MODEL]["paramteters"],
                       n_jobs=-1, scoring=rmspe, cv=None)
    print cross_val_score(clf, train_X, train_y, scoring=rmspe, cv=5, n_jobs=3)
开发者ID:guruttosekai2011,项目名称:Rossmann_Store_Sales,代码行数:11,代码来源:prediction.py


示例20: set_traindata

def set_traindata(df, key):

    fu = FeatureUnion(transformer_list=f.feature_transformer_rule)
    Std = preprocessing.StandardScaler()

    X = fu.fit_transform(df)
    y = np.concatenate(df["label"].apply(lambda x: x.flatten()))

    X = Std.fit_transform(X)

    return (X, y)
开发者ID:haisland0909,项目名称:Denoising-Dirty-Documents,代码行数:11,代码来源:classify.py



注:本文中的sklearn.pipeline.FeatureUnion类示例由纯净天空整理自Github/MSDocs等源码及文档管理平台,相关代码片段筛选自各路编程大神贡献的开源项目,源码版权归原作者所有,传播和使用请参考对应项目的License;未经允许,请勿转载。


鲜花

握手

雷人

路过

鸡蛋
该文章已有0人参与评论

请发表评论

全部评论

专题导读
上一篇:
Python pipeline.Pipeline类代码示例发布时间:2022-05-27
下一篇:
Python pipeline.make_union函数代码示例发布时间:2022-05-27
热门推荐
阅读排行榜

扫描微信二维码

查看手机版网站

随时了解更新最新资讯

139-2527-9053

在线客服(服务时间 9:00~18:00)

在线QQ客服
地址:深圳市南山区西丽大学城创智工业园
电邮:jeky_zhao#qq.com
移动电话:139-2527-9053

Powered by 互联科技 X3.4© 2001-2213 极客世界.|Sitemap