• 设为首页
  • 点击收藏
  • 手机版
    手机扫一扫访问
    迪恩网络手机版
  • 关注官方公众号
    微信扫一扫关注
    迪恩网络公众号

Python pipeline.make_union函数代码示例

原作者: [db:作者] 来自: [db:来源] 收藏 邀请

本文整理汇总了Python中sklearn.pipeline.make_union函数的典型用法代码示例。如果您正苦于以下问题:Python make_union函数的具体用法?Python make_union怎么用?Python make_union使用的例子?那么恭喜您, 这里精选的函数代码示例或许可以为您提供帮助。



在下文中一共展示了make_union函数的20个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于我们的系统推荐出更棒的Python代码示例。

示例1: test_make_union_kwargs

def test_make_union_kwargs():
    pca = PCA(svd_solver='full')
    mock = Transf()
    fu = make_union(pca, mock, n_jobs=3)
    assert_equal(fu.transformer_list, make_union(pca, mock).transformer_list)
    assert_equal(3, fu.n_jobs)
    # invalid keyword parameters should raise an error message
    assert_raise_message(
        TypeError,
        'Unknown keyword arguments: "transformer_weights"',
        make_union, pca, mock, transformer_weights={'pca': 10, 'Transf': 1}
    )
开发者ID:lebigot,项目名称:scikit-learn,代码行数:12,代码来源:test_pipeline.py


示例2: get_results

def get_results(dataset):
    X_full, y_full = dataset.data, dataset.target
    n_samples = X_full.shape[0]
    n_features = X_full.shape[1]

    # Estimate the score on the entire dataset, with no missing values
    estimator = RandomForestRegressor(random_state=0, n_estimators=100)
    full_scores = cross_val_score(estimator, X_full, y_full,
                                  scoring='neg_mean_squared_error')

    # Add missing values in 75% of the lines
    missing_rate = 0.75
    n_missing_samples = int(np.floor(n_samples * missing_rate))
    missing_samples = np.hstack((np.zeros(n_samples - n_missing_samples,
                                          dtype=np.bool),
                                 np.ones(n_missing_samples,
                                         dtype=np.bool)))
    rng.shuffle(missing_samples)
    missing_features = rng.randint(0, n_features, n_missing_samples)

    # Estimate the score after replacing missing values by 0
    X_missing = X_full.copy()
    X_missing[np.where(missing_samples)[0], missing_features] = 0
    y_missing = y_full.copy()
    estimator = RandomForestRegressor(random_state=0, n_estimators=100)
    zero_impute_scores = cross_val_score(estimator, X_missing, y_missing,
                                         scoring='neg_mean_squared_error')

    # Estimate the score after imputation (mean strategy) of the missing values
    X_missing = X_full.copy()
    X_missing[np.where(missing_samples)[0], missing_features] = 0
    y_missing = y_full.copy()
    estimator = make_pipeline(
        make_union(SimpleImputer(missing_values=0, strategy="mean"),
                   MissingIndicator(missing_values=0)),
        RandomForestRegressor(random_state=0, n_estimators=100))
    mean_impute_scores = cross_val_score(estimator, X_missing, y_missing,
                                         scoring='neg_mean_squared_error')

    # Estimate the score after chained imputation of the missing values
    estimator = make_pipeline(
        make_union(ChainedImputer(missing_values=0, random_state=0),
                   MissingIndicator(missing_values=0)),
        RandomForestRegressor(random_state=0, n_estimators=100))
    chained_impute_scores = cross_val_score(estimator, X_missing, y_missing,
                                            scoring='neg_mean_squared_error')

    return ((full_scores.mean(), full_scores.std()),
            (zero_impute_scores.mean(), zero_impute_scores.std()),
            (mean_impute_scores.mean(), mean_impute_scores.std()),
            (chained_impute_scores.mean(), chained_impute_scores.std()))
开发者ID:lebigot,项目名称:scikit-learn,代码行数:51,代码来源:plot_missing_values.py


示例3: __init__

 def __init__(self, training_values=None, training_targets=None):
     self.vectorizer = make_union(TfidfVectorizer(), PostTransformer())
     # Set using parameter_search. TODO: review after updating
     # corpus.
     self.classifier = svm.LinearSVC(C=1, loss='squared_hinge', multi_class='ovr', class_weight='balanced', tol=1e-6)
     if training_values is not None and training_targets is not None:
         self.fit(training_values, training_targets)
开发者ID:franciscocorrales,项目名称:LearnProgrammingBot,代码行数:7,代码来源:main.py


示例4: PipelineTelstra

def PipelineTelstra(Classifier):
    pipeline = make_pipeline(
        make_union(
            make_pipeline(
                DataSpliterTrans(cols='location',transp=True),
                preprocessing.OneHotEncoder(handle_unknown='ignore')
            ),
            make_pipeline(
                DataSpliterTrans(cols='event_type',matrix=True),
                DictVectorizer()
            ),
            make_pipeline(
                DataSpliterTrans(cols='severity_type',matrix=True),
                DictVectorizer()
            ),
            make_pipeline(
                DataSpliterTrans(cols='resource_type',matrix=True),
                DictVectorizer()
            ),
            make_pipeline(
                DataSpliterTrans(cols='volume',matrix=True),
                DictVectorizer()
            ),
            make_pipeline(
                DataSpliterTrans(cols='log_feature',matrix=True),
                DictVectorizer()
            )
        ),
        Classifier()
        )
    print('pipeline done.')
    return pipeline
开发者ID:diazcelsa,项目名称:kaggle,代码行数:32,代码来源:data_modifier.py


示例5: __init__

    def __init__(self, transforms):
        self.transforms = transforms

        union = make_union(*[t() for t in transforms])
        pipeline = [union]
        self.pipeline = make_pipeline(*pipeline)
        self.classifier = LogisticRegression(penalty="l1", class_weight="auto")
开发者ID:willferreira,项目名称:mscproject,代码行数:7,代码来源:lr_predictors.py


示例6: preprocess

    def preprocess(self,any_set,is_train):

        if is_train:
            dico_pattern={'match_lowercase_only':'\\b[a-z]+\\b',
              'match_word':'\\w{2,}',
              'match_word1': '(?u)\\b\\w+\\b',
              'match_word_punct': '\w+|[,.?!;]',
              'match_NNP': '\\b[A-Z][a-z]+\\b|\\b[A-Z]+\\b',
              'match_punct': "[,.?!;'-]"
             }

            tfv_title = TfidfVectorizer(lowercase=True, stop_words='english', token_pattern=dico_pattern["match_word1"],
                              ngram_range=(1, 2), max_df=1.0, min_df=2, max_features=None,
                              vocabulary=None, binary=True, norm=u'l2',
                              use_idf=True, smooth_idf=True, sublinear_tf=True)

            tfv_desc = TfidfVectorizer(lowercase=True, stop_words='english', token_pattern=dico_pattern["match_word1"],
                              ngram_range=(1, 2), max_df=1.0, min_df=2, max_features=None,
                              vocabulary=None, binary=True, norm=u'l2',
                              use_idf=True, smooth_idf=True, sublinear_tf=True)

            title_pipe = make_pipeline(ColumnSelector(key='title'), tfv_title)
            desc_pipe = make_pipeline(ColumnSelector(key='description'), tfv_desc)
            self.pipeline = make_union(title_pipe, desc_pipe)

            return self.pipeline.fit_transform(any_set)
        else:
            return self.pipeline.transform(any_set)
开发者ID:Cadene,项目名称:DataScienceGame,代码行数:28,代码来源:Predictor.py


示例7: test_make_union

def test_make_union():
    pca = PCA()
    mock = TransfT()
    fu = make_union(pca, mock)
    names, transformers = zip(*fu.transformer_list)
    assert_equal(names, ("pca", "transft"))
    assert_equal(transformers, (pca, mock))
开发者ID:Givonaldo,项目名称:scikit-learn,代码行数:7,代码来源:test_pipeline.py


示例8: get_extra_features

def get_extra_features(args):
    forest = ExtraTreesClassifier(n_estimators=2000,
                                  criterion='entropy',
                                  max_features='sqrt',
                                  max_depth=6,
                                  min_samples_split=8,
                                  n_jobs=-1,
                                  bootstrap=True,
                                  oob_score=True,
                                  verbose=1,
                                  class_weight='balanced')
    pca = PCA(n_components=200)
    ica = FastICA(n_components=200, max_iter=1000)
    kmeans = KMeans(n_clusters=200, n_init=20, max_iter=1000)

    pipeline = make_pipeline(selectKFromModel(forest, k=1000),
                             StandardScaler(),
                             make_union(pca, ica, kmeans))

    X_train = np.load('feature/1_100/X_train.npy')
    y_train = np.load('feature/1_100/y_train.npy')
    X_test = np.load('feature/1_100/X_test.npy')

    pipeline.fit(X_train, y_train[:, args.yix])
    sel_ixs = pipeline.steps[0][1].indices[:500]
    X_train_ext = np.hstack((pipeline.transform(X_train), X_train[:, sel_ixs]))
    X_test_ext = np.hstack((pipeline.transform(X_test), X_test[:, sel_ixs]))

    with open(path.join(save_dir, 'pipe.pkl'), 'wb') as f_pipe:
        pickle.dump(pipeline, f_pipe)

    np.save(path.join(save_dir, 'selix.npy'), sel_ixs)
    return X_train_ext, X_test_ext
开发者ID:jingxiang-li,项目名称:kaggle-yelp,代码行数:33,代码来源:feature_selection.py


示例9: test_make_union

def test_make_union():
    pca = PCA(svd_solver='full')
    mock = Transf()
    fu = make_union(pca, mock)
    names, transformers = zip(*fu.transformer_list)
    assert_equal(names, ("pca", "transf"))
    assert_equal(transformers, (pca, mock))
开发者ID:dsquareindia,项目名称:scikit-learn,代码行数:7,代码来源:test_pipeline.py


示例10: get_pipeline

def get_pipeline(fsmethods, clfmethod):
    """Returns an instance of a sklearn Pipeline given the parameters
    fsmethod1 and fsmethod2 will be joined in a FeatureUnion, then it will joined
    in a Pipeline with clfmethod

    Parameters
    ----------
    fsmethods: list of estimators
        All estimators in a pipeline, must be transformers (i.e. must have a transform method).

    clfmethod: classifier
        The last estimator may be any type (transformer, classifier, etc.).

    Returns
    -------
    pipe
    """
    feat_union = None
    if not isinstance(fsmethods, list):
        if hasattr(fsmethods, 'transform'):
            feat_union = fsmethods
        else:
            raise ValueError('fsmethods expected to be either a list or a transformer method')
    else:
        feat_union = make_union(*fsmethods)

    if feat_union is None:
        pipe = make_pipeline(clfmethod)
    else:
        pipe = make_pipeline(feat_union, clfmethod)

    return pipe
开发者ID:Neurita,项目名称:darwin,代码行数:32,代码来源:sklearn_utils.py


示例11: test_missing_indicator_with_imputer

def test_missing_indicator_with_imputer(X, missing_values, X_trans_exp):
    trans = make_union(
        SimpleImputer(missing_values=missing_values, strategy='most_frequent'),
        MissingIndicator(missing_values=missing_values)
    )
    X_trans = trans.fit_transform(X)
    assert_array_equal(X_trans, X_trans_exp)
开发者ID:psorianom,项目名称:scikit-learn,代码行数:7,代码来源:test_impute.py


示例12: __init__

    def __init__(self, classifier="sgd", classifier_args=None, lowercase=True,
                 text_replacements=None, map_to_synsets=False, binary=False,
                 min_df=0, ngram=1, stopwords=None, limit_train=None,
                 map_to_lex=False, duplicates=False):
        self.limit_train = limit_train
        self.duplicates = duplicates

        pipeline = [ExtractText(lowercase)]
        if text_replacements:
            pipeline.append(ReplaceText(text_replacements))

        ext = [build_text_extraction(binary=binary, min_df=min_df,
                                    ngram=ngram, stopwords=stopwords)]

        if map_to_synsets:
            ext.append(build_synset_extraction(binary=binary, min_df=min_df,
                                               ngram=ngram))
        if map_to_lex:
            ext.append(build_lex_extraction(binary=binary, min_df=min_df,
                                            ngram=ngram))
        ext = make_union(*ext)
        pipeline.append(ext)

        #Building classifier
        if classifier_args is None:
            classifier_args={}
        classifier = _valid_classifiers[classifier](**classifier_args)
        self.pipeline = make_pipeline(*pipeline)
        self.classifier = classifier
开发者ID:jthang,项目名称:KaggleLab,代码行数:29,代码来源:predictor.py


示例13: pca_kpca

def pca_kpca(train_data, labels):
    estimators = make_union(PCA(), TruncatedSVD(), KernelPCA())
#    estimators = [('linear_pca', PCA()), ('kernel_pca', KernelPCA())]
    combined = FeatureUnion(estimators)
    combined.fit(train_data, labels) # combined.fit_tranform(tain_data, labels)

    return combined
开发者ID:kirk86,项目名称:Task-1,代码行数:7,代码来源:misc.py


示例14: __init__

    def __init__(self, **config):
        # Validate options are present
        for option in _configuration_options:
            if option not in config:
                raise ValueError("Missing configuration "
                                 "option {!r}".format(option))

        # Feature extraction
        sparse_features = parse_features(config["sparse_features"])
        densifier = make_pipeline(Vectorizer(sparse_features, sparse=True),
                                  ClassifierAsFeature())
        dense_features = parse_features(config["dense_features"])
        vectorization = make_union(densifier,
                                   Vectorizer(dense_features, sparse=False))

        # Classifier
        try:
            classifier = _valid_classifiers[config["classifier"]]
        except KeyError:
            raise ValueError("Unknown classification algorithm "
                             "{!r}".format(config["classifier"]))
        classifier = classifier(**config["classifier_args"])

        self.pipeline = make_pipeline(vectorization, StandardScaler())
        self.classifier = classifier
开发者ID:52nlp,项目名称:iepy,代码行数:25,代码来源:relation_extraction_classifier.py


示例15: fit

 def fit(self, X, y):
     # Filthy hack
     sids = X[:, -1]
     all_pipelines = [make_pipeline(LogisticRegressionCV()).fit(X_s, y_s) for
                      X_s, y_s in subject_splitter(X[:, :-1], y, sids)]
     f_union = make_union(*[FeatureUnionWrapper(p) for p in all_pipelines])
     self.clf_ = make_pipeline(f_union, LogisticRegressionCV()).fit(X[:, :-1], y)
     return self
开发者ID:kastnerkyle,项目名称:kaggle-decmeg2014,代码行数:8,代码来源:minimal_clf.py


示例16: get_scores_for_imputer

def get_scores_for_imputer(imputer, X_missing, y_missing):
    estimator = make_pipeline(
        make_union(imputer, MissingIndicator(missing_values=0)),
        REGRESSOR)
    impute_scores = cross_val_score(estimator, X_missing, y_missing,
                                    scoring='neg_mean_squared_error',
                                    cv=N_SPLITS)
    return impute_scores
开发者ID:allefpablo,项目名称:scikit-learn,代码行数:8,代码来源:plot_missing_values.py


示例17: make_pipe

def make_pipe(classifier):
    language_featurizer = make_union(CountVectorizer(),
                                     FunctionFeaturizer(longest_run_of_capital_letters_feature,
                                                    percent_character_feature,
                                                    percent_character_combinations,
                                                    longest_run_of_character_feature,
                                                    character_combinations_binary
                                                    ))
    return make_pipeline(language_featurizer, classifier)
开发者ID:jdhiggins,项目名称:programming-language-classifier,代码行数:9,代码来源:feature_vectorizer.py


示例18: _create_feature_union

def _create_feature_union(features):
    """
    Create a FeatureUnion.
    Each "feature" is a 3-tuple: (name, feature_extractor, vectorizer).
    """
    return make_union(*[
        make_pipeline(fe, vec)
        for name, fe, vec in features
    ])
开发者ID:rolando-contribute,项目名称:TeamHG-Memex-Formasaurus,代码行数:9,代码来源:model.py


示例19: __init__

    def __init__(self, transforms, n_estimators=2000, criterion='gini', min_samples_leaf=2, n_jobs=-1):
        self.transforms = transforms
        self.n_estimators = n_estimators
        self.criterion = criterion
        self.min_samples_leaf = min_samples_leaf
        self.n_jobs = n_jobs

        union = make_union(*[t() for t in transforms])
        pipeline = [union]

        self.pipeline = make_pipeline(*pipeline)
        self.classifier = RandomForestClassifier(n_estimators, criterion, min_samples_leaf=min_samples_leaf, n_jobs=-1)
开发者ID:paris5020,项目名称:athene_system,代码行数:12,代码来源:rf_predictors.py


示例20: create_input_transformer

def create_input_transformer(fields, vec_name):
    """Create a pipeline of input transformations, allowing to use scaling of input fields."""
    pipeline = []
    for field in fields:
        field_name = field['name']
        field_scale = field['scale']
        field_type = processed_db.get_field_type(field_name)

        pipeline.append(
            make_pipeline(ItemSelector(field_name),             # select the correct column
                          Vectorizer(vec_name, field_type),     # vectorize (depending on str/numeric input)
                          Scaler(field_scale))                  # scale column based on user input
        )

    return make_union(*pipeline)
开发者ID:Lilykos,项目名称:clusterix,代码行数:15,代码来源:utils.py



注:本文中的sklearn.pipeline.make_union函数示例由纯净天空整理自Github/MSDocs等源码及文档管理平台,相关代码片段筛选自各路编程大神贡献的开源项目,源码版权归原作者所有,传播和使用请参考对应项目的License;未经允许,请勿转载。


鲜花

握手

雷人

路过

鸡蛋
该文章已有0人参与评论

请发表评论

全部评论

专题导读
上一篇:
Python pipeline.FeatureUnion类代码示例发布时间:2022-05-27
下一篇:
Python pipeline.make_pipeline函数代码示例发布时间:2022-05-27
热门推荐
阅读排行榜

扫描微信二维码

查看手机版网站

随时了解更新最新资讯

139-2527-9053

在线客服(服务时间 9:00~18:00)

在线QQ客服
地址:深圳市南山区西丽大学城创智工业园
电邮:jeky_zhao#qq.com
移动电话:139-2527-9053

Powered by 互联科技 X3.4© 2001-2213 极客世界.|Sitemap