• 设为首页
  • 点击收藏
  • 手机版
    手机扫一扫访问
    迪恩网络手机版
  • 关注官方公众号
    微信扫一扫关注
    迪恩网络公众号

Python tree.DecisionTreeClassifier类代码示例

原作者: [db:作者] 来自: [db:来源] 收藏 邀请

本文整理汇总了Python中sklearn.tree.DecisionTreeClassifier的典型用法代码示例。如果您正苦于以下问题:Python DecisionTreeClassifier类的具体用法?Python DecisionTreeClassifier怎么用?Python DecisionTreeClassifier使用的例子?那么恭喜您, 这里精选的类代码示例或许可以为您提供帮助。



在下文中一共展示了DecisionTreeClassifier类的20个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于我们的系统推荐出更棒的Python代码示例。

示例1: decision_tree_entropy

def decision_tree_entropy(training_data):
    clf = DecisionTreeClassifier(criterion="entropy",random_state=0)
    clf.fit(training_data[0], training_data[1])
    #with open("/media/deeksha/e/Deeksha/Dropbox/Coursework/MachineLearning/HW3/entropy.dot", 'w') as f:
    #    f = tree.export_graphviz(clf, out_file=f)
    print "entropy:Number of Nodes", clf.tree_.node_count
    return clf
开发者ID:deekshachugh,项目名称:MachineLearning,代码行数:7,代码来源:DecisionTreeUsingGiniand+Entropy.py


示例2: __init__

class Transformer:
    def __init__(self, use_PCA=True):
        self._clf = DecisionTreeClassifier(min_samples_leaf=10)
        self._idx = None
        self._scaler = StandardScaler()
        self._trans = PCA('mle')
        self._use_PCA = use_PCA

    def fit(self, X, y):
        X = np.array(X)
        self._clf.fit(X, y)

        self._idx = filter(lambda x: self._clf.feature_importances_[x] > 0, \
                range(len(self._clf.feature_importances_)))

        new_set = [X[i][self._idx] for i in xrange(len(X))]

#        new_set = self._scaler.fit_transform(new_set)

        if self._use_PCA:
            new_set = self._trans.fit_transform(new_set)
        return new_set

    def transform(self, features):
        features = features[self._idx]
#        features = self._scaler.transform(features.astype(float))
        if self._use_PCA:
            features = self._trans.transform(features)
        return features
开发者ID:ItsLastDay,项目名称:Opinion-mining-from-reviews,代码行数:29,代码来源:solution.py


示例3: quize1

def quize1(data):
# 1. Select count of neighbors.Загрузите выборку из файла titanic.csv с помощью пакета Pandas.
# 2.Оставьте в выборке четыре признака: класс пассажира (Pclass), цену билета (Fare), возраст пассажира (Age) и его пол (Sex).
# 3.Обратите внимание, что признак Sex имеет строковые значения.
# 4.Выделите целевую переменную — она записана в столбце Survived.
# 5.В данных есть пропущенные значения — например, для некоторых пассажиров неизвестен их возраст.
# 6.Такие записи при чтении их в pandas принимают значение nan.
# Найдите все объекты, у которых есть пропущенные признаки, и удалите их из выборки.
# Обучите решающее дерево с параметром random_state=241 и остальными параметрами по умолчанию.
# Вычислите важности признаков и найдите два признака с
# наибольшей важностью. Их названия будут ответами для данной задачи
# (в качестве ответа укажите названия признаков через запятую или пробел, порядок не важен).
    dataF = data[['Pclass', 'Fare', 'Age', 'Sex','Survived']]
    dataF = dataF.dropna()
    Y = dataF['Survived']
    dataF = dataF[['Pclass', 'Fare', 'Age', 'Sex']]
    clf = DecisionTreeClassifier(random_state=241)
    dataF.loc[dataF['Sex'] != 'male', 'Sex'] = 0
    dataF.loc[dataF['Sex'] == 'male', 'Sex'] = 1
    print (dataF)
    clf.fit(dataF, Y)
    importances = clf.feature_importances_
    print(importances)
    # d = zip(dataF.columns, clf.feature_importanc_)
    # print(d)
    return
开发者ID:BlinJin,项目名称:Machine-Learning,代码行数:26,代码来源:decision_trees.py


示例4: evaluateDecisionTree

def evaluateDecisionTree(train_x,train_y,test_x,test_y):
    clf = DecisionTreeClassifier(criterion='entropy',min_samples_leaf=5,max_depth=20)
    clf.fit(train_x,train_y)
    p = clf.predict_proba(test_x)[:,1]
    auc = roc_auc_score(test_y,p)
    plotAUC(test_y,clf.predict_proba(test_x)[:,1],'DT')
    return auc
开发者ID:ds-ga-1001-final,项目名称:project,代码行数:7,代码来源:decision_tree.py


示例5: decision_tree

def decision_tree(train_bow,train_labels,test_bow,test_labels,bow_indexes):
    print("Training decision tree")
    dt_classifier=DecisionTreeClassifier()

    dt_classifier.fit(train_bow,train_labels)
    print("Testing decision tree")
    test(dt_classifier,"dt",test_bow,test_labels,bow_indexes)
开发者ID:wangk1,项目名称:research,代码行数:7,代码来源:classifiers_func.py


示例6: train_adaboost

def train_adaboost(features, labels, learning_rate, n_lab, n_runs, n_estim, n_samples):
    uniqLabels = np.unique(labels)
    print 'Taking ', str(n_lab), ' labels'
    uniqLabels = uniqLabels[:n_lab]
    used_labels = uniqLabels
    pbar = start_progressbar(len(uniqLabels), 'training adaboost for %i labels' %len(uniqLabels))
    allLearners = []
    for yy ,targetLab in enumerate(uniqLabels):
        runs=[]
        for rrr in xrange(n_runs):
            #import ipdb;ipdb.set_trace()
            feats,labs = get_binary_sets(features, labels, targetLab, n_samples)
            #print 'fitting stump'
            #import ipdb;ipdb.set_trace()
            baseClf = DecisionTreeClassifier(max_depth=4, min_samples_leaf=10, min_samples_split=10)
            baseClf.fit(feats, labs)
            ada_real = AdaBoostClassifier( base_estimator=baseClf, learning_rate=learning_rate,
                                      n_estimators=n_estim,
                                      algorithm="SAMME.R")
            #import ipdb;ipdb.set_trace()
            runs.append(ada_real.fit(feats, labs))
        allLearners.append(runs)
        update_progressbar(pbar, yy)
    end_progressbar(pbar)
    
    return allLearners, used_labels
开发者ID:aarslan,项目名称:action_rec,代码行数:26,代码来源:classifier_wrappers.py


示例7: test_importances

def test_importances():
    """Check variable importances."""
    X, y = datasets.make_classification(n_samples=2000,
                                        n_features=10,
                                        n_informative=3,
                                        n_redundant=0,
                                        n_repeated=0,
                                        shuffle=False,
                                        random_state=0)

    for name, Tree in CLF_TREES.items():
        clf = Tree(random_state=0)

        clf.fit(X, y)
        importances = clf.feature_importances_
        n_important = np.sum(importances > 0.1)

        assert_equal(importances.shape[0], 10, "Failed with {0}".format(name))
        assert_equal(n_important, 3, "Failed with {0}".format(name))

        X_new = clf.transform(X, threshold="mean")
        assert_less(0, X_new.shape[1], "Failed with {0}".format(name))
        assert_less(X_new.shape[1], X.shape[1], "Failed with {0}".format(name))

    # Check on iris that importances are the same for all builders
    clf = DecisionTreeClassifier(random_state=0)
    clf.fit(iris.data, iris.target)
    clf2 = DecisionTreeClassifier(random_state=0,
                                  max_leaf_nodes=len(iris.data))
    clf2.fit(iris.data, iris.target)

    assert_array_equal(clf.feature_importances_,
                       clf2.feature_importances_)
开发者ID:Carol-Hu,项目名称:scikit-learn,代码行数:33,代码来源:test_tree.py


示例8: MultEstimator

class MultEstimator(BaseEstimator):
    def __init__(self, categories):
        self.categories = categories

    def fit(self, X, y, **params):
        self.models = {_: None for _ in self.categories}
        self.tot_model = DecisionTreeClassifier(max_depth=8, min_samples_leaf=100)
        categ = X[:, -1]
        data = X[:, :-1]
        self.tot_model.fit(data, y)
        for c in self.models.keys():
            mask = categ == c
            m = DecisionTreeClassifier(max_depth=8, min_samples_leaf=100)
            m.fit(data[mask], y[mask])
            self.models[c] = m

    def predict(self, X):
        categ = X[:, -1]
        data = X[:, :-1]
        p = self.tot_model.predict(data)
        for c in self.models.keys():
            mask = categ == c
            if mask.any():
                p[mask] = self.models[c].predict(data[mask])
        return p

    def predict_proba(self, X):
        categ = X[:, -1]
        data = X[:, :-1]
        p = self.tot_model.predict_proba(data)
        for c in self.models.keys():
            mask = categ == c
            if mask.any():
                p[mask] = self.models[c].predict_proba(data[mask])
        return p
开发者ID:alfiya400,项目名称:kaggle-avitoDuplicatesDetection,代码行数:35,代码来源:model.py


示例9: main

def main(percentage):
    """Given a percentage for splitting the dataset, fit the training set and apply the rest as a test set."""
    df = pd.read_csv('cellStrength.log')
    df.drop('SSID', 1, inplace=True)
    processed = preprocess(df)
    location_col = processed[0].shape[1]-4

    hash_to_location = {y:x for x,y in processed[1].items()}

    df2, targets = encode_target(processed[0], location_col)
    msk = np.random.rand(len(df)) < percentage
    test = df2[~msk].copy()
    train = df2[msk].copy()

    open('golden.csv', 'w').write(','.join([hash_to_location[p] for p in test['Target'].tolist()]) + '\n' )

    test.drop(186, 1, inplace=True)
    test.drop('Target', 1, inplace=True)

    features = list(df2.columns[:location_col]) + list(df2.columns[location_col+1:-1])

    y = train['Target']
    X = train[features]

    dt = DecisionTreeClassifier(min_samples_split=3, random_state=99)
    try:
        dt.fit(X, y)
    except ValueError:
        return
    predictions = dt.predict(test).tolist()
    open('golden.csv', 'a').write(','.join([hash_to_location[p] for p in predictions]))

    # get_code(dt, features, targets)
    return get_accuracy('golden.csv')
开发者ID:elahi-arman,项目名称:Python,代码行数:34,代码来源:router_association.py


示例10: programmer_2

def programmer_2():
    datafile = 'data/model.xls'
    data = pd.read_excel(datafile)
    data = data.as_matrix()
    shuffle(data)  # 随机打乱数据

    # 设置训练数据比8:2
    p = 0.8
    train = data[:int(len(data) * p), :]
    test = data[int(len(data) * p):, :]

    # 构建CART决策树模型
    treefile = 'tmp/tree.pkl'
    tree = DecisionTreeClassifier()
    tree.fit(train[:, :3], train[:, 3])

    joblib.dump(tree, treefile)

    cm_plot(train[:, 3], tree.predict(train[:, :3])).show()  # 显示混淆矩阵可视化结果
    # 注意到Scikit-Learn使用predict方法直接给出预测结果。

    fpr, tpr, thresholds = roc_curve(
        test[:, 3], tree.predict_proba(test[:, :3])[:, 1], pos_label=1)
    plt.plot(fpr, tpr, linewidth=2, label='ROC of CART', color='green')
    plt.xlabel('False Positive Rate')
    plt.ylabel('True Positive Rate')
    # 设定边界范围
    plt.ylim(0, 1.05)
    plt.xlim(0, 1.05)
    plt.legend(loc=4)
    plt.show()
    print(thresholds)
开发者ID:Ctipsy,项目名称:python_data_analysis_and_mining_action,代码行数:32,代码来源:code.py


示例11: test_graphviz_errors

def test_graphviz_errors():
    # Check for errors of export_graphviz
    clf = DecisionTreeClassifier(max_depth=3, min_samples_split=2)

    # Check not-fitted decision tree error
    out = StringIO()
    assert_raises(NotFittedError, export_graphviz, clf, out)

    clf.fit(X, y)

    # Check if it errors when length of feature_names
    # mismatches with number of features
    message = ("Length of feature_names, "
               "1 does not match number of features, 2")
    assert_raise_message(ValueError, message, export_graphviz, clf, None,
                         feature_names=["a"])

    message = ("Length of feature_names, "
               "3 does not match number of features, 2")
    assert_raise_message(ValueError, message, export_graphviz, clf, None,
                         feature_names=["a", "b", "c"])

    # Check class_names error
    out = StringIO()
    assert_raises(IndexError, export_graphviz, clf, out, class_names=[])

    # Check precision error
    out = StringIO()
    assert_raises_regex(ValueError, "should be greater or equal",
                        export_graphviz, clf, out, precision=-1)
    assert_raises_regex(ValueError, "should be an integer",
                        export_graphviz, clf, out, precision="1")
开发者ID:Lavanya-Basavaraju,项目名称:scikit-learn,代码行数:32,代码来源:test_export.py


示例12: decision_trees

def decision_trees(features, labels):
    classifier = DecisionTreeClassifier(random_state=0, criterion="entropy")
    classifier.fit(features, labels)
    scores = cross_validation.cross_val_score(
        classifier, features, labels, cv=10, score_func=metrics.precision_recall_fscore_support
    )
    print_table("Decision Trees", numpy.around(numpy.mean(scores, axis=0), 2))
开发者ID:pelluch,项目名称:data-mining,代码行数:7,代码来源:main.py


示例13: text_learning_experiment

def text_learning_experiment(words_to_remove=[]):
    from_sara  = open("../text_learning/from_sara.txt", "r")
    from_chris = open("../text_learning/from_chris.txt", "r")
    word_data, authors = vectorize_emails(from_sara, from_chris, max_emails=300, words_to_remove=words_to_remove)
    features_train, features_test, labels_train, labels_test = \
        cross_validation.train_test_split(word_data, authors, test_size=0.1, random_state=42)
    vectorizer = TfidfVectorizer(sublinear_tf=True, max_df=0.5,
                                 stop_words='english')
    features_train = vectorizer.fit_transform(features_train)
    features_test  = vectorizer.transform(features_test).toarray()

    features_train = features_train[:150].toarray()
    labels_train   = labels_train[:150]

    clf = DecisionTreeClassifier()
    clf.fit(features_train, labels_train)
    predict_train = clf.predict(features_train)
    predict_test = clf.predict(features_test)
    print "train acc:", accuracy_score(labels_train, predict_train)
    print "test acc: ", accuracy_score(labels_test, predict_test)
    feature_index = np.argmax(clf.feature_importances_)
    feature_importance = clf.feature_importances_[feature_index]
    feature_name = vectorizer.get_feature_names()[feature_index]
    print "Most important feature, and relative importance:", feature_name, ":", feature_importance
    return feature_name, feature_importance
开发者ID:andrei-iusan,项目名称:ud120-projects,代码行数:25,代码来源:poi_id.py


示例14: train_dtc

def train_dtc(X, y):
    """
    Create and train the Decision Tree Classifier.
    """
    dtc = DecisionTreeClassifier()
    dtc.fit(X, y)
    return dtc
开发者ID:texaspandaa,项目名称:Text-Mining,代码行数:7,代码来源:1.py


示例15: decision_tree_prediction

def decision_tree_prediction(features_train, labels_train, features_test, ids):

    X_train, X_test, y_train, y_test = cross_validation.train_test_split(features_train, labels_train, random_state=1301, stratify=labels_train, test_size=0.3)

    clf = DecisionTreeClassifier(criterion='gini',
                                 min_samples_split=10,
                                 max_depth=10,
                                 max_leaf_nodes=16,
                                 max_features=2)


    #clf_acc = clf.fit(X_train, y_train)
    # print(clf.best_estimator_)
    #feature_importance = clf.feature_importances_
    #print (feature_importance)

    #pred = clf_acc.predict_proba(X_test)[:,1]
    #print (y_test, pred)
    # acc = accuracy_score(y_test, pred)
    # print ("Acc {}".format(acc))

    clf = clf.fit(features_train, labels_train)

    pred = clf.predict_proba(features_test)[:,1]

    predictions_file = open("data/canivel_decision_tree.csv", "wb")
    predictions_file_object = csv.writer(predictions_file)
    predictions_file_object.writerow(["ID", "TARGET"])
    predictions_file_object.writerows(zip(ids, pred))
    predictions_file.close()
开发者ID:canivel,项目名称:Kaggle-Santander,代码行数:30,代码来源:regular_classifiers.py


示例16: main

def main():
    data = run_game()

    clf = DecisionTreeClassifier(criterion='entropy')

    game_data = [[i[0], i[1]] for i in data]
    profits = [i[2] for i in data]

    clf.fit(game_data, profits)

    with open('tree.dot', 'w') as dotfile:
        export_graphviz(
            clf,
            dotfile,
            feature_names=['coin', 'bet']
        )

    predictions_lose1 = [clf.predict([0, 0]) for x in xrange(100)]
    predictions_lose2 = [clf.predict([0, 1]) for x in xrange(100)]
    predictions_win = [clf.predict([1, 1]) for x in xrange(100)]

    print 'All these profit predictions should be zero:'
    print predictions_lose1
    print 'Accuracy was', calculate_accuracy(predictions_lose1, np.array([0]))

    print 'All these profit predictions should be zero:'
    print predictions_lose2
    print 'Accuracy was', calculate_accuracy(predictions_lose2, np.array([0]))

    print 'All these profit predictions should be two:'
    print predictions_win
    print 'Accuracy was', calculate_accuracy(predictions_win, np.array([2]))
开发者ID:kimmobrunfeldt,项目名称:machine-learning,代码行数:32,代码来源:main.py


示例17: buildTree

def buildTree(options, treefile, dataFile = None):
    dt = loadTree(treefile)
    if dt is not None:
        return dt
    if dataFile is None:
        raise ValueError("No data file specified")

    dt = DecisionTreeClassifier(min_samples_split=20, random_state=99)
    files = []
    featureFrames = []
    targetFrames = []
    if os.path.isdir(dataFile):
        files = getFiles(dataFile, ".csv")
    else:
        files.append(dataFile)
    for _file in files:
        print("Loading data %s" % _file)
        (featureValues, targetValues, features, df) = loadData(_file, options)
        featureFrames.append(featureValues)
        targetFrames.append(targetValues)
    dt.fit(pd.concat(featureFrames), pd.concat(targetFrames))
    saveTree(treefile, dt)
    print("Building graph")
    visualize_tree(treefile, dt, features)
    return dt
开发者ID:gbrian,项目名称:naive-machine-learning,代码行数:25,代码来源:searchml.py


示例18: main

def main(args):
    exec "import main.pandas_talib.sig_%s as conf" % args.signame
    build.work2(20, 'sp500Top50', args.signame)
    df = base.get_merged(conf.__name__, yeod.get_sp500Top50())
    df.to_csv("ta.csv")

    tree = DecisionTreeClassifier() 
    
    feat_names = base.get_feat_names(df)

    dfTrain = df[(df.date>='1970-01-01') & (df.date <='2009-12-31')]
    npTrainFeat = dfTrain.loc[:,feat_names].values.copy()
    npTrainLabel = dfTrain.loc[:,"label5"].values.copy()
    npTrainLabel[npTrainLabel >  1.0] = 1
    npTrainLabel[npTrainLabel <  1.0] = 0

    tree.fit(npTrainFeat, npTrainLabel)
    joblib.dump(tree, "tree.pkl", compress = 3)
    
    dfTest = df[(df.date>='2010-01-01') & (df.date <='2099-12-31')]
    npTestFeat = dfTest.loc[:, feat_names].values.copy()
    
    npPred = tree.predict_proba(npTestFeat)

    dfTest.loc[:,"pred"] = npPred[:,1]
    
    print dfTest['pred'].head()

    dfPos = dfTest[ dfTest['pred'] > 0.55 ]
    print 1.0 * len(dfPos[dfPos['label5']>1])  / len(dfPos)
    print 1.0 * len(dfTest[dfTest['label5']>1])  / len(dfTest)
开发者ID:hongbin0908,项目名称:pytrade,代码行数:31,代码来源:check_sig2.py


示例19: test_graphviz_errors

def test_graphviz_errors():
    """Check for errors of export_graphviz"""
    clf = DecisionTreeClassifier(max_depth=3, min_samples_split=1)
    clf.fit(X, y)

    out = StringIO()
    assert_raises(IndexError, export_graphviz, clf, out, feature_names=[])
开发者ID:Hydroinformatics-UNESCO-IHE,项目名称:scikit-learn,代码行数:7,代码来源:test_export.py


示例20: train

	def train(self, X, Y):
		N, D = X.shape
		for t in xrange(self.boostrap_sample):
			sampleX, sampleY = self.get_sample(X, Y)
			clf = DecisionTreeClassifier(criterion="entropy", max_depth = 1)
			clf.fit(sampleX, sampleY)
			self.weak_clfs.append(clf)
开发者ID:MUforever,项目名称:Machine-Learning,代码行数:7,代码来源:Bagging.py



注:本文中的sklearn.tree.DecisionTreeClassifier类示例由纯净天空整理自Github/MSDocs等源码及文档管理平台,相关代码片段筛选自各路编程大神贡献的开源项目,源码版权归原作者所有,传播和使用请参考对应项目的License;未经允许,请勿转载。


鲜花

握手

雷人

路过

鸡蛋
该文章已有0人参与评论

请发表评论

全部评论

专题导读
上一篇:
Python tree.DecisionTreeRegressor类代码示例发布时间:2022-05-27
下一篇:
Python tree.export_graphviz函数代码示例发布时间:2022-05-27
热门推荐
阅读排行榜

扫描微信二维码

查看手机版网站

随时了解更新最新资讯

139-2527-9053

在线客服(服务时间 9:00~18:00)

在线QQ客服
地址:深圳市南山区西丽大学城创智工业园
电邮:jeky_zhao#qq.com
移动电话:139-2527-9053

Powered by 互联科技 X3.4© 2001-2213 极客世界.|Sitemap