• 设为首页
  • 点击收藏
  • 手机版
    手机扫一扫访问
    迪恩网络手机版
  • 关注官方公众号
    微信扫一扫关注
    迪恩网络公众号

Python preprocessing.LabelBinarizer类代码示例

原作者: [db:作者] 来自: [db:来源] 收藏 邀请

本文整理汇总了Python中sklearn.preprocessing.LabelBinarizer的典型用法代码示例。如果您正苦于以下问题:Python LabelBinarizer类的具体用法?Python LabelBinarizer怎么用?Python LabelBinarizer使用的例子?那么恭喜您, 这里精选的类代码示例或许可以为您提供帮助。



在下文中一共展示了LabelBinarizer类的20个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于我们的系统推荐出更棒的Python代码示例。

示例1: BinaryRelevanceClassifier

class BinaryRelevanceClassifier(BaseEstimator, ClassifierMixin):
    def __init__(self, estimator):
        self.estimator = estimator

    def fit(self, X, Y):
        # binarize labels
        self.bl = LabelBinarizer()
        Y = self.bl.fit_transform(Y)
        self.classes_ = self.bl.classes_

        # create an estimator for each label
        self.estimators_ = []
        for i in xrange(self.bl.classes_.shape[0]):
            estimator = clone(self.estimator)
            estimator.fit(X, Y[:, i])
            self.estimators_.append(estimator)

    def predict(self, X):
        self._check_is_fitted()

        X = np.atleast_2d(X)
        Y = np.empty((X.shape[0], self.classes_.shape[0]))
        for i, estimator in enumerate(self.estimators_):
            Y[:, i] = estimator.predict(X).T

        return self.bl.inverse_transform(Y)

    def _check_is_fitted(self):
        if not hasattr(self, "estimators_"):
            raise ValueError("The object hasn't been fitted yet!")
开发者ID:sunnyrjuneja,项目名称:ai_tidbits,代码行数:30,代码来源:binary_relevance_classifier.py


示例2: GBClassifier

class GBClassifier(_BaseGB, ClassifierMixin):

    def __init__(self, estimator, n_estimators=100,
                 step_size="line_search", learning_rate=0.1,
                 loss="squared_hinge", subsample=1.0,
                 callback=None, random_state=None):
        self.estimator = estimator
        self.n_estimators = n_estimators
        self.step_size = step_size
        self.learning_rate = learning_rate
        self.loss = loss
        self.subsample = subsample
        self.callback = callback
        self.random_state = random_state

    def _get_loss(self):
        losses = dict(squared_hinge=_SquaredHingeLoss(),
                      log=_LogLoss())
        return losses[self.loss]

    def fit(self, X, y):
        self._lb = LabelBinarizer(neg_label=-1)
        Y = self._lb.fit_transform(y)
        return super(GBClassifier, self).fit(X, Y)

    def predict(self, X):
        pred = self.decision_function(X)
        return self._lb.inverse_transform(pred)
开发者ID:0x0all,项目名称:ivalice,代码行数:28,代码来源:gradient_boosting.py


示例3: test_multinomial_loss_ground_truth

def test_multinomial_loss_ground_truth():
    # n_samples, n_features, n_classes = 4, 2, 3
    n_classes = 3
    X = np.array([[1.1, 2.2], [2.2, -4.4], [3.3, -2.2], [1.1, 1.1]])
    y = np.array([0, 1, 2, 0])
    lbin = LabelBinarizer()
    Y_bin = lbin.fit_transform(y)

    weights = np.array([[0.1, 0.2, 0.3], [1.1, 1.2, -1.3]])
    intercept = np.array([1., 0, -.2])
    sample_weights = np.array([0.8, 1, 1, 0.8])

    prediction = np.dot(X, weights) + intercept
    logsumexp_prediction = logsumexp(prediction, axis=1)
    p = prediction - logsumexp_prediction[:, np.newaxis]
    loss_1 = -(sample_weights[:, np.newaxis] * p * Y_bin).sum()
    diff = sample_weights[:, np.newaxis] * (np.exp(p) - Y_bin)
    grad_1 = np.dot(X.T, diff)

    weights_intercept = np.vstack((weights, intercept)).T.ravel()
    loss_2, grad_2, _ = _multinomial_loss_grad(weights_intercept, X, Y_bin,
                                               0.0, sample_weights)
    grad_2 = grad_2.reshape(n_classes, -1)
    grad_2 = grad_2[:, :-1].T

    assert_almost_equal(loss_1, loss_2)
    assert_array_almost_equal(grad_1, grad_2)

    # ground truth
    loss_gt = 11.680360354325961
    grad_gt = np.array([[-0.557487, -1.619151, +2.176638],
                        [-0.903942, +5.258745, -4.354803]])
    assert_almost_equal(loss_1, loss_gt)
    assert_array_almost_equal(grad_1, grad_gt)
开发者ID:AlexisMignon,项目名称:scikit-learn,代码行数:34,代码来源:test_sag.py


示例4: Encoding

def Encoding(data, general_matrix=None):
    encoder = LabelBinarizer()
    count = 0
    # encoding
    for i in range(data.shape[1]):
        if type(data[0, i]) == str:
            count += 1
            col = data[:, i]
            unique = np.unique(col if general_matrix is None else general_matrix[:, i])

            try:
                encoder.fit(unique)
            except:
                pass

            new_col = encoder.transform(col)

            # split at i and i + 1
            before, removed, after = np.hsplit(data, [i, i + 1])
            # concatenate
            data = np.concatenate((before, new_col, after), axis=1)
            before, removed, after = np.hsplit(general_matrix, [i, i + 1])
            general_matrix = np.concatenate((before, encoder.transform(general_matrix[:, i]), after), axis=1)

    print "count : %d" % count
    # return data
    return data
开发者ID:nhanloukiala,项目名称:AppsOfDataAnalysis,代码行数:27,代码来源:cyber_attack_classification.py


示例5: display_image_predictions

def display_image_predictions(features, labels, predictions):
    n_classes = 10
    label_names = _load_label_names()
    label_binarizer = LabelBinarizer()
    label_binarizer.fit(range(n_classes))
    label_ids = label_binarizer.inverse_transform(np.array(labels))

    fig, axies = plt.subplots(nrows=4, ncols=2)
    fig.tight_layout()
    fig.suptitle('Softmax Predictions', fontsize=20, y=1.1)

    n_predictions = 3
    margin = 0.05
    ind = np.arange(n_predictions)
    width = (1. - 2. * margin) / n_predictions

    for image_i, (feature, label_id, pred_indicies, pred_values) in enumerate(zip(features, label_ids, predictions.indices, predictions.values)):
        pred_names = [label_names[pred_i] for pred_i in pred_indicies]
        correct_name = label_names[label_id]

        axies[image_i][0].imshow(feature*255)
        axies[image_i][0].set_title(correct_name)
        axies[image_i][0].set_axis_off()

        axies[image_i][1].barh(ind + margin, pred_values[::-1], width)
        axies[image_i][1].set_yticks(ind + margin)
        axies[image_i][1].set_yticklabels(pred_names[::-1])
        axies[image_i][1].set_xticks([0, 0.5, 1.0])
开发者ID:lpalum,项目名称:machine-learning,代码行数:28,代码来源:helper.py


示例6: transform

 def transform(self, data_dict):
     listOfUnits = ["kilogram", "kg", "gram", "[GMgmkK]?Hz", "liter", "ml",
             "cup", "cm", "foot", "inch", "meter", "mg", "gallon", "milliliter", "[MGTmgtKk]B"]
     regex = "[\d]+\.[\d]+(" + "[\b/,-]|".join(listOfUnits) + ")"
     data = data_dict[self.key].str.extract(regex, flags = re.IGNORECASE, expand=False).str.lower()
     lb = LabelBinarizer()
     return lb.fit_transform(data.fillna(""))
开发者ID:zero0nee,项目名称:UnspscClassifier,代码行数:7,代码来源:transformers.py


示例7: bio_classification_report

def bio_classification_report(y_true, y_pred):

    lb = LabelBinarizer()
    y_true_combined = 1 - lb.fit_transform(list(chain.from_iterable(y_true)))
    y_pred_combined = list(chain.from_iterable(y_pred))

    tagset = set(lb.classes_) - {'O'}
    tagset = sorted(tagset, key=lambda tag: tag.split('-', 1)[::-1])
    class_indices = {cls: idx for idx, cls in enumerate(lb.classes_)}
    print 'True sum %d Pred sum %d Len %d' %(sum(y_true_combined), sum(y_pred_combined), len(y_pred_combined))
    print "AUC\tP-R: %.4f\tROC: %.4f" % (average_precision_score(y_true_combined, y_pred_combined, average=None),
        roc_auc_score(y_true_combined, y_pred_combined, average=None))
    #plt.figure()
    #fpr, tpr, thr = roc_curve(y_true_combined, y_pred_combined)
    #area = auc(fpr, tpr)
    #plt.plot(fpr, tpr, label='{area:.3f}'.format( area=area))
    #plt.legend(loc=4)
    #plt.savefig('sub3.jpg')

    return classification_report(
        1 - y_true_combined,
        [0 if v > 0.1 else 1 for v in y_pred_combined],
        labels=[class_indices[cls] for cls in tagset],
        target_names=tagset,
    )
开发者ID:lmxl,项目名称:hal,代码行数:25,代码来源:base_learner.py


示例8: CategoricalToNumerical

class CategoricalToNumerical(object):

    def __init__(self, dimensionality_reducer=None, verify=True):
        pass
        """Takes in a dimensionality reducer in order to convert categorical features into numerical.
        """
        if dimensionality_reducer is None:
            dimensionality_reducer = RandomizedPCA(1)
        self.dimensionality_reducer = dimensionality_reducer
        self.verify = verify
        self.binarizer = LabelBinarizer()

    def fit(self, X, y=None):
        self._verify(X, self.verify)
        binarized = self.binarizer.fit_transform(X)
        self.dimensionality_reducer.fit(binarized)

    def transform(self, X):
        self._verify(X, False)
        binarized = self.binarizer.transform(X)
        result = self.dimensionality_reducer.transform(binarized).flatten()
        assert X.shape == result.shape
        return result

    def fit_transform(self, X, y=None):
        self.fit(X)
        return self.transform(X)

    def _verify(self, X, verify):
        if verify:
            assert is_categorical(X)
        else:
            assert isinstance(X, np.ndarray)
            assert len(X.shape) == 1
开发者ID:Diviyan-Kalainathan,项目名称:causal-humans,代码行数:34,代码来源:convert.py


示例9: iris_demo

def iris_demo():
    # load the iris dataset
    iris = load_iris()
    X = iris['data']
    y_labels = iris['target']

    lb = LabelBinarizer()
    y = lb.fit_transform(y_labels)

    # split into training, validation and test datasets
    X_train, X_test, y_train, y_test = train_test_split(X, y,
                                                        test_size=0.25,
                                                        random_state=RANDOM_STATE)

    X_train, X_valid, y_train, y_valid = train_test_split(X_train, y_train,
                                                          test_size=0.25,
                                                          random_state=RANDOM_STATE)

    # train the neural net
    print("Building logistic regression classifier to classify iris data")
    nn = pynn.ArtificialNeuralNet([X_train.shape[1], 20, y_train.shape[1]])
    print("Training")
    nn.fit(X_train, y_train, X_valid, y_valid,
           batch_size=20, n_epochs=20, learning_rate=0.05,
           random_state=RANDOM_STATE)

    y_pred = nn.predict(X_test)

    print("iris accuracy: {}%".format(
        accuracy_score(y_test.argmax(1), y_pred.argmax(1)) * 100))
开发者ID:benjamin-croker,项目名称:pynn,代码行数:30,代码来源:example.py


示例10: get_dataset2

def get_dataset2(test_fraction):
    """
   @:param: test_fraction used to split train and test
   Vectorizes the features and labels into categorical values and randomly splits into train and test set
   :return: X_train, X_test, y_train, y_test
   """
    data = []
    with open('labels.csv', 'r') as datafile:
        csv_reader = csv.reader(datafile, delimiter=',', quotechar='|')
        for row in csv_reader:
            data.append(row)

    data = numpy.asarray(data)
    X = data[:, 0:data.shape[1]-1]
    y = data[:, data.shape[1]-1]

    # X,y = get_tabledata()

    vec = DictVectorizer()
    feature_dict = [dict(enumerate(x)) for x in X.tolist()]
    X = vec.fit_transform(feature_dict).toarray()
    joblib.dump(vec, 'vectorizer.pkl')

    lb = LabelBinarizer()
    y = lb.fit_transform(y)
    joblib.dump(lb, 'binarizer.pkl')
    X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=test_fraction)
    return X_train, X_test, y_train, y_test
开发者ID:spatial-computing,项目名称:strabo-learning-ocr-transformation-hpc,代码行数:28,代码来源:Data.py


示例11: bio_classification_report

def bio_classification_report(y_true, y_pred):
    """Evaluates entity extraction accuracy.

    Classification report for a list of BIO-encoded sequences.
    It computes token-level metrics and discards "O" labels.
    Note that it requires scikit-learn 0.15+ (or a version from github master)
    to calculate averages properly!
    Taken from https://github.com/scrapinghub/python-crfsuite/blob/master/examples/CoNLL%202002.ipynb
    """
    from sklearn.preprocessing import LabelBinarizer
    from itertools import chain
    from sklearn.metrics import classification_report

    lb = LabelBinarizer()
    y_true_combined = lb.fit_transform(list(chain.from_iterable(y_true)))
    y_pred_combined = lb.transform(list(chain.from_iterable(y_pred)))

    tagset = set(lb.classes_) - {'O'}
    tagset = sorted(tagset, key=lambda tag: tag.split('-', 1)[::-1])
    class_indices = {cls: idx for idx, cls in enumerate(lb.classes_)}

    return classification_report(
            y_true_combined,
            y_pred_combined,
            labels=[class_indices[cls] for cls in tagset],
            target_names=tagset,
    )
开发者ID:dhpollack,项目名称:rasa_nlu,代码行数:27,代码来源:crf_entity_extractor.py


示例12: scorer_auc

def scorer_auc(y_true, y_pred):
    from sklearn.metrics import roc_auc_score
    from sklearn.preprocessing import LabelBinarizer
    """Dedicated to 2class probabilistic outputs"""
    le = LabelBinarizer()
    y_true = le.fit_transform(y_true)
    return roc_auc_score(y_true, y_pred)
开发者ID:SherazKhan,项目名称:Paris_orientation-decoding,代码行数:7,代码来源:base.py


示例13: binarize_label_columns

def binarize_label_columns(df, columns, two_classes_as='single'):
    '''
    Inputs:
        df: Pandas dataframe object.
        columns: Columns to binarize.
        tow_classes_as: How to handle two classes, as 'single' or 'multiple' columns.
    Returns a tuple with the following items:
        df: Pandas dataframe object with new columns.
        binlabel_names: Names of the newly created binary variables.
        lb_objects: a dictionary with columns as keys and sklear.LabelBinarizer 
        objects as values.
    '''
    binlabel_names = []
    lb_objects = {}
    for col in columns:
        if len(df[col].unique()) > 1: 
            rows_notnull = df[col].notnull() # Use only valid feature observations
            lb = LabelBinarizer()
            binclass = lb.fit_transform(df[col][rows_notnull]) # Fit & transform on valid observations
            if len(lb.classes_) == 2 and two_classes_as == 'multiple':
                binclass = np.hstack((1 - binclass, binclass))
            lb_objects[col] = lb
            if len(lb.classes_) > 2 or two_classes_as == 'multiple':
                col_binlabel_names = [col+'_'+str(c) for c in lb.classes_]
                binlabel_names += col_binlabel_names # Names for the binarized classes
                for n in col_binlabel_names: df[n] = np.NaN # Initialize columns
                df.loc[rows_notnull, col_binlabel_names] = binclass # Merge binarized data
            elif two_classes_as == 'single': 
                binlabel_names.append(col+'_bin') # Names for the binarized classes
                df[col+'_bin'] = np.NaN # Initialize columns
                df.loc[rows_notnull, col+'_bin'] = binclass # Merge binarized data
    return df, binlabel_names, lb_objects
开发者ID:jonathan1296,项目名称:Data-Science-Lectures,代码行数:32,代码来源:start_here.py


示例14: one_hot_encoding

def one_hot_encoding(y_train, y_test):
    labelBinarizer = LabelBinarizer()
    labelBinarizer.fit(y_train)

    y_train_one_hot = labelBinarizer.transform(y_train)
    y_test_one_hot = labelBinarizer.transform(y_test)
    return y_train_one_hot, y_test_one_hot
开发者ID:dzungcamlang,项目名称:Traffic-Signs,代码行数:7,代码来源:util.py


示例15: full_matrix

def full_matrix(dropped):
    # create initial matrix
    print('starting with m0')
    lb = LabelBinarizer(sparse_output=True)
    # m = lb.fit_transform(dropped.restaurant_id)
    m = lb.fit_transform(dropped.user_name)
    print(m.shape)
    # build matrix
    # making nan its own category for categorical
    print("adding categorical to matrix")
    m = add_categorical_to_matrix(m, dropped, ['review_stars', 'user_name', 'restaurant_stars', 'restaurant_attributes_ages_allowed', 'restaurant_attributes_alcohol', 'restaurant_attributes_attire', 'restaurant_attributes_byob_corkage', 'restaurant_attributes_noise_level', 'restaurant_attributes_smoking', 'restaurant_attributes_wifi', 'restaurant_city',  'restaurant_hours_friday_close', 'restaurant_hours_friday_open', 'restaurant_hours_monday_close', 'restaurant_hours_monday_open', 'restaurant_hours_saturday_close', 'restaurant_hours_saturday_open', 'restaurant_hours_sunday_close', 'restaurant_hours_sunday_open', 'restaurant_hours_thursday_close', 'restaurant_hours_thursday_open', 'restaurant_hours_tuesday_close', 'restaurant_hours_tuesday_open', 'restaurant_hours_wednesday_close', 'restaurant_hours_wednesday_open', 'restaurant_ambience', 'restaurant_music', 'restaurant_parking', 'restaurant_street', 'restaurant_zipcode',  'inspection_year', 'inspection_month', 'inspection_day', 'inspection_dayofweek', 'inspection_quarter',])
    print(m.shape)

    print("adding bool to matrix")
    m = add_categorical_to_matrix(m, dropped, ['restaurant_attributes_accepts_credit_cards', 'restaurant_attributes_byob', 'restaurant_attributes_caters', 'restaurant_attributes_coat_check', 'restaurant_attributes_corkage', 'restaurant_attributes_delivery', 'restaurant_attributes_dietary_restrictions_dairy_free', 'restaurant_attributes_dietary_restrictions_gluten_free', 'restaurant_attributes_dietary_restrictions_halal', 'restaurant_attributes_dietary_restrictions_kosher', 'restaurant_attributes_dietary_restrictions_soy_free', 'restaurant_attributes_dietary_restrictions_vegan', 'restaurant_attributes_dietary_restrictions_vegetarian', 'restaurant_attributes_dogs_allowed', 'restaurant_attributes_drive_thr', 'restaurant_attributes_good_for_dancing', 'restaurant_attributes_good_for_groups', 'restaurant_attributes_good_for_breakfast', 'restaurant_attributes_good_for_brunch', 'restaurant_attributes_good_for_dessert', 'restaurant_attributes_good_for_dinner', 'restaurant_attributes_good_for_latenight', 'restaurant_attributes_good_for_lunch', 'restaurant_attributes_good_for_kids', 'restaurant_attributes_happy_hour', 'restaurant_attributes_has_tv', 'restaurant_attributes_open_24_hours', 'restaurant_attributes_order_at_counter', 'restaurant_attributes_outdoor_seating',  'restaurant_attributes_payment_types_amex', 'restaurant_attributes_payment_types_cash_only', 'restaurant_attributes_payment_types_discover', 'restaurant_attributes_payment_types_mastercard', 'restaurant_attributes_payment_types_visa', 'restaurant_attributes_take_out',  'restaurant_attributes_takes_reservations', 'restaurant_attributes_waiter_service', 'restaurant_attributes_wheelchair_accessible', ])
    print(m.shape)

    m = add_numerical_to_matrix(m, dropped, ['review_votes_cool', 'review_votes_funny', 'review_votes_useful', 'user_average_stars', 'user_compliments_cool', 'user_compliments_cute', 'user_compliments_funny', 'user_compliments_hot', 'user_compliments_list', 'user_compliments_more', 'user_compliments_note', 'user_compliments_photos', 'user_compliments_plain', 'user_compliments_profile', 'user_compliments_writer', 'user_fans', 'user_review_count', 'user_votes_cool', 'user_votes_funny', 'user_votes_useful', 'restaurant_attributes_price_range', 'restaurant_latitude', 'restaurant_longitude', 'restaurant_review_count', 'checkin_counts', 'review_delta', 'previous_inspection_delta', 'polarity', 'subjectivity', 'neg', 'neu', 'pos', 'compound', 'user_yelping_since_delta','manager', 'supervisor', 'training', 'safety', 'disease', 'ill', 'sick', 'poisoning', 'hygiene', 'raw', 'undercooked', 'cold', 'clean', 'sanitary', 'wash', 'jaundice', 'yellow', 'hazard', 'inspection', 'violation', 'gloves', 'hairnet', 'nails', 'jewelry', 'sneeze', 'cough', 'runny', 'illegal', 'rotten', 'dirty', 'mouse', 'cockroach', 'contaminated', 'gross', 'disgusting', 'stink', 'old', 'parasite', 'reheat', 'frozen', 'broken', 'drip', 'bathroom', 'toilet', 'leak', 'trash', 'dark', 'lights', 'dust', 'puddle', 'pesticide', 'bugs', 'mold'])
    print(m.shape)

    print("adding restaurant categories to matrix")
    cats = ['restaurant_category_1', 'restaurant_category_2', 'restaurant_category_3', 'restaurant_category_4', 'restaurant_category_5', 'restaurant_category_6', 'restaurant_category_7']
    m = special_categories_to_matrix(m, dropped, cats)
    print(m.shape)

    print("adding restaurant neighborhoods to matrix")
    cats = ['restaurant_neighborhood_1', 'restaurant_neighborhood_2', 'restaurant_neighborhood_3']
    m = special_categories_to_matrix(m, dropped, cats)
    print(m.shape)

    print("matrix shape of {}".format(m.shape))
    joblib.dump(m, 'pickle_jar/full_matrix')
开发者ID:potatochip,项目名称:kojak,代码行数:32,代码来源:blue_pill.py


示例16: conv_demo

def conv_demo():
    # load the digits dataset
    digits = load_digits()
    X = digits['data']
    y_labels = digits['target']

    lb = LabelBinarizer()
    y = lb.fit_transform(y_labels)

    # split into training, validation and test datasets
    X_train, X_test, y_train, y_test = train_test_split(X, y,
                                                        test_size=0.25,
                                                        random_state=RANDOM_STATE)

    X_train, X_valid, y_train, y_valid = train_test_split(X_train, y_train,
                                                          test_size=0.25,
                                                          random_state=RANDOM_STATE)

    # train the neural net
    print("Building neural net to classify digits")
    conv_net = pynn.ConvNet(digits['images'][0].shape, 1, y.shape[1],
                            random_state=RANDOM_STATE)
    print("Training")
    conv_net.fit(X_train, y_train, X_valid, y_valid,
                 batch_size=20, n_epochs=20, learning_rate=0.05)

    y_pred = conv_net.predict(X_test)

    print("digits accuracy: {}%".format(
        accuracy_score(y_test.argmax(1), y_pred.argmax(1)) * 100))
开发者ID:benjamin-croker,项目名称:pynn,代码行数:30,代码来源:example.py


示例17: just_categorical

def just_categorical(dropped):

    # create initial matrix
    print('starting with m0')
    lb = LabelBinarizer(sparse_output=True)
    m = lb.fit_transform(dropped.restaurant_id)
    print(m.shape)

    # build matrix
    # making nan its own category for categorical
    print("adding categorical to matrix")
    m = add_categorical_to_matrix(m, dropped, ['review_stars', 'user_name', 'restaurant_stars', 'restaurant_attributes_ages_allowed', 'restaurant_attributes_alcohol', 'restaurant_attributes_attire', 'restaurant_attributes_byob_corkage', 'restaurant_attributes_noise_level', 'restaurant_attributes_smoking', 'restaurant_attributes_wifi', 'restaurant_city',  'restaurant_hours_friday_close', 'restaurant_hours_friday_open', 'restaurant_hours_monday_close', 'restaurant_hours_monday_open', 'restaurant_hours_saturday_close', 'restaurant_hours_saturday_open', 'restaurant_hours_sunday_close', 'restaurant_hours_sunday_open', 'restaurant_hours_thursday_close', 'restaurant_hours_thursday_open', 'restaurant_hours_tuesday_close', 'restaurant_hours_tuesday_open', 'restaurant_hours_wednesday_close', 'restaurant_hours_wednesday_open', 'restaurant_ambience', 'restaurant_music', 'restaurant_parking', 'restaurant_street', 'restaurant_zipcode',  'inspection_year', 'inspection_month', 'inspection_day', 'inspection_dayofweek', 'inspection_quarter',])
    print(m.shape)

    print("adding bool to matrix")
    m = add_categorical_to_matrix(m, dropped, ['restaurant_attributes_accepts_credit_cards', 'restaurant_attributes_byob', 'restaurant_attributes_caters', 'restaurant_attributes_coat_check', 'restaurant_attributes_corkage', 'restaurant_attributes_delivery', 'restaurant_attributes_dietary_restrictions_dairy_free', 'restaurant_attributes_dietary_restrictions_gluten_free', 'restaurant_attributes_dietary_restrictions_halal', 'restaurant_attributes_dietary_restrictions_kosher', 'restaurant_attributes_dietary_restrictions_soy_free', 'restaurant_attributes_dietary_restrictions_vegan', 'restaurant_attributes_dietary_restrictions_vegetarian', 'restaurant_attributes_dogs_allowed', 'restaurant_attributes_drive_thr', 'restaurant_attributes_good_for_dancing', 'restaurant_attributes_good_for_groups', 'restaurant_attributes_good_for_breakfast', 'restaurant_attributes_good_for_brunch', 'restaurant_attributes_good_for_dessert', 'restaurant_attributes_good_for_dinner', 'restaurant_attributes_good_for_latenight', 'restaurant_attributes_good_for_lunch', 'restaurant_attributes_good_for_kids', 'restaurant_attributes_happy_hour', 'restaurant_attributes_has_tv', 'restaurant_attributes_open_24_hours', 'restaurant_attributes_order_at_counter', 'restaurant_attributes_outdoor_seating',  'restaurant_attributes_payment_types_amex', 'restaurant_attributes_payment_types_cash_only', 'restaurant_attributes_payment_types_discover', 'restaurant_attributes_payment_types_mastercard', 'restaurant_attributes_payment_types_visa', 'restaurant_attributes_take_out',  'restaurant_attributes_takes_reservations', 'restaurant_attributes_waiter_service', 'restaurant_attributes_wheelchair_accessible', ])
    print(m.shape)

    print("adding restaurant categories to matrix")
    cats = ['restaurant_category_1', 'restaurant_category_2', 'restaurant_category_3', 'restaurant_category_4', 'restaurant_category_5', 'restaurant_category_6', 'restaurant_category_7']
    m = special_categories_to_matrix(m, dropped, cats)
    print(m.shape)

    print("adding restaurant neighborhoods to matrix")
    cats = ['restaurant_neighborhood_1', 'restaurant_neighborhood_2', 'restaurant_neighborhood_3']
    m = special_categories_to_matrix(m, dropped, cats)
    print(m.shape)

    print("matrix shape of {}".format(m.shape))
    joblib.dump(m, 'pickle_jar/categorical_matrix')
开发者ID:potatochip,项目名称:kojak,代码行数:30,代码来源:blue_pill.py


示例18: BaseSGD

class BaseSGD(object):
    def _get_loss(self):
        losses = {
            "modified_huber": ModifiedHuber(),
            "hinge": Hinge(1.0),
            "perceptron": Hinge(0.0),
            "log": Log(),
            "sparse_log": SparseLog(),
            "squared": SquaredLoss(),
            "huber": Huber(self.epsilon),
            "epsilon_insensitive": EpsilonInsensitive(self.epsilon),
        }
        return losses[self.loss]

    def _get_learning_rate(self):
        learning_rates = {"constant": 1, "pegasos": 2, "invscaling": 3}
        return learning_rates[self.learning_rate]

    def _set_label_transformers(self, y):
        if self.multiclass == "natural":
            self.label_encoder_ = LabelEncoder()
            y = self.label_encoder_.fit_transform(y).astype(np.float64)

        self.label_binarizer_ = LabelBinarizer(neg_label=-1, pos_label=1)
        self.label_binarizer_.fit(y)
        self.classes_ = self.label_binarizer_.classes_.astype(np.int32)
        n_classes = len(self.label_binarizer_.classes_)
        n_vectors = 1 if n_classes <= 2 else n_classes
        return n_classes, n_vectors
开发者ID:Raz0r,项目名称:lightning,代码行数:29,代码来源:sgd.py


示例19: binarize_seqfeature

def binarize_seqfeature(X):
    """
    Binarizes the sequence features into 1s and 0s.
    
    Parameters:
    ===========
    - X: (pandas DataFrame) the sequence feature matrix without drug resistance values.
    
    Returns:
    ========
    - binarized:     (pandas DataFrame) a binarized sequence feature matrix with columns corresponding to particular amino acids at each position.
    - binarizers:    (dictionary) a dictionary of binarizer objects for each position.
    """
    binarized = pd.DataFrame()
    binarizers = dict()
    for col in X.columns:
        lb = LabelBinarizer()
        binarized_cols = lb.fit_transform(X[col])
        if len(lb.classes_) == 2:
            binarized[col] = pd.Series(binarized_cols[:, 0])
        else:
            for i, c in enumerate(lb.classes_):
                binarized[col + "_" + c] = binarized_cols[:, i]
        binarizers[col] = lb

    return binarized, binarizers
开发者ID:nickleshill,项目名称:systems-microbiology-hiv,代码行数:26,代码来源:custom_funcs.py


示例20: run

def run():
    # Load and preprocess data
    label_to_unique_instance = load_data()
    X, Y = preprocess_data(label_to_unique_instance)

    # Encode labels
    label_binarizer = LabelBinarizer()
    transformed_Y = label_binarizer.fit_transform(Y)

    # Cross validation
    cross_validation_iterator = StratifiedShuffleSplit(Y, n_iter=1, test_size=0.4, random_state=0)
    for train_index, test_index in cross_validation_iterator:
        break

    # Init model
    model = init_model(raw_feature_dim=X.shape[-1], unique_lable_num=len(label_binarizer.classes_))

    # Training procedure
    model.fit(X[train_index], transformed_Y[train_index],
              batch_size=BATCH_SIZE, nb_epoch=MAXIMUM_EPOCH_NUM,
              validation_data=(X[test_index], transformed_Y[test_index]),
              callbacks=[TensorBoard(log_dir="/tmp/Sequence Classification")],
              verbose=2)

    print("All done!")
开发者ID:nixingyang,项目名称:Miscellaneous-Projects,代码行数:25,代码来源:solution.py



注:本文中的sklearn.preprocessing.LabelBinarizer类示例由纯净天空整理自Github/MSDocs等源码及文档管理平台,相关代码片段筛选自各路编程大神贡献的开源项目,源码版权归原作者所有,传播和使用请参考对应项目的License;未经允许,请勿转载。


鲜花

握手

雷人

路过

鸡蛋
该文章已有0人参与评论

请发表评论

全部评论

专题导读
上一篇:
Python preprocessing.LabelEncoder类代码示例发布时间:2022-05-27
下一篇:
Python preprocessing.KernelCenterer类代码示例发布时间:2022-05-27
热门推荐
阅读排行榜

扫描微信二维码

查看手机版网站

随时了解更新最新资讯

139-2527-9053

在线客服(服务时间 9:00~18:00)

在线QQ客服
地址:深圳市南山区西丽大学城创智工业园
电邮:jeky_zhao#qq.com
移动电话:139-2527-9053

Powered by 互联科技 X3.4© 2001-2213 极客世界.|Sitemap