1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65
| import lightgbm as lgb import numpy as np import scipy import h5py
train_data = lgb.Dataset('train.svm.bin') data = np.random.rand(500, 10) label = np.random.randint(2, size=500) train_data = lgb.Dataset(data, label=label)
''' # 创建数据集的其他方式 csr = scipy.sparse.csr_matrix((dat, (row, col))) train_data = lgb.Dataset(csr)
class HDFSequence(lgb.Sequence): def __init__(self, hdf_dataset, batch_size): self.data = hdf_dataset self.batch_size = batch_size
def __getitem__(self, idx): return self.data[idx]
def __len__(self): return len(self.data)
f = h5py.File('train.hdf5', 'r') train_data = lgb.Dataset(HDFSequence(f['X'], 8192), label=f['Y'][:])
train_data = lgb.Dataset('train.svm.txt') train_data.save_binary('train.bin')
validation_data = train_data.create_valid('validation.svm') # validation_data = lgb.Dataset('validation.svm', reference=train_data)
train_data = lgb.Dataset(data, label=label, feature_name=['c1', 'c2', 'c3'], categorical_feature=['c3'])
w = np.random.rand(500, ) train_data = lgb.Dataset(data, label=label, weight=w) '''
param = {'num_leaves': 31, 'objective': 'binary'} param['metric'] = 'auc'
num_round = 10 bst = lgb.train(param, train_data, num_round, valid_sets=[validation_data]) bst.save_model('model.txt') json_model = bst.dump_model() bst = lgb.Booster(model_file='model.txt')
lgb.cv(param, train_data, num_round, nfold=5)
bst = lgb.train(param, train_data, num_round, valid_sets=valid_sets, callbacks=[lgb.early_stopping(stopping_rounds=5)]) bst.save_model('model.txt', num_iteration=bst.best_iteration)
data = np.random.rand(7, 10) ypred = bst.predict(data)
|