import os
 import time
import optuna
 import pandas as pd
 from catboost import CatBoostRegressor
 from sklearn.metrics import r2_score, mean_squared_error
 from sklearn.model_selection import train_test_split
X_train = data.drop([‘label’, ‘b1’, ‘b2’], axis=1).values
 y_train = data[‘label’].values
 X_train, X_test, y_train, y_test = train_test_split(X_train, y_train, test_size=0.2, random_state=42)
def epoch_time(start_time, end_time):
 elapsed_secs = end_time - start_time
 elapsed_mins = elapsed_secs / 60
 return elapsed_mins, elapsed_secs
def objective(trial):
 # 自定义的参数空间
 depth = trial.suggest_int(‘depth’, 1, 16)
 border_count = trial.suggest_int(‘border_count’, 1, 222)
 l2_leaf_reg = trial.suggest_int(‘l2_leaf_reg’, 1, 222)
 learning_rate = trial.suggest_uniform(‘learning_rate’, 0.001, 0.9)
 iterations = trial.suggest_int(‘iterations’, 1, 100)
 estimator = CatBoostRegressor(loss_function=‘RMSE’, random_seed=22, learning_rate=learning_rate,
 iterations=iterations, l2_leaf_reg=l2_leaf_reg,
 border_count=border_count,
 depth=depth, verbose=0)
 estimator.fit(X_train, y_train)
 val_pred = estimator.predict(X_test)
 mse = mean_squared_error(y_test, val_pred)
 return mse
“”" Run optimize.
 Set n_trials and/or timeout (in sec) for optimization by Optuna
 “”"
 study = optuna.create_study(sampler=optuna.samplers.TPESampler(), direction=‘minimize’)
study = optuna.create_study(sampler=optuna.samplers.RandomSampler(), direction=‘minimize’)
start_time = time.time()
 study.optimize(objective, n_trials=10)
 end_time = time.time()
 elapsed_mins, elapsed_secs = epoch_time(start_time, end_time)
 print(‘elapsed_secs:’, elapsed_secs)
 print(‘Best value:’, study.best_trial.value)
import os
 import time
import pandas as pd
 from catboost import CatBoostRegressor
 from hyperopt import fmin, hp, partial, Trials, tpe,rand
 from sklearn.metrics import r2_score, mean_squared_error
 from sklearn.model_selection import train_test_split
自定义hyperopt的参数空间
space = {“iterations”: hp.choice(“iterations”, range(1, 100)),
 “depth”: hp.randint(“depth”, 16),
 “l2_leaf_reg”: hp.randint(“l2_leaf_reg”, 222),
 “border_count”: hp.randint(“border_count”, 222),
 ‘learning_rate’: hp.uniform(‘learning_rate’, 0.001, 0.9),
 }
X_train = data.drop([‘label’, ‘b1’, ‘b2’], axis=1).values
 y_train = data[‘label’].values
 X_train, X_test, y_train, y_test = train_test_split(X_train, y_train, test_size=0.2, random_state=42)
def epoch_time(start_time, end_time):
 elapsed_secs = end_time - start_time
 elapsed_mins = elapsed_secs / 60
 return elapsed_mins, elapsed_secs
自动化调参并训练
def cat_factory(argsDict):
 estimator = CatBoostRegressor(loss_function=‘RMSE’, random_seed=22, learning_rate=argsDict[‘learning_rate’],
 iterations=argsDict[‘iterations’], l2_leaf_reg=argsDict[‘l2_leaf_reg’],
 border_count=argsDict[‘border_count’],
 depth=argsDict[‘depth’], verbose=0)
 estimator.fit(X_train, y_train)
 val_pred = estimator.predict(X_test)
 mse = mean_squared_error(y_test, val_pred)
 return mse
算法选择 tpe
algo = partial(tpe.suggest)
随机搜索
algo = partial(rand.suggest)
初始化每次尝试
trials = Trials()
开始自动参数寻优
start_time = time.time()
 best = fmin(cat_factory, space, algo=algo, max_evals=10, trials=trials)
 end_time = time.time()
 elapsed_mins, elapsed_secs = epoch_time(start_time, end_time)
 print(‘elapsed_secs:’, elapsed_secs)
 all = []
遍历每一次的寻参结果
for one in trials:
 str_re = str(one)
 argsDict = one[‘misc’][‘vals’]
 value = one[‘result’][‘loss’]
 learning_rate = argsDict[“learning_rate”][0]
 iterations = argsDict[“iterations”][0]
 depth = argsDict[“depth”][0]
 l2_leaf_reg = argsDict[“l2_leaf_reg”][0]
 border_count = argsDict[“border_count”][0]
 finish = [value, learning_rate, iterations, depth, l2_leaf_reg, border_count]
 all.append(finish)
parameters = pd.DataFrame(all, columns=[‘value’, ‘learning_rate’, ‘iterations’, ‘depth’, ‘l2_leaf_reg’, ‘border_count’])
从寻参结果中找到r2最大的
best = parameters.loc[abs(parameters[‘value’]).idxmin()]
 print(“best: {}”.format(best))