Проблемы с настройкой пространства условного поиска в hyperopt
Я полностью признаю, что я, возможно, неправильно устанавливаю условное пространство, но по какой-то причине я просто не могу заставить это работать вообще. Я пытаюсь использовать hyperopt для настройки модели логистической регрессии, и в зависимости от решателя существуют некоторые другие параметры, которые необходимо изучить. Если вы выбираете liblinear solver, вы можете выбирать штрафы, и в зависимости от штрафа вы также можете выбрать двойной. Когда я пытаюсь запустить hyperopt в этом пространстве поиска, он продолжает выдавать ошибку, потому что пропускает весь словарь, как показано ниже. Есть идеи? Я получаю сообщение об ошибке: 'ValueError: Логистическая регрессия поддерживает только решатели liblinear, newton-cg, lbfgs и sag, got {' solver ':' sag '}' Этот формат работал при настройке пространства поиска случайного леса, поэтому я ' м в недоумении.
import numpy as np
import scipy as sp
import pandas as pd
pd.options.display.max_columns = None
pd.options.display.max_rows = None
import matplotlib.pyplot as plt
%matplotlib inline
import seaborn as sns
sns.set(style="white")
import pyodbc
import statsmodels as sm
from pandasql import sqldf
import math
from tqdm import tqdm
import pickle
from sklearn.preprocessing import RobustScaler, OneHotEncoder, MinMaxScaler
from sklearn.utils import shuffle
from sklearn.cross_validation import KFold, StratifiedKFold, cross_val_score, cross_val_predict, train_test_split
from sklearn.model_selection import GridSearchCV
from sklearn.model_selection import StratifiedKFold as StratifiedKFoldIt
#from sklearn.grid_search import GridSearchCV, RandomizedSearchCV
from sklearn.feature_selection import RFECV, VarianceThreshold, SelectFromModel, SelectKBest
from sklearn.decomposition import PCA, IncrementalPCA, FactorAnalysis
from sklearn.calibration import CalibratedClassifierCV
from sklearn.ensemble import RandomForestClassifier, ExtraTreesClassifier, GradientBoostingClassifier, AdaBoostClassifier, BaggingClassifier
from sklearn.svm import SVC
from sklearn.neighbors import KNeighborsClassifier
from sklearn.naive_bayes import GaussianNB, MultinomialNB
from sklearn.linear_model import LogisticRegression, LogisticRegressionCV, SGDClassifier
from sklearn.metrics import precision_recall_curve, precision_score, recall_score, accuracy_score, classification_report, confusion_matrix, f1_score, log_loss
from imblearn.over_sampling import RandomOverSampler, SMOTE, ADASYN
from imblearn.under_sampling import RandomUnderSampler, ClusterCentroids, NearMiss, NeighbourhoodCleaningRule, OneSidedSelection
#import lightgbm as lgbm
from xgboost.sklearn import XGBClassifier
from hyperopt import fmin, tpe, hp, Trials, STATUS_OK
space4lr = {
'C': hp.uniform('C', .0001, 100.0),
'solver' : hp.choice('solver', [
{'solver' : 'newton-cg',},
{'solver' : 'lbfgs',},
{'solver' : 'sag'},
{'solver' : 'liblinear', 'penalty' : hp.choice('penalty', [
{'penalty' : 'l1'},
{'penalty' : 'l2', 'dual' : hp.choice('dual', [True, False])}]
)},
]),
'fit_intercept': hp.choice('fit_intercept', ['True', 'False']),
'class_weight': hp.choice('class_weight', ['balanced', None]),
'max_iter': 50000,
'random_state': 84,
'n_jobs': 8
}
lab = 0
results = pd.DataFrame()
for i in feature_elims:
target = 'Binary_over_3'
alt_targets = ['year2_PER', 'year2_GP' ,'year2_Min', 'year2_EFF' ,'year2_WS/40' ,'year2_Pts/Poss' ,'Round' ,'GRZ_Pick'
,'GRZ_Player_Rating' ,'Binary_over_2', 'Binary_over_3' ,'Binary_over_4' ,'Binary_5' ,'Draft_Strength']
#alt_targets.remove(target)
nondata_columns = ['display_name' ,'player_global_id', 'season' ,'season_' ,'team_global_id', 'birth_date', 'Draft_Day']
nondata_columns.extend(alt_targets)
AGG_SET_CART_PERC = sqldf("""SELECT * FROM AGG_SET_PLAYED_ADJ_SOS_Jan1 t1
LEFT JOIN RANKINGS t2 ON t1.[player_global_id] = t2.[player_global_id]
LEFT JOIN Phys_Training t3 ON t1.[player_global_id] = t3.[player_global_id]""")
AGG_SET_CART_PERC['HS_RSCI'] = AGG_SET_CART_PERC['HS_RSCI'].fillna(110)
AGG_SET_CART_PERC['HS_Avg_Rank'] = AGG_SET_CART_PERC['HS_Avg_Rank'].fillna(1)
AGG_SET_CART_PERC['HS_years_ranked'] = AGG_SET_CART_PERC['HS_years_ranked'].fillna(0)
AGG_SET_CART_PERC = shuffle(AGG_SET_CART_PERC, random_state=8675309)
rus = RandomUnderSampler(random_state=8675309)
ros = RandomOverSampler(random_state=8675309)
rs = RobustScaler()
X = AGG_SET_CART_PERC
y = X[target]
X = pd.DataFrame(X.drop(nondata_columns, axis=1))
position = pd.get_dummies(X['position'])
for idx, row in position.iterrows():
if row['F/C'] == 1:
row['F'] = 1
row['C'] = 1
if row['G/F'] == 1:
row['G'] = 1
row['F'] = 1
position = position.drop(['F/C', 'G/F'], axis=1)
X = pd.concat([X, position], axis=1).drop(['position'], axis=1)
X = rs.fit_transform(X, y=None)
X = i.transform(X)
def hyperopt_train_test(params):
clf = LogisticRegression(**params)
#cvs = cross_val_score(xgbc, X, y, scoring='recall', cv=skf).mean()
skf = StratifiedKFold(y, n_folds=6, shuffle=False, random_state=1)
metrics = []
tuning_met = []
accuracy = []
precision = []
recall = []
f1 = []
log = []
for i, (train, test) in enumerate(skf):
X_train = X[train]
y_train = y[train]
X_test = X[test]
y_test = y[test]
X_train, y_train = ros.fit_sample(X_train, y_train)
X_train, y_train = rus.fit_sample(X_train, y_train)
clf.fit(X_train, y_train)
y_pred = clf.predict(X_test)
#tuning_met.append(precision_score(y_test, y_pred))
tuning_met.append((((precision_score(y_test, y_pred))*4) + recall_score(y_test, y_pred))/5)
accuracy.append(accuracy_score(y_test, y_pred))
precision.append(precision_score(y_test, y_pred))
recall.append(recall_score(y_test, y_pred))
f1.append(f1_score(y_test, y_pred))
log.append(log_loss(y_test, y_pred))
metrics.append(sum(tuning_met) / len(tuning_met))
metrics.append(sum(accuracy) / len(accuracy))
metrics.append(sum(precision) / len(precision))
metrics.append(sum(recall) / len(recall))
metrics.append(sum(f1) / len(f1))
metrics.append(sum(log) / len(log))
return(metrics)
best = 0
count = 0
def f(params):
global best, count, results, lab, met
met = hyperopt_train_test(params.copy())
met.append(params)
met.append(featureset_labels[lab])
acc = met[0]
results = results.append([met])
if acc > best:
print(featureset_labels[lab],'new best:', acc, 'Accuracy:', met[1], 'Precision:', met[2], 'Recall:', met[3], 'using', params, """
""")
best = acc
#if results.empty is False & results.count() >= lab:
# results.drop(results.index[lab])
#results = results.append([met])
else:
print(acc, featureset_labels[lab], count)
count = count + 1
return {'loss': -acc, 'status': STATUS_OK}
trials = Trials()
best = fmin(f, space4lr, algo=tpe.suggest, max_evals=1000, trials=trials)
print(featureset_labels[lab], ' best:')
print(best, """
""")
lab = lab + 1
0 ответов
Хоть и отвечал слишком поздно, но вчера я столкнулся с этим вопросом. Ниже упомянутый фрагмент кода, я думаю, поможет вам.
space = hp.choice('classifier',[
{
'model': LogisticRegression,
'param':
{
'hyper_param_groups' :hp.choice('hyper_param_groups',
[
{
'penalty':hp.choice('penalty_block1', ['l2']),
'solver':hp.choice('solver_block1', ['newton-cg', 'sag', 'saga', 'lbfgs']),
'multi_class':hp.choice('multi_class', ['ovr', 'multinomial']),
},
{
'penalty':hp.choice('penalty_block2', ['l2']),
'solver':hp.choice('solver_block2', ['liblinear']),
'multi_class':hp.choice('multi_class_block2', ['ovr']),
},
{
'penalty':hp.choice('penalty_block3', ['l1']),
'solver':hp.choice('solver_block3', ['saga']),
'multi_class':hp.choice('multi_class_block3', ['ovr', 'multinomial']),
},
]),
'dual':hp.choice('dual', [False]),
'class_weight':hp.choice('class_weight', ['balanced', None]),
'random_state':hp.choice('random_state', [10,267]),
'max_iter':hp.choice('max_iter', [100,500]),
'verbose':hp.choice('verbose', [0])
}
}])
и как использовать его в вызываемом методе:
penalty = args['param']['hyper_param_groups']['penalty']
solver = args['param']['hyper_param_groups']['solver']
multi_class = args['param']['hyper_param_groups']['multi_class']
dual = args['param']['dual']
class_weight = args['param']['class_weight']
random_state = args['param']['random_state']
max_iter = args['param']['max_iter']
verbose = args['param']['verbose']