Проблема с простым конвейером Neuraxle (StandardScaler -> LinearSVC)
Я не могу понять, почему это neuraxle трубопровод does't работает.
Я просто хочу масштабировать данные и применять LinearSVC.
Что я делаю не так?
Вот что я пытаюсь сделать:
import numpy as np
from sklearn.ensemble import GradientBoostingRegressor
from sklearn.metrics import mean_squared_error
from sklearn.preprocessing import StandardScaler
from sklearn.svm import LinearSVC
from neuraxle.hyperparams.distributions import RandInt
from neuraxle.hyperparams.space import HyperparameterSpace
from neuraxle.metaopt.auto_ml import AutoML, InMemoryHyperparamsRepository, \
ValidationSplitter
from neuraxle.metaopt.callbacks import MetricCallback, ScoringCallback
from neuraxle.pipeline import Pipeline
from neuraxle.steps.sklearn import SKLearnWrapper, RidgeModelStacking
DATA_INPUTS = np.random.randint(0, 100, (100, 3))
EXPECTED_OUTPUTS = np.random.randint(0, 3, 100)
p = Pipeline([
SKLearnWrapper(StandardScaler()),
SKLearnWrapper(LinearSVC(),
HyperparameterSpace({'C': RandInt(0, 10000)})),
])
auto_ml = AutoML(
p,
validation_splitter=ValidationSplitter(0.20),
refit_trial=True,
n_trials=10,
epochs=10,
cache_folder_when_no_handle='cache',
scoring_callback=ScoringCallback(mean_squared_error,
higher_score_is_better=False),
callbacks=[MetricCallback('mse', metric_function=mean_squared_error,
higher_score_is_better=False)],
hyperparams_repository=InMemoryHyperparamsRepository(
cache_folder='cache')
)
random_search = auto_ml.fit(DATA_INPUTS, EXPECTED_OUTPUTS)
Выход:
new trial:
{
"SKLearnWrapper_LinearSVC": {
"C": 7794
}
}
trial 1/10
fitting trial 1/10 split 1/1
hyperparams: {
"SKLearnWrapper_LinearSVC__C": 7794,
"SKLearnWrapper_LinearSVC__class_weight": null,
"SKLearnWrapper_LinearSVC__dual": true,
"SKLearnWrapper_LinearSVC__fit_intercept": true,
"SKLearnWrapper_LinearSVC__intercept_scaling": 1,
"SKLearnWrapper_LinearSVC__loss": "squared_hinge",
"SKLearnWrapper_LinearSVC__max_iter": 1000,
"SKLearnWrapper_LinearSVC__multi_class": "ovr",
"SKLearnWrapper_LinearSVC__penalty": "l2",
"SKLearnWrapper_LinearSVC__random_state": null,
"SKLearnWrapper_LinearSVC__tol": 0.0001,
"SKLearnWrapper_LinearSVC__verbose": 0,
"SKLearnWrapper_StandardScaler__copy": true,
"SKLearnWrapper_StandardScaler__with_mean": true,
"SKLearnWrapper_StandardScaler__with_std": true
}
epoch 1/10
main train: 1.475
main validation: 0.9
mse train: 1.475
mse validation: 0.9
epoch 2/10
<neuraxle.metaopt.trial.Trial object at 0x7f764b20e190>
Traceback (most recent call last):
File "/home/alxkolm/projects/Neuraxle/neuraxle/metaopt/auto_ml.py", line 660, in _fit_data_container
repo_trial_split = self._execute_trial(
File "/home/alxkolm/projects/Neuraxle/neuraxle/metaopt/trial.py", line 243, in __exit__
raise exc_val
File "/home/alxkolm/projects/Neuraxle/neuraxle/metaopt/auto_ml.py", line 660, in _fit_data_container
repo_trial_split = self._execute_trial(
File "/home/alxkolm/projects/Neuraxle/neuraxle/metaopt/auto_ml.py", line 725, in _execute_trial
self.print_func('success trial {} score: {}'.format(
File "/home/alxkolm/projects/Neuraxle/neuraxle/metaopt/trial.py", line 489, in __exit__
raise exc_val
File "/home/alxkolm/projects/Neuraxle/neuraxle/metaopt/auto_ml.py", line 716, in _execute_trial
repo_trial_split = self.trainer.fit_trial_split(
File "/home/alxkolm/projects/Neuraxle/neuraxle/metaopt/auto_ml.py", line 484, in fit_trial_split
trial_split = trial_split.fit_trial_split(train_data_container, context)
File "/home/alxkolm/projects/Neuraxle/neuraxle/metaopt/trial.py", line 294, in fit_trial_split
self.pipeline = self.pipeline.handle_fit(train_data_container, context)
File "/home/alxkolm/projects/Neuraxle/neuraxle/base.py", line 983, in handle_fit
new_self = self._fit_data_container(data_container, context)
File "/home/alxkolm/projects/Neuraxle/neuraxle/pipeline.py", line 173, in _fit_data_container
step, data_container = step.handle_fit_transform(data_container, context)
File "/home/alxkolm/projects/Neuraxle/neuraxle/base.py", line 1002, in handle_fit_transform
new_self, data_container = self._fit_transform_data_container(data_container, context)
File "/home/alxkolm/projects/Neuraxle/neuraxle/base.py", line 1106, in _fit_transform_data_container
new_self, out = self.fit_transform(data_container.data_inputs, data_container.expected_outputs)
File "/home/alxkolm/projects/Neuraxle/neuraxle/steps/sklearn.py", line 60, in fit_transform
out = self.wrapped_sklearn_predictor.fit_transform(data_inputs, expected_outputs)
File "/home/alxkolm/projects/ttoy/.venv38/lib/python3.8/site-packages/sklearn/base.py", line 556, in fit_transform
return self.fit(X, y, **fit_params).transform(X)
File "/home/alxkolm/projects/ttoy/.venv38/lib/python3.8/site-packages/sklearn/preprocessing/data.py", line 639, in fit
return self.partial_fit(X, y)
File "/home/alxkolm/projects/ttoy/.venv38/lib/python3.8/site-packages/sklearn/preprocessing/data.py", line 661, in partial_fit
X = check_array(X, accept_sparse=('csr', 'csc'), copy=self.copy,
File "/home/alxkolm/projects/ttoy/.venv38/lib/python3.8/site-packages/sklearn/utils/validation.py", line 517, in check_array
raise ValueError(
ValueError: Expected 2D array, got 1D array instead:
array=[2. 2. 2. 2. 2. 2. 2. 2. 2. 2. 2. 2. 2. 2. 2. 2. 2. 2. 2. 2. 2. 2. 2. 2.
2. 2. 2. 2. 2. 2. 2. 2. 2. 2. 2. 2. 2. 2. 2. 2. 2. 2. 2. 2. 2. 2. 2. 2.
2. 2. 2. 2. 2. 2. 2. 2. 2. 2. 2. 2. 2. 2. 2. 2. 2. 2. 2. 2. 2. 2. 2. 2.
2. 2. 2. 2. 2. 2. 2. 2.].
Reshape your data either using array.reshape(-1, 1) if your data has a single feature or array.reshape(1, -1) if it contains a single sample.
During handling of the above exception, another exception occurred:
Traceback (most recent call last):
File "/home/alxkolm/projects/ttoy/trainbox/case.py", line 39, in <module>
random_search = auto_ml.fit(DATA_INPUTS, EXPECTED_OUTPUTS)
File "/home/alxkolm/projects/Neuraxle/neuraxle/base.py", line 3144, in fit
new_self = self.handle_fit(data_container, context)
File "/home/alxkolm/projects/Neuraxle/neuraxle/base.py", line 983, in handle_fit
new_self = self._fit_data_container(data_container, context)
File "/home/alxkolm/projects/Neuraxle/neuraxle/metaopt/auto_ml.py", line 674, in _fit_data_container
self._get_trial_split_description(repo_trial, repo_trial_split, validation_splits, trial_number)))
UnboundLocalError: local variable 'repo_trial_split' referenced before assignment
1 ответ
Решение
Я исправил вашу проблему здесь: https://github.com/Neuraxio/Neuraxle/pull/333
По сути, цикл AutoML сохранял один и тот же объект DataContainer для каждой эпохи, но ваш конвейер изменял значения внутри ссылки для входных данных. Я добавлял мелкую копию перед каждой эпохой. Я протестировал ваш код с помощью модульного теста, и теперь он отлично работает.