Проблема с простым конвейером Neuraxle (StandardScaler -> LinearSVC)

Я не могу понять, почему это neuraxle трубопровод does't работает.
Я просто хочу масштабировать данные и применять LinearSVC.
Что я делаю не так?

Вот что я пытаюсь сделать:

import numpy as np
from sklearn.ensemble import GradientBoostingRegressor
from sklearn.metrics import mean_squared_error
from sklearn.preprocessing import StandardScaler
from sklearn.svm import LinearSVC

from neuraxle.hyperparams.distributions import RandInt
from neuraxle.hyperparams.space import HyperparameterSpace
from neuraxle.metaopt.auto_ml import AutoML, InMemoryHyperparamsRepository, \
    ValidationSplitter
from neuraxle.metaopt.callbacks import MetricCallback, ScoringCallback
from neuraxle.pipeline import Pipeline
from neuraxle.steps.sklearn import SKLearnWrapper, RidgeModelStacking

DATA_INPUTS = np.random.randint(0, 100, (100, 3))
EXPECTED_OUTPUTS = np.random.randint(0, 3, 100)

p = Pipeline([
    SKLearnWrapper(StandardScaler()),
    SKLearnWrapper(LinearSVC(),
                   HyperparameterSpace({'C': RandInt(0, 10000)})),
])

auto_ml = AutoML(
    p,
    validation_splitter=ValidationSplitter(0.20),
    refit_trial=True,
    n_trials=10,
    epochs=10,
    cache_folder_when_no_handle='cache',
    scoring_callback=ScoringCallback(mean_squared_error,
                                     higher_score_is_better=False),
    callbacks=[MetricCallback('mse', metric_function=mean_squared_error,
                              higher_score_is_better=False)],
    hyperparams_repository=InMemoryHyperparamsRepository(
        cache_folder='cache')
)

random_search = auto_ml.fit(DATA_INPUTS, EXPECTED_OUTPUTS)

Выход:

new trial:
{
    "SKLearnWrapper_LinearSVC": {
        "C": 7794
    }
}

trial 1/10
fitting trial 1/10 split 1/1
hyperparams: {
    "SKLearnWrapper_LinearSVC__C": 7794,
    "SKLearnWrapper_LinearSVC__class_weight": null,
    "SKLearnWrapper_LinearSVC__dual": true,
    "SKLearnWrapper_LinearSVC__fit_intercept": true,
    "SKLearnWrapper_LinearSVC__intercept_scaling": 1,
    "SKLearnWrapper_LinearSVC__loss": "squared_hinge",
    "SKLearnWrapper_LinearSVC__max_iter": 1000,
    "SKLearnWrapper_LinearSVC__multi_class": "ovr",
    "SKLearnWrapper_LinearSVC__penalty": "l2",
    "SKLearnWrapper_LinearSVC__random_state": null,
    "SKLearnWrapper_LinearSVC__tol": 0.0001,
    "SKLearnWrapper_LinearSVC__verbose": 0,
    "SKLearnWrapper_StandardScaler__copy": true,
    "SKLearnWrapper_StandardScaler__with_mean": true,
    "SKLearnWrapper_StandardScaler__with_std": true
}


epoch 1/10
main train: 1.475
main validation: 0.9
mse train: 1.475
mse validation: 0.9

epoch 2/10
<neuraxle.metaopt.trial.Trial object at 0x7f764b20e190>
Traceback (most recent call last):
  File "/home/alxkolm/projects/Neuraxle/neuraxle/metaopt/auto_ml.py", line 660, in _fit_data_container
    repo_trial_split = self._execute_trial(
  File "/home/alxkolm/projects/Neuraxle/neuraxle/metaopt/trial.py", line 243, in __exit__
    raise exc_val
  File "/home/alxkolm/projects/Neuraxle/neuraxle/metaopt/auto_ml.py", line 660, in _fit_data_container
    repo_trial_split = self._execute_trial(
  File "/home/alxkolm/projects/Neuraxle/neuraxle/metaopt/auto_ml.py", line 725, in _execute_trial
    self.print_func('success trial {} score: {}'.format(
  File "/home/alxkolm/projects/Neuraxle/neuraxle/metaopt/trial.py", line 489, in __exit__
    raise exc_val
  File "/home/alxkolm/projects/Neuraxle/neuraxle/metaopt/auto_ml.py", line 716, in _execute_trial
    repo_trial_split = self.trainer.fit_trial_split(
  File "/home/alxkolm/projects/Neuraxle/neuraxle/metaopt/auto_ml.py", line 484, in fit_trial_split
    trial_split = trial_split.fit_trial_split(train_data_container, context)
  File "/home/alxkolm/projects/Neuraxle/neuraxle/metaopt/trial.py", line 294, in fit_trial_split
    self.pipeline = self.pipeline.handle_fit(train_data_container, context)
  File "/home/alxkolm/projects/Neuraxle/neuraxle/base.py", line 983, in handle_fit
    new_self = self._fit_data_container(data_container, context)
  File "/home/alxkolm/projects/Neuraxle/neuraxle/pipeline.py", line 173, in _fit_data_container
    step, data_container = step.handle_fit_transform(data_container, context)
  File "/home/alxkolm/projects/Neuraxle/neuraxle/base.py", line 1002, in handle_fit_transform
    new_self, data_container = self._fit_transform_data_container(data_container, context)
  File "/home/alxkolm/projects/Neuraxle/neuraxle/base.py", line 1106, in _fit_transform_data_container
    new_self, out = self.fit_transform(data_container.data_inputs, data_container.expected_outputs)
  File "/home/alxkolm/projects/Neuraxle/neuraxle/steps/sklearn.py", line 60, in fit_transform
    out = self.wrapped_sklearn_predictor.fit_transform(data_inputs, expected_outputs)
  File "/home/alxkolm/projects/ttoy/.venv38/lib/python3.8/site-packages/sklearn/base.py", line 556, in fit_transform
    return self.fit(X, y, **fit_params).transform(X)
  File "/home/alxkolm/projects/ttoy/.venv38/lib/python3.8/site-packages/sklearn/preprocessing/data.py", line 639, in fit
    return self.partial_fit(X, y)
  File "/home/alxkolm/projects/ttoy/.venv38/lib/python3.8/site-packages/sklearn/preprocessing/data.py", line 661, in partial_fit
    X = check_array(X, accept_sparse=('csr', 'csc'), copy=self.copy,
  File "/home/alxkolm/projects/ttoy/.venv38/lib/python3.8/site-packages/sklearn/utils/validation.py", line 517, in check_array
    raise ValueError(
ValueError: Expected 2D array, got 1D array instead:
array=[2. 2. 2. 2. 2. 2. 2. 2. 2. 2. 2. 2. 2. 2. 2. 2. 2. 2. 2. 2. 2. 2. 2. 2.
 2. 2. 2. 2. 2. 2. 2. 2. 2. 2. 2. 2. 2. 2. 2. 2. 2. 2. 2. 2. 2. 2. 2. 2.
 2. 2. 2. 2. 2. 2. 2. 2. 2. 2. 2. 2. 2. 2. 2. 2. 2. 2. 2. 2. 2. 2. 2. 2.
 2. 2. 2. 2. 2. 2. 2. 2.].
Reshape your data either using array.reshape(-1, 1) if your data has a single feature or array.reshape(1, -1) if it contains a single sample.

During handling of the above exception, another exception occurred:

Traceback (most recent call last):
  File "/home/alxkolm/projects/ttoy/trainbox/case.py", line 39, in <module>
    random_search = auto_ml.fit(DATA_INPUTS, EXPECTED_OUTPUTS)
  File "/home/alxkolm/projects/Neuraxle/neuraxle/base.py", line 3144, in fit
    new_self = self.handle_fit(data_container, context)
  File "/home/alxkolm/projects/Neuraxle/neuraxle/base.py", line 983, in handle_fit
    new_self = self._fit_data_container(data_container, context)
  File "/home/alxkolm/projects/Neuraxle/neuraxle/metaopt/auto_ml.py", line 674, in _fit_data_container
    self._get_trial_split_description(repo_trial, repo_trial_split, validation_splits, trial_number)))
UnboundLocalError: local variable 'repo_trial_split' referenced before assignment

1 ответ

Решение

Я исправил вашу проблему здесь: https://github.com/Neuraxio/Neuraxle/pull/333

По сути, цикл AutoML сохранял один и тот же объект DataContainer для каждой эпохи, но ваш конвейер изменял значения внутри ссылки для входных данных. Я добавлял мелкую копию перед каждой эпохой. Я протестировал ваш код с помощью модульного теста, и теперь он отлично работает.

Другие вопросы по тегам