Ошибка памяти после сбоя в liblinear svm
После получения сообщения об ошибке, касающегося сбоя liblinear, происходит следующая трассировка ошибки. Я пытаюсь понять, на что ссылается ошибка, и как защититься от нее.
SVM из SKLearn, вот код, который его настраивает.
svc = LinearSVC(class_weight='balanced',verbose=1,max_iter=2000)
train_sizes, train_scores, valid_scores = learning_curve(svc,xtscale,np.ravel(ytran),
train_sizes=[10000,20000,30000],scoring=make_scorer(accuracy_score),n_jobs=4,verbose=2)
трассировка ошибок
.C:\Python27\lib\site-packages\sklearn\svm\base.py:924: ConvergenceWarning: Liblinear failed to conv
erge, increase the number of iterations.
"the number of iterations.", ConvergenceWarning)
............[CV] ................................ no parameters to be set -13.2min
.........Traceback (most recent call last):
File "C:\MachineLearning\SFCrime\crime.py", line 59, in <module>
train_sizes=[10000,20000,30000],scoring=make_scorer(accuracy_score),n_jobs=4,verbose=2)
File "C:\Python27\lib\site-packages\sklearn\learning_curve.py", line 153, in learning_curve
for train, test in cv for n_train_samples in train_sizes_abs)
File "C:\Python27\lib\site-packages\sklearn\externals\joblib\parallel.py", line 812, in __call__
self.retrieve()
File "C:\Python27\lib\site-packages\sklearn\externals\joblib\parallel.py", line 762, in retrieve
raise exception
sklearn.externals.joblib.my_exceptions.JoblibMemoryError: JoblibMemoryError
___________________________________________________________________________
Multiprocessing exception:
...........................................................................
C:\MachineLearning\SFCrime\crime.py in <module>()
54 # param_grid = {'C': [0.5, 1, 10]}
55 # gs = grid_search.GridSearchCV(svc, param_grid,n_jobs=4,verbose=1)
56 # gs.fit(testData[:,0:-2],np.ravel(testData[:,-1]))
57 #print gs.best_estimator_
58 train_sizes, train_scores, valid_scores = learning_curve(svc,xtscale,np.ravel(ytran)
,
---> 59 train_sizes=[10000,20000,30000],scoring=make_scorer(accuracy_score),n_jobs=4
,verbose=2)
60 #svc.fit(testData[:,0:7],np.ravel(testData[:,7]))
61 #valData = xydecider[np.random.randint(0,xydecider.shape[0],10000)]
62 #print svc.predict(xtransf.transform(np.matrix([2015,7,14,8,35, -122.3935620,37.7782
485])))
63
...........................................................................
C:\Python27\lib\site-packages\sklearn\learning_curve.py in learning_curve(estimator=LinearSVC(C=1.0,
class_weight='balanced', dual=T...', random_state=None, tol=0.0001,
verbose=1), X=array([[ 0.00000000e+00, 1.73165030e+00, -4....603501e-02, 3.21082024e-02,
-1.99147226e-02]]), y=array([ 1, 2, 2, ..., 3, 5, 13]), train_sizes=[10000, 20000, 30000], cv=[
(array([107733, 192190, 212425, ..., 878046, 878047, 878048]), array([ 0, 1, 2, ..., 4
65259, 466404, 486091])), (array([ 0, 1, 2, ..., 878046, 878047, 878048]), array([1077
33, 192190, 212425, ..., 718561, 718572, 718573])), (array([ 0, 1, 2, ..., 718561, 718
572, 718573]), array([316490, 337880, 481804, ..., 878046, 878047, 878048]))], scoring=make_scorer(a
ccuracy_score), exploit_incremental_learning=False, n_jobs=4, pre_dispatch='all', verbose=2)
148 scorer, verbose) for train, test in cv)
149 else:
150 out = parallel(delayed(_fit_and_score)(
151 clone(estimator), X, y, scorer, train[:n_train_samples], test,
152 verbose, parameters=None, fit_params=None, return_train_score=True)
--> 153 for train, test in cv for n_train_samples in train_sizes_abs)
cv = [(array([107733, 192190, 212425, ..., 878046, 878047, 878048]), array([ 0, 1,
2, ..., 465259, 466404, 486091])), (array([ 0, 1, 2, ..., 878046, 878047, 878048]
), array([107733, 192190, 212425, ..., 718561, 718572, 718573])), (array([ 0, 1, 2, ..
., 718561, 718572, 718573]), array([316490, 337880, 481804, ..., 878046, 878047, 878048]))]
154 out = np.array(out)[:, :2]
155 n_cv_folds = out.shape[0] // n_unique_ticks
156 out = out.reshape(n_cv_folds, n_unique_ticks, 2)
157
...........................................................................
C:\Python27\lib\site-packages\sklearn\externals\joblib\parallel.py in __call__(self=Parallel(n_jobs=
4), iterable=<generator object <genexpr>>)
807 if pre_dispatch == "all" or n_jobs == 1:
808 # The iterable was consumed all at once by the above for loop.
809 # No need to wait for async callbacks to trigger to
810 # consumption.
811 self._iterating = False
--> 812 self.retrieve()
self.retrieve = <bound method Parallel.retrieve of Parallel(n_jobs=4)>
813 # Make sure that we get a last message telling us we are done
814 elapsed_time = time.time() - self._start_time
815 self._print('Done %3i out of %3i | elapsed: %s finished',
816 (len(self._output), len(self._output),
---------------------------------------------------------------------------
Sub-process traceback:
---------------------------------------------------------------------------
MemoryError Wed Jan 13 12:56:08 2016
PID: 5784 Python 2.7.10: C:\Python27\python.exe
...........................................................................
C:\Python27\lib\site-packages\sklearn\externals\joblib\parallel.pyc in __call__(self=<sklearn.extern
als.joblib.parallel.BatchedCalls object>)
67 def __init__(self, iterator_slice):
68 self.items = list(iterator_slice)
69 self._size = len(self.items)
70
71 def __call__(self):
---> 72 return [func(*args, **kwargs) for func, args, kwargs in self.items]
73
74 def __len__(self):
75 return self._size
76
...........................................................................
C:\Python27\lib\site-packages\sklearn\cross_validation.pyc in _fit_and_score(estimator=LinearSVC(C=1
.0, class_weight='balanced', dual=T...', random_state=None, tol=0.0001,
verbose=1), X=memmap([[ 0.00000000e+00, 1.73165030e+00, -4...603501e-02, 3.21082024e-02,
-1.99147226e-02]]), y=memmap([ 1, 2, 2, ..., 3, 5, 13]), scorer=make_scorer(accuracy_score), tr
ain=array([107733, 192190, 212425, ..., 309456, 309457, 309460]), test=memmap([ 0, 1,
2, ..., 465259, 466404, 486091]), verbose=2, parameters=None, fit_params={}, return_train_score=True
, return_parameters=False, error_score='raise')
1519 if parameters is not None:
1520 estimator.set_params(**parameters)
1521
1522 start_time = time.time()
1523
-> 1524 X_train, y_train = _safe_split(estimator, X, y, train)
1525 X_test, y_test = _safe_split(estimator, X, y, test, train)
1526
1527 try:
1528 if y_train is None:
...........................................................................
C:\Python27\lib\site-packages\sklearn\cross_validation.pyc in _safe_split(estimator=LinearSVC(C=1.0,
class_weight='balanced', dual=T...', random_state=None, tol=0.0001,
verbose=1), X=memmap([[ 0.00000000e+00, 1.73165030e+00, -4...603501e-02, 3.21082024e-02,
-1.99147226e-02]]), y=memmap([ 1, 2, 2, ..., 3, 5, 13]), indices=array([107733, 192190, 212425,
..., 309456, 309457, 309460]), train_indices=None)
1586 if train_indices is None:
1587 X_subset = X[np.ix_(indices, indices)]
1588 else:
1589 X_subset = X[np.ix_(indices, train_indices)]
1590 else:
-> 1591 X_subset = safe_indexing(X, indices)
1592
1593 if y is not None:
1594 y_subset = safe_indexing(y, indices)
1595 else:
...........................................................................
C:\Python27\lib\site-packages\sklearn\utils\__init__.pyc in safe_indexing(X=memmap([[ 0.00000000e+0
0, 1.73165030e+00, -4...603501e-02, 3.21082024e-02, -1.99147226e-02]]), indices=array([107733,
192190, 212425, ..., 309456, 309457, 309460]))
158 return X.copy().iloc[indices]
159 elif hasattr(X, "shape"):
160 if hasattr(X, 'take') and (hasattr(indices, 'dtype') and
161 indices.dtype.kind == 'i'):
162 # This is often substantially faster than X[indices]
--> 163 return X.take(indices, axis=0)
164 else:
165 return X[indices]
166 else:
167 return [X[idx] for idx in indices]
MemoryError:
______________
_____________________________________________________________
1 ответ
Попробуйте увеличить значение итерации. Может случиться так, что SVM нужно больше итераций, чтобы сходиться, чем вы позволили. Кроме того, я бы попробовал использовать sklearn.svm.SVC в качестве другого алгоритма, если liblinear не может сойтись на ваших данных.