Керас Ручная перекрестная проверка - точность не соответствует
Я бегу LSTM для классификации текста. Использование перекрестной проверки для получения ярлыков для каждого отзыва.
def k_fold_cross_validation(X, K):
for k in range(K):
training = [x for i, x in enumerate(X) if i % K != k]
validation = [x for i, x in enumerate(X) if i % K == k]
yield training, validation
data_x = k_fold_cross_validation(cleaned_reviews, K=10)
data_y = k_fold_cross_validation(ratings, K=10)
y_final = []
for i in range(10):
print('Model', i+1 ,'of 10')
X_train, X_test = next(data_x)
y_train, y_test = next(data_y)
max_features = 20000
tokenizer = Tokenizer(num_words=max_features)
tokenizer.fit_on_texts(list(X_train))
list_tokenized_train = tokenizer.texts_to_sequences(X_train)
list_tokenized_test = tokenizer.texts_to_sequences(X_test)
maxlen = 100
X_t = pad_sequences(list_tokenized_train, maxlen=maxlen)
X_te = pad_sequences(list_tokenized_test, maxlen=maxlen)
y_train = np.asarray(y_train)
y_test = np.asarray(y_test)
inp = Input(shape=(maxlen, ))
embed_size = 128
x = Embedding(max_features, embed_size)(inp)
x = LSTM(200, return_sequences=True,name='lstm_layer')(x)
x = GlobalMaxPool1D()(x)
x = Dropout(0.1)(x)
x = Dense(120, activation="relu")(x)
x = Dropout(0.1)(x)
x = Dense(60, activation="relu")(x)
x = Dropout(0.1)(x)
x = Dense(2, activation="sigmoid")(x)
model = Model(inputs=inp, outputs=x)
print("start compiling...")
model.compile(loss='sparse_categorical_crossentropy', optimizer='adam', metrics=['accuracy'])
print("start fitting...")
model.fit(X_t,y_train, epochs=2, batch_size=32, validation_split=0.1)
print("start evaluating")
y_pred = model.predict(X_te, batch_size=1024)
y_classes = y_pred.argmax(axis=-1)
y_final += y_classes.tolist()
accScore = metrics.accuracy_score(y_test,y_classes)
lbl = [0,1]
precision = metrics.precision_score(y_test,y_classes,average=None,labels=lbl)
recall = metrics.recall_score(y_test,y_classes,average=None,labels=lbl)
f1Score = metrics.f1_score(y_test,y_classes,average=None,labels=lbl)
print("\nAcurracy: ",accScore,"\n")
for i in range(len(lbl)):
print("Precision of %s class: %f" %(lbl[i],precision[i]))
print("Recall of %s class: %f" %(lbl[i],recall[i]))
print("F1-Score of %s class: %f" %(lbl[i],f1Score[i]),"\n")
Все 10 моделей дают точность около 80%, но когда я пытаюсь объединить результаты всех этих моделей для проверки точности по всему набору данных, она падает до ~53%. Я думаю, что я делаю что-то не так, собирая все прогнозируемые метки в y_final, который используется для сравнения с начальными рейтингами для получения метрик производительности.
import copy
y_preds = copy.deepcopy(y_final)
accScore = metrics.accuracy_score(ratings, y_preds)
lbl = [0,1]
precision = metrics.precision_score(ratings, y_preds, average=None, labels=lbl)
recall = metrics.recall_score(ratings, y_preds, average=None, labels=lbl)
f1Score = metrics.f1_score(ratings, y_preds, average=None, labels=lbl)
print("\nOverall Acurracy: ",accScore,"\n")
for i in range(len(lbl)):
print("Precision of %s class: %f" %(lbl[i],precision[i]))
print("Recall of %s class: %f" %(lbl[i],recall[i]))
print("F1-Score of %s class: %f" %(lbl[i],f1Score[i]),"\n")
Какие-либо советы, предложения или исправления?