Титаник Dataset/TensorFlow input_fn() ошибка и настройка

Я стараюсь изо всех сил реализовать DNN с тензорным потоком, так что я смог загрузить файлы и проанализировать их так, чтобы все данные в кадре данных Pandas были некатагоричными, однако я попытался разместить свои данные в DNN тензорного потока, и я не уверен, как двигаться вперед. Любая помощь приветствуется

#==============================================================================
# 
# Import statements
# 
#==============================================================================
import tensorflow as tf
import pandas as pd
from pandas import Series, DataFrame
from sklearn.preprocessing import LabelEncoder

import tempfile


Columns = ['PassengerId', 'Survived', 'Pclass', 'Sex', 'Age', 'SibSp', 'Parch', 'Embarked']
#==============================================================================
# 
# read data from given file and return a pandas dataset
# 
#==============================================================================
def readData(FileName):
    titanic_df = pd.read_csv(FileName)
    titanic_df = titanic_df.dropna()
    return titanic_df



def processData(data):
    gender = LabelEncoder()
    gender.fit(['male', 'female'])
    data["Sex"]= gender.transform(data["Sex"])

    Embarked = LabelEncoder()
    Embarked.fit(['S', 'C','Q'])
    data["Embarked"] = Embarked.transform(data["Embarked"])


    return data

def input_fn(data): 
    Continuous_cols = {k: tf.constant(data[k].values) for k in Columns}
    label = tf.constant(data['Survived'].values)
    return dict(Continuous_cols.items()),label


def train_input_fn(train_data):
    return input_fn(train_data)

def eval_input_fn(test_data):
    return input_fn(test_data)    




def main():
    test_data = readData('test.csv')
    train_data = readData('train.csv')
    Col_To_Drop =['Name','Ticket','Cabin','Fare']  
    for col in Col_To_Drop:
        test_data = test_data.drop(col,1)
        train_data = train_data.drop(col,1)


    test_data = processData(test_data)
    train_data = processData(train_data)

    Columns = list(train_data.columns.values)
    print(Columns)
    print(test_data.info())
    print(train_data.info())


    print(train_data.head())

    model_dir = tempfile.mkdtemp()
    features = []
    for c in Columns:
        features.append(tf.contrib.layers.real_valued_column(str(c)))


    m = tf.contrib.learn.LinearClassifier(feature_columns=features,
        model_dir=model_dir)
    m.fit(input_fn=lambda: train_input_fn(train_data), steps=200)
    results = m.evaluate(input_fn=lambda: eval_input_fn(test_data), steps=1)
    for key in sorted(results):
        print("%s: %s" % (key, results[key]))



if __name__ == "__main__":
    main()

------------------------Выход---------------------

NOTE: The Python console is going to be REMOVED in Spyder 3.2. Please start to migrate your work to the IPython console instead.

['PassengerId', 'Survived', 'Pclass', 'Sex', 'Age', 'SibSp', 'Parch', 'Embarked']
<class 'pandas.core.frame.DataFrame'>
Int64Index: 87 entries, 12 to 414
Data columns (total 7 columns):
PassengerId    87 non-null int64
Pclass         87 non-null int64
Sex            87 non-null int64
Age            87 non-null float64
SibSp          87 non-null int64
Parch          87 non-null int64
Embarked       87 non-null int64
dtypes: float64(1), int64(6)
memory usage: 5.4 KB
None
<class 'pandas.core.frame.DataFrame'>
Int64Index: 183 entries, 1 to 889
Data columns (total 8 columns):
PassengerId    183 non-null int64
Survived       183 non-null int64
Pclass         183 non-null int64
Sex            183 non-null int64
Age            183 non-null float64
SibSp          183 non-null int64
Parch          183 non-null int64
Embarked       183 non-null int64
dtypes: float64(1), int64(7)
memory usage: 12.9 KB
None
    PassengerId  Survived  Pclass  Sex   Age  SibSp  Parch  Embarked
1             2         1       1    0  38.0      1      0         0
3             4         1       1    0  35.0      1      0         2
6             7         0       1    1  54.0      0      0         2
10           11         1       3    0   4.0      1      1         2
11           12         1       1    0  58.0      0      0         2
WARNING:tensorflow:Rank of input Tensor (1) should be the same as output_rank (2) for column. Will attempt to expand dims. It is highly recommended that you resize your input, as this behavior may change.
WARNING:tensorflow:Rank of input Tensor (1) should be the same as output_rank (2) for column. Will attempt to expand dims. It is highly recommended that you resize your input, as this behavior may change.
WARNING:tensorflow:Rank of input Tensor (1) should be the same as output_rank (2) for column. Will attempt to expand dims. It is highly recommended that you resize your input, as this behavior may change.
WARNING:tensorflow:Rank of input Tensor (1) should be the same as output_rank (2) for column. Will attempt to expand dims. It is highly recommended that you resize your input, as this behavior may change.
WARNING:tensorflow:Rank of input Tensor (1) should be the same as output_rank (2) for column. Will attempt to expand dims. It is highly recommended that you resize your input, as this behavior may change.
WARNING:tensorflow:Rank of input Tensor (1) should be the same as output_rank (2) for column. Will attempt to expand dims. It is highly recommended that you resize your input, as this behavior may change.
WARNING:tensorflow:Rank of input Tensor (1) should be the same as output_rank (2) for column. Will attempt to expand dims. It is highly recommended that you resize your input, as this behavior may change.
WARNING:tensorflow:Rank of input Tensor (1) should be the same as output_rank (2) for column. Will attempt to expand dims. It is highly recommended that you resize your input, as this behavior may change.
WARNING:tensorflow:From C:\ProgramData\Anaconda3\lib\site-packages\tensorflow\contrib\learn\python\learn\estimators\head.py:615: scalar_summary (from tensorflow.python.ops.logging_ops) is deprecated and will be removed after 2016-11-30.
Instructions for updating:
Please switch to tf.summary.scalar. Note that tf.summary.scalar uses the node name instead of the tag. This means that TensorFlow will automatically de-duplicate summary names based on the scope they are created in. Also, passing a tensor or list of tags to a scalar summary op is no longer supported.
 up CPU computations.
e are available on your machine and could speed up CPU computations.
2017-07-23 17:48:29.227623: W d:\nwani\l\tensorflow_1498062690615\work\tensorflow-1.1.0\tensorflow\core\platform\cpu_feature_guard.cc:45] The TensorFlow library wasn't compiled to use FMA instructions, but these are available on your machine and could speede\platform\cpu_feature_guard.cc:45] The TensorFlow library wasn't compiled to use AVX2 instructions, but thes98062690615\work\tensorflow-1.1.0\tensorflow\core\platform\cpu_feature_guard.cc:45] The TensorFlow library wasn't compiled to use AVX instructions, but these are available on your machine and could speed up CPU computations.
2017-07-23 17:48:29.227119: W d:\nwani\l\tensorflow_1498062690615\work\tensorflow-1.1.0\tensorflow\cor_1498062690615\work\tensorflow-1.1.0\tensorflow\core\platform\cpu_feature_guard.cc:45] The TensorFlow library wasn't compiled to use SSE4.2 instructions, but these are available on your machine and could speed up CPU computations.
2017-07-23 17:48:29.226681: W d:\nwani\l\tensorflow_14and could speed up CPU computations.
2017-07-23 17:48:29.225877: W d:\nwani\l\tensorflow_1498062690615\work\tensorflow-1.1.0\tensorflow\core\platform\cpu_feature_guard.cc:45] The TensorFlow library wasn't compiled to use SSE4.1 instructions, but these are available on your machine and could speed up CPU computations.
2017-07-23 17:48:29.226233: W d:\nwani\l\tensorflowe_guard.cc:45] The TensorFlow library wasn't compiled to use SSE3 instructions, but these are available on your machine -07-23 17:48:29.224819: W d:\nwani\l\tensorflow_1498062690615\work\tensorflow-1.1.0\tensorflow\core\platform\cpu_feature_guard.cc:45] The TensorFlow library wasn't compiled to use SSE2 instructions, but these are available on your machine and could speed up CPU computations.
2017-07-23 17:48:29.225278: W d:\nwani\l\tensorflow_1498062690615\work\tensorflow-1.1.0\tensorflow\core\platform\cpu_featur\tensorflow\core\platform\cpu_feature_guard.cc:45] The TensorFlow library wasn't compiled to use SSE instructions, but these are available on your machine and could speed up CPU computations.
20172017-07-23 17:48:29.224365: W d:\nwani\l\tensorflow_1498062690615\work\tensorflow-1.1.0







Traceback (most recent call last):
  File "C:\ProgramData\Anaconda3\lib\site-packages\pandas\core\indexes\base.py", line 2393, in get_loc
    return self._getitem_column(key)
  File "C:\ProgramData\Anaconda3\lib\site-packages\pandas\core\frame.py", line 2069, in _getitem_column
    return self._get_item_cache(key)
  File "C:\ProgramData\Anaconda3\lib\site-packages\pandas\core\generic.py", line 1534, in _get_item_cache
    values = self._data.get(item)
  File "C:\ProgramData\Anaconda3\lib\site-packages\pandas\core\internals.py", line 3590, in get
    loc = self.items.get_loc(item)
  File "C:\ProgramData\Anaconda3\lib\site-packages\pandas\core\indexes\base.py", line 2395, in get_loc
    return self._engine.get_loc(self._maybe_cast_indexer(key))
  File "pandas\_libs\index.pyx", line 132, in pandas._libs.index.IndexEngine.get_loc (pandas\_libs\index.c:5239)
  File "pandas\_libs\index.pyx", line 154, in pandas._libs.index.IndexEngine.get_loc (pandas\_libs\index.c:5085)
  File "pandas\_libs\hashtable_class_helper.pxi", line 1207, in pandas._libs.hashtable.PyObjectHashTable.get_item (pandas\_libs\hashtable.c:20405)
  File "pandas\_libs\hashtable_class_helper.pxi", line 1215, in pandas._libs.hashtable.PyObjectHashTable.get_item (pandas\_libs\hashtable.c:20359)
KeyError: 'Survived'
    return self._engine.get_loc(key)
  File "pandas\_libs\index.pyx", line 132, in pandas._libs.index.IndexEngine.get_loc (pandas\_libs\index.c:5239)
  File "pandas\_libs\index.pyx", line 154, in pandas._libs.index.IndexEngine.get_loc (pandas\_libs\index.c:5085)
  File "pandas\_libs\hashtable_class_helper.pxi", line 1207, in pandas._libs.hashtable.PyObjectHashTable.get_item (pandas\_libs\hashtable.c:20405)
  File "pandas\_libs\hashtable_class_helper.pxi", line 1215, in pandas._libs.hashtable.PyObjectHashTable.get_item (pandas\_libs\hashtable.c:20359)
KeyError: 'Survived'

During handling of the above exception, another exception occurred:

Traceback (most recent call last):
  File "C:\Users\jamil\Desktop\Code\Python\TensorFlow\titanic\titanic.py", line 90, in <module>
    main()
  File "C:\Users\jamil\Desktop\Code\Python\TensorFlow\titanic\titanic.py", line 83, in main
    results = m.evaluate(input_fn=lambda: eval_input_fn(test_data), steps=1)
  File "C:\ProgramData\Anaconda3\lib\site-packages\tensorflow\python\util\deprecation.py", line 281, in new_func
    return func(*args, **kwargs)
  File "C:\ProgramData\Anaconda3\lib\site-packages\tensorflow\contrib\learn\python\learn\estimators\estimator.py", line 518, in evaluate
    log_progress=log_progress)
  File "C:\ProgramData\Anaconda3\lib\site-packages\tensorflow\contrib\learn\python\learn\estimators\estimator.py", line 801, in _evaluate_model
    features, labels = input_fn()
  File "C:\Users\jamil\Desktop\Code\Python\TensorFlow\titanic\titanic.py", line 83, in <lambda>
    results = m.evaluate(input_fn=lambda: eval_input_fn(test_data), steps=1)
  File "C:\Users\jamil\Desktop\Code\Python\TensorFlow\titanic\titanic.py", line 49, in eval_input_fn
    return input_fn(test_data)    
  File "C:\Users\jamil\Desktop\Code\Python\TensorFlow\titanic\titanic.py", line 40, in input_fn
    Continuous_cols = {k: tf.constant(data[k].values) for k in Columns}
  File "C:\Users\jamil\Desktop\Code\Python\TensorFlow\titanic\titanic.py", line 40, in <dictcomp>
    Continuous_cols = {k: tf.constant(data[k].values) for k in Columns}
  File "C:\ProgramData\Anaconda3\lib\site-packages\pandas\core\frame.py", line 2062, in __getitem__

1 ответ

Проблема исходит из этих строк:

m.fit(input_fn=train_input_fn(train_data), steps=200)
results = m.evaluate(input_fn=eval_input_fn(test_data), steps=1)

input_fn Ожидается, что аргумент будет функцией, которая не принимает аргументов и возвращает кортеж, тогда как вы вызываете train_input_fn() / eval_input_fn() функции и передавая их возвращаемые значения (кортеж).

Самый простой способ решить эту проблему - это обернуть train_input_fn(train_data) а также eval_input_fn(test_data) в lambda выражение, следующее:

m.fit(input_fn=lambda: train_input_fn(train_data), steps=200)
results = m.evaluate(input_fn=lambda: eval_input_fn(test_data), steps=1)

Выражение lambda: train_input_fn(train_data) это функция, которая не принимает аргументов и при вызове возвращает значение train_input_fn(train_data), который является кортежем.

Другие вопросы по тегам