Проблема прогнозирования с использованием байесовской модели на Python
Я использую простой титанический набор данных для прогнозирования сохранившихся данных с использованием байесовских сетей. Хотя я могу создать структуру путем изучения структуры, но после того, как я поместил свой тестовый набор данных после в байесовскую модель, он показывает ключевую ошибку, как будто я могу передать правильную данные в словаре. См. документацию по байесовской модели: https://pgmpy.org/_modules/pgmpy/models/BayesianModel.html
from pgmpy.models import BayesianModel
from pgmpy.factors.discrete import TabularCPD, DiscreteFactor
from pgmpy.inference import BeliefPropagation
from pgmpy.inference import VariableElimination
from pgmpy.estimators import MaximumLikelihoodEstimator,BayesianEstimator,ConstraintBasedEstimator,HillClimbSearch, BicScore,K2Score,ExhaustiveSearch
import numpy as np
import pandas as pd
import networkx as nx
import matplotlib.pyplot as plt
import seaborn as sns
BN_Model = BayesianModel([('Embarked', 'Fare'), ('Fare', 'Pclass'), ('Parch', 'Age'), ('Parch', 'Fare'), ('Parch', 'SibSp'), ('Parch', 'Sex'), ('Pclass', 'Survived'), ('Pclass', 'Age'), ('Sex', 'Survived'), ('SibSp', 'Fare'), ('SibSp', 'Sex')])
nx.draw_networkx(BN_Model,with_labels=True)
plt.show()
BN_Model.fit(train, estimator=MaximumLikelihoodEstimator)
test['Fare']=test['Fare'].replace(np.nan,test['Fare'].mean())
x=BN_Model.predict(test[['Embarked','Fare' ,'Parch', 'Pclass', 'Sex', 'SibSp']])```
---------------------------------------------------------------------------
_RemoteTraceback Traceback (most recent call last)
_RemoteTraceback:
"""
Traceback (most recent call last):
File "/opt/conda/lib/python3.6/site-packages/joblib/externals/loky/process_executor.py", line 418, in _process_worker
r = call_item()
File "/opt/conda/lib/python3.6/site-packages/joblib/externals/loky/process_executor.py", line 272, in __call__
return self.fn(*self.args, **self.kwargs)
File "/opt/conda/lib/python3.6/site-packages/joblib/_parallel_backends.py", line 608, in __call__
return self.func(*args, **kwargs)
File "/opt/conda/lib/python3.6/site-packages/joblib/parallel.py", line 256, in __call__
for func, args, kwargs in self.items]
File "/opt/conda/lib/python3.6/site-packages/joblib/parallel.py", line 256, in <listcomp>
for func, args, kwargs in self.items]
File "/opt/conda/lib/python3.6/site-packages/pgmpy/inference/ExactInference.py", line 370, in map_query
show_progress=show_progress,
File "/opt/conda/lib/python3.6/site-packages/pgmpy/inference/ExactInference.py", line 157, in _variable_elimination
working_factors = self._get_working_factors(evidence)
File "/opt/conda/lib/python3.6/site-packages/pgmpy/inference/ExactInference.py", line 44, in _get_working_factors
[(evidence_var, evidence[evidence_var])], inplace=False
File "/opt/conda/lib/python3.6/site-packages/pgmpy/factors/discrete/DiscreteFactor.py", line 428, in reduce
(var, self.get_state_no(var, state_name)) for var, state_name in values
File "/opt/conda/lib/python3.6/site-packages/pgmpy/factors/discrete/DiscreteFactor.py", line 428, in <listcomp>
(var, self.get_state_no(var, state_name)) for var, state_name in values
File "/opt/conda/lib/python3.6/site-packages/pgmpy/utils/state_name.py", line 74, in get_state_no
return self.name_to_no[var][state_name]
KeyError: 7.8292
"""
The above exception was the direct cause of the following exception:
KeyError Traceback (most recent call last)
<ipython-input-105-37e427dce88d> in <module>
----> 1 x=BN_Model.predict(test[['Embarked','Fare' ,'Parch', 'Pclass', 'Sex', 'SibSp']])
2
3
/opt/conda/lib/python3.6/site-packages/pgmpy/models/BayesianModel.py in predict(self, data, n_jobs)
592 )
593 for index, data_point in tqdm(
--> 594 data_unique.iterrows(), total=data_unique.shape[0]
595 )
596 )
/opt/conda/lib/python3.6/site-packages/joblib/parallel.py in __call__(self, iterable)
1015
1016 with self._backend.retrieval_context():
-> 1017 self.retrieve()
1018 # Make sure that we get a last message telling us we are done
1019 elapsed_time = time.time() - self._start_time
/opt/conda/lib/python3.6/site-packages/joblib/parallel.py in retrieve(self)
907 try:
908 if getattr(self._backend, 'supports_timeout', False):
--> 909 self._output.extend(job.get(timeout=self.timeout))
910 else:
911 self._output.extend(job.get())
/opt/conda/lib/python3.6/site-packages/joblib/_parallel_backends.py in wrap_future_result(future, timeout)
560 AsyncResults.get from multiprocessing."""
561 try:
--> 562 return future.result(timeout=timeout)
563 except LokyTimeoutError:
564 raise TimeoutError()
/opt/conda/lib/python3.6/concurrent/futures/_base.py in result(self, timeout)
430 raise CancelledError()
431 elif self._state == FINISHED:
--> 432 return self.__get_result()
433 else:
434 raise TimeoutError()
/opt/conda/lib/python3.6/concurrent/futures/_base.py in __get_result(self)
382 def __get_result(self):
383 if self._exception:
--> 384 raise self._exception
385 else:
386 return self._result
KeyError: 7.8292
'''
2 ответа
Для предсказания лучше использовать библиотеку sklearn. Хотя pgmpy содержит байесовские функции, он служит другой цели, чем то, что вы описываете.
Для предсказания я бы использовал следующие библиотеки:
pip install sklearn
pip install df2onehot
pip install classeval
Предложение делать прогнозы:
import df2onehot
import classeval
# Import titanic dataset
X = df2onehot.import_example()
y = X['Survived']
# Remove y from X
del X['Survived']
# Make one-hot, remove numeric variables and features that contain less then 2 samples.
X = df2onehot.df2onehot(X, y_min=2)['onehot']
# Split into train test
from sklearn.model_selection import train_test_split
X_train, X_test, y_train, y_test = train_test_split(X, y)
#Import Multinomial Naive Bayes model because its all one-hot now and perhaps the most appropriate if you decide to go for Bayes.
from sklearn.naive_bayes import MultinomialNB
#Create a naive-bayes Classifier
model = MultinomialNB()
# Train the model using the training sets
model.fit(X_train, y_train)
# Predict Output
y_pred = model.predict(X_test)
y_proba = model.predict_proba(X_test)
# Evaluate results
results = classeval.eval(y_test.values.astype(bool), y_pred.astype(bool), y_proba[:,0])
classeval.plot(results)
Хотя вы также описываете вывод, попробуйте использовать bnlearn
для заключения.
Установка со средой:
conda create -n BNLEARN python=3.6
conda activate BNLEARN
conda install -c ankurankan pgmpy
conda deactivate
conda activate BNLEARN
pip install bnlearn
Теперь вы можете сделать следующие выводы о выживших:
# Load titanic dataset containing mixed variables
df_raw = bnlearn.import_example(data='titanic')
# Pre-processing of the input dataset
dfhot, dfnum = bnlearn.df2onehot(df_raw)
# Structure learning
DAG = bnlearn.structure_learning.fit(dfnum)
# Plot
G = bnlearn.plot(DAG)
# Parameter learning
model = bnlearn.parameter_learning.fit(DAG, df)
# Print CPDs
bnlearn.print_CPD(model)
# Make inference
q = bnlearn.inference.fit(model, variables=['Survived'], evidence={'Sex':0, 'Pclass':1})
print(q.values)
print(q.variables)
Более подробную информацию можно найти здесь: https://erdogant.github.io/bnlearn.