We consider semi-Markov control models with Borel state and action spaces, possibly unbounded costs, and holding times with a generalized exponential distribution with unknown mean θ. Assuming that such a distribution does not depend on the state-action pairs, we introduce a Bayesian estimation procedure for θ, which combined with a variant of the vanishing discount factor approach yields average cost optimal policies.
@article{bwmeta1.element.bwnjournal-article-doi-10_4064-am42-2-7,
author = {J. Adolfo Minj\'arez-Sosa and Jos\'e A. Montoya},
title = {Bayesian estimation of the mean holding time in average semi-Markov control processes},
journal = {Applicationes Mathematicae},
volume = {42},
year = {2015},
pages = {205-218},
zbl = {1336.60173},
language = {en},
url = {http://dml.mathdoc.fr/item/bwmeta1.element.bwnjournal-article-doi-10_4064-am42-2-7}
}
J. Adolfo Minjárez-Sosa; José A. Montoya. Bayesian estimation of the mean holding time in average semi-Markov control processes. Applicationes Mathematicae, Tome 42 (2015) pp. 205-218. http://gdmltest.u-ga.fr/item/bwmeta1.element.bwnjournal-article-doi-10_4064-am42-2-7/