We express Gittins indices for multi-armed bandit problems as
Laurent expansions around discount factor 1. The coefficients of these
expan-sions are then used to characterize stationary optimal policies when the
optimality criteria are sensitive-discount optimality (otherwise known as
Blackwell optimality), average-reward optimality and average-overtaking
optimality. We also obtain bounds and derive optimality conditions for policies
of a type that continue playing the same bandit as long as the state of that
bandit remains in prescribed sets.
@article{1034968239,
author = {Katehakis, Michael N. and Rothblum, Uriel G.},
title = {Finite state multi-armed bandit problems: sensitive-discount,
average-reward and average-overtaking optimality},
journal = {Ann. Appl. Probab.},
volume = {6},
number = {1},
year = {1996},
pages = { 1024-1034},
language = {en},
url = {http://dml.mathdoc.fr/item/1034968239}
}
Katehakis, Michael N.; Rothblum, Uriel G. Finite state multi-armed bandit problems: sensitive-discount,
average-reward and average-overtaking optimality. Ann. Appl. Probab., Tome 6 (1996) no. 1, pp. 1024-1034. http://gdmltest.u-ga.fr/item/1034968239/