@article(lfd:argall:2009,
 author       = {B. D. Argall and S. Chernova and M. Veloso and B. Browning},
 year         = {2009},
 title        = {A survey of robot learning from demonstration},
 journal      = {Robotics and autonomous systems},
 volume       = {57},
 number       = {5},
 pages        = {469--483},
 doi          = {10.1016/j.robot.2008.10.024},
)
@article(barto-sm:hrl,
 author       = {A. Barto and S. Mahadevan},
 year         = {2003},
 title        = {Recent Advances in Hierarchical Reinforcement Learning},
 journal      = {Discrete Event Systems Journal},
 volume       = {13},
 pages        = {41--77},
 doi          = {10.1023/A:1022140919877},
)
@article(nac:Bhatnagar09,
 author       = {S. Bhatnagar and R. Sutton and M. Ghavamzadeh and M. Lee},
 year         = {2009},
 title        = {Natural Actor-Critic Algorithms},
 journal      = {Automatica},
 volume       = {45},
 number       = {11},
 pages        = {2471--2482},
 doi          = {10.1016/j.automatica.2009.07.008},
)
@incollection(cim08,
 author       = {A. Cimatti and M. Pistore and P. Traverso},
 year         = {2008},
 title        = {Automated planning},
 editor       = {Frank van Harmelen and Vladimir Lifschitz and Bruce Porter},
 booktitle    = {Handbook of Knowledge Representation},
 publisher    = {Elsevier},
 doi          = {10.1016/S1574-6526(07)03022-2},
)
@phdthesis(erdo08,
 author       = {Erdo\u{g}an, S. T.},
 year         = {2008},
 title        = {A Library of General-Purpose Action Descriptions},
 school       = {University of Texas at Austin},
)
@inproceedings(griffith2013policy,
 author       = {S. Griffith and K. Subramanian and J. Scholz and C. L. Isbell and A. L. Thomaz},
 year         = {2013},
 title        = {Policy shaping: Integrating human feedback with reinforcement learning},
 booktitle    = {Advances in neural information processing systems (NeurIPS)},
 pages        = {2625--2633},
)
@article(hanheide2015robot,
 author       = {M. Hanheide and M. G{\"o}belbecker and G. S Horn},
 year         = {2015},
 title        = {Robot task planning and explanation in open and uncertain worlds},
 journal      = {Artificial Intelligence},
 doi          = {10.1016/j.artint.2015.08.008},
)
@article(helmert2006fast,
 author       = {M. Helmert},
 year         = {2006},
 title        = {The fast downward planning system},
 journal      = {Journal of Artificial Intelligence Research},
 volume       = {26},
 pages        = {191--246},
 doi          = {10.1613/jair.1705},
)
@inproceedings(hogg2010learning,
 author       = {C. Hogg and U. Kuter and Munoz-Avila, H.},
 year         = {2010},
 title        = {Learning Methods to Generate Good Plans: Integrating HTN Learning and Reinforcement Learning.},
 booktitle    = {Association for the Advancement of Artificial Intelligence (AAAI)},
)
@article(inclezan2016modular,
 author       = {D. Inclezan and M. Gelfond},
 year         = {2016},
 title        = {Modular action language ALM},
 journal      = {Theory and Practice of Logic Programming},
 volume       = {16},
 number       = {2},
 pages        = {189--235},
 doi          = {10.1080/11663081.2013.798954},
)
@inproceedings(yang:iros:2019,
 author       = {Y. Jiang and F. Yang and S. Zhang and P. Stone},
 year         = {2019},
 title        = {Task-Motion Planning with Reinforcement Learning for Adaptable Mobile Service Robots.},
 booktitle    = {IEEE/RSJ International Conference on Intelligent Robots and Systems (IROS)},
)
@inproceedings(khandelwal2014planning,
 author       = {P. Khandelwal and F. Yang and M. Leonetti and V. Lifschitz and P. Stone},
 year         = {2014},
 title        = {Planning in Action Language $\mathcal{BC}$ while Learning Action Costs for Mobile Robots.},
 booktitle    = {International Conference on Automated Planning and Scheduling (ICAPS)},
)
@article(khandelwal2017bwibots,
 author       = {P. Khandelwal and S. Zhang and J. Sinapov and M. Leonetti and J. Thomason and F. Yang and I. Gori and M. Svetlik and P. Khante and V. Lifschitz},
 year         = {2017},
 title        = {BWIBots: A platform for bridging the gap between AI and human--robot interaction research},
 journal      = {The International Journal of Robotics Research},
 volume       = {36},
 number       = {5-7},
 pages        = {635--659},
 doi          = {10.1007/978-3-319-23264-5_42},
)
@inproceedings(knox2009interactively,
 author       = {W. B. Knox and P. Stone},
 year         = {2009},
 title        = {Interactively shaping agents via human reinforcement: The TAMER framework},
 booktitle    = {Proceedings of the fifth International Conference on Knowledge Capture},
 organization = {ACM},
 pages        = {9--16},
 doi          = {10.1145/1597735.1597738},
)
@inproceedings(knox2010combining,
 author       = {W. B. Knox and P. Stone},
 year         = {2010},
 title        = {Combining manual feedback with subsequent MDP reward signals for reinforcement learning},
 booktitle    = {Proceedings of the 9th International Conference on Autonomous Agents and Multiagent Systems: volume 1-Volume 1},
 organization = {International Foundation for Autonomous Agents and Multiagent Systems},
 pages        = {5--12},
)
@inproceedings(knox2012reinforcement,
 author       = {W. B. Knox and P. Stone},
 year         = {2012},
 title        = {Reinforcement learning from simultaneous human and MDP reward},
 booktitle    = {Proceedings of the 11th International Conference on Autonomous Agents and Multiagent Systems-Volume 1},
 organization = {International Foundation for Autonomous Agents and Multiagent Systems},
 pages        = {475--482},
)
@inproceedings(lee13,
 author       = {J. Lee and V. Lifschitz and F. Yang},
 year         = {2013},
 title        = {{Action Language $\mathcal{BC}$: A Preliminary Report}},
 booktitle    = {International Joint Conference on Artificial Intelligence (IJCAI)},
)
@inproceedings(leonetti2012automatic,
 author       = {M. Leonetti and L. Iocchi and F. Patrizi},
 year         = {2012},
 title        = {Automatic generation and learning of finite-state controllers},
 booktitle    = {International Conference on Artificial Intelligence: Methodology, Systems, and Applications},
 organization = {Springer},
 pages        = {135--144},
 doi          = {10.1007/3-540-61474-5_68},
)
@article(leonetti2016synthesis,
 author       = {M. Leonetti and L. Iocchi and P. Stone},
 year         = {2016},
 title        = {A synthesis of automated planning and reinforcement learning for efficient, robust decision-making},
 journal      = {Artificial Intelligence},
 volume       = {241},
 pages        = {103--130},
 doi          = {10.1016/j.artint.2016.07.004},
)
@inproceedings(lif06,
 author       = {V. Lifschitz and W. Ren},
 year         = {2006},
 title        = {A modular action description language},
 booktitle    = {Association for the Advancement of Artificial Intelligence (AAAI)},
 pages        = {853--859},
)
@inproceedings(lyu2018sdrl,
 author       = {D. Lyu and F. Yang and B. Liu and S. Gustafson},
 year         = {2019},
 title        = {SDRL: Interpretable and Data-efficient Deep Reinforcement LearningLeveraging Symbolic Planning},
 booktitle    = {Association for the Advancement of Artificial Intelligence (AAAI)},
)
@inproceedings(macglashan2017interactive,
 author       = {J. MacGlashan and K Ho, M. and R. Loftin and B. Peng and G. Wang and D. L. Roberts and M. E. Taylor and M. L. Littman},
 year         = {2017},
 title        = {Interactive Learning from Policy-Dependent Human Feedback},
 booktitle    = {International Conference on Machine Learning (ICML)},
)
@inproceedings(macglashan2016convergent,
 author       = {J. MacGlashan and M. L. Littman and D. L. Roberts and R. Loftin and B. Peng and M. E. Taylor},
 year         = {2016},
 title        = {Convergent Actor Critic by Humans},
 booktitle    = {International Conference on Intelligent Robots and Systems},
)
@article(mcc87,
 author       = {John McCarthy},
 year         = {1987},
 title        = {Generality in {A}rtificial {I}ntelligence},
 journal      = {Communications of the ACM (CACM)},
 doi          = {10.1145/33447.33448},
)
@article(dqn:nature:2015,
 author       = {V. Mnih and K. Kavukcuoglu and D. Silver and A. A Rusu and J. Veness and M. G Bellemare and A. Graves and M. Riedmiller and A. K Fidjeland and G. Ostrovski},
 year         = {2015},
 title        = {Human-level control through deep reinforcement learning},
 journal      = {Nature},
 volume       = {518},
 number       = {7540},
 pages        = {529--533},
 doi          = {10.1016/S0004-3702(98)00023-X},
)
@inproceedings(ng2000algorithms,
 author       = {A. Y. Ng and S. J. Russell},
 year         = {2000},
 title        = {Algorithms for inverse reinforcement learning.},
 booktitle    = {International Conference on Machine Learning (ICML)},
 volume       = {1},
 pages        = {2},
)
@inproceedings(parr1998reinforcement,
 author       = {R. Parr and S. J. Russell},
 year         = {1998},
 title        = {Reinforcement learning with hierarchies of machines},
 booktitle    = {Advances in neural information processing systems (NeurIPS)},
 pages        = {1043--1049},
)
@article(nac:peters2008,
 author       = {J. Peters and S. Schaal},
 year         = {2008},
 title        = {Natural actor-critic},
 journal      = {Neurocomputing},
 volume       = {71},
 number       = {7},
 pages        = {1180--1190},
 doi          = {10.1016/j.neucom.2007.11.026},
)
@inproceedings(rosenthal2011learning,
 author       = {S. Rosenthal and M. M. Veloso and A. K. Dey},
 year         = {2011},
 title        = {Learning Accuracy and Availability of Humans Who Help Mobile Robots.},
 booktitle    = {Association for the Advancement of Artificial Intelligence (AAAI)},
)
@techreport(rosenthal2012human,
 author       = {S. L. Rosenthal},
 year         = {2012},
 title        = {Human-centered planning for effective task autonomy},
 type         = {Technical Report},
 institution  = {CARNEGIE-MELLON UNIV PITTSBURGH PA SCHOOL OF COMPUTER SCIENCE},
)
@inproceedings(Ryan02usingabstract,
 author       = {M. R.K. Ryan},
 year         = {2002},
 title        = {Using abstract models of behaviours to automatically generate reinforcement learning hierarchies},
 booktitle    = {In Proceedings of The 19th International Conference on Machine Learning (ICML)},
 publisher    = {Morgan Kaufmann},
 pages        = {522--529},
)
@inproceedings(Ryan98rl-tops:an,
 author       = {M. R.K. Ryan and M. D. Pendrith},
 year         = {1998},
 title        = {RL-TOPs: An Architecture for Modularity and Re-Use in Reinforcement Learning},
 booktitle    = {In Proceedings of the Fifteenth International Conference on Machine Learning (ICML)},
 publisher    = {Morgan Kaufmann},
 pages        = {481--487},
)
@inproceedings(trpo:schulman2015,
 author       = {J. Schulman and S. Levine and P. Abbeel and M. Jordan and P. Moritz},
 year         = {2015},
 title        = {Trust region policy optimization},
 booktitle    = {Proceedings of the 32nd International Conference on Machine Learning (ICML)},
 pages        = {1889--1897},
)
@article(schulman2015high,
 author       = {J. Schulman and P. Moritz and S. Levine and M. Jordan and P. Abbeel},
 year         = {2015},
 title        = {High-dimensional continuous control using generalized advantage estimation},
 journal      = {arXiv preprint arXiv:1506.02438},
)
@inproceedings(rlearning:schwartz,
 author       = {A. Schwartz},
 year         = {1993},
 title        = {A Reinforcement Learning Method for Maximizing Undiscounted Rewards},
 booktitle    = {International Conference on Machine Learning (ICML)},
 publisher    = {Morgan Kaufmann, San Francisco, CA},
 doi          = {10.1016/B978-1-55860-307-3.50045-9},
)
@book(sutton2018reinforcement,
 author       = {R. S. Sutton and A. G. Barto},
 year         = {2018},
 title        = {Reinforcement learning: An introduction},
 publisher    = {MIT press},
)
@article(sutton1999between,
 author       = {R. S. Sutton and D. Precup and S. Singh},
 year         = {1999},
 title        = {Between MDPs and semi-MDPs: A framework for temporal abstraction in reinforcement learning},
 journal      = {Artificial intelligence},
 volume       = {112},
 number       = {1-2},
 pages        = {181--211},
 doi          = {10.1016/S0004-3702(99)00052-1},
)
@article(thomaz2008teachable,
 author       = {A. L. Thomaz and C. Breazeal},
 year         = {2008},
 title        = {Teachable robots: Understanding human teaching behavior to build more effective robot learners},
 journal      = {Artificial Intelligence},
 volume       = {172},
 number       = {6-7},
 pages        = {716--737},
 doi          = {10.1016/j.artint.2007.09.009},
)
@inproceedings(thomaz2006reinforcement,
 author       = {A. L. Thomaz and C. Breazeal},
 year         = {2006},
 title        = {Reinforcement learning with human teachers: Evidence of feedback and guidance with implications for learning performance},
 booktitle    = {Aaai},
 volume       = {6},
 organization = {Boston, MA},
 pages        = {1000--1005},
)
@article(tsividis2017human,
 author       = {P. A. Tsividis and T. Pouncy and J. L. Xu and J. B. Tenenbaum and S. J. Gershman},
 year         = {2017},
 title        = {Human learning in Atari},
)
@article(reinforce:williams1992,
 author       = {R. J Williams},
 year         = {1992},
 title        = {Simple statistical gradient-following algorithms for connectionist reinforcement learning},
 journal      = {Machine learning},
 volume       = {8},
 number       = {3-4},
 pages        = {229--256},
 doi          = {10.1023/A:1022672621406},
)
@inproceedings(yang:peorl:2018,
 author       = {F. Yang and D. Lyu and B. Liu and S. Gustafson},
 year         = {2018},
 title        = {PEORL: Integrating Symbolic Planning and Hierarchical Reinforcement Learning for Robust Decision-Making},
 booktitle    = {International Joint Conference of Artificial Intelligence (IJCAI)},
 doi          = {10.24963/ijcai.2018/675},
)