@article(lfd:argall:2009, author = {B. D. Argall and S. Chernova and M. Veloso and B. Browning}, year = {2009}, title = {A survey of robot learning from demonstration}, journal = {Robotics and autonomous systems}, volume = {57}, number = {5}, pages = {469--483}, doi = {10.1016/j.robot.2008.10.024}, ) @article(barto-sm:hrl, author = {A. Barto and S. Mahadevan}, year = {2003}, title = {Recent Advances in Hierarchical Reinforcement Learning}, journal = {Discrete Event Systems Journal}, volume = {13}, pages = {41--77}, doi = {10.1023/A:1022140919877}, ) @article(nac:Bhatnagar09, author = {S. Bhatnagar and R. Sutton and M. Ghavamzadeh and M. Lee}, year = {2009}, title = {Natural Actor-Critic Algorithms}, journal = {Automatica}, volume = {45}, number = {11}, pages = {2471--2482}, doi = {10.1016/j.automatica.2009.07.008}, ) @incollection(cim08, author = {A. Cimatti and M. Pistore and P. Traverso}, year = {2008}, title = {Automated planning}, editor = {Frank van Harmelen and Vladimir Lifschitz and Bruce Porter}, booktitle = {Handbook of Knowledge Representation}, publisher = {Elsevier}, doi = {10.1016/S1574-6526(07)03022-2}, ) @phdthesis(erdo08, author = {Erdo\u{g}an, S. T.}, year = {2008}, title = {A Library of General-Purpose Action Descriptions}, school = {University of Texas at Austin}, ) @inproceedings(griffith2013policy, author = {S. Griffith and K. Subramanian and J. Scholz and C. L. Isbell and A. L. Thomaz}, year = {2013}, title = {Policy shaping: Integrating human feedback with reinforcement learning}, booktitle = {Advances in neural information processing systems (NeurIPS)}, pages = {2625--2633}, ) @article(hanheide2015robot, author = {M. Hanheide and M. G{\"o}belbecker and G. S Horn}, year = {2015}, title = {Robot task planning and explanation in open and uncertain worlds}, journal = {Artificial Intelligence}, doi = {10.1016/j.artint.2015.08.008}, ) @article(helmert2006fast, author = {M. Helmert}, year = {2006}, title = {The fast downward planning system}, journal = {Journal of Artificial Intelligence Research}, volume = {26}, pages = {191--246}, doi = {10.1613/jair.1705}, ) @inproceedings(hogg2010learning, author = {C. Hogg and U. Kuter and Munoz-Avila, H.}, year = {2010}, title = {Learning Methods to Generate Good Plans: Integrating HTN Learning and Reinforcement Learning.}, booktitle = {Association for the Advancement of Artificial Intelligence (AAAI)}, ) @article(inclezan2016modular, author = {D. Inclezan and M. Gelfond}, year = {2016}, title = {Modular action language ALM}, journal = {Theory and Practice of Logic Programming}, volume = {16}, number = {2}, pages = {189--235}, doi = {10.1080/11663081.2013.798954}, ) @inproceedings(yang:iros:2019, author = {Y. Jiang and F. Yang and S. Zhang and P. Stone}, year = {2019}, title = {Task-Motion Planning with Reinforcement Learning for Adaptable Mobile Service Robots.}, booktitle = {IEEE/RSJ International Conference on Intelligent Robots and Systems (IROS)}, ) @inproceedings(khandelwal2014planning, author = {P. Khandelwal and F. Yang and M. Leonetti and V. Lifschitz and P. Stone}, year = {2014}, title = {Planning in Action Language $\mathcal{BC}$ while Learning Action Costs for Mobile Robots.}, booktitle = {International Conference on Automated Planning and Scheduling (ICAPS)}, ) @article(khandelwal2017bwibots, author = {P. Khandelwal and S. Zhang and J. Sinapov and M. Leonetti and J. Thomason and F. Yang and I. Gori and M. Svetlik and P. Khante and V. Lifschitz}, year = {2017}, title = {BWIBots: A platform for bridging the gap between AI and human--robot interaction research}, journal = {The International Journal of Robotics Research}, volume = {36}, number = {5-7}, pages = {635--659}, doi = {10.1007/978-3-319-23264-5_42}, ) @inproceedings(knox2009interactively, author = {W. B. Knox and P. Stone}, year = {2009}, title = {Interactively shaping agents via human reinforcement: The TAMER framework}, booktitle = {Proceedings of the fifth International Conference on Knowledge Capture}, organization = {ACM}, pages = {9--16}, doi = {10.1145/1597735.1597738}, ) @inproceedings(knox2010combining, author = {W. B. Knox and P. Stone}, year = {2010}, title = {Combining manual feedback with subsequent MDP reward signals for reinforcement learning}, booktitle = {Proceedings of the 9th International Conference on Autonomous Agents and Multiagent Systems: volume 1-Volume 1}, organization = {International Foundation for Autonomous Agents and Multiagent Systems}, pages = {5--12}, ) @inproceedings(knox2012reinforcement, author = {W. B. Knox and P. Stone}, year = {2012}, title = {Reinforcement learning from simultaneous human and MDP reward}, booktitle = {Proceedings of the 11th International Conference on Autonomous Agents and Multiagent Systems-Volume 1}, organization = {International Foundation for Autonomous Agents and Multiagent Systems}, pages = {475--482}, ) @inproceedings(lee13, author = {J. Lee and V. Lifschitz and F. Yang}, year = {2013}, title = {{Action Language $\mathcal{BC}$: A Preliminary Report}}, booktitle = {International Joint Conference on Artificial Intelligence (IJCAI)}, ) @inproceedings(leonetti2012automatic, author = {M. Leonetti and L. Iocchi and F. Patrizi}, year = {2012}, title = {Automatic generation and learning of finite-state controllers}, booktitle = {International Conference on Artificial Intelligence: Methodology, Systems, and Applications}, organization = {Springer}, pages = {135--144}, doi = {10.1007/3-540-61474-5_68}, ) @article(leonetti2016synthesis, author = {M. Leonetti and L. Iocchi and P. Stone}, year = {2016}, title = {A synthesis of automated planning and reinforcement learning for efficient, robust decision-making}, journal = {Artificial Intelligence}, volume = {241}, pages = {103--130}, doi = {10.1016/j.artint.2016.07.004}, ) @inproceedings(lif06, author = {V. Lifschitz and W. Ren}, year = {2006}, title = {A modular action description language}, booktitle = {Association for the Advancement of Artificial Intelligence (AAAI)}, pages = {853--859}, ) @inproceedings(lyu2018sdrl, author = {D. Lyu and F. Yang and B. Liu and S. Gustafson}, year = {2019}, title = {SDRL: Interpretable and Data-efficient Deep Reinforcement LearningLeveraging Symbolic Planning}, booktitle = {Association for the Advancement of Artificial Intelligence (AAAI)}, ) @inproceedings(macglashan2017interactive, author = {J. MacGlashan and K Ho, M. and R. Loftin and B. Peng and G. Wang and D. L. Roberts and M. E. Taylor and M. L. Littman}, year = {2017}, title = {Interactive Learning from Policy-Dependent Human Feedback}, booktitle = {International Conference on Machine Learning (ICML)}, ) @inproceedings(macglashan2016convergent, author = {J. MacGlashan and M. L. Littman and D. L. Roberts and R. Loftin and B. Peng and M. E. Taylor}, year = {2016}, title = {Convergent Actor Critic by Humans}, booktitle = {International Conference on Intelligent Robots and Systems}, ) @article(mcc87, author = {John McCarthy}, year = {1987}, title = {Generality in {A}rtificial {I}ntelligence}, journal = {Communications of the ACM (CACM)}, doi = {10.1145/33447.33448}, ) @article(dqn:nature:2015, author = {V. Mnih and K. Kavukcuoglu and D. Silver and A. A Rusu and J. Veness and M. G Bellemare and A. Graves and M. Riedmiller and A. K Fidjeland and G. Ostrovski}, year = {2015}, title = {Human-level control through deep reinforcement learning}, journal = {Nature}, volume = {518}, number = {7540}, pages = {529--533}, doi = {10.1016/S0004-3702(98)00023-X}, ) @inproceedings(ng2000algorithms, author = {A. Y. Ng and S. J. Russell}, year = {2000}, title = {Algorithms for inverse reinforcement learning.}, booktitle = {International Conference on Machine Learning (ICML)}, volume = {1}, pages = {2}, ) @inproceedings(parr1998reinforcement, author = {R. Parr and S. J. Russell}, year = {1998}, title = {Reinforcement learning with hierarchies of machines}, booktitle = {Advances in neural information processing systems (NeurIPS)}, pages = {1043--1049}, ) @article(nac:peters2008, author = {J. Peters and S. Schaal}, year = {2008}, title = {Natural actor-critic}, journal = {Neurocomputing}, volume = {71}, number = {7}, pages = {1180--1190}, doi = {10.1016/j.neucom.2007.11.026}, ) @inproceedings(rosenthal2011learning, author = {S. Rosenthal and M. M. Veloso and A. K. Dey}, year = {2011}, title = {Learning Accuracy and Availability of Humans Who Help Mobile Robots.}, booktitle = {Association for the Advancement of Artificial Intelligence (AAAI)}, ) @techreport(rosenthal2012human, author = {S. L. Rosenthal}, year = {2012}, title = {Human-centered planning for effective task autonomy}, type = {Technical Report}, institution = {CARNEGIE-MELLON UNIV PITTSBURGH PA SCHOOL OF COMPUTER SCIENCE}, ) @inproceedings(Ryan02usingabstract, author = {M. R.K. Ryan}, year = {2002}, title = {Using abstract models of behaviours to automatically generate reinforcement learning hierarchies}, booktitle = {In Proceedings of The 19th International Conference on Machine Learning (ICML)}, publisher = {Morgan Kaufmann}, pages = {522--529}, ) @inproceedings(Ryan98rl-tops:an, author = {M. R.K. Ryan and M. D. Pendrith}, year = {1998}, title = {RL-TOPs: An Architecture for Modularity and Re-Use in Reinforcement Learning}, booktitle = {In Proceedings of the Fifteenth International Conference on Machine Learning (ICML)}, publisher = {Morgan Kaufmann}, pages = {481--487}, ) @inproceedings(trpo:schulman2015, author = {J. Schulman and S. Levine and P. Abbeel and M. Jordan and P. Moritz}, year = {2015}, title = {Trust region policy optimization}, booktitle = {Proceedings of the 32nd International Conference on Machine Learning (ICML)}, pages = {1889--1897}, ) @article(schulman2015high, author = {J. Schulman and P. Moritz and S. Levine and M. Jordan and P. Abbeel}, year = {2015}, title = {High-dimensional continuous control using generalized advantage estimation}, journal = {arXiv preprint arXiv:1506.02438}, ) @inproceedings(rlearning:schwartz, author = {A. Schwartz}, year = {1993}, title = {A Reinforcement Learning Method for Maximizing Undiscounted Rewards}, booktitle = {International Conference on Machine Learning (ICML)}, publisher = {Morgan Kaufmann, San Francisco, CA}, doi = {10.1016/B978-1-55860-307-3.50045-9}, ) @book(sutton2018reinforcement, author = {R. S. Sutton and A. G. Barto}, year = {2018}, title = {Reinforcement learning: An introduction}, publisher = {MIT press}, ) @article(sutton1999between, author = {R. S. Sutton and D. Precup and S. Singh}, year = {1999}, title = {Between MDPs and semi-MDPs: A framework for temporal abstraction in reinforcement learning}, journal = {Artificial intelligence}, volume = {112}, number = {1-2}, pages = {181--211}, doi = {10.1016/S0004-3702(99)00052-1}, ) @article(thomaz2008teachable, author = {A. L. Thomaz and C. Breazeal}, year = {2008}, title = {Teachable robots: Understanding human teaching behavior to build more effective robot learners}, journal = {Artificial Intelligence}, volume = {172}, number = {6-7}, pages = {716--737}, doi = {10.1016/j.artint.2007.09.009}, ) @inproceedings(thomaz2006reinforcement, author = {A. L. Thomaz and C. Breazeal}, year = {2006}, title = {Reinforcement learning with human teachers: Evidence of feedback and guidance with implications for learning performance}, booktitle = {Aaai}, volume = {6}, organization = {Boston, MA}, pages = {1000--1005}, ) @article(tsividis2017human, author = {P. A. Tsividis and T. Pouncy and J. L. Xu and J. B. Tenenbaum and S. J. Gershman}, year = {2017}, title = {Human learning in Atari}, ) @article(reinforce:williams1992, author = {R. J Williams}, year = {1992}, title = {Simple statistical gradient-following algorithms for connectionist reinforcement learning}, journal = {Machine learning}, volume = {8}, number = {3-4}, pages = {229--256}, doi = {10.1023/A:1022672621406}, ) @inproceedings(yang:peorl:2018, author = {F. Yang and D. Lyu and B. Liu and S. Gustafson}, year = {2018}, title = {PEORL: Integrating Symbolic Planning and Hierarchical Reinforcement Learning for Robust Decision-Making}, booktitle = {International Joint Conference of Artificial Intelligence (IJCAI)}, doi = {10.24963/ijcai.2018/675}, )