

{"id":38333,"date":"2020-03-05T00:00:19","date_gmt":"2020-03-04T16:00:19","guid":{"rendered":"https:\/\/case.ntu.edu.tw\/blog\/?p=38333"},"modified":"2022-01-03T14:44:37","modified_gmt":"2022-01-03T06:44:37","slug":"%e7%b5%a6ai%e4%b8%80%e5%80%8b%e5%ae%89%e5%85%a8%e3%80%81%e8%88%92%e9%81%a9%e7%9a%84%e5%ad%b8%e7%bf%92%e7%92%b0%e5%a2%83","status":"publish","type":"post","link":"https:\/\/case.ntu.edu.tw\/blog\/?p=38333","title":{"rendered":"\u7d66AI\u4e00\u500b\u5b89\u5168\u3001\u8212\u9069\u7684\u5b78\u7fd2\u74b0\u5883"},"content":{"rendered":"<div class=\"single-post-media clr\">\n<div class=\"post-thumbnail\"><img fetchpriority=\"high\" decoding=\"async\" src=\"https:\/\/highscope.ch.ntu.edu.tw\/wordpress\/wp-content\/uploads\/2020\/03\/safety-gym-0.png\" alt=\"\u7d66AI\u4e00\u500b\u5b89\u5168\u3001\u8212\u9069\u7684\u5b78\u7fd2\u74b0\u5883\" width=\"500\" height=\"329\" \/><\/div>\n<\/div>\n<div class=\"entry clr\">\n<div class=\"pf-content\">\n<p><strong>\u64b0\u6587\uff0f\u8a31\u5b88\u5091<\/strong><\/p>\n<p><strong>\u74b0\u5883\u63a2\u7d22\u7684\u98a8\u96aa<\/strong><\/p>\n<p><a href=\"http:\/\/highscope.ch.ntu.edu.tw\/wordpress\/?p=81532\">\u5f37\u5316\u5b78\u7fd2\uff08Reinforcement Learning\uff09<\/a>\u7684\u9748\u611f\u4f86\u81ea\u65bc\u5fc3\u7406\u5b78\u4e2d\u7684\u884c\u70ba\u4e3b\u7fa9\uff0c\u8a8d\u70ba\u884c\u70ba\u662f\u751f\u7269\u8207\u74b0\u5883\u4e92\u52d5\u7684\u7d50\u679c\uff0c\u4e26\u80fd\u900f\u904e\u734e\u52f5\u6216\u61f2\u7f70\u800c\u5f37\u5316\u3002\u5957\u7528\u65bcAI\u4e4b\u4e0a\uff0c\u4fbf\u662f\u5229\u7528\u9069\u7576\u7684\u734e\u52f5\u51fd\u6578\uff08reward function\uff09\uff0c\u5f37\u5316\u667a\u6167\u9ad4\u5728\u53cd\u8986\u8a66\u932f\u7684\u63a2\u7d22\u904e\u7a0b\u4e2d\u6240\u8868\u73fe\u51fa\u6709\u52a9\u65bc\u9054\u6210\u4efb\u52d9\u7684\u300c\u826f\u597d\u300d\u884c\u70ba\uff0c\u4e26\u964d\u4f4e\u7121\u6548\u884c\u70ba\u7684\u51fa\u73fe\u983b\u7387\u3002\u7136\u800c\u63a2\u7d22\u672c\u8eab\u5c31\u662f\u4e00\u4ef6\u6709\u98a8\u96aa\u7684\u4e8b\uff0c\u5982\u4f55\u9632\u6b62\u667a\u6167\u9ad4\u5728\u904e\u7a0b\u4e2d\u50b7\u5bb3\u5468\u906d\u4eba\u4e8b\u7269\u7684\u5b89\u5168\uff0c\u4e26\u6c92\u6709\u60f3\u50cf\u4e2d\u7684\u7c21\u55ae\u3002<\/p>\n<p>\u8209\u4f8b\u4f86\u8aaa\uff0c\u82e5\u81ea\u52d5\u5316\u5de5\u5ee0\u60f3\u4ee5\u5f37\u5316\u5b78\u7fd2<a href=\"http:\/\/highscope.ch.ntu.edu.tw\/wordpress\/?p=79841\">\u8a13\u7df4\u6a5f\u5668\u4eba\u624b\u81c2<\/a>\u4f86\u7d44\u88dd\u96f6\u4ef6\u3002\u4e00\u958b\u59cb\u6642\uff0c\u6a5f\u5668\u4eba\u53ef\u80fd\u6703\u8a66\u8457\u96a8\u6a5f\u6416\u52d5\u624b\u81c2\uff0c\u4f46\u9019\u6a23\u5c31\u6709\u53ef\u80fd\u50b7\u5bb3\u5230\u5468\u906d\u7684\u5de5\u4f5c\u4eba\u54e1\u3002\u96d6\u7136\u5728\u9019\u6a23\u7684\u60c5\u5883\u4e2d\uff0c\u6211\u5011\u53ef\u4ee5\u7dca\u6025\u505c\u6a5f\u6216\u5c07\u6a5f\u5668\u624b\u81c2\u5468\u570d\u6de8\u7a7a\u4f86\u9632\u6b62\u53ef\u80fd\u7684\u5371\u96aa\uff0c\u7136\u800c\u96a8\u8457AI\u7684\u61c9\u7528\u5834\u57df\u6108\u8da8\u591a\u6a23\uff0c\u9019\u985e\u7684\u9810\u9632\u63aa\u65bd\u4e0d\u4e00\u5b9a\u53ef\u884c\uff0c\u4e5f\u5c31\u5fc5\u9808\u8003\u616e\u5176\u4ed6\u7684\u5b89\u5168\u63a2\u7d22\u7b56\u7565\u3002<\/p>\n<p><strong>\u53d7\u9650\u7684\u5f37\u5316\u5b78\u7fd2<\/strong><\/p>\n<p>\u7136\u800c\u4e00\u822c\u7684\u5f37\u5316\u5b78\u7fd2\u5f88\u96e3\u505a\u5230\u9019\u4e00\u9ede\uff0c\u56e0\u70ba\u9019\u610f\u5473\u8457\u5728\u8a2d\u8a08\u734e\u52f5\u51fd\u6578\u6642\uff0c\u9700\u8981\u540c\u6642\u8003\u616e\u4efb\u52d9\u9054\u6210\u6548\u7387\u8207\u5b89\u5168\u8981\u6c42\u9019\u5169\u500b\u57fa\u672c\u4e0a\u4e92\u65a5\u7684\u76ee\u6a19\uff1bOpenAI\u8a8d\u70ba\u300c\u53d7\u9650\u7684\u5f37\u5316\u5b78\u7fd2\u300d\uff08Constrained Reinforcement Learning\uff09\u6703\u662f\u66f4\u597d\u7684\u9078\u64c7\u3002\u53d7\u9650\u8207\u4e00\u822c\u7684\u5f37\u5316\u5b78\u7fd2\u985e\u4f3c\uff0c\u53ea\u662f\u9664\u4e86\u734e\u52f5\u51fd\u6578\u5916\uff0c\u53e6\u5916\u589e\u52a0\u4e86\u6210\u672c\u51fd\u6578\uff08cost function\uff09\u4f86\u9650\u5236\u667a\u6167\u9ad4\u3002\u5047\u8a2d\u6211\u5011\u60f3\u8b93\u81ea\u99d5\u8eca\u5f9eA\u9ede\u958b\u5230B\u9ede\uff0c\u9664\u4e86\u4ee5\u734e\u52f5\u51fd\u6578\u4f86\u9f13\u52f5\u667a\u6167\u9ad4\u7528\u6700\u5c11\u7684\u6642\u9593\u5b8c\u6210\u4efb\u52d9\u5916\uff0c\u540c\u6642\u4e5f\u4ee5\u6210\u672c\u51fd\u6578\u7d04\u675f\u81ea\u99d5\u8eca\u7684\u99d5\u99db\u884c\u70ba\u5fc5\u9808\u7b26\u5408\u4ea4\u901a\u898f\u5247\u3002\u5982\u6b64\u4e00\u4f86\uff0c\u958b\u767c\u8005\u5c31\u4e0d\u9700\u8981\u5728\u6548\u7387\u8207\u5b89\u5168\u5169\u8005\u9593\u505a\u51fa\u53d6\u6368\uff0c\u800c\u662f\u53ef\u4ee5\u9078\u64c7\u60f3\u8981\u7684\u7d50\u679c\uff0c\u8b93\u6f14\u7b97\u6cd5\u81ea\u4e3b\u63a2\u7d22\u3002<\/p>\n<p>\u9664\u6b64\u4e4b\u5916\uff0c\u53d7\u9650\u7684\u5f37\u5316\u5b78\u7fd2\u9084\u6709\u53e6\u4e00\u9805\u512a\u9ede\u3002\u4ee5\u4e0a\u8ff0\u540c\u6a23\u7684\u4f8b\u5b50\u70ba\u4f8b\uff0c\u5047\u8a2d\u6211\u5011\u7684\u734e\u61f2\u6a5f\u5236\u662f\u6bcf\u6b21\u667a\u6167\u9ad4\u5230\u9054\u76ee\u7684\u5730\u6642\u90fd\u6703\u6536\u5230\u4e00\u7b46\u8eca\u99ac\u8cbb\uff08\u8207\u62b5\u9054\u6642\u9593\u6210\u53cd\u6bd4\uff09\uff0c\u4f46\u5728\u9014\u4e2d\u82e5\u8207\u5176\u4ed6\u8eca\u8f1b\u767c\u751f\u78b0\u649e\u4e5f\u9700\u652f\u4ed8\u4e00\u7b46\u7f70\u6b3e\u3002\u5728\u4e00\u822c\u7684\u5f37\u5316\u5b78\u7fd2\u4e2d\uff0c\u78b0\u649e\u7f70\u6b3e\u901a\u5e38\u662f\u56fa\u5b9a\u7684\u3002\u9019\u6a23\u7684\u8a2d\u8a08\u2500\u2500\u5c24\u5176\u7576\u5b8c\u6210\u4efb\u52d9\u7684\u734e\u52f5\u9060\u8d85\u904e\u6240\u8a2d\u5b9a\u7684\u56fa\u5b9a\u7f70\u6b3e\u6642\u2500\u2500\u5f88\u5bb9\u6613\u4f7f\u5f97\u667a\u6167\u9ad4\u53ea\u70ba\u4e86\u734e\u52f5\u800c\u5b8c\u5168\u4e0d\u7406\u6703\u8eca\u798d\u7684\u98a8\u96aa\uff1b\u76f8\u5c0d\u7684\uff0c\u5728\u53d7\u9650\u7684\u5f37\u5316\u5b78\u7fd2\u4e2d\u6211\u5011\u53ef\u4ee5\u5728\u4e00\u958b\u59cb\u8a13\u7df4\u6642\u9078\u64c7\u4e00\u500b\u53ef\u63a5\u53d7\u7684\u78b0\u649e\u7387\u4f5c\u70ba\u6700\u4f4e\u7684\u5b89\u5168\u6a19\u6e96\uff0c\u4e00\u65e6\u667a\u6167\u9ad4\u78b0\u649e\u6b21\u6578\u8d85\u904e\u8a72\u6a19\u6e96\u4fbf\u5927\u5e45\u63d0\u9ad8\u7f70\u6b3e\uff0c\u4f7f\u5f97\u4efb\u52d9\u734e\u52f5\u8b8a\u5f97\u6c92\u6709\u8a98\u56e0\uff0c\u667a\u6167\u9ad4\u4e5f\u5c31\u4e0d\u6703\u56e0\u70ba\u8caa\u5feb\u800c\u7121\u8996\u5176\u4ed6\u7528\u8def\u4eba\u7684\u5b89\u5168\u3002<\/p>\n<p><strong>\u5b89\u5168\u7684\u5b78\u7fd2\u74b0\u5883<\/strong><\/p>\n<p>\u5373\u4f7f\u5982\u6b64\uff0c\u5728\u53d7\u9650\u5f37\u5316\u5b78\u7fd2AI\u8a13\u7df4\u5b8c\u6210\u524d\uff0c\u4ecd\u7136\u9700\u8981\u4e00\u500b\uff08\u5c0d\u5468\u906d\u5176\u4ed6\u4eba\uff09\u5b89\u5168\u7684\u5b78\u7fd2\u74b0\u5883\u3002Safety Gym\u4fbf\u662fOpenAI\u5c08\u70ba\u5f37\u5316\u5b78\u7fd2AI\u6240\u8a2d\u8a08\u7684\u4e00\u7d44\u5de5\u5177\uff0c\u63d0\u4f9b\u5404\u7a2e\u5b78\u7fd2\u74b0\u5883\u4e26\u5099\u6709\u4e0d\u540c\u7684\u96e3\u5ea6\u8207\u8907\u96dc\u5ea6\u3002\u76ee\u524d\u5957\u4ef6\u4e2d\u9810\u8a2d\u6709Point\u3001Car\u548cDoggo\u4e09\u7a2e\u6a5f\u5668\u4eba\uff0cAI\u9700\u8981\u5b78\u7fd2\u63a7\u5236\u5404\u985e\u6a5f\u5668\u4eba\u4e26\u5b8c\u6210\u4e09\u7a2e\u9810\u8a2d\u4efb\u52d9\u4e4b\u4e00\uff0c\u6bcf\u7a2e\u4efb\u52d9\u5404\u6709\u5169\u7a2e\u96e3\u5ea6\u3002<\/p>\n<div id=\"attachment_83607\" class=\"wp-caption aligncenter\"><img decoding=\"async\" class=\"wp-image-83607 size-full\" src=\"https:\/\/highscope.ch.ntu.edu.tw\/wordpress\/wp-content\/uploads\/2020\/03\/safety-gym-1.png\" alt=\"\" width=\"960\" height=\"720\" \/><\/p>\n<p class=\"wp-caption-text\">\u4ee5Doggo\u6a5f\u5668\u4eba\u70ba\u4f8b\uff0c\u7531\u5de6\u81f3\u53f3\u7684\u4efb\u52d9\u5206\u5225\u662f\u62b5\u9054\u6307\u5b9a\u5730\u9ede\u3001\u6309\u4e0b\u6307\u5b9a\u6309\u9215\u548c\u62d6\u62c9\u7269\u4ef6\u81f3\u6307\u5b9a\u5730\u9ede\uff08\u7da0\u8272\u5713\u690e\uff09\u3002\u85cd\u8272\u5713\u70ba\u6307\u5b9a\u4e0d\u5f97\u89f8\u78b0\u7684\u5340\u584a\uff0c\u7576\u6a5f\u5668\u4eba\u4e0d\u614e\u89f8\u78b0\u4fbf\u6703\u51fa\u73fe\u5982\u5217\u4e8c\u7684\u7d05\u8272\u8b66\u793a\u71c8\u3002\uff08\u5716\u7247\u4f86\u6e90\uff1aOpenAI, Safety Gym, 2019.\uff09<\/p>\n<\/div>\n<p>\u958b\u767c\u5718\u968a\u5728Safety Gym\u4e0a\u8a66\u884c\u5404\u7a2e\u5f37\u5316\u5b78\u7fd2\u6a21\u578b\uff0c\u4e26\u7e6a\u88fd\u51fa\u5982\u4e0b\u5716\u7684\u5b78\u7fd2\u66f2\u7dda\uff0c\u5efa\u7acb\u5404\u6a21\u578b\u7684\u57fa\u6e96\u6548\u80fd\u3002<\/p>\n<div id=\"attachment_83608\" class=\"wp-caption aligncenter\"><img decoding=\"async\" class=\"wp-image-83608 size-full\" src=\"https:\/\/highscope.ch.ntu.edu.tw\/wordpress\/wp-content\/uploads\/2020\/03\/%E5%AD%B8%E7%BF%92%E6%9B%B2%E7%B7%9A.png\" alt=\"\" width=\"1276\" height=\"587\" \/><\/p>\n<p class=\"wp-caption-text\">\u91dd\u5c0d\u300c\u64cd\u4f5cDoggo\u6a5f\u5668\u4eba\u79fb\u52d5\u81f3\u6307\u5b9a\u5730\u9ede\u300d\u9019\u9805\u4efb\u52d9\uff0c\u5404\u7a2e\uff08\u5305\u542b\u4e00\u822c\u8207\u53d7\u9650\uff09\u5f37\u5316\u5b78\u7fd2\u6a21\u578b\u7684\u5b78\u7fd2\u66f2\u7dda\u3002\uff08\u5716\u7247\u4f86\u6e90\uff1aOpenAI, Safety Gym, 2019.\uff09<\/p>\n<\/div>\n<p>\u9664\u4e86\u900f\u904e\u9019\u6a23\u7684\u958b\u6e90\u6846\u67b6\uff0c\u4efb\u4f55AI\u958b\u767c\u4eba\u54e1\u90fd\u80fd\u5920\u8f15\u9b06\u5730\u5728AI\u7684\u5b89\u5168\u6027\u4e0a\u9032\u884c\u5354\u4f5c\u5916\uff0cOpenAI\u7684\u6700\u7d42\u76ee\u7684\u662f\u5e0c\u671b\u5c07Safety Gym\u63a8\u5ee3\u70ba\u6240\u6709\u5f37\u5316\u5b78\u7fd2AI\u7684\u5b89\u5168\u6a19\u6e96\u6e2c\u8a66\uff0c\u5c31\u50cf\u6211\u5011\u5df2\u7d93\u53ef\u4ee5\u4ee5\u7279\u5b9a\u4efb\u52d9\u4f86\u91cf\u5316\u7cfb\u7d71\u7684\u6e96\u78ba\u5ea6\u8207\u8868\u73fe\u4e00\u6a23\u3002<\/p>\n<p><strong>\u00a0<\/strong><\/p>\n<p><strong>\u7de8\u8b6f\u4f86\u6e90<\/strong><\/p>\n<p>OpenAI,\u00a0<a href=\"https:\/\/openai.com\/blog\/safety-gym\/\">Safety Gym<\/a>, 2019<\/p>\n<p><strong>\u53c3\u8003\u8cc7\u6599<\/strong><\/p>\n<ol>\n<li>A. Ray et al.,\u00a0<a href=\"https:\/\/cdn.openai.com\/safexp-short.pdf\">\u201cBenchmarking Safe Exploration in Deep Reinforcement Learning\u201d<\/a>,\u00a0<em>openai.com<\/em>, 2019<\/li>\n<li>J. Achiam et al.,\u00a0<a href=\"https:\/\/arxiv.org\/abs\/1705.10528\">Constrained Policy Optimization<\/a>,\u00a0<em>arXiv.org<\/em>, 2017<\/li>\n<\/ol>\n<p>(\u672c\u6587\u7531\u6559\u80b2\u90e8\u88dc\u52a9\u300cAI\u5831\u5831\u2500AI\u79d1\u666e\u63a8\u5ee3\u8a08\u756b\u300d\u57f7\u884c\u5718\u968a\u7de8\u8b6f)<\/p>\n<\/div>\n<\/div>\n","protected":false},"excerpt":{"rendered":"<p>\u9019\u4e0d\u662f\u4ec0\u9ebc\u4f4f\u5546\u5ee3\u544a\uff0c\u800c\u662fOpenAI\u5c08\u70ba\u5f37\u5316\u5b78\u7fd2AI\u958b\u767c\u7684\u5b78\u7fd2\u5de5\u5177\u2500\u2500Safety Gym\uff0c\u8b93AI\u80fd\u5728\u932f\u8aa4\u4e2d\u5b78\u7fd2\uff0c\u537b\u53c8\u4e0d\u6703\u5371\u53ca\u5468\u906d\u5176\u4ed6\u4eba\u7684\u5b89\u5168\u3002<\/p>\n","protected":false},"author":21,"featured_media":38334,"comment_status":"closed","ping_status":"closed","sticky":false,"template":"","format":"standard","meta":{"footnotes":""},"categories":[3772,3293],"tags":[5670,4989,5671,1846,5513],"aioseo_notices":[],"jetpack_featured_media_url":"https:\/\/case.ntu.edu.tw\/blog\/wp-content\/uploads\/2022\/01\/safety-gym-0.png","_links":{"self":[{"href":"https:\/\/case.ntu.edu.tw\/blog\/index.php?rest_route=\/wp\/v2\/posts\/38333"}],"collection":[{"href":"https:\/\/case.ntu.edu.tw\/blog\/index.php?rest_route=\/wp\/v2\/posts"}],"about":[{"href":"https:\/\/case.ntu.edu.tw\/blog\/index.php?rest_route=\/wp\/v2\/types\/post"}],"author":[{"embeddable":true,"href":"https:\/\/case.ntu.edu.tw\/blog\/index.php?rest_route=\/wp\/v2\/users\/21"}],"replies":[{"embeddable":true,"href":"https:\/\/case.ntu.edu.tw\/blog\/index.php?rest_route=%2Fwp%2Fv2%2Fcomments&post=38333"}],"version-history":[{"count":1,"href":"https:\/\/case.ntu.edu.tw\/blog\/index.php?rest_route=\/wp\/v2\/posts\/38333\/revisions"}],"predecessor-version":[{"id":38335,"href":"https:\/\/case.ntu.edu.tw\/blog\/index.php?rest_route=\/wp\/v2\/posts\/38333\/revisions\/38335"}],"wp:featuredmedia":[{"embeddable":true,"href":"https:\/\/case.ntu.edu.tw\/blog\/index.php?rest_route=\/wp\/v2\/media\/38334"}],"wp:attachment":[{"href":"https:\/\/case.ntu.edu.tw\/blog\/index.php?rest_route=%2Fwp%2Fv2%2Fmedia&parent=38333"}],"wp:term":[{"taxonomy":"category","embeddable":true,"href":"https:\/\/case.ntu.edu.tw\/blog\/index.php?rest_route=%2Fwp%2Fv2%2Fcategories&post=38333"},{"taxonomy":"post_tag","embeddable":true,"href":"https:\/\/case.ntu.edu.tw\/blog\/index.php?rest_route=%2Fwp%2Fv2%2Ftags&post=38333"}],"curies":[{"name":"wp","href":"https:\/\/api.w.org\/{rel}","templated":true}]}}