

{"id":39105,"date":"2018-10-01T00:00:27","date_gmt":"2018-09-30T16:00:27","guid":{"rendered":"https:\/\/case.ntu.edu.tw\/blog\/?p=39105"},"modified":"2022-01-05T00:54:16","modified_gmt":"2022-01-04T16:54:16","slug":"%e6%a9%9f%e5%99%a8%e4%ba%ba%ef%bc%9a%e4%bb%80%e9%ba%bc%ef%bc%9f%e5%a4%a7%e8%81%b2%e9%bb%9e%e6%88%91%e8%81%bd%e4%b8%8d%e8%a6%8b%ef%bc%81","status":"publish","type":"post","link":"https:\/\/case.ntu.edu.tw\/blog\/?p=39105","title":{"rendered":"\u6a5f\u5668\u4eba\uff1a\u4ec0\u9ebc\uff1f\u5927\u8072\u9ede\u6211\u807d\u4e0d\u898b\uff01"},"content":{"rendered":"<div class=\"single-post-media clr\">\n<div class=\"post-thumbnail\"><img fetchpriority=\"high\" decoding=\"async\" src=\"https:\/\/highscope.ch.ntu.edu.tw\/wordpress\/wp-content\/uploads\/2018\/10\/louder-main.jpg\" alt=\"\u6a5f\u5668\u4eba\uff1a\u4ec0\u9ebc\uff1f\u5927\u8072\u9ede\u6211\u807d\u4e0d\u898b\uff01\" width=\"500\" height=\"333\" \/><\/div>\n<\/div>\n<div class=\"entry clr\">\n<div class=\"pf-content\">\n<p><strong>\u7de8\u8b6f\uff0f\u81fa\u5927\u96fb\u6a5f\u7cfb \u5433\u5955\u8431<\/strong><\/p>\n<p><strong>\u96de\u5c3e\u9152\u6703\u6548\u61c9<\/strong><\/p>\n<p>\u97f3\u8a0a\u5206\u96e2\uff08Speech Separation\uff09\u4e00\u76f4\u662f\u8a9e\u97f3\u8fa8\u8b58\u6280\u8853\u4e9f\u5f85\u514b\u670d\u7684\u4e00\u5927\u554f\u984c\u3002\u5728\u904e\u53bb\uff0c\u7814\u7a76\u4eba\u54e1\u5229\u7528\u76e3\u7763\u5f0f\u6a5f\u5668\u5b78\u7fd2\uff0c\u589e\u52a0AI\u5c0d\u74b0\u5883\u566a\u97f3\u7684\u9451\u5225\u5ea6\uff0c\u4e26\u63d0\u5347\u76ee\u6a19\u8a9e\u97f3\u7684\u97f3\u8a0a\u54c1\u8cea\uff0c\u9032\u800c\u9054\u5230\u964d\u4f4e\u80cc\u666f\u96dc\u97f3\u3001\u5206\u96e2\u4e0d\u540c\u8b1b\u8005\u8a9e\u97f3\u7684\u76ee\u7684[2]\uff1b\u6216\u8005\uff0c\u5229\u7528\u6df1\u5ea6\u5b78\u7fd2\uff0c\u8a13\u7df4AI\u85c9\u7531\u8a9e\u97f3\u7279\u5fb5\uff08\u983b\u8b5c\u4e0a\u4e0d\u540c\u6642\u983b\u5340\u584a\u9593\u7684\u5c0d\u6bd4\uff09\uff0c\u5c07\u97f3\u8a0a\u5206\u7fa4\uff08clustering\uff09\u3002\u7576\u770b\u5230\u904e\u53bb\u8a13\u7df4\u96c6\u4e2d\u4e0d\u66fe\u51fa\u73fe\u7684\u97f3\u8a0a\u6642\uff0c\u85c9K-means\u5206\u7fa4\u6cd5\u5c07\u5176\u4e2d\u7684\u7d44\u6210\u6b78\u985e\u3002\u9019\u6a23\u7684\u65b9\u6cd5\u5728\u50c5\u6709\u4e8c\u81f3\u4e09\u4eba\u540c\u6642\u8aaa\u8a71\u7684\u60c5\u5883\u4e0b\uff0c\u5df2\u53d6\u5f97\u4e0d\u932f\u7684\u97f3\u8a0a\u5206\u96e2\u6548\u679c[3]\u3002\u7136\u800c\u9019\u4e9b\u7814\u7a76\u591a\u8457\u91cd\u5728\u5982\u4f55\u5c07\u76ee\u6a19\u8a9e\u97f3\u62bd\u96e2\u51fa\u4f86\u4e26\u512a\u5316\uff0c\u81f3\u65bc\u5982\u4f55\u5c07\u4e0d\u540c\u4f86\u6e90\u7684\u8a9e\u97f3\u5c0d\u61c9\u5230\u8aaa\u8a71\u8005\u8eab\u4e0a\uff0c\u4ecd\u6709\u6539\u5584\u7a7a\u9593\u3002<\/p>\n<p>\u53e6\u4e00\u65b9\u9762\uff0c\u4eba\u8166\u8207\u751f\u4ff1\u4f86\u9019\u6a23\u7684\u807d\u89ba\u9078\u64c7\u80fd\u529b\uff1a\u5728\u5435\u96dc\u74b0\u5883\uff08\u4f8b\u5982\uff1a\u96de\u5c3e\u9152\u6703\uff09\u4e2d\uff0c\u82e5\u6211\u5011\u5c07\u6ce8\u610f\u529b\u96c6\u4e2d\u5728\u7279\u5b9a\u8072\u97f3\u4f86\u6e90\uff0c\u4f8b\u5982\u6ce8\u8996\u8aaa\u8a71\u8005\u7684\u81c9\u90e8\uff0c\u4fbf\u80fd\u81ea\u52d5\u5ffd\u7565\u5176\u4ed6\u4e0d\u76f8\u5e72\u7684\u8072\u97f3\uff0c\u807d\u898b\u5c0d\u65b9\u7684\u8aaa\u8a71\u5167\u5bb9\uff0c\u7a31\u4f5c\u300c\u96de\u5c3e\u9152\u6703\u6548\u61c9\u300d\uff08The Cocktail Party Effect\uff09\u3002\u5176\u4e2d\u7684\u4e00\u5927\u95dc\u9375\uff0c\u5728\u65bc\u5f71\u50cf\u8207\u8a9e\u97f3\u7684\u7d50\u5408\u3002<\/p>\n<p><strong>\u97f3\u8a0a\u7d50\u5408\u5f71\u50cf<\/strong><\/p>\n<p>\u4ee5\u6b64\u70ba\u51fa\u767c\u9ede\uff0cGoogle\u958b\u767c\u51fa\u65b0\u7684\u97f3\u8a0a\u5206\u96e2\u6280\u8853\u4fbf\u662f\u7531\u5f71\u7247\u8457\u624b\uff0c\u9996\u5148\u5728\u4e00\u5e40\u5e40\u5f71\u683c\u4e2d\u9032\u884c\u4eba\u81c9\u8207\u53e3\u90e8\u52d5\u4f5c\u8fa8\u8b58\uff0c\u591a\u4e86\u8996\u89ba\u8cc7\u8a0a\u5f8c\uff0c\u518d\u5c07\u5206\u96e2\u7684\u8a9e\u97f3\u8207\u4eba\u7269\u5f71\u50cf\u4f5c\u9023\u7d50\u3002\u4e00\u500b\u9700\u8981\u514b\u670d\u7684\u56f0\u96e3\uff0c\u4e5f\u662f\u8fd1\u5e74\u4f86\u8a31\u591a\u8996\u97f3\u8a0a\uff08Audio-visual\uff0c AV\uff09\u5206\u96e2\u6280\u8853\u5171\u6709\u7684\u7f3a\u9677\u2500\u4ed6\u5011\u662f\u300cspeaker-dependent\u300d\uff0c\u610f\u5373\uff1a\u8fa8\u8b58\u5c0d\u8c61\u5fc5\u9808\u66fe\u7d93\u51fa\u73fe\u5728\u8a13\u7df4\u8cc7\u6599\u4e2d\uff0c\u7cfb\u7d71\u65b9\u53ef\u8b58\u5225\u3002\u800c\u9019\u5f80\u5f80\u53d6\u6c7a\u65bc\u8a13\u7df4\u8cc7\u6599\u96c6\u7684\u898f\u6a21\u8207\u5167\u5bb9\u591a\u6a23\u6027\u3002<\/p>\n<p>\u7814\u7a76\u4eba\u54e1\u65bc\u662f\u8490\u96c6\u4e86\u8fd1\u4e09\u5341\u842c\u90e8\u77ed\u8b1b\u5f71\u7247\uff08\u4f8b\u5982\uff1aTED Talks\uff09\uff0c\u5305\u542b\u4e0d\u540c\u8a9e\u8a00\u3001\u4e0d\u540c\u5e74\u9f61\u5c64\u7684\u8b1b\u8005\uff0c\u4e26\u64f7\u53d6\u5176\u4e2d\u97f3\u8a0a\u55ae\u7d14\u3001\u4eba\u50cf\u6e05\u6670\u7684\u7247\u6bb5\uff0c\u532f\u6574\u70ba\u300c<a href=\"https:\/\/looking-to-listen.github.io\/\">AVSpeech<\/a>\u300d\u8cc7\u6599\u96c6\uff0c\u7528\u4ee5\u8a13\u7df4AI\u91dd\u5c0d\u4e0d\u540c\u8aaa\u8a71\u8005\u5206\u96e2\u51fa\u7368\u7acb\u97f3\u8ecc\u3002\u5728\u300c\u5f88\u591a\u4eba\u540c\u6642\u8b1b\u8a71\u300d\u8207\u300c\u74b0\u5883\u5435\u96dc\u300d\u7b49\u60c5\u6cc1\u4e0b\uff0c\u8996\u9700\u8981\u5f37\u5316\u7279\u5b9a\u97f3\u8ecc\uff0c\u4ee5\u9054\u5230\u6d88\u9664\u96dc\u97f3\uff08\u5305\u542b\u4ed6\u4eba\u5c0d\u8a71\u8207\u80cc\u666f\u566a\u97f3\uff09\u7684\u76ee\u7684\u3002<\/p>\n<div id=\"attachment_79644\" class=\"wp-caption alignnone\"><img decoding=\"async\" class=\"size-full wp-image-79644\" src=\"https:\/\/highscope.ch.ntu.edu.tw\/wordpress\/wp-content\/uploads\/2018\/10\/louder-fig1.png\" alt=\"\" width=\"1236\" height=\"324\" \/><\/p>\n<p class=\"wp-caption-text\">\u5c07\u5f71\u7247\u4e2d\u7684\u5f71\u50cf\u8207\u97f3\u8a0a\u5206\u96e2\u5f8c\u518d\u914d\u5c0d\uff08\u4f86\u6e90\uff1aA. Ephrat et al, 2018.\uff09<\/p>\n<\/div>\n<p><strong>\u6a21\u578b\u67b6\u69cb<\/strong><\/p>\n<div id=\"attachment_79645\" class=\"wp-caption alignnone\"><img decoding=\"async\" class=\"size-full wp-image-79645\" src=\"https:\/\/highscope.ch.ntu.edu.tw\/wordpress\/wp-content\/uploads\/2018\/10\/louder-fig2.png\" alt=\"\" width=\"1071\" height=\"443\" \/><\/p>\n<p class=\"wp-caption-text\">Google\u65b0\u8a9e\u97f3\u8fa8\u8b58AI\u80cc\u5f8c\u7684\u795e\u7d93\u7db2\u8def\u67b6\u69cb\uff08\u4f86\u6e90\uff1aI. Mosseri et al, 2018. \uff09<\/p>\n<\/div>\n<ul>\n<li>\u8f38\u5165\uff1a\u5f71\u50cf+\u97f3\u8a0a<\/li>\n<\/ul>\n<p>\u8f38\u5165\u7684\u8cc7\u8a0a\u5305\u542b\u5f71\u7247\u7684\u5f71\u683c\u8207\u5c0d\u61c9\u7684\u97f3\u8ecc\uff0c\u5f71\u7247\u4e2d\u53ef\u80fd\u6709\u8d85\u904e\u4e00\u500b\u4eba\u540c\u6642\u8aaa\u8a71\uff0c\u6216\u6709\u5176\u4ed6\u566a\u97f3\u5f62\u6210\u5e72\u64fe\u3002\u9996\u5148\u85c9\u7531\u9810\u5148\u8a13\u7df4\u597d\u7684\u81c9\u90e8\u8fa8\u8b58\u6a21\u578b\uff0c\u70ba\u6bcf\u5e40\u5f71\u683c\u4e2d\u7684\u4eba\u81c9\uff0c\u4f9d\u64da\u7279\u5fb5\u8ce6\u4e88\u4e00\u5411\u91cf\u8868\u793a\u3002\u97f3\u8ecc\u5247\u662f\u9032\u884c\u77ed\u6642\u8ddd\u5085\u7acb\u8449\u8f49\u63db\uff08Short-time Fourier Transform\uff0cSTFT\uff09\uff0c\u4ee5\u8907\u6578\u5f62\u5f0f\u8868\u793a\u3002<\/p>\n<ul>\n<li>\u6a21\u578b\uff1a\u795e\u7d93\u7db2\u8def\u67b6\u69cb<\/li>\n<\/ul>\n<p>\u5c07\u4e0d\u540c\u4eba\u7269\u81c9\u90e8\u8fa8\u8b58\u7684\u7d50\u679c\u8f38\u5165\u591a\u5c64\u5377\u7a4d\u795e\u7d93\u7db2\u8def\uff08Convolutional Neural Networks\uff0c CNN\uff09\uff0c\u5f97\u5230\u8996\u8a0a\u7279\u5fb5\u3002\u63a5\u8457\u878d\u5408\u8996\u8a0a\u8207\u97f3\u8a0a\u7279\u5fb5\uff0c\u901a\u904e\u96d9\u5411\u9577\u77ed\u671f\u8a18\u61b6\uff08Bidirectional LSTM\uff09\u985e\u795e\u7d93\u7db2\u8def\u8207\u5168\u9023\u63a5\u5c64\uff08Fully connected layers\uff09\uff0c\u7522\u751f\u53ef\u7528\u65bc\u97f3\u8a0a\u5206\u96e2\u7684\u8f38\u51fa\u3002<\/p>\n<ul>\n<li>\u8f38\u51fa\uff1a\u983b\u8b5c\u906e\u7f69<\/li>\n<\/ul>\n<p>\u6a21\u578b\u91dd\u5c0d\u5f71\u7247\u4e2d\u7684\u6bcf\u500b\u4eba\u7269\u4ee5\u53ca\u80cc\u666f\u96dc\u97f3\uff0c\u8f38\u51fa\u5c0d\u61c9\u7684\u983b\u8b5c\u906e\u7f69\u3002\u5c07\u906e\u7f69\u8207\u8f38\u5165\u7aef\u5145\u6eff\u566a\u97f3\u7684\u983b\u8b5c\u76f8\u4e58\uff0c\u518d\u9032\u884c\u9006\u5085\u7acb\u8449\u8f49\u63db\u5f8c\uff0c\u4fbf\u53ef\u5f97\u5230\u8a72\u4eba\u7269\u4e7e\u6de8\u7684\u97f3\u8a0a\u3002\u5f9e\u6bcf\u500b\u4eba\u7368\u7acb\u7684\u97f3\u8ecc\uff0c\u6211\u5011\u53ef\u4ee5\u91cd\u7d44\u4e00\u6bb5\u5f71\u7247\uff0c\u52a0\u5f37\u7279\u5b9a\u89d2\u8272\u7684\u8a71\u8a9e\uff0c\u4e26\u964d\u4f4e\u5176\u4ed6\u5e72\u64fe\uff0c\u4f7f\u5f97\u91cd\u8981\u5c0d\u8a71\u66f4\u52a0\u6e05\u6670\u3002<\/p>\n<p><strong>\u61c9\u7528<\/strong><\/p>\n<p>\u89c0\u770bYouTube\u5f71\u7247\u6642\uff0c\u756b\u9762\u4e0b\u65b9\u5f80\u5f80\u6709\u7cfb\u7d71\u81ea\u52d5\u751f\u6210\u7684\u5b57\u5e55\u53ef\u642d\u914d\u4f7f\u7528\uff0c\u7136\u800c\u5728\u4e92\u52d5\u8f03\u71b1\u70c8\u7684\u7bc0\u76ee\u4e2d\uff0c\u7cfb\u7d71\u5bb9\u6613\u5c07\u4e0d\u540c\u8aaa\u8a71\u8005\u7684\u8a71\u8a9e\u6df7\u96dc\u5728\u4e00\u8d77\uff0c\u7522\u751f\u96e3\u4ee5\u7406\u89e3\uff0c\u751a\u81f3\u932f\u8aa4\u767e\u51fa\u7684\u5b57\u5e55\u3002\u800c\u826f\u597d\u7684\u97f3\u8a0a\u5206\u96e2\uff0c\u53ef\u671b\u5e6b\u52a9\u7cfb\u7d71\u5728\u591a\u4eba\u5c0d\u8a71\u7684\u60c5\u5883\u4e0b\uff0c\u6e05\u695a\u5206\u8fa8\u6bcf\u4f4d\u5c0d\u8a71\u8005\u7684\u8a9e\u97f3\uff0c\u63d0\u5347\u5b57\u5e55\u6b63\u78ba\u7387\u3002\u9664\u6b64\u4e4b\u5916\uff0c\u4e5f\u53ef\u61c9\u7528\u65bc\u8996\u8a0a\u6703\u8b70\uff0c\u8b93\u8207\u6703\u8005\u80fd\u66f4\u6e05\u6670\u5730\u807d\u898b\u767c\u8a00\u8005\u7684\u8072\u97f3\u3002\u7576\u7136\uff0c\u4e5f\u662f\u6700\u91cd\u8981\u7684\u76ee\u7684\uff0c\u667a\u80fd\u52a9\u7406\u5f97\u4ee5\u5728\u5435\u96dc\u74b0\u5883\u4e2d\uff0c\u66f4\u8f15\u6613\u5730\u63a5\u6536\u4f7f\u7528\u8005\u6240\u4e0b\u7684\u6307\u4ee4\u3002<\/p>\n<p>&nbsp;<\/p>\n<p><strong>\u53c3\u8003\u8cc7\u6599<\/strong><\/p>\n<ol>\n<li>A. Ephrat, I. Mosseri, O. Lang, T. Dekel, K. Wilson, A. Hassidim, W. Freeman and M. Rubinstein, \u201c<a href=\"https:\/\/arxiv.org\/abs\/1804.03619\">Looking to listen at the cocktail party<\/a>\u201c,\u00a0<em>ACM Transactions on Graphics<\/em>, vol. 37, no. 4, pp. 1-11, 2018.<\/li>\n<li>D. Wang and J. Chen, \u201c<a href=\"https:\/\/arxiv.org\/abs\/1708.07524\">Supervised Speech Separation Based on Deep Learning: An Overview<\/a>\u201c,\u00a0<em>IEEE\/ACM Transactions on Audio, Speech, and Language Processing<\/em>, vol. 26, no. 10, pp. 1702-1726, 2018.<\/li>\n<li>J. Hershey, Z. Chen, J. Le Roux and S. Watanabe, \u201c<a href=\"https:\/\/ieeexplore.ieee.org\/document\/7471631\/authors#authors\">Deep clustering: Discriminative embeddings for segmentation and separation<\/a>\u201c, in\u00a0<em>2016 IEEE International Conference on Acoustics, Speech and Signal Processing (ICASSP)<\/em>, Shanghai, China, 2018, pp. 2379-190X.<\/li>\n<li>I. Mosseri and O. Lang, \u201c<a href=\"https:\/\/ai.googleblog.com\/2018\/04\/looking-to-listen-audio-visual-speech.html\">Looking to Listen: Audio-Visual Speech Separation<\/a>\u201c, Google AI Blog, 2018.<\/li>\n<\/ol>\n<\/div>\n<\/div>\n","protected":false},"excerpt":{"rendered":"<p>\u4eba\u985e\u6709\u500b\u8207\u751f\u4ff1\u4f86\u7684\u80fd\u529b\uff1a\u80fd\u5728\u5435\u96dc\u7684\u74b0\u5883\u4e2d\uff0c\u5206\u8fa8\u51fa\u54ea\u53e5\u8a71\u662f\u54ea\u500b\u4eba\u8aaa\u7684\uff0c\u4e26\u5c07\u4e0d\u60f3\u95dc\u6ce8\u7684\u90e8\u4efd\u81ea\u52d5\u300c\u6d88\u97f3\u300d\uff0c\u964d\u4f4e\u8a0a\u606f\u63a5\u6536\u7684\u932f\u8aa4\u7387\u3002\u73fe\u5728\uff0c\u96fb\u8166\u4e5f\u5c07\u5177\u5099\u9019\u6a23\u7684\u80fd\u529b\u3002Google \u65b0\u767c\u8868\u7684\u97f3\u8a0a\u5206\u96e2\u6280\u8853\uff0c\u85c9\u7531\u5206\u6790\u8aaa\u8a71\u8005\u53e3\u90e8\u52d5\u4f5c\u5f71\u50cf\u8207\u8072\u97f3\u7279\u5fb5\uff0c\u8fa8\u5225\u8072\u97f3\u662f\u5f9e\u8ab0\u7684\u53e3\u4e2d\u767c\u51fa\uff0c\u9032\u800c\u5c07\u756b\u9762\u4e2d\u6bcf\u500b\u4eba\u7269\u6240\u8aaa\u7684\u8a71\u5206\u96e2\u70ba\u7368\u7acb\u97f3\u8ecc\u3002\u5982\u6b64\u4e00\u4f86\uff0c\u667a\u80fd\u52a9\u7406\u4e5f\u80fd\u5728\u5435\u96dc\u74b0\u5883\u4e2d\uff0c\u5206\u8fa8\u51fa\u81ea\u5df1\u7684\u4f7f\u7528\u8005\u6240\u4e0b\u7684\u6307\u4ee4\u3002<\/p>\n","protected":false},"author":21,"featured_media":39106,"comment_status":"closed","ping_status":"closed","sticky":false,"template":"","format":"standard","meta":{"footnotes":""},"categories":[3772,3293],"tags":[4073,6185,6186],"aioseo_notices":[],"jetpack_featured_media_url":"https:\/\/case.ntu.edu.tw\/blog\/wp-content\/uploads\/2022\/01\/louder-main.jpg","_links":{"self":[{"href":"https:\/\/case.ntu.edu.tw\/blog\/index.php?rest_route=\/wp\/v2\/posts\/39105"}],"collection":[{"href":"https:\/\/case.ntu.edu.tw\/blog\/index.php?rest_route=\/wp\/v2\/posts"}],"about":[{"href":"https:\/\/case.ntu.edu.tw\/blog\/index.php?rest_route=\/wp\/v2\/types\/post"}],"author":[{"embeddable":true,"href":"https:\/\/case.ntu.edu.tw\/blog\/index.php?rest_route=\/wp\/v2\/users\/21"}],"replies":[{"embeddable":true,"href":"https:\/\/case.ntu.edu.tw\/blog\/index.php?rest_route=%2Fwp%2Fv2%2Fcomments&post=39105"}],"version-history":[{"count":1,"href":"https:\/\/case.ntu.edu.tw\/blog\/index.php?rest_route=\/wp\/v2\/posts\/39105\/revisions"}],"predecessor-version":[{"id":39107,"href":"https:\/\/case.ntu.edu.tw\/blog\/index.php?rest_route=\/wp\/v2\/posts\/39105\/revisions\/39107"}],"wp:featuredmedia":[{"embeddable":true,"href":"https:\/\/case.ntu.edu.tw\/blog\/index.php?rest_route=\/wp\/v2\/media\/39106"}],"wp:attachment":[{"href":"https:\/\/case.ntu.edu.tw\/blog\/index.php?rest_route=%2Fwp%2Fv2%2Fmedia&parent=39105"}],"wp:term":[{"taxonomy":"category","embeddable":true,"href":"https:\/\/case.ntu.edu.tw\/blog\/index.php?rest_route=%2Fwp%2Fv2%2Fcategories&post=39105"},{"taxonomy":"post_tag","embeddable":true,"href":"https:\/\/case.ntu.edu.tw\/blog\/index.php?rest_route=%2Fwp%2Fv2%2Ftags&post=39105"}],"curies":[{"name":"wp","href":"https:\/\/api.w.org\/{rel}","templated":true}]}}