\u5bf9\u96c6\u5408X\u8fdb\u884c\u5206\u6790\u540e\u5f97\u5230\u5982\u4e0b\u7ed3\u679c\uff1a\u968f\u673a\u53d8\u91cfx\u670d\u4ece\u81ea\u7531\u5ea6\u4e3atk\u7684\u5361\u65b9\u5206\u5e03\uff0c\u5176\u4e2dtk\u8fd1\u4f3c\u7b49\u4e8e\u8fc7\u6ee4\u76ee\u6807\u6570\uff0c\u4e0e\u6570\u636e\u7ef4\u5ea6\u7b49\u53c2\u6570\u65e0\u5173\u3002<\/strong>\u5361\u65b9\u5206\u5e03\u662f\u4e00\u79cd\u7279\u6b8a\u7684Gamma\u5206\u5e03\uff0c\u6b64\u65f6Gamma\u5206\u5e03\u7684\u5f62\u72b6\u53c2\u6570\u03b1=tk\/2\u3001\u5c3a\u5ea6\u53c2\u6570\u03b2=2\u3002<\/p>\n<\/p>\n
Figure 2\uff1a\u4e0a\u90e8\u622a\u56fe\u4e3a\u641c\u7d22\u7b97\u6cd5\u7684\u4e00\u5f20\u6267\u884c\u622a\u56fe\uff0c\u6570\u636e\u5e93\u5927\u5c0f209\u4e07\uff0c\u67e5\u8be2\u91cf21504\uff0c\u6570\u636e\u7ef4\u5ea696\uff0c\u6570\u636e\u7c7b\u578bhalf\uff0cTopK\u4e3a128\uff0c\u5faa\u73af16\u6b21\u7ed3\u675f\u3002\u7b2c1\u30012\u5217\u662f\u8ddd\u79bb\u8ba1\u7b97\u4e0e\u8fc7\u6ee4\u51fd\u6570\u7684\u6267\u884c\u65f6\u95f4\uff08\u5355\u4f4d\uff1ams\uff09\u548c\u76f8\u5e94\u7b97\u529b\uff08\u5355\u4f4d\uff1aTFLOPS\uff09\uff1b\u7b2c3\u30014\u30015\u5217\u5206\u522b\u662f\u7ed3\u679c\u89e3\u6790\u51fd\u6570\u3001\u6392\u5e8f\u51fd\u6570\u3001\u9608\u503c\u66f4\u65b0\u51fd\u6570\u7684\u6267\u884c\u65f6\u95f4\uff1b\u7b2c6\u5217\u662f\u5206\u6bb5\u5927\u5c0f\uff1b\u7b2c7\u30018\u5217\u662f\u8fc7\u6ee4\u7ed3\u679c\u6570\u7684\u5747\u503c\u548c\u6807\u51c6\u5dee\uff1b\u7b2c9\u300110\u5217\u662f\u7531\u5747\u503c\u3001\u6807\u51c6\u5dee\u8ba1\u7b97\u51fa\u7684Gamma\u5206\u5e03\u7684\u5f62\u72b6\u53c2\u6570\u03b1\u3001\u5c3a\u5ea6\u53c2\u6570\u03b2\uff1b\u7b2c11\u300112\u5217\u662f\u8fc7\u6ee4\u7ed3\u679c\u6570\u7684\u6700\u5927\u503c\u3001\u6700\u5c0f\u503c\uff1b\u7b2c13\u5217\u662f\u8fc7\u6ee4\u76ee\u6807\u6570\u3002\u6700\u540e\u4e00\u884c\u662f\u6574\u4e2a\u67e5\u8be2\u7684\u8017\u65f6\u548c\u76f8\u5e94\u7b97\u529b\u3002<\/p>\n
Figure 3\uff1a\u5de6\u4fa7\u84dd\u8272\u5b9e\u7ebf\u662ftk=32\u7684\u6982\u7387\u5bc6\u5ea6\u66f2\u7ebf\uff0c\u84dd\u8272\u5706\u70b9\u662f\u76ee\u6807\u6570\u4e3a32\u7684\u8fc7\u6ee4\u7ed3\u679c\u6570\u7684\u7edf\u8ba1\u503c\uff1b\u6a59\u8272\u5b9e\u7ebf\u662ftk=64\u7684\u6982\u7387\u5bc6\u5ea6\u66f2\u7ebf\uff0c\u6a59\u8272\u5706\u70b9\u662f\u76ee\u6807\u6570\u4e3a64\u7684\u8fc7\u6ee4\u7ed3\u679c\u6570\u7684\u7edf\u8ba1\u503c\uff1b\u7eff\u8272\u5b9e\u7ebf\u662ftk=128\u7684\u6982\u7387\u5bc6\u5ea6\u66f2\u7ebf\uff0c\u7eff\u8272\u5706\u70b9\u662f\u76ee\u6807\u6570\u4e3a128\u7684\u8fc7\u6ee4\u7ed3\u679c\u6570\u7684\u7edf\u8ba1\u503c\u3002<\/p>\n
\u00a0 \u00a0 \u00a0 \u00a0 \u7531\u4e8e\u9996\u4e2a\u5206\u6bb5\u662f\u5b8c\u6574\u7684\u8f93\u51fa\u3001\u6392\u5e8f\uff0c\u5e76\u5b58\u50a8\u5230\u7ed3\u679c\u961f\u5217\uff0c\u56e0\u6b64\u7ed3\u679c\u961f\u5217\u6c38\u4e0d\u4e3a\u7a7a\uff0c\u6240\u4ee5\u521d\u59cb\u8fc7\u6ee4\u76ee\u6807\u6570\u8303\u56f4\u5185\u7684\u53ec\u56de\u7387\u4e3a1\u3002\u5f53\u4f7f\u7528\u88682\u7684\u65b9\u5f0f\u589e\u52a0\u8fc7\u6ee4\u76ee\u6807\u6570\u65f6\uff0c\u7531\u4e8e\u591a\u4e2a\u77eb\u6b63\u5206\u6bb5\u7684\u5b58\u5728\uff0c\u4f7fTopK\u7684\u53ec\u56de\u7387\u4e5f\u63a5\u8fd1\u4e8e1\u3002\u53ef\u4ee5\u901a\u8fc7\u7ed3\u679c\u961f\u5217\u7684\u586b\u5145\u60c5\u51b5\u63a8\u7b97\u53ec\u56de\u7387\uff0c\u5f53\u586b\u5145\u4f4d\u7f6e\u8fbe\u5230\u6216\u8d85\u8fc7TopK\u65f6\uff0c\u53ec\u56de\u7387\u4e3a1\u3002\u901a\u8fc7\u63a7\u5236\u8fc7\u6ee4\u7684\u7ed3\u679c\u6570\uff0c\u53ef\u4ee5\u51cf\u5c0f\u8f93\u51fa\u5e26\u5bbd\uff0c\u540c\u65f6\u4e5f\u51cf\u5c0f\u4e86\u6392\u5e8f\u7684\u6b21\u6570\u548c\u6bcf\u6b21\u6392\u5e8f\u7684\u8ba1\u7b97\u91cf\uff0c\u4f7f\u786c\u4ef6\u7684\u6027\u80fd\u5f97\u5230\u5145\u5206\u5229\u7528\u3002<\/p>\n
\u00a0 \u00a0 \u00a0 \u00a0 Felix Chern\u7b49\u4eba\u5728Google TPU\u4e0a\u5b9e\u73b0\u4e86\u5cf0\u503c\u6027\u80fd\u7684knn\u641c\u7d22\u65b9\u6cd5[7]\uff0c\u6838\u5fc3\u65b9\u6cd5\u4e5f\u662f\u5bf9\u7ed3\u679c\u8fdb\u884c\u8fc7\u6ee4\uff0c\u4e0d\u8fc7\u4ed6\u4eec\u6ca1\u6709\u4f7f\u7528\u8fed\u4ee3\u66f4\u65b0\u9608\u503c\u7684\u7b56\u7565\uff0c\u800c\u662f\u901a\u8fc7\u53ec\u56de\u7387\u6765\u63a7\u5236\u8fc7\u6ee4\u7ed3\u679c\u7684\u6570\u91cf\uff0c\u6d88\u9664\u4e86\u8fed\u4ee3\u3001\u6392\u5e8f\u7684\u5f00\u9500\u3002\u4ed6\u4eec\u5bf9\u5185\u5b58\u74f6\u9888\u3001\u6307\u4ee4\u74f6\u9888\u505a\u4e86\u5f88\u8be6\u7ec6\u7684\u5206\u6790\u3002<\/p>\n
4.\u7ed3\u8bba<\/h2>\n
\u00a0 \u00a0 \u00a0 \u00a0 \u867d\u7136GPU\u6709\u5f88\u9ad8\u7684\u7406\u8bba\u7b97\u529b\uff0c\u4f46\u5f88\u5c11\u6709\u5e94\u7528\u80fd\u5c06\u5176\u5145\u5206\u5229\u7528\u8d77\u6765\u3002\u5728\u672c\u6587\u4e2d\uff0c\u6211\u4eec\u63d0\u51fa\u4e86\u4e00\u5957\u4ee5\u8fc7\u6ee4\u4e3a\u57fa\u7840\u7684\u7b97\u6cd5\uff0c\u901a\u8fc7\u5bf9\u8f93\u51fa\u7ed3\u679c\u8fdb\u884c\u8fc7\u6ee4\uff0c\u51cf\u5c11\u8f93\u51fa\u5e26\u5bbd\uff0c\u63d0\u9ad8\u786c\u4ef6\u5229\u7528\u7387\uff1b\u901a\u8fc7\u5bf9\u8fc7\u6ee4\u7ed3\u679c\u6570\u91cf\u7684\u63a7\u5236\uff0c\u63d0\u9ad8\u6392\u5e8f\u901f\u5ea6\uff0c\u4f46\u51e0\u4e4e\u4e0d\u5f71\u54cd\u53ec\u56de\u7387\uff1b\u901a\u8fc7\u4e13\u95e8\u9488\u5bf9\u4f4e\u7ef4\u5ea6\u8ba1\u7b97\u7684\u4f18\u5316\uff0c\u4f7f\u65b0\u786c\u4ef6\u7684\u5229\u7528\u7387\u8fbe\u523096%\u3002<\/p>\n
\u672c\u6587\u7684\u90e8\u5206\u5185\u5bb9\u6765\u81ea\u6211\u7684blog[9]\u548c\u4e13\u5229\u3002
Ding Nan[4]\u3001\u82f1\u4f1f\u8fbe[10]\u7b49\u5bf9GPU\u7684\u5185\u5b58\u74f6\u9888\u3001\u6307\u4ee4\u74f6\u9888\u4e5f\u6709\u5f88\u8be6\u7ec6\u7684\u5206\u6790\u3002<\/p>\n
\u53c2\u8003\u6587\u732e<\/h2>\n
[1] Jia, Zhe, et al. “Dissecting the NVidia Turing T4 GPU via microbenchmarking.” arXiv preprint arXiv:1903.07486 (2019).<\/p>\n
[2] Ebrahimpour, Hossein, and Abbas Kouzani. “Face recognition using bagging KNN.” International conference on signal processing and communication systems (ICSPCS\u20192007) australia, gold coast. sn, 2007.<\/p>\n
[3] Gordo, Albert, et al. “Deep image retrieval: Learning global representations for image search.” European conference on computer vision. Springer, Cham, 2016.<\/p>\n
[4] Ding, Nan, and Samuel Williams. “An instruction roofline model for gpus.” 2019 IEEE\/ACM Performance Modeling, Benchmarking and Simulation of High Performance Computer Systems (PMBS) (2019): 7-18.<\/p>\n
[5] Babenko, Artem, and Victor Lempitsky. “Efficient indexing of billion-scale datasets of deep descriptors.” Proceedings of the IEEE Conference on Computer Vision and Pattern Recognition. 2016.<\/p>\n
[6] Johnson, Jeff, Matthijs Douze, and Herv\u00e9 J\u00e9gou. “Billion-scale similarity search with gpus.” IEEE Transactions on Big Data 7.3 (2019): 535-547.<\/p>\n
[7] Chern, Felix, et al. “Tpu-knn: K nearest neighbor search at peak flop\/s.” arXiv preprint arXiv:2206.14286 (2022).<\/p>\n
[8] Kerr, Andrew, et al. “Cutlass: Fast linear algebra in cuda c++.” NVIDIA Developer Blog (2017).<\/p>\n
[9] hws000. “The fastest knn search algorithm” https:\/\/blog.simbot.net\/2022\/03\/12\/the-fastest-knn-search-algorithm\/<\/p>\n
[10] “Matrix Multiplication Background User’s Guide” NVIDIA Documentation Center https:\/\/docs.nvidia.com\/deeplearning\/performance\/dl-performance-matrix-multiplication\/index.html<\/p>\n
[11] “Matrix multiply-accumulate operation using mma instruction” NVIDIA Documentation Center https:\/\/docs.nvidia.com\/cuda\/parallel-thread-execution\/index.html#warp-level-matrix-instructions-for-mma<\/p>\n","protected":false},"excerpt":{"rendered":"
\u672c\u6587\u521b\u9020\u6027\u7684\u63d0\u51fa\u4e00\u79cd\u65b0\u7684knn\uff08k nearest neighbor\uff09\u641c\u7d22\u52a0\u901f\u7b97\u6cd5\uff0c\u53ef\u4ee5\u8ba9\u786c\u4ef6\u7684\u5229\u7528\u7387\u8fbe\u523090%\u4ee5\u4e0a\u3002\u672c\u7b97\u6cd5\u5c06knn\u641c\u7d22\u4ece\u8ba1\u7b97\u5bc6\u96c6\u3001\u5185\u5b58IO\u5bc6\u96c6\u7b97\u6cd5\uff0c\u6539\u8fdb\u4e3a\u7eaf\u8ba1\u7b97\u5bc6\u96c6\u7b97\u6cd5\uff0c\u5145\u5206\u6316\u6398\u786c\u4ef6\u80fd\u529b\uff0c\u53ea\u8981\u5806\u66f4\u591a\u7684\u786c\u4ef6\uff0c\u5c31\u80fd\u5b9e\u73b0\u66f4\u5feb\u7684\u8ba1\u7b97\u3002\u65b0\u7b97\u6cd5\u4e0d\u4f46\u53ef\u4ee5\u8fd0\u884c\u5728GPU\u4e0a\uff0c\u8fd8\u53ef\u4ee5\u9ad8\u6548\u8fd0\u884c\u4e8e\u5404\u79cd\u4e13\u4e1a\u77e9\u9635\u52a0\u901f\u5361\u4e0a\u3002<\/p>\n","protected":false},"author":2,"featured_media":0,"comment_status":"closed","ping_status":"open","sticky":false,"template":"","format":"standard","meta":{"footnotes":""},"categories":[29],"tags":[36,39,31,37,35,34],"_links":{"self":[{"href":"https:\/\/blog.simbot.net\/wp-json\/wp\/v2\/posts\/400"}],"collection":[{"href":"https:\/\/blog.simbot.net\/wp-json\/wp\/v2\/posts"}],"about":[{"href":"https:\/\/blog.simbot.net\/wp-json\/wp\/v2\/types\/post"}],"author":[{"embeddable":true,"href":"https:\/\/blog.simbot.net\/wp-json\/wp\/v2\/users\/2"}],"replies":[{"embeddable":true,"href":"https:\/\/blog.simbot.net\/wp-json\/wp\/v2\/comments?post=400"}],"version-history":[{"count":7,"href":"https:\/\/blog.simbot.net\/wp-json\/wp\/v2\/posts\/400\/revisions"}],"predecessor-version":[{"id":415,"href":"https:\/\/blog.simbot.net\/wp-json\/wp\/v2\/posts\/400\/revisions\/415"}],"wp:attachment":[{"href":"https:\/\/blog.simbot.net\/wp-json\/wp\/v2\/media?parent=400"}],"wp:term":[{"taxonomy":"category","embeddable":true,"href":"https:\/\/blog.simbot.net\/wp-json\/wp\/v2\/categories?post=400"},{"taxonomy":"post_tag","embeddable":true,"href":"https:\/\/blog.simbot.net\/wp-json\/wp\/v2\/tags?post=400"}],"curies":[{"name":"wp","href":"https:\/\/api.w.org\/{rel}","templated":true}]}}