{"id":494,"date":"2019-08-13T15:48:11","date_gmt":"2019-08-13T07:48:11","guid":{"rendered":"https:\/\/nj.transwarp.cn:8180\/?p=494"},"modified":"2025-06-13T14:12:28","modified_gmt":"2025-06-13T06:12:28","slug":"hyperdrive%e7%9a%84sql-bulkload","status":"publish","type":"post","link":"https:\/\/kbwp.transwarp.cn\/?p=494","title":{"rendered":"Hyperdrive\u7684SQL Bulkload"},"content":{"rendered":"<p>2019-08-08 17:40:17 \u661f\u671f\u56db<\/p>\n<h3>\u6982\u8981\u63cf\u8ff0<\/h3>\n<hr \/>\n<blockquote>\n<p>\u672c\u6848\u4f8b\u8be6\u7ec6\u4ecb\u7ecdhyperdrive\u7684SQL Bulkload\u64cd\u4f5c\u6d41\u7a0b\uff0c\u4ee5\u53ca\u6ce8\u610f\u4e8b\u9879\uff1b<br \/>\n\u540c\u65f6\u7b80\u5355\u4ecb\u7ecd\u4e86hyperdrive\u4e0ehyperbase\u7684SQL Bulkload\u64cd\u4f5c\u7684\u533a\u522b\uff1b<\/p>\n<\/blockquote>\n<h3>\u8be6\u7ec6\u8bf4\u660e<\/h3>\n<hr \/>\n<p>Bulkload\u662f\u4e00\u79cd\u5feb\u901f\u5411Hyperbase\u5bfc\u5165\u5927\u91cf\u6570\u636e\u7684\u65b9\u6cd5\uff1b<br \/>\nSQL BulkLoad\u7684\u9002\u7528\u573a\u666f\u4e3a\uff1a<\/p>\n<ul>\n<li>\u521d\u6b21\u539f\u59cb\u6570\u636e\u5bfc\u5165<\/li>\n<li>\u589e\u91cf\u6570\u636e\u5bfc\u5165<\/li>\n<\/ul>\n<h4>0\u3001\u64cd\u4f5c\u524d\u51c6\u5907\u5de5\u4f5c<\/h4>\n<p>\u539f\u59cb\u6570\u636e\u9700\u8981\u52a0\u8f7d\u5230 inceptor \u8868\u4e2d\uff0c\u6bd4\u5982 BL_FOR_HYPERDRIVE_EXT \u8868<\/p>\n<ul>\n<li>\u521b\u5efa2\u4e2a\u4e34\u65f6\u51fd\u6570\u7528\u4ee5\u8fdb\u884c split key \u7684\u7f16\u7801\u8f6c\u6362\n<pre><code class=\"language-sql\">CREATE TEMPORARY FUNCTION HYPERDRIVE_PT_ENCODE AS \"io.transwarp.hyperdrive.udf.UDFEncodePrimitiveType\";\nCREATE TEMPORARY FUNCTION HYPERDRIVE_ST_ENCODE AS \"io.transwarp.hyperdrive.udf.UDFEncodeStructType\";<\/code><\/pre>\n<p>\u6ce8\u610f\uff0c\u5982\u679chyperbase\u7248\u672c\u662f9.3.3\u7248\u672c\uff0c\u4e14quark.hyperdrive2.enable\u53c2\u6570\u503c\u4e3atrue\uff0c\u4e0a\u8ff0\u4e24\u4e2a\u4e34\u65f6\u51fd\u6570\u521b\u5efa\u8bed\u53e5\u5e94\u4fee\u6539\u4e3a\u5982\u4e0b\uff1a<\/p>\n<pre><code class=\"language-sql\">CREATE TEMPORARY FUNCTION HYPERDRIVE_PT_ENCODE AS \"io.transwarp.hyper2drive.udf.UDFEncodePrimitiveType\";\nCREATE TEMPORARY FUNCTION HYPERDRIVE_ST_ENCODE AS \"io.transwarp.hyper2drive.udf.UDFEncodeStructType\";<\/code><\/pre>\n<\/li>\n<\/ul>\n<h4>1\u3001\u91c7\u6837\u751f\u6210SplitKey<\/h4>\n<p>\u5728\u5bfc\u5165\u6570\u636e\u4e4b\u524d\uff0c\u7528\u6237\u9700\u8981\u5bf9\u6570\u636e\u8fdb\u884c\u91c7\u6837\uff0c\u6839\u636e\u91c7\u6837\u7ed3\u679c\u53ef\u4ee5\u751f\u6210splitkey\uff0c\u4f5c\u4e3a\u9884\u5206region\u7684\u4f9d\u636e\uff1b<br \/>\n\u91c7\u6837\u8868\u5373\u7528\u6765\u4fdd\u5b58\u91c7\u6837\u7684\u7ed3\u679c\uff0c\u91c7\u6837\u8868\u5217\u4e2d\u5fc5\u987b\u5305\u542b\u7528\u6765\u751f\u6210rowkey\u7684\u6e90\u8868\u5b57\u6bb5\uff1b<br \/>\n\u4f8b\u5982\uff0c\u4f7f\u7528\u6e90\u8868\u4e2d\u7684 SS_SOLD_DATE_SK \u5b57\u6bb5\u4f1a\u7528\u6765\u751f\u6210\u76ee\u6807\u8868\u7684rowkey\uff1a<\/p>\n<pre><code class=\"language-sql\">CREATE TABLE SAMPLETABLE\n(SS_SOLD_DATE_SK INT)\nSTORED AS ORC;<\/code><\/pre>\n<h5>1.1\u3001\u5b58\u653e\u91c7\u6837\u7ed3\u679c<\/h5>\n<ul>\n<li>\u6839\u636e\u539f\u59cb\u6570\u636e\u8868\u6536\u96c6\u91c7\u6837\u7ed3\u679c\n<pre><code class=\"language-sql\">INSERT INTO TABLE SAMPLETABLE \nSELECT SAMPLE(8484\n,SS_SOLD_DATE_SK)\nFROM BL_FOR_HYPERDRIVE_EXT;<\/code><\/pre>\n<\/li>\n<li>\u8bf4\u660e\uff1a\n<ul>\n<li>Inceptor\u63d0\u4f9b\u4e86sample\u51fd\u6570\u7528\u6765\u751f\u6210splitkey\uff1b<\/li>\n<li>sample\u51fd\u6570\u63a5\u53d7\u81f3\u5c112\u4e2a\u53c2\u6570\uff0c\u7b2c1\u4e2a\u53c2\u6570\u662f\u91c7\u6837\u7387\uff0c\u91c7\u6837\u7387=\u6570\u636e\u6761\u6570\/97\/\u9884\u5206region\u6570<\/li>\n<li>\u6837\u4f8b\u6570\u636e\u67095760749\u6761\uff0c\u9884\u52067\u4e2aregion\uff0c\u56e0\u6b64\u91c7\u6837\u7387=5760749\/97\/7\uff0c\u5373\u91c7\u6837\u7387\u4e3a8484\uff1b<\/li>\n<li>\u540e\u9762\u7684\u53c2\u6570\u4e3a\u91c7\u6837\u8868\u4e2d\u7684\u5217\u540d\uff1b<\/li>\n<li>\u7528\u6237\u9700\u8981\u6839\u636e\u81ea\u8eab\u4e1a\u52a1\u573a\u666f\u548c\u5b9e\u9645\u6570\u636e\u91cf\uff0c\u786e\u5b9a\u5bfc\u5165\u540e\u76ee\u6807\u8868\u7684rowkey\uff0cregion\u6570\u91cf\uff1b<\/li>\n<\/ul>\n<\/li>\n<\/ul>\n<p>\u6b64\u5904\u4e0e\u4f20\u7edf hyperbase \u8868\u7684 sql bulkload \u4e0d\u540c\uff0c\u9664\u4e86\u521b\u5efa\u4e00\u5f20\u5b58\u653e\u91c7\u6837\u7ed3\u679c\u7684\u8868\u4ee5\u5916\uff0c\u8fd8\u9700\u8981\u989d\u5916\u521b\u5efa\u4e00\u5f20\u5b58\u653e split key \u7684\u8868\uff0c\u5982\u4e0b\uff1a<\/p>\n<h5>2\u3001\u521b\u5efasplitkey\u8868\uff0c\u5e76\u901a\u8fc7\u91c7\u6837\u83b7\u53d6splitkey<\/h5>\n<ul>\n<li>\n<p>\u521b\u5efa\u4e00\u5f20\u5b58\u653e split key \u7684\u8868\uff0c\u6b64\u8868\u5fc5\u987b\u662f hbase \u8868\u7c7b\u578b<\/p>\n<pre><code class=\"language-sql\">DROP TABLE IF EXISTS tmp_splitkeys;\nCREATE TABLE TMP_SPLITKEYS\n(KEY BINARY\n, VALUE BIGINT)\nSTORED BY 'org.apache.hadoop.hive.hbase.HBaseStorageHandler'\nTBLPROPERTIES ('hbase.table.name' = 'tmp_splitkeys');<\/code><\/pre>\n<p>\u751f\u6210splitkey\u5e76\u63d2\u5165\u5230splitkey\u8868\uff0c\u540e\u7eed\u5efa\u7acb\u6620\u5c04\u8868\u65f6\u9700\u8981\u6307\u5b9asplitkey\u8868\uff1b<br \/>\n\u7531\u4e8e hyperdrive \u8868\u4e2d\u5bf9 split key \u7684\u7f16\u7801\u8fdb\u884c\u4e86\u8c03\u6574\uff0c\u6211\u4eec\u5728\u751f\u6210 split key \u7684\u65f6\u9700\u8981\u7528\u521a\u624d\u521b\u5efa\u7684\u4e34\u65f6\u51fd\u6570\u66ff\u4ee3 concat() \u51fd\u6570\uff1b<br \/>\n\u4f7f\u7528\u8be5\u51fd\u6570\u65f6\uff0c\u9700\u8981\u5728\u6bcf\u4e2a\u8981\u7528\u5230\u7684\u5b57\u6bb5\u540e\u9762\u8bbe\u7f6e\u5b57\u6bb5\u957f\u5ea6\uff0cstring \u7c7b\u578b\u6309\u9700\u8bbe\u7f6e\uff0c\u5176\u4ed6\u7c7b\u578b\u8bbe\u7f6e\u4e3a-1\uff1b<br \/>\n\u53e6\u5916\uff0c\u62fc\u63a5\u7684\u5b57\u6bb5\u5fc5\u987b\u662f\u4f60\u5c06\u7528\u5728\u5efa\u8868\u65f6\u4f5c\u4e3arowkey\u7684\u5b57\u6bb5\u6216\u5176\u4e00\u90e8\u5206\uff0c\u540c\u65f6\u4e8c\u8005\u987a\u5e8f\u5fc5\u987b\u4e00\u81f4\u4e14\u4f4d\u4e8erowkey\u7684\u6700\u524d\u7aef\uff1b<\/p>\n<pre><code class=\"language-sql\">INSERT INTO TABLE TMP_SPLITKEYS\nSELECT\nMID,1\nFROM\n(SELECT\n     MAX(C) AS MID\n FROM\n     (SELECT\n          HYPERDRIVE_ST_ENCODE(SS_SOLD_DATE_SK,-1) C\n           , NTILE(7)\n              OVER\n                   ( ORDER BY\n                          HYPERDRIVE_ST_ENCODE(SS_SOLD_DATE_SK,-1)) NT\n      FROM\n          SAMPLETABLE)\n GROUP BY\n      NT)\nORDER BY\n MID LIMIT 6;<\/code><\/pre>\n<ul>\n<li>ntile\u51fd\u6570\u7684\u53c2\u6570\u5373\u4e3a\u9884\u5206region\u7684\u6570\u91cf\uff1b<\/li>\n<li>limit\u7684\u6570\u91cf\u662f\u9884\u5206region\u6570\u91cf\u51cf\u4e00\uff1b<\/li>\n<\/ul>\n<\/li>\n<\/ul>\n<h4>3\u3001\u4f7f\u7528 Inceptor \u5efa\u7acb hyperbase \u8868\u7684\u6620\u5c04\u8868<\/h4>\n<p>\u6b64\u65f6\u6211\u4eec\u5982\u679c\u67e5\u8be2 splitkey \u8868\uff0c\u4f1a\u53d1\u73b0 splitkey \u7684\u663e\u793a\u662f\u4e71\u7801\uff0c\u8fd9\u91cc\u5176\u5b9e\u5df2\u7ecf\u662f\u8f6c\u6362\u6210\u4e8c\u8fdb\u5236\u7f16\u7801\u540e\u7684\u7ed3\u679c\uff1b<br \/>\n\u5efa\u7acb hyperdrive \u6620\u5c04\u8868\uff1b\u5efa\u8868\u65f6\u8bed\u6cd5\u7565\u6709\u4e0d\u540c\uff0c\u9700\u8981\u6307\u5b9a split key \u8868\u548c region \u6570\u3002<\/p>\n<pre><code class=\"language-sql\">DROP TABLE BL_HYPERDRIVE_MAP_INCEPTOR;\nCREATE TABLE BL_HYPERDRIVE_MAP_INCEPTOR\n(KEY STRUCT \u5c16\u62ec\u53f7SS_SOLD_DATE_SK:INT, UUID:BIGINT\u5c16\u62ec\u53f7\n  , SS_SOLD_DATE_SK INT\n  , SS_SOLD_TIME_SK INT\n  , SS_ITEM_SK INT\n  , SS_CUSTOMER_SK INT\n  , SS_CDEMO_SK INT\n  , SS_HDEMO_SK INT\n  , SS_ADDR_SK INT\n  , SS_STORE_SK INT)\nSTORED AS HYPERDRIVE\nTBLPROPERTIES(\n'hbase.table.splitkey.decode'='false',\n'hbase.table.splitkey.tablename'='tmp_splitkeys',\n'hbase.table.region.count'='7',\n'COMPRESSION'='SNAPPY');\n\n-- \u53e6\u5916\uff0c\u5982\u679c\u5b57\u6bb5\u4e2d\u6709 struct \u7c7b\u578b\u4e14\u542b\u6709 string \u7c7b\u578b\uff0c\u5219\u5fc5\u987b\u6307\u5b9a\u5176\u957f\u5ea6\uff0c\u4e14\u9700\u8981\u5728SERDEPROPERTIES \u4e2d\u8981\u6307\u5b9a\u5b57\u6bb5\u957f\u5ea6\uff1a'hyperdrive.structstring.length.struct_name.struct_field'='string_length'\uff0c\u5efa\u8868\u8bed\u53e5\u5982\u4e0b\uff1a\nCREATE TABLE BL_HYPERDRIVE_MAP_INCEPTOR\n(KEY STRUCT \u5c16\u62ec\u53f7SS_SOLD_DATE_SK:string LENGTH 18, UUID:bigint\u5c16\u62ec\u53f7\n  , SS_SOLD_DATE_SK INT\n  , SS_SOLD_TIME_SK INT\n  , SS_ITEM_SK INT\n  , SS_CUSTOMER_SK INT\n  , SS_CDEMO_SK INT\n  , SS_HDEMO_SK INT\n  , SS_ADDR_SK INT\n  , SS_STORE_SK INT)\nROW FORMAT DELIMITED FIELDS TERMINATED BY '|' COLLECTION ITEMS TERMINATED BY '\\001' MAP KEYS TERMINATED BY '\\003' LINES TERMINATED BY '\\N'\nSTORED AS HYPERDRIVE\nTBLPROPERTIES(\n'hbase.table.splitkey.decode'='false',\n'hbase.table.splitkey.tablename'='tmp_splitkeys',\n'hbase.table.region.count'='7',\n'COMPRESSION'='SNAPPY',\n'hyperdrive.structstring.length.key.ss_sold_date_sk'='18');<\/code><\/pre>\n<h4>5\u3001\u4f7f\u7528SQL Bulkload\u5bfc\u5165\u6570\u636e<\/h4>\n<p>\u4f7f\u7528bulkload\u7684\u65b9\u5f0f\u4ece\u6e90\u8868\u4e2d\u5c06\u6570\u636e\u5bfc\u5165\u76ee\u6807\u8868<\/p>\n<pre><code class=\"language-sql\">-- \u4e2a\u522b\u7248\u672c\u6ca1\u6709 UUIQ \u8fd9\u4e2a\u51fd\u6570\uff0c\u6b64\u6b21\u53ea\u662f\u4e3e\u4f8b\uff0c\u53ef\u4ee5\u4f7f\u7528\u5176\u4ed6\u5b57\u6bb5\u62fc\u63a5\u4e3a struct \u7c7b\u578b\u6bd4\u5982\u8fd9\u6837\n-- NAMED_STRUCT('SS_SOLD_DATE_SK',SS_SOLD_DATE_SK,'UUID',SS_STORE_SK) NS_KEY\nINSERT INTO TABLE BL_HYPERDRIVE_MAP_INCEPTOR \nSELECT \/*+USE_BULKLOAD*\/\nNAMED_STRUCT('SS_SOLD_DATE_SK',SS_SOLD_DATE_SK,'UUID',UNIQ()) NS_KEY\n,SS_SOLD_DATE_SK INT\n,SS_SOLD_TIME_SK INT\n,SS_ITEM_SK INT\n,SS_CUSTOMER_SK INT\n,SS_CDEMO_SK INT\n,SS_HDEMO_SK INT\n,SS_ADDR_SK INT\n,SS_STORE_SK INT\nFROM BL_FOR_HYPERDRIVE_EXT ORDER BY NS_KEY;<\/code><\/pre>\n","protected":false},"excerpt":{"rendered":"<p>2019-08-08 17:40:17 \u661f\u671f\u56db \u6982\u8981\u63cf\u8ff0 \u672c\u6848\u4f8b\u8be6\u7ec6\u4ecb\u7ecdhyperdrive\u7684SQL Bulk ..<\/p>\n<div class=\"clear-fix\"><\/div>\n<p><a href=\"https:\/\/kbwp.transwarp.cn\/?p=494\" title=\"read more...\">Read more<\/a><\/p>\n","protected":false},"author":11,"featured_media":0,"comment_status":"closed","ping_status":"closed","sticky":false,"template":"","format":"standard","meta":{"footnotes":""},"categories":[38,1],"tags":[65,66],"class_list":["post-494","post","type-post","status-publish","format-standard","hentry","category-configuration","category-uncategorized","tag-bulkload","tag-hyperdrive"],"acf":[],"_links":{"self":[{"href":"https:\/\/kbwp.transwarp.cn\/index.php?rest_route=\/wp\/v2\/posts\/494","targetHints":{"allow":["GET"]}}],"collection":[{"href":"https:\/\/kbwp.transwarp.cn\/index.php?rest_route=\/wp\/v2\/posts"}],"about":[{"href":"https:\/\/kbwp.transwarp.cn\/index.php?rest_route=\/wp\/v2\/types\/post"}],"author":[{"embeddable":true,"href":"https:\/\/kbwp.transwarp.cn\/index.php?rest_route=\/wp\/v2\/users\/11"}],"replies":[{"embeddable":true,"href":"https:\/\/kbwp.transwarp.cn\/index.php?rest_route=%2Fwp%2Fv2%2Fcomments&post=494"}],"version-history":[{"count":6,"href":"https:\/\/kbwp.transwarp.cn\/index.php?rest_route=\/wp\/v2\/posts\/494\/revisions"}],"predecessor-version":[{"id":16723,"href":"https:\/\/kbwp.transwarp.cn\/index.php?rest_route=\/wp\/v2\/posts\/494\/revisions\/16723"}],"wp:attachment":[{"href":"https:\/\/kbwp.transwarp.cn\/index.php?rest_route=%2Fwp%2Fv2%2Fmedia&parent=494"}],"wp:term":[{"taxonomy":"category","embeddable":true,"href":"https:\/\/kbwp.transwarp.cn\/index.php?rest_route=%2Fwp%2Fv2%2Fcategories&post=494"},{"taxonomy":"post_tag","embeddable":true,"href":"https:\/\/kbwp.transwarp.cn\/index.php?rest_route=%2Fwp%2Fv2%2Ftags&post=494"}],"curies":[{"name":"wp","href":"https:\/\/api.w.org\/{rel}","templated":true}]}}