{"id":15007,"date":"2025-04-14T13:46:09","date_gmt":"2025-04-14T05:46:09","guid":{"rendered":"https:\/\/nj.transwarp.cn:8180\/?p=15007"},"modified":"2025-04-14T13:46:09","modified_gmt":"2025-04-14T05:46:09","slug":"hive-filesink-skip-empty-file-%e5%8f%82%e6%95%b0%e5%ba%94%e7%94%a8","status":"publish","type":"post","link":"https:\/\/kbwp.transwarp.cn\/?p=15007","title":{"rendered":"hive.filesink.skip.empty.file \u53c2\u6570\u5e94\u7528"},"content":{"rendered":"<h3>\u6982\u8981\u63cf\u8ff0<\/h3>\n<hr \/>\n<p><code>hive.filesink.skip.empty.file<\/code> \u53c2\u6570\uff0c\u7528\u4e8e\u63a7\u5236\u7a7a\u6587\u4ef6\u662f\u5426\u88ab inceptor \u7684 FileSink \u64cd\u4f5c\u8df3\u8fc7\u3002\u5f53\u8bbe\u7f6e\u4e3a true \u65f6\uff0c\u5982\u679c\u5199\u5165\u6587\u4ef6\u662f\u7a7a\u7684\uff0cFileSink \u4e0d\u4f1a\u521b\u5efa\u8fd9\u4e2a\u6587\u4ef6\u3002\u8fd9\u6709\u52a9\u4e8e\u907f\u514d\u5728 HDFS \u4e2d\u521b\u5efa\u4e0d\u5fc5\u8981\u7684\u5c0f\u6587\u4ef6\uff0c\u662f <strong>\u8ba1\u7b97\u7aef-reduce\u9636\u6bb5<\/strong> \u51cf\u5c11\u5c0f\u6587\u4ef6\u6570\u91cf\u7684\u6709\u6548\u65b9\u6848\u3002<\/p>\n<h3>\u8be6\u7ec6\u8bf4\u660e<\/h3>\n<hr \/>\n<pre><code class=\"language-sql\">--\u76ee\u6807\u8868\u4e3a5\u4e2a\u5206\u6876\u7684orc\u666e\u901a\u8868\nDROP TABLE IF EXISTS orc_dest;\nCREATE TABLE orc_dest(id INT ,name STRING ) CLUSTERED BY (id) INTO 5 BUCKETS STORED AS ORC;\n\n--\u6e90\u8868\nDROP TABLE IF EXISTS orc_src;\nCREATE TABLE orc_src(id INT ,name STRING ) STORED AS ORC;\nINSERT INTO orc_src SELECT 1,'a' FROM system.dual;\nINSERT INTO orc_src SELECT 2,'a' FROM system.dual;\nINSERT INTO orc_src SELECT 3,'a' FROM system.dual;<\/code><\/pre>\n<h4>\u53c2\u6570\u5f00\u542f\u72b6\u6001\u4e0b\uff1a<\/h4>\n<pre><code class=\"language-sql\">--\u5f00\u542f\u53c2\u6570\uff1a\nSET hive.filesink.skip.empty.file=true;\nINSERT OVERWRITE TABLE orc_dest SELECT * FROM orc_src;<\/code><\/pre>\n<p>\u53ef\u4ee5\u770b\u5230\uff0c\u76ee\u6807\u8868hdfs\u76ee\u5f55\u4e0b\u53ea\u67093\u4e2a\u6570\u636e\u6587\u4ef6\u751f\u6210\uff0c\u6bcf\u4e2a\u6570\u636e\u6587\u4ef6\u90fd\u67091\u6761\u6570\u636e\u3002<\/p>\n<pre><code class=\"language-shell\">[root@kv1~]# hdfs dfs -ls hdfs:\/\/nameservice1\/quark1\/user\/hive\/warehouse\/default.db\/hive\/orc_dest\nWARNING: log4j.properties is not found. HADOOP_CONF_DIR may be incomplete.\n2024-12-05 11:15:06,095 INFO  [main] util.KerberosUtil (KerberosUtil.java:getDefaultPrincipalPattern(67)) - Using principal pattern: HTTP\/_HOST\nFound 3 items\n-rwxr-xr-x   3 hive hbase        273 2024-12-05 11:14 hdfs:\/\/nameservice1\/quark1\/user\/hive\/warehouse\/default.db\/hive\/orc_dest\/000001_0\n-rwxr-xr-x   3 hive hbase        277 2024-12-05 11:14 hdfs:\/\/nameservice1\/quark1\/user\/hive\/warehouse\/default.db\/hive\/orc_dest\/000002_0\n-rwxr-xr-x   3 hive hbase        277 2024-12-05 11:14 hdfs:\/\/nameservice1\/quark1\/user\/hive\/warehouse\/default.db\/hive\/orc_dest\/000003_0<\/code><\/pre>\n<h4>\u53c2\u6570\u5173\u95ed\u72b6\u6001\u4e0b\uff1a<\/h4>\n<pre><code class=\"language-sql\">--\u5173\u95ed\u53c2\u6570\uff1a\nSET hive.filesink.skip.empty.file=false;\nINSERT OVERWRITE TABLE orc_dest SELECT * FROM orc_src;\n\nSHOW CREATE TABLE orc_dest;\ndfs -ls   hdfs:\/\/nameservice1\/quark1\/user\/hive\/warehouse\/default.db\/hive\/orc_dest<\/code><\/pre>\n<p>\u76ee\u6807\u8868hdfs\u76ee\u5f55\u4e0b\u67095\u4e2a\u6570\u636e\u6587\u4ef6\u751f\u6210\uff0c\u5176\u4e2d<code>000000_0<\/code>\u548c<code>000004_0<\/code>\u662f\u6ca1\u6709\u6570\u636e\u7684<\/p>\n<pre><code class=\"language-shell\">[root@kv1~]# hdfs dfs -ls hdfs:\/\/nameservice1\/quark1\/user\/hive\/warehouse\/default.db\/hive\/orc_dest\nWARNING: log4j.properties is not found. HADOOP_CONF_DIR may be incomplete.\n2024-12-05 11:15:37,654 INFO  [main] util.KerberosUtil (KerberosUtil.java:getDefaultPrincipalPattern(67)) - Using principal pattern: HTTP\/_HOST\nFound 5 items\n-rwxr-xr-x   3 hive hbase         71 2024-12-05 11:15 hdfs:\/\/nameservice1\/quark1\/user\/hive\/warehouse\/default.db\/hive\/orc_dest\/000000_0\n-rwxr-xr-x   3 hive hbase        273 2024-12-05 11:15 hdfs:\/\/nameservice1\/quark1\/user\/hive\/warehouse\/default.db\/hive\/orc_dest\/000001_0\n-rwxr-xr-x   3 hive hbase        277 2024-12-05 11:15 hdfs:\/\/nameservice1\/quark1\/user\/hive\/warehouse\/default.db\/hive\/orc_dest\/000002_0\n-rwxr-xr-x   3 hive hbase        277 2024-12-05 11:15 hdfs:\/\/nameservice1\/quark1\/user\/hive\/warehouse\/default.db\/hive\/orc_dest\/000003_0\n-rwxr-xr-x   3 hive hbase         71 2024-12-05 11:15 hdfs:\/\/nameservice1\/quark1\/user\/hive\/warehouse\/default.db\/hive\/orc_dest\/000004_0<\/code><\/pre>\n<p>\u4e00\u4e9b\u7b2c\u4e09\u65b9\u5de5\u5177\uff0c\u6bd4\u5982 datax \u65e0\u6cd5\u8bc6\u522b\u8fd9\u79cd\u7a7a\u6587\u4ef6\u3002<\/p>\n<p>\u901a\u8fc7 <code>orcfiledump<\/code> \u89e3\u6790\u4e5f\u4f1a\u629b\u51fa <code> java.lang.IndexOutOfBoundsException: Index: 1, Size: 1<\/code> \u7684\u9519\u8bef\u3002<\/p>\n<p><img decoding=\"async\" src=\"https:\/\/nj.transwarp.cn:8180\/wp-content\/uploads\/2024\/12\/image-1733274967467.png\" alt=\"file\" \/><\/p>\n","protected":false},"excerpt":{"rendered":"<p>\u6982\u8981\u63cf\u8ff0 hive.filesink.skip.empty.file \u53c2\u6570\uff0c\u7528\u4e8e\u63a7\u5236\u7a7a\u6587\u4ef6\u662f\u5426\u88ab incept ..<\/p>\n<div class=\"clear-fix\"><\/div>\n<p><a href=\"https:\/\/kbwp.transwarp.cn\/?p=15007\" title=\"read more...\">Read more<\/a><\/p>\n","protected":false},"author":12,"featured_media":0,"comment_status":"closed","ping_status":"closed","sticky":false,"template":"","format":"standard","meta":{"footnotes":""},"categories":[1],"tags":[],"class_list":["post-15007","post","type-post","status-publish","format-standard","hentry","category-uncategorized"],"acf":[],"_links":{"self":[{"href":"https:\/\/kbwp.transwarp.cn\/index.php?rest_route=\/wp\/v2\/posts\/15007","targetHints":{"allow":["GET"]}}],"collection":[{"href":"https:\/\/kbwp.transwarp.cn\/index.php?rest_route=\/wp\/v2\/posts"}],"about":[{"href":"https:\/\/kbwp.transwarp.cn\/index.php?rest_route=\/wp\/v2\/types\/post"}],"author":[{"embeddable":true,"href":"https:\/\/kbwp.transwarp.cn\/index.php?rest_route=\/wp\/v2\/users\/12"}],"replies":[{"embeddable":true,"href":"https:\/\/kbwp.transwarp.cn\/index.php?rest_route=%2Fwp%2Fv2%2Fcomments&post=15007"}],"version-history":[{"count":3,"href":"https:\/\/kbwp.transwarp.cn\/index.php?rest_route=\/wp\/v2\/posts\/15007\/revisions"}],"predecessor-version":[{"id":15080,"href":"https:\/\/kbwp.transwarp.cn\/index.php?rest_route=\/wp\/v2\/posts\/15007\/revisions\/15080"}],"wp:attachment":[{"href":"https:\/\/kbwp.transwarp.cn\/index.php?rest_route=%2Fwp%2Fv2%2Fmedia&parent=15007"}],"wp:term":[{"taxonomy":"category","embeddable":true,"href":"https:\/\/kbwp.transwarp.cn\/index.php?rest_route=%2Fwp%2Fv2%2Fcategories&post=15007"},{"taxonomy":"post_tag","embeddable":true,"href":"https:\/\/kbwp.transwarp.cn\/index.php?rest_route=%2Fwp%2Fv2%2Ftags&post=15007"}],"curies":[{"name":"wp","href":"https:\/\/api.w.org\/{rel}","templated":true}]}}