DataX迁移Hive到崖山分布式

首页    知识库    典型案例    DataX迁移Hive到崖山分布式

概述

本文主要介绍通过Datax实现Hive数据迁移到崖山分布式。

 

环境

源Hive版本:3.1.3

目标YashanDB版本:23.2.3.100

 

建表脚本

-- hive

CREATE TABLE IF NOT EXISTS product(

    product_no char(5),

    product_name varchar(30),

    cost double,

    price duble

)

ROW FORMAT DELIMITED FIELDS TERMINATED BY '\001'

STORED AS textfile;

 

-- yashandb

CREATE TABLE product

(

    product_no CHAR(5),

    product_name VARCHAR2(30),

    cost NUMBER,

    price NUMBER

);

 

hive表和DataX数据类型映射

 

hive同步到崖山job配置

{

    "job": {

        "content": [

            {

                "reader": {

                    "name":"hdfsreader",

                    "parameter":{

                        "column":[

                            {

                                "index":0,

                                "type":"string"

                            },

                            {

                                "index":1,

                                "type":"string"

                            },

                            {

                                "index":2,

                                "type":"double"

                            },

                            {

                                "index":3,

                                "type":"double"

                            }

                        ],

                        "defaultFS":"hdfs://127.0.0.1:8020",

                        "encoding":"UTF-8",

                        "fieldDelimiter":"\u0001",

                        "fileType":"text",

                        "path":"/usr/hive/warehouse/sales.db/product"

                    }

                },

                "writer": {

                    "name": "yashandbwriter",

                    "parallel": {

                        "binder": 6

                    },

                    "parameter": {

                        "batchError": true,

                        "column":[

                            "PRODUCT_NO",

                            "PRODUCT_NAME",

                            "COST",

                            "PRICE"

                        ],

                        "connection": [

                            {

                                "jdbcUrl": "jdbc:yasdb://127.0.0.1:1688/yashandb",

                                "table": [

                                    "SALES.PRODUCT"

                                ]

                            }

                        ],

                        "batchSize": 4096,

                        "batchesPerTxn": 1000,

                        "password": "sales",

                        "preSql": ["truncate table SALES.PRODUCT"],

                        "session": [],

                        "username": "sales",

                        "writeMode": "bulkinsert"

                    }

                }

            }

        ],

        "setting": {

            "speed": {

                "channel": "1"

            }

        }

    }

}

 

执行同步

python bin/datax.py job/hive2yashandb.json

 

浏览量:0