Here is an example to copy MySQL tables from different shards to one Hive table:
#!/bin/bash
set -x
set -e
game=gameexample
mysql_host_prefix=userdb
mysql_host_suffix=myhostname.com
mysql_tab=user
mysql_database=mydatabase
mysql_user=myusername
mysql_pwd=xxxxx
hive_tab=user
echo "Log: `date` dropping Hive table $game.${hive_tab}"
hive -S -e "drop table \`$game.${hive_tab}\`;"
for SHARD_ID in {1..200}; do
mysql_host=${mysql_host_prefix}-${SHARD_ID}-${mysql_host_suffix}
mysql_conn="mysql --user=$mysql_user --password=$mysql_pwd -D${mysql_database} --host=${mysql_host} -s --skip-column-names"
hive_shard_tab=${hive_tab}_shard${SHARD_ID}
hdfs_dir=/user/mapr/${mysql_tab}_shard${SHARD_ID}
if ping -c 1 -W 1 "$mysql_host"; then
echo "Log: `date` $mysql_host is alive"
else
echo "Log: `date` $mysql_host is not alive"
exit 0
fi
echo "Log: `date` dropping Hive table $game.${hive_shard_tab}"
hive -S -e "drop table $game.${hive_shard_tab}"
echo "Log: `date` removing ${hdfs_dir} on HDFS"
hadoop fs -rm -r -f ${hdfs_dir}
sql="select count(*) from \`${mysql_tab}\`"
mysql_row_cnt=`echo "$sql" | $mysql_conn`
echo "Log: `date` found ${mysql_row_cnt} rows in the MySQL table ${mysql_tab} with query: $sql"
sqoop import \
--connect jdbc:mysql://$mysql_host/${mysql_database} \
--table "${mysql_tab}" \
--username $mysql_user \
--password $mysql_pwd \
--num-mappers 1 \
--hive-overwrite \
--hive-table $game.${hive_shard_tab} \
--hive-import \
--target-dir ${hdfs_dir} \
--hive-delims-replacement ' '
hive_row_cnt=`hive -S -e "select count(*) from $game.${hive_shard_tab}"`
echo "Log: `date` ended up with ${hive_row_cnt} rows in the Hive table $game.${hive_shard_tab} which are copied from the MySQL table ${mysql_tab} (${mysql_row_cnt} rows)"
# merging
if [ $SHARD_ID = 1 ]; then
sql_str="create table $game.\`$hive_tab\` as select * from $game.${hive_shard_tab};"
echo "Log: `date` creating the Hive table $game.${hive_tab} with the data from the first Shard with sql: $sql_str"
hive -S -e "$sql_str"
else
sql_str="insert into table $game.\`$hive_tab\` select * from $game.${hive_shard_tab};"
echo "Log: `date` merging into the Hive table $game.${hive_tab} the data from Shard $SHARD_ID with sql: $sql_str"
hive -S -e "$sql_str"
fi
done
exit 0
No comments:
Post a Comment