diff --git a/README.md b/README.md new file mode 100644 index 0000000..16e2be0 --- /dev/null +++ b/README.md @@ -0,0 +1,10 @@ +# ansible-hadoop + +## About +Bootstraps a HDFS/Hbase cluster with Ansible + +## Usage +``` +$ ansible-playbook -i inventory.yml -e hadoop_path="/tmp" -e hdfs_cluster_id="test-cluster" hdfs-create.yml +$ ansible-playbook -i inventory.yml -e "hbase_path=/tmp" hbase-create.yml +``` diff --git a/hbase-create.yml b/hbase-create.yml new file mode 100644 index 0000000..95af728 --- /dev/null +++ b/hbase-create.yml @@ -0,0 +1,6 @@ +--- + +- hosts: namenodes + roles: + - hbase + diff --git a/hdfs-create.yml b/hdfs-create.yml new file mode 100644 index 0000000..0869987 --- /dev/null +++ b/hdfs-create.yml @@ -0,0 +1,5 @@ +--- + +- hosts: namenodes + roles: + - hdfs diff --git a/inventory.yml b/inventory.yml new file mode 100644 index 0000000..469de3b --- /dev/null +++ b/inventory.yml @@ -0,0 +1,9 @@ +all: + vars: + + children: + namenodes: + vars: + hosts: + namenode-1: + namenode-2: diff --git a/roles/hbase/defaults/main.yml b/roles/hbase/defaults/main.yml new file mode 100644 index 0000000..27cad28 --- /dev/null +++ b/roles/hbase/defaults/main.yml @@ -0,0 +1,7 @@ +hbase_version: "0.96.1.1" +hbase_archive_file: "hbase-{{ hbase_version }}-hadoop2-bin.tar.gz" +hbase_archive_file_checksum256: "7334e7da0b655ab02cfc64454c3d2e93a4c584efbde2dfd37915b9530d1643f8" +hbase_archive_url: "https://archive.apache.org/dist/hbase/hbase-{{ hbase_version }}/{{ hbase_archive_file }}" +hbase_config_path: "/tmp/hbase/conf" +hbase_rootdir: "/tmp/hbase-root" +hbase_datadir: "/tmp/hbase-data" diff --git a/roles/hbase/files/hbase-config.sh b/roles/hbase/files/hbase-config.sh new file mode 100755 index 0000000..5b0399a --- /dev/null +++ b/roles/hbase/files/hbase-config.sh @@ -0,0 +1,40 @@ +#!/usr/bin/env bash + +set -ueo pipefail + +HBASE_VERSION="0.96.1.1" +HBASE_FILE="hbase-${HBASE_VERSION}-hadoop2-bin.tar.gz" +HBASE_DIR="hbase-${HBASE_VERSION}-hadoop2" +#HBASE_URL="https://downloads.apache.org/hbase/${HBASE_VERSION}/${HBASE_FILE}" +HBASE_URL="https://archive.apache.org/dist/hbase/hbase-${HBASE_VERSION}/${HBASE_FILE}" +HBASE_FILE_CKSUM="1625453f839f7d8c86078a131af9731f6df28c59e58870db84913dcbc640d430253134a825de7cec247ea1f0cf232435765e00844ee2e4faf31aeb356955c478" +HBASE_PATH="/tmp" +HBASE_CONFIG_TEMPLATE="${HBASE_PATH}/hbase/conf/hbase-site.xml.j2" + +SCRIPT_PATH=$(dirname "$0") +source $SCRIPT_PATH/../../../setup.sh + +create_hbase_config_template() { + read -r -d '' CONFIG < + + + + hbase.rootdir + file:///{{ hbase_rootdir }}/hbase + + + hbase.zookeeper.property.dataDir + {{ hbase_datadir }}/zookeeper + + + hbase.unsafe.stream.capability.enforce + false + + +EOF + echo "$CONFIG" +} + +HBASE_CONFIG=$(create_hbase_config_template) +write_file ${HBASE_CONFIG_TEMPLATE} "${HBASE_CONFIG}" diff --git a/roles/hbase/tasks/main.yml b/roles/hbase/tasks/main.yml new file mode 100644 index 0000000..c5a2dde --- /dev/null +++ b/roles/hbase/tasks/main.yml @@ -0,0 +1,31 @@ +--- + + - name: Download HBase archive locally + get_url: + url: "{{ hbase_archive_url }}" + dest: "/tmp/hbase.tar.gz" + mode: 0600 + checksum: sha256:{{ hbase_archive_file_checksum256 }} + run_once: true + delegate_to: localhost + + - name: Copy and extract archive + ansible.builtin.unarchive: + src: "/tmp/hbase.tar.gz" + dest: "/tmp" + + - name: Recursively remove directory + ansible.builtin.file: + path: /tmp/hbase + state: absent + + - name: Rename dir + command: mv -f /tmp/hbase-0.96.1.1-hadoop2 /tmp/hbase + + - name: Write config template + ansible.builtin.command: + cmd: "hbase/files/hbase-config.sh" + delegate_to: localhost + + - name: Create hbase-site.xml + template: src={{ hbase_config_path }}/hbase-site.xml.j2 dest={{ hbase_config_path }}/hbase-site.xml mode=0700 diff --git a/roles/hdfs/defaults/main.yml b/roles/hdfs/defaults/main.yml new file mode 100644 index 0000000..2e5f4c6 --- /dev/null +++ b/roles/hdfs/defaults/main.yml @@ -0,0 +1,15 @@ +hadoop_version: "2.10.0" +hadoop_dir: "hadoop" +hadoop_archive_file: "hadoop-{{ hadoop_version }}.tar.gz" +hadoop_archive_dir: "hadoop-{{ hadoop_version }}" +hadoop_archive_file_checksum256: "131750c258368be4baff5d4a83b4de2cd119bda3774ed26d1d233b6fdf33f07f" +hadoop_archive_url: "https://archive.apache.org/dist/hadoop/common/hadoop-{{ hadoop_version }}/{{ hadoop_archive_file }}" +hadoop_config_path: "{{ hadoop_path }}/hadoop/etc/hadoop" +hdfs_cmd_namenode: "{{ hadoop_path }}/{{ hadoop_dir }}/bin/hdfs namenode" +hdfs_cmd_datanode: "{{ hadoop_path }}/{{ hadoop_dir }}/bin/hdfs datanode" +hdfs_cmd_journalnode: "{{ hadoop_path }}/{{ hadoop_dir }}/bin/hdfs journalnode" +hdfs_cmd_zkfc: "{{ hadoop_path }}/{{ hadoop_dir }}/bin/hdfs start zkfc" +hdfs_cmd_format: "{{ hadoop_path }}/{{ hadoop_dir }}/bin/hdfs namenode -format" +hdfs_cmd_format_ha: "{{ hadoop_path }}/{{ hadoop_dir }}/bin/hdfs namenode -initializeSharedEdits" +namenode_1: "namenode-1" +namenode_2: "namenode_2" diff --git a/roles/hdfs/files/hadoop-config.sh b/roles/hdfs/files/hadoop-config.sh new file mode 100755 index 0000000..76d0e42 --- /dev/null +++ b/roles/hdfs/files/hadoop-config.sh @@ -0,0 +1,114 @@ +#!/usr/bin/env bash + +set -ueo pipefail + +HDFS_CONFIG_TEMPLATE="/tmp/hadoop/etc/hadoop/hdfs-site.xml.j2" +HDFS_CONFIG_TEMPLATE_CORE="/tmp/hadoop/etc/hadoop/core-site.xml.j2" +HDFS_CONFIG_TEMPLATE_MAPRED="/tmp/hadoop/etc/hadoop/mapred-site.xml.j2" + +SCRIPT_PATH=$(dirname "$0") +source setup.sh + +create_hdfs_core_config_template() { + #printf "Writing HDFS core-site.xml config\n" + read -r -d '' CONFIG < + + + + fs.defaultFS + hdfs://{{ hdfs_cluster_id }} + + + dfs.journalnode.edits.dir + /.tmp/hadoop + + +EOF + echo "$CONFIG" +} + +create_hdfs_mapred_config_template() { + #printf "Writing HDFS mapred-site.xml config\n" + read -r -d '' CONFIG < + + + + fs.defaultFS + hdfs://{{ cluster_ha_id }} + + +EOF + echo "$CONFIG" +} + +create_hdfs_config_template() { + #printf "Writing HDFS hdfs-site.xml config\n" + read -r -d '' CONFIG < + + + + dfs.nameservices + {{ hdfs_cluster_id }} + + + dfs.ha.namenodes.{{ hdfs_cluster_id }} + nn1,nn2 + + + fs.defaultFS + hdfs://{{ hdfs_cluster_id }} + + + dfs.namenode.rpc-address.{{ hdfs_cluster_id }}.nn1 + {{ namenode_1 }}:8020 + + + dfs.namenode.rpc-address.{{ hdfs_cluster_id }}.nn2 + {{ namenode_2 }}:8020 + + + dfs.namenode.http-address.{{ hdfs_cluster_id }}.nn1 + {{ namenode_1 }}:50070 + + + dfs.namenode.http-address.{{ hdfs_cluster_id }}.nn2 + {{ namenode_2}}:50070 + + + dfs.namenode.http-address.{{ hdfs_cluster_id }}.nn1 + {{ namenode_1 }}:9870 + + + dfs.namenode.http-address.{{ hdfs_cluster_id }}.nn2 + {{ namenode_2 }}:9870 + + + dfs.namenode.name.dir + /tmp/hdfs/namenode + + + dfs.datanode.data.dir + /tmp/hdfs/datanode + + + dfs.namenode.shared.edits.dir + file:///tmp/hadoop + + + ha.zookeeper.quorum + 127.0.0.1:2181 + + +EOF + echo "$CONFIG" +} + +HDFS_CONFIG=$(create_hdfs_config_template) +HDFS_CONFIG_CORE=$(create_hdfs_core_config_template) +HDFS_CONFIG_MAPRED=$(create_hdfs_mapred_config_template) +write_file ${HDFS_CONFIG_TEMPLATE} "${HDFS_CONFIG}" +write_file ${HDFS_CONFIG_TEMPLATE_CORE} "${HDFS_CONFIG_CORE}" +write_file ${HDFS_CONFIG_TEMPLATE_MAPRED} "${HDFS_CONFIG_MAPRED}" diff --git a/roles/hdfs/tasks/main.yml b/roles/hdfs/tasks/main.yml new file mode 100644 index 0000000..743a43e --- /dev/null +++ b/roles/hdfs/tasks/main.yml @@ -0,0 +1,37 @@ +--- + + - name: Download Hadoop archive if not existent + get_url: + url: "{{ hadoop_archive_url }}" + dest: "{{ hadoop_path }}/{{ hadoop_archive_file }}" + mode: 0600 + checksum: sha256:{{ hadoop_archive_file_checksum256 }} + run_once: true + delegate_to: localhost + + - name: Extract archive + ansible.builtin.unarchive: + src: "{{ hadoop_path }}/{{ hadoop_archive_file }}" + dest: "{{ hadoop_path }}" + + - name: Recursively remove directory + ansible.builtin.file: + path: "{{ hadoop_path }}/{{ hadoop_dir }}" + state: absent + + - name: Rename dir + command: mv -f {{ hadoop_path }}/{{ hadoop_archive_dir }} {{ hadoop_path }}/{{ hadoop_dir }} + + - name: Write config templates + ansible.builtin.command: + cmd: "roles/hdfs/files/hadoop-config.sh" + delegate_to: localhost + + - name: Create core-site.xml + template: src={{ hadoop_config_path}}/core-site.xml.j2 dest={{ hadoop_config_path}}/core-site.xml mode=0700 + + - name: Create hdfs-site.xml + template: src={{ hadoop_config_path}}/hdfs-site.xml.j2 dest={{ hadoop_config_path}}/hdfs-site.xml mode=0700 + + - name: Create mapred-site.xml + template: src={{ hadoop_config_path}}/core-site.xml.j2 dest={{ hadoop_config_path}}/mapred-site.xml mode=0700 diff --git a/setup.sh b/setup.sh new file mode 100644 index 0000000..e04dff0 --- /dev/null +++ b/setup.sh @@ -0,0 +1,48 @@ +#!/usr/bin/env bash + +check_dependencies() { + for i in "${DEPS[@]}" + do + if [[ -z $(which "${i}") ]]; then + error "Could not find ${i}" + exit 1 + fi + done +} + +extract_archive() { + printf "Extracting %s archive\n" "$1" + if ! tar xfz "${1}"; then + printf "Failed to extract archive: %s\n" "$1" + exit 1 + fi +} + +compare_checksum() { + local r + CKSUM=$(sha512 -q "${1}") + if ! [ "$CKSUM" = "$2" ]; then + r=1 + else + r=0 + fi + echo "$r" +} + +write_file() { + printf "Writing %s\n" "$1" + printf "%s" "$2" > "$1" +} + +run() { + local pid + printf "Starting %s\n" "$2" + if $($1 > /dev/null 2>&1 &); then + printf "Started %s successfully\n" "$2" + pid=$! + else + printf "Failed to start %s\n" "$2" + pid="-1" + fi + echo "$pid" +}