Add files

This commit is contained in:
Björn Busse 2021-03-09 20:18:15 +01:00
parent 2900359e53
commit d20e855344
11 changed files with 322 additions and 0 deletions

10
README.md Normal file
View File

@ -0,0 +1,10 @@
# ansible-hadoop
## About
Bootstraps a HDFS/Hbase cluster with Ansible
## Usage
```
$ ansible-playbook -i inventory.yml -e hadoop_path="/tmp" -e hdfs_cluster_id="test-cluster" hdfs-create.yml
$ ansible-playbook -i inventory.yml -e "hbase_path=/tmp" hbase-create.yml
```

6
hbase-create.yml Normal file
View File

@ -0,0 +1,6 @@
---
- hosts: namenodes
roles:
- hbase

5
hdfs-create.yml Normal file
View File

@ -0,0 +1,5 @@
---
- hosts: namenodes
roles:
- hdfs

9
inventory.yml Normal file
View File

@ -0,0 +1,9 @@
all:
vars:
children:
namenodes:
vars:
hosts:
namenode-1:
namenode-2:

View File

@ -0,0 +1,7 @@
hbase_version: "0.96.1.1"
hbase_archive_file: "hbase-{{ hbase_version }}-hadoop2-bin.tar.gz"
hbase_archive_file_checksum256: "7334e7da0b655ab02cfc64454c3d2e93a4c584efbde2dfd37915b9530d1643f8"
hbase_archive_url: "https://archive.apache.org/dist/hbase/hbase-{{ hbase_version }}/{{ hbase_archive_file }}"
hbase_config_path: "/tmp/hbase/conf"
hbase_rootdir: "/tmp/hbase-root"
hbase_datadir: "/tmp/hbase-data"

View File

@ -0,0 +1,40 @@
#!/usr/bin/env bash
set -ueo pipefail
HBASE_VERSION="0.96.1.1"
HBASE_FILE="hbase-${HBASE_VERSION}-hadoop2-bin.tar.gz"
HBASE_DIR="hbase-${HBASE_VERSION}-hadoop2"
#HBASE_URL="https://downloads.apache.org/hbase/${HBASE_VERSION}/${HBASE_FILE}"
HBASE_URL="https://archive.apache.org/dist/hbase/hbase-${HBASE_VERSION}/${HBASE_FILE}"
HBASE_FILE_CKSUM="1625453f839f7d8c86078a131af9731f6df28c59e58870db84913dcbc640d430253134a825de7cec247ea1f0cf232435765e00844ee2e4faf31aeb356955c478"
HBASE_PATH="/tmp"
HBASE_CONFIG_TEMPLATE="${HBASE_PATH}/hbase/conf/hbase-site.xml.j2"
SCRIPT_PATH=$(dirname "$0")
source $SCRIPT_PATH/../../../setup.sh
create_hbase_config_template() {
read -r -d '' CONFIG <<EOF
<?xml version="1.0"?>
<?xml-stylesheet type="text/xsl" href="configuration.xsl"?>
<configuration>
<property>
<name>hbase.rootdir</name>
<value>file:///{{ hbase_rootdir }}/hbase</value>
</property>
<property>
<name>hbase.zookeeper.property.dataDir</name>
<value>{{ hbase_datadir }}/zookeeper</value>
</property>
<property>
<name>hbase.unsafe.stream.capability.enforce</name>
<value>false</value>
</property>
</configuration>
EOF
echo "$CONFIG"
}
HBASE_CONFIG=$(create_hbase_config_template)
write_file ${HBASE_CONFIG_TEMPLATE} "${HBASE_CONFIG}"

View File

@ -0,0 +1,31 @@
---
- name: Download HBase archive locally
get_url:
url: "{{ hbase_archive_url }}"
dest: "/tmp/hbase.tar.gz"
mode: 0600
checksum: sha256:{{ hbase_archive_file_checksum256 }}
run_once: true
delegate_to: localhost
- name: Copy and extract archive
ansible.builtin.unarchive:
src: "/tmp/hbase.tar.gz"
dest: "/tmp"
- name: Recursively remove directory
ansible.builtin.file:
path: /tmp/hbase
state: absent
- name: Rename dir
command: mv -f /tmp/hbase-0.96.1.1-hadoop2 /tmp/hbase
- name: Write config template
ansible.builtin.command:
cmd: "hbase/files/hbase-config.sh"
delegate_to: localhost
- name: Create hbase-site.xml
template: src={{ hbase_config_path }}/hbase-site.xml.j2 dest={{ hbase_config_path }}/hbase-site.xml mode=0700

View File

@ -0,0 +1,15 @@
hadoop_version: "2.10.0"
hadoop_dir: "hadoop"
hadoop_archive_file: "hadoop-{{ hadoop_version }}.tar.gz"
hadoop_archive_dir: "hadoop-{{ hadoop_version }}"
hadoop_archive_file_checksum256: "131750c258368be4baff5d4a83b4de2cd119bda3774ed26d1d233b6fdf33f07f"
hadoop_archive_url: "https://archive.apache.org/dist/hadoop/common/hadoop-{{ hadoop_version }}/{{ hadoop_archive_file }}"
hadoop_config_path: "{{ hadoop_path }}/hadoop/etc/hadoop"
hdfs_cmd_namenode: "{{ hadoop_path }}/{{ hadoop_dir }}/bin/hdfs namenode"
hdfs_cmd_datanode: "{{ hadoop_path }}/{{ hadoop_dir }}/bin/hdfs datanode"
hdfs_cmd_journalnode: "{{ hadoop_path }}/{{ hadoop_dir }}/bin/hdfs journalnode"
hdfs_cmd_zkfc: "{{ hadoop_path }}/{{ hadoop_dir }}/bin/hdfs start zkfc"
hdfs_cmd_format: "{{ hadoop_path }}/{{ hadoop_dir }}/bin/hdfs namenode -format"
hdfs_cmd_format_ha: "{{ hadoop_path }}/{{ hadoop_dir }}/bin/hdfs namenode -initializeSharedEdits"
namenode_1: "namenode-1"
namenode_2: "namenode_2"

114
roles/hdfs/files/hadoop-config.sh Executable file
View File

@ -0,0 +1,114 @@
#!/usr/bin/env bash
set -ueo pipefail
HDFS_CONFIG_TEMPLATE="/tmp/hadoop/etc/hadoop/hdfs-site.xml.j2"
HDFS_CONFIG_TEMPLATE_CORE="/tmp/hadoop/etc/hadoop/core-site.xml.j2"
HDFS_CONFIG_TEMPLATE_MAPRED="/tmp/hadoop/etc/hadoop/mapred-site.xml.j2"
SCRIPT_PATH=$(dirname "$0")
source setup.sh
create_hdfs_core_config_template() {
#printf "Writing HDFS core-site.xml config\n"
read -r -d '' CONFIG <<EOF
<?xml version="1.0" encoding="UTF-8"?>
<?xml-stylesheet type="text/xsl" href="configuration.xsl"?>
<configuration>
<property>
<name>fs.defaultFS</name>
<value>hdfs://{{ hdfs_cluster_id }}</value>
</property>
<property>
<name>dfs.journalnode.edits.dir</name>
<value>/.tmp/hadoop</value>
</property>
</configuration>
EOF
echo "$CONFIG"
}
create_hdfs_mapred_config_template() {
#printf "Writing HDFS mapred-site.xml config\n"
read -r -d '' CONFIG <<EOF
<?xml version="1.0" encoding="UTF-8"?>
<?xml-stylesheet type="text/xsl" href="configuration.xsl"?>
<configuration>
<property>
<name>fs.defaultFS</name>
<value>hdfs://{{ cluster_ha_id }}</value>
</property>
</configuration>
EOF
echo "$CONFIG"
}
create_hdfs_config_template() {
#printf "Writing HDFS hdfs-site.xml config\n"
read -r -d '' CONFIG <<EOF
<?xml version="1.0" encoding="UTF-8"?>
<?xml-stylesheet type="text/xsl" href="configuration.xsl"?>
<configuration>
<property>
<name>dfs.nameservices</name>
<value>{{ hdfs_cluster_id }}</value>
</property>
<property>
<name>dfs.ha.namenodes.{{ hdfs_cluster_id }}</name>
<value>nn1,nn2</value>
</property>
<property>
<name>fs.defaultFS</name>
<value>hdfs://{{ hdfs_cluster_id }}</value>
</property>
<property>
<name>dfs.namenode.rpc-address.{{ hdfs_cluster_id }}.nn1</name>
<value>{{ namenode_1 }}:8020</value>
</property>
<property>
<name>dfs.namenode.rpc-address.{{ hdfs_cluster_id }}.nn2</name>
<value>{{ namenode_2 }}:8020</value>
</property>
<property>
<name>dfs.namenode.http-address.{{ hdfs_cluster_id }}.nn1</name>
<value>{{ namenode_1 }}:50070</value>
</property>
<property>
<name>dfs.namenode.http-address.{{ hdfs_cluster_id }}.nn2</name>
<value>{{ namenode_2}}:50070</value>
</property>
<property>
<name>dfs.namenode.http-address.{{ hdfs_cluster_id }}.nn1</name>
<value>{{ namenode_1 }}:9870</value>
</property>
<property>
<name>dfs.namenode.http-address.{{ hdfs_cluster_id }}.nn2</name>
<value>{{ namenode_2 }}:9870</value>
</property>
<property>
<name>dfs.namenode.name.dir</name>
<value>/tmp/hdfs/namenode</value>
</property>
<property>
<name>dfs.datanode.data.dir</name>
<value>/tmp/hdfs/datanode</value>
</property>
<property>
<name>dfs.namenode.shared.edits.dir</name>
<value>file:///tmp/hadoop</value>
</property>
<property>
<name>ha.zookeeper.quorum</name>
<value>127.0.0.1:2181</value>
</property>
</configuration>
EOF
echo "$CONFIG"
}
HDFS_CONFIG=$(create_hdfs_config_template)
HDFS_CONFIG_CORE=$(create_hdfs_core_config_template)
HDFS_CONFIG_MAPRED=$(create_hdfs_mapred_config_template)
write_file ${HDFS_CONFIG_TEMPLATE} "${HDFS_CONFIG}"
write_file ${HDFS_CONFIG_TEMPLATE_CORE} "${HDFS_CONFIG_CORE}"
write_file ${HDFS_CONFIG_TEMPLATE_MAPRED} "${HDFS_CONFIG_MAPRED}"

37
roles/hdfs/tasks/main.yml Normal file
View File

@ -0,0 +1,37 @@
---
- name: Download Hadoop archive if not existent
get_url:
url: "{{ hadoop_archive_url }}"
dest: "{{ hadoop_path }}/{{ hadoop_archive_file }}"
mode: 0600
checksum: sha256:{{ hadoop_archive_file_checksum256 }}
run_once: true
delegate_to: localhost
- name: Extract archive
ansible.builtin.unarchive:
src: "{{ hadoop_path }}/{{ hadoop_archive_file }}"
dest: "{{ hadoop_path }}"
- name: Recursively remove directory
ansible.builtin.file:
path: "{{ hadoop_path }}/{{ hadoop_dir }}"
state: absent
- name: Rename dir
command: mv -f {{ hadoop_path }}/{{ hadoop_archive_dir }} {{ hadoop_path }}/{{ hadoop_dir }}
- name: Write config templates
ansible.builtin.command:
cmd: "roles/hdfs/files/hadoop-config.sh"
delegate_to: localhost
- name: Create core-site.xml
template: src={{ hadoop_config_path}}/core-site.xml.j2 dest={{ hadoop_config_path}}/core-site.xml mode=0700
- name: Create hdfs-site.xml
template: src={{ hadoop_config_path}}/hdfs-site.xml.j2 dest={{ hadoop_config_path}}/hdfs-site.xml mode=0700
- name: Create mapred-site.xml
template: src={{ hadoop_config_path}}/core-site.xml.j2 dest={{ hadoop_config_path}}/mapred-site.xml mode=0700

48
setup.sh Normal file
View File

@ -0,0 +1,48 @@
#!/usr/bin/env bash
check_dependencies() {
for i in "${DEPS[@]}"
do
if [[ -z $(which "${i}") ]]; then
error "Could not find ${i}"
exit 1
fi
done
}
extract_archive() {
printf "Extracting %s archive\n" "$1"
if ! tar xfz "${1}"; then
printf "Failed to extract archive: %s\n" "$1"
exit 1
fi
}
compare_checksum() {
local r
CKSUM=$(sha512 -q "${1}")
if ! [ "$CKSUM" = "$2" ]; then
r=1
else
r=0
fi
echo "$r"
}
write_file() {
printf "Writing %s\n" "$1"
printf "%s" "$2" > "$1"
}
run() {
local pid
printf "Starting %s\n" "$2"
if $($1 > /dev/null 2>&1 &); then
printf "Started %s successfully\n" "$2"
pid=$!
else
printf "Failed to start %s\n" "$2"
pid="-1"
fi
echo "$pid"
}