zookeeper: Add health check

This commit is contained in:
Björn Busse 2020-06-09 17:22:52 +02:00
parent 58b34f86c4
commit 9c1b309164
1 changed files with 38 additions and 2 deletions

View File

@ -66,7 +66,7 @@ prom_hbase_num_clusterrequests = Gauge('hbase_clusterrequests', 'HBase Clusterre
prom_hbase_regions_in_transition_stale = Gauge('regions_in_transition_stale', 'Number of stale regions in transition')
prom_zookeeper_num = Gauge('zookeeper_num', 'Known ZooKeeper Servers')
prom_zookeeper_num_live = Gauge('zookeeper_num_live', 'Live ZooKeeper Servers')
prom_zookeeper_num_dead = Gauge('zookeeper_num_dead', 'Dead ZooKeeper Servers')
prom_zookeeper_has_leader = Gauge('zookeeper_has_leader', 'ZooKeeper cluser has a leader')
# HDFS/HBase
hdfs_config_file = "/etc/hadoop/conf/hdfs-site.xml"
@ -138,6 +138,42 @@ class zk():
logging.debug("ZooKeeper: Connection re-established")
# Handle being connected/reconnected to Zookeeper
def active_servers(address_list):
zk_has_leader = 0
zk_leader_address = ""
num_active_servers = 0
re_mode = re.compile(r'^Mode:\s*(.+?)\s*$')
for address in address_list:
cmd = 'echo stat | nc ' + address + ' 2181'
p = Popen(['/bin/sh', '-c', cmd], stdout=PIPE, stderr=PIPE, close_fds=False)
output, error = p.communicate()
output = output.splitlines()
error = error.splitlines()
for line in output:
match = re_mode.match(line.decode('utf-8'))
if match:
mode = match.group(1)
logging.info("zk: server %s: %s", address, mode)
num_active_servers += 1
if match.group(1) == "leader":
has_leader = 1
zk_leader_address = address
prom_zookeeper_has_leader.set(has_leader)
for line in error:
logging.info(line)
prom_zookeeper_num_live.set(num_active_servers)
logging.info("zk: %d active ZooKeeper servers", num_active_servers)
if has_leader:
logging.info("zk: Zookeeper has leader: True")
logging.info("zk: leader: %s", zk_leader_address)
else:
logging.info("zk: Zookeeper has leader: False")
class jmx_query():
@ -609,6 +645,7 @@ if __name__ == '__main__':
while True:
m = zk.get_znode_data(znode_hbase + "/master")
zk.active_servers(zk_server)
if not m:
logging.info("ZooKeeper: Failed to get HBase master")
@ -619,7 +656,6 @@ if __name__ == '__main__':
jmx.main(hdfs_namenodes)
hbase_exporter().main(hbase_master)
#prom_zookeeper_num_live.set(nzookeeper_live)
#prom_zookeeper_num_dead.set(nzk_server - nzookeeper_live)
nruns += 1