zookeeper: Add health check
This commit is contained in:
parent
58b34f86c4
commit
9c1b309164
@ -66,7 +66,7 @@ prom_hbase_num_clusterrequests = Gauge('hbase_clusterrequests', 'HBase Clusterre
|
||||
prom_hbase_regions_in_transition_stale = Gauge('regions_in_transition_stale', 'Number of stale regions in transition')
|
||||
prom_zookeeper_num = Gauge('zookeeper_num', 'Known ZooKeeper Servers')
|
||||
prom_zookeeper_num_live = Gauge('zookeeper_num_live', 'Live ZooKeeper Servers')
|
||||
prom_zookeeper_num_dead = Gauge('zookeeper_num_dead', 'Dead ZooKeeper Servers')
|
||||
prom_zookeeper_has_leader = Gauge('zookeeper_has_leader', 'ZooKeeper cluser has a leader')
|
||||
|
||||
# HDFS/HBase
|
||||
hdfs_config_file = "/etc/hadoop/conf/hdfs-site.xml"
|
||||
@ -138,6 +138,42 @@ class zk():
|
||||
logging.debug("ZooKeeper: Connection re-established")
|
||||
# Handle being connected/reconnected to Zookeeper
|
||||
|
||||
def active_servers(address_list):
|
||||
zk_has_leader = 0
|
||||
zk_leader_address = ""
|
||||
num_active_servers = 0
|
||||
re_mode = re.compile(r'^Mode:\s*(.+?)\s*$')
|
||||
|
||||
for address in address_list:
|
||||
cmd = 'echo stat | nc ' + address + ' 2181'
|
||||
p = Popen(['/bin/sh', '-c', cmd], stdout=PIPE, stderr=PIPE, close_fds=False)
|
||||
output, error = p.communicate()
|
||||
output = output.splitlines()
|
||||
error = error.splitlines()
|
||||
|
||||
for line in output:
|
||||
match = re_mode.match(line.decode('utf-8'))
|
||||
if match:
|
||||
mode = match.group(1)
|
||||
logging.info("zk: server %s: %s", address, mode)
|
||||
num_active_servers += 1
|
||||
|
||||
if match.group(1) == "leader":
|
||||
has_leader = 1
|
||||
zk_leader_address = address
|
||||
prom_zookeeper_has_leader.set(has_leader)
|
||||
|
||||
for line in error:
|
||||
logging.info(line)
|
||||
|
||||
prom_zookeeper_num_live.set(num_active_servers)
|
||||
logging.info("zk: %d active ZooKeeper servers", num_active_servers)
|
||||
if has_leader:
|
||||
logging.info("zk: Zookeeper has leader: True")
|
||||
logging.info("zk: leader: %s", zk_leader_address)
|
||||
else:
|
||||
logging.info("zk: Zookeeper has leader: False")
|
||||
|
||||
|
||||
class jmx_query():
|
||||
|
||||
@ -609,6 +645,7 @@ if __name__ == '__main__':
|
||||
|
||||
while True:
|
||||
m = zk.get_znode_data(znode_hbase + "/master")
|
||||
zk.active_servers(zk_server)
|
||||
|
||||
if not m:
|
||||
logging.info("ZooKeeper: Failed to get HBase master")
|
||||
@ -619,7 +656,6 @@ if __name__ == '__main__':
|
||||
jmx.main(hdfs_namenodes)
|
||||
hbase_exporter().main(hbase_master)
|
||||
#prom_zookeeper_num_live.set(nzookeeper_live)
|
||||
#prom_zookeeper_num_dead.set(nzk_server - nzookeeper_live)
|
||||
|
||||
nruns += 1
|
||||
|
||||
|
Loading…
Reference in New Issue
Block a user