diff --git a/hbase-exporter b/hbase-exporter index 530d594..e39355d 100755 --- a/hbase-exporter +++ b/hbase-exporter @@ -66,7 +66,7 @@ prom_hbase_num_clusterrequests = Gauge('hbase_clusterrequests', 'HBase Clusterre prom_hbase_regions_in_transition_stale = Gauge('regions_in_transition_stale', 'Number of stale regions in transition') prom_zookeeper_num = Gauge('zookeeper_num', 'Known ZooKeeper Servers') prom_zookeeper_num_live = Gauge('zookeeper_num_live', 'Live ZooKeeper Servers') -prom_zookeeper_num_dead = Gauge('zookeeper_num_dead', 'Dead ZooKeeper Servers') +prom_zookeeper_has_leader = Gauge('zookeeper_has_leader', 'ZooKeeper cluser has a leader') # HDFS/HBase hdfs_config_file = "/etc/hadoop/conf/hdfs-site.xml" @@ -138,6 +138,42 @@ class zk(): logging.debug("ZooKeeper: Connection re-established") # Handle being connected/reconnected to Zookeeper + def active_servers(address_list): + zk_has_leader = 0 + zk_leader_address = "" + num_active_servers = 0 + re_mode = re.compile(r'^Mode:\s*(.+?)\s*$') + + for address in address_list: + cmd = 'echo stat | nc ' + address + ' 2181' + p = Popen(['/bin/sh', '-c', cmd], stdout=PIPE, stderr=PIPE, close_fds=False) + output, error = p.communicate() + output = output.splitlines() + error = error.splitlines() + + for line in output: + match = re_mode.match(line.decode('utf-8')) + if match: + mode = match.group(1) + logging.info("zk: server %s: %s", address, mode) + num_active_servers += 1 + + if match.group(1) == "leader": + has_leader = 1 + zk_leader_address = address + prom_zookeeper_has_leader.set(has_leader) + + for line in error: + logging.info(line) + + prom_zookeeper_num_live.set(num_active_servers) + logging.info("zk: %d active ZooKeeper servers", num_active_servers) + if has_leader: + logging.info("zk: Zookeeper has leader: True") + logging.info("zk: leader: %s", zk_leader_address) + else: + logging.info("zk: Zookeeper has leader: False") + class jmx_query(): @@ -609,6 +645,7 @@ if __name__ == '__main__': while True: m = zk.get_znode_data(znode_hbase + "/master") + zk.active_servers(zk_server) if not m: logging.info("ZooKeeper: Failed to get HBase master") @@ -619,7 +656,6 @@ if __name__ == '__main__': jmx.main(hdfs_namenodes) hbase_exporter().main(hbase_master) #prom_zookeeper_num_live.set(nzookeeper_live) - #prom_zookeeper_num_dead.set(nzk_server - nzookeeper_live) nruns += 1