Make exporter update rate and hbck frequency configurable, add args
This commit is contained in:
parent
a890dbc154
commit
1030701cb5
1 changed files with 65 additions and 31 deletions
|
@ -21,6 +21,7 @@ from __future__ import unicode_literals
|
||||||
import argparse
|
import argparse
|
||||||
from bs4 import BeautifulSoup
|
from bs4 import BeautifulSoup
|
||||||
from flatten_json import flatten
|
from flatten_json import flatten
|
||||||
|
import datetime as dt
|
||||||
from google.protobuf import text_format
|
from google.protobuf import text_format
|
||||||
import io
|
import io
|
||||||
import json
|
import json
|
||||||
|
@ -48,8 +49,6 @@ logpath = tmp_path
|
||||||
# ZooKeeper
|
# ZooKeeper
|
||||||
zk_reconnect_interval_s = 30
|
zk_reconnect_interval_s = 30
|
||||||
|
|
||||||
prom_scrape_interval_s = 10
|
|
||||||
|
|
||||||
# Prom vars
|
# Prom vars
|
||||||
REQUEST_TIME = Summary('request_processing_seconds', 'Time spent processing request')
|
REQUEST_TIME = Summary('request_processing_seconds', 'Time spent processing request')
|
||||||
prom_hdfs_total = Gauge('hdfs_bytes_total', 'HDFS total bytes')
|
prom_hdfs_total = Gauge('hdfs_bytes_total', 'HDFS total bytes')
|
||||||
|
@ -84,7 +83,6 @@ hbase_master_ui_default_port = 16010
|
||||||
hdfs_namenode_default_port = 50070
|
hdfs_namenode_default_port = 50070
|
||||||
cluster_is_kerberized = False
|
cluster_is_kerberized = False
|
||||||
|
|
||||||
|
|
||||||
class zk():
|
class zk():
|
||||||
zk_client = ""
|
zk_client = ""
|
||||||
|
|
||||||
|
@ -358,26 +356,41 @@ class jmx_query():
|
||||||
|
|
||||||
class hbase_exporter():
|
class hbase_exporter():
|
||||||
|
|
||||||
def main(self, hbase_master_hosts):
|
def main(self, hbase_master_hosts, run_hbck):
|
||||||
hbase_active_master = self.zk_active_master()
|
hbase_active_master = self.zk_active_master()
|
||||||
|
|
||||||
if not hbase_active_master:
|
if not hbase_active_master:
|
||||||
logging.info("Failed to determine active HBase master")
|
logging.info("hbase: Failed to determine active HBase master")
|
||||||
prom_hbase_up.set(0)
|
prom_hbase_up.set(0)
|
||||||
prom_hbase_healthy.set(0)
|
prom_hbase_healthy.set(0)
|
||||||
return False
|
return False
|
||||||
|
|
||||||
self.stale_regions_in_transition(hbase_active_master)
|
self.stale_regions_in_transition(hbase_active_master)
|
||||||
|
msg = 'hbase: {0} stale regions in transition '\
|
||||||
|
.format(self.num_regions_in_transition_stale)
|
||||||
|
|
||||||
|
logging.info(msg)
|
||||||
|
prom_hbase_num_regions_in_transition_stale.set(self.num_regions_in_transition_stale)
|
||||||
|
|
||||||
|
if run_hbck:
|
||||||
self.hbck_inconsistencies()
|
self.hbck_inconsistencies()
|
||||||
|
logging.info("hbase-hbck: Number of inconsistencies: %d", self.num_inconsistencies)
|
||||||
|
prom_hbase_num_inconsistencies.set(self.num_inconsistencies)
|
||||||
|
|
||||||
self.hbase_write_test()
|
self.hbase_write_test()
|
||||||
self.check_health()
|
if self.hbase_write_success:
|
||||||
|
logging.info("hbase: Write test succeeded")
|
||||||
|
prom_hbase_writeable.set(1)
|
||||||
|
else:
|
||||||
|
logging.info("hbase: Write test failed! Is Thrift up and running?")
|
||||||
|
prom_hbase_writeable.set(0)
|
||||||
|
|
||||||
|
self.check_health(run_hbck)
|
||||||
|
|
||||||
|
|
||||||
def check_health(self):
|
def check_health(self, run_hbck):
|
||||||
if self.num_inconsistencies == -1:
|
# Only check for inconsistencies if we actually ran hbck
|
||||||
prom_hbase_healthy.set(0)
|
if run_hbck and self.num_inconsistencies > 0:
|
||||||
return False
|
|
||||||
elif self.num_inconsistencies > 0:
|
|
||||||
prom_hbase_healthy.set(0)
|
prom_hbase_healthy.set(0)
|
||||||
return False
|
return False
|
||||||
|
|
||||||
|
@ -411,7 +424,6 @@ class hbase_exporter():
|
||||||
msg = msg[meta_length + 9:]
|
msg = msg[meta_length + 9:]
|
||||||
master = pbMaster()
|
master = pbMaster()
|
||||||
master.ParseFromString(msg)
|
master.ParseFromString(msg)
|
||||||
logging.info("zk: %s", master.master.host_name)
|
|
||||||
|
|
||||||
return master.master.host_name
|
return master.master.host_name
|
||||||
|
|
||||||
|
@ -466,18 +478,12 @@ class hbase_exporter():
|
||||||
if not isinstance(num_regions_in_transition_stale, int):
|
if not isinstance(num_regions_in_transition_stale, int):
|
||||||
logging.debug('Parse error - got non-integer for stale regions in transition')
|
logging.debug('Parse error - got non-integer for stale regions in transition')
|
||||||
|
|
||||||
msg = '{0} stale regions in transition '\
|
|
||||||
.format(num_regions_in_transition_stale)
|
|
||||||
|
|
||||||
prom_hbase_num_regions_in_transition_stale.set(num_regions_in_transition_stale)
|
|
||||||
logging.info(msg)
|
|
||||||
|
|
||||||
self.num_regions_in_transition_stale = num_regions_in_transition_stale
|
self.num_regions_in_transition_stale = num_regions_in_transition_stale
|
||||||
|
|
||||||
|
|
||||||
def hbaseui_parse_output(self, content):
|
def hbaseui_parse_output(self, content):
|
||||||
soup = BeautifulSoup(content, 'html.parser')
|
soup = BeautifulSoup(content, 'html.parser')
|
||||||
num_regions_in_transition_stale = 0
|
num_regions_in_transition_stale = -1
|
||||||
try:
|
try:
|
||||||
headings = soup.findAll('h2')
|
headings = soup.findAll('h2')
|
||||||
for heading in headings:
|
for heading in headings:
|
||||||
|
@ -504,7 +510,7 @@ class hbase_exporter():
|
||||||
self.num_inconsistencies = None
|
self.num_inconsistencies = None
|
||||||
hbck_status = None
|
hbck_status = None
|
||||||
|
|
||||||
logging.info("HBase: Running hbck consistency check")
|
logging.info("hbase: Running hbck consistency check")
|
||||||
p = Popen(['hbase', 'hbck'], stdout=PIPE, stderr=PIPE, close_fds=False)
|
p = Popen(['hbase', 'hbck'], stdout=PIPE, stderr=PIPE, close_fds=False)
|
||||||
output, error = p.communicate()
|
output, error = p.communicate()
|
||||||
output = output.splitlines()
|
output = output.splitlines()
|
||||||
|
@ -515,7 +521,7 @@ class hbase_exporter():
|
||||||
|
|
||||||
if match:
|
if match:
|
||||||
self.num_inconsistencies = match.group(1)
|
self.num_inconsistencies = match.group(1)
|
||||||
logging.info('Number of inconsistencies: %s', self.num_inconsistencies)
|
logging.info('hbase-hbck: Number of inconsistencies: %s', self.num_inconsistencies)
|
||||||
continue
|
continue
|
||||||
|
|
||||||
match = re_status.match(line.decode('utf-8'))
|
match = re_status.match(line.decode('utf-8'))
|
||||||
|
@ -580,7 +586,6 @@ class hbase_exporter():
|
||||||
|
|
||||||
if p.returncode != 0:
|
if p.returncode != 0:
|
||||||
self.hbase_write_success = 0
|
self.hbase_write_success = 0
|
||||||
prom_hbase_is_writeable.set(0)
|
|
||||||
return False
|
return False
|
||||||
|
|
||||||
self.hbase_write_success = 1
|
self.hbase_write_success = 1
|
||||||
|
@ -616,7 +621,9 @@ if __name__ == '__main__':
|
||||||
parser.add_argument('--hdfs-namenode', dest='hdfs_namenode', action='append', help="HDFS namenode address, can be specified multiple times", type=str, default=hdfs_namenode_default_address)
|
parser.add_argument('--hdfs-namenode', dest='hdfs_namenode', action='append', help="HDFS namenode address, can be specified multiple times", type=str, default=hdfs_namenode_default_address)
|
||||||
parser.add_argument('--zookeeper-server-address', dest='zk_server', action='append', help="ZooKeeper server address, can be specified multiple times", type=str, required=True)
|
parser.add_argument('--zookeeper-server-address', dest='zk_server', action='append', help="ZooKeeper server address, can be specified multiple times", type=str, required=True)
|
||||||
parser.add_argument('--zookeeper-use-tls', dest='zk_use_tls', help="Use TLS when connecting to ZooKeeper", type=bool, default=False)
|
parser.add_argument('--zookeeper-use-tls', dest='zk_use_tls', help="Use TLS when connecting to ZooKeeper", type=bool, default=False)
|
||||||
parser.add_argument('--prometheus-exporter-port', dest='prom_http_port', help="Listen port for Prometheus export", type=int, default=9010)
|
parser.add_argument('--exporter-port', dest='prom_http_port', help="Listen port for Prometheus export", type=int, default=9010)
|
||||||
|
parser.add_argument('--export-refresh-rate', dest='prom_export_interval_s', help="Time between metrics are gathered in seconds", type=int, default=60)
|
||||||
|
parser.add_argument('--hbck-refresh-rate', dest='hbase_hbck_interval_s', help="Minimum time between two consecutive hbck runs in seconds", type=int, default=600)
|
||||||
parser.add_argument('--relay-jmx', dest='relay_jmx', help="Relay complete JMX data", type=bool, default=False)
|
parser.add_argument('--relay-jmx', dest='relay_jmx', help="Relay complete JMX data", type=bool, default=False)
|
||||||
parser.add_argument('--logfile', dest='logfile', help="Path to optional logfile", type=str)
|
parser.add_argument('--logfile', dest='logfile', help="Path to optional logfile", type=str)
|
||||||
parser.add_argument('--loglevel', dest='loglevel', help="Loglevel, default: INFO", type=str, default='INFO')
|
parser.add_argument('--loglevel', dest='loglevel', help="Loglevel, default: INFO", type=str, default='INFO')
|
||||||
|
@ -630,6 +637,8 @@ if __name__ == '__main__':
|
||||||
hbase_master = args.hbase_master
|
hbase_master = args.hbase_master
|
||||||
hdfs_namenodes = args.hdfs_namenode
|
hdfs_namenodes = args.hdfs_namenode
|
||||||
relay_complete_jmx = args.relay_jmx
|
relay_complete_jmx = args.relay_jmx
|
||||||
|
prom_export_interval_s = args.prom_export_interval_s
|
||||||
|
hbase_hbck_interval_s = args.hbase_hbck_interval_s
|
||||||
del locals()['args']
|
del locals()['args']
|
||||||
|
|
||||||
nzk_server = len(zk_server)
|
nzk_server = len(zk_server)
|
||||||
|
@ -701,20 +710,45 @@ if __name__ == '__main__':
|
||||||
jmx = jmx_query(relay_complete_jmx)
|
jmx = jmx_query(relay_complete_jmx)
|
||||||
|
|
||||||
while True:
|
while True:
|
||||||
|
nruns += 1
|
||||||
|
run_hbck = False
|
||||||
|
|
||||||
|
# Set the initial hbck timer
|
||||||
|
if nruns == 1:
|
||||||
|
hbase_hbck_timer_s = dt.datetime.now()
|
||||||
|
run_hbck = True
|
||||||
|
|
||||||
hbase_active_master = hbase_exporter.zk_active_master()
|
hbase_active_master = hbase_exporter.zk_active_master()
|
||||||
logging.info("hbase: Active master: " + hbase_active_master)
|
logging.info("hbase: Active master: " + hbase_active_master)
|
||||||
|
|
||||||
zk.active_servers(zk_server)
|
zk.active_servers(zk_server)
|
||||||
|
|
||||||
|
|
||||||
#jmx_query().main(hdfs_namenodes)
|
|
||||||
jmx.main(hdfs_namenodes)
|
jmx.main(hdfs_namenodes)
|
||||||
hbase_exporter().main(hbase_master)
|
|
||||||
#prom_zookeeper_num_live.set(nzookeeper_live)
|
|
||||||
|
|
||||||
nruns += 1
|
hbase_hbck_time_s = int((dt.datetime.now() - hbase_hbck_timer_s).total_seconds())
|
||||||
|
logging.info("hbase-hbck: Timer: {0} seconds".format(hbase_hbck_time_s))
|
||||||
|
|
||||||
|
# Do an hbck on the first run and then whenever the interval
|
||||||
|
# between to consecutive runs in seconds is higher than the configured interval
|
||||||
|
if hbase_hbck_interval_s < hbase_hbck_time_s or run_hbck:
|
||||||
|
run_hbck = True
|
||||||
|
# Set a new hbck timer
|
||||||
|
hbase_hbck_timer_s = dt.datetime.now()
|
||||||
|
else:
|
||||||
|
hbck_t_next_s = hbase_hbck_interval_s - hbase_hbck_time_s
|
||||||
|
if hbck_t_next_s < prom_export_interval_s:
|
||||||
|
# Minimum wait time is our export refresh rate -
|
||||||
|
# the time how long we sleep between two runs
|
||||||
|
hbck_t_next_s = prom_export_interval_s
|
||||||
|
logging.info("hbase-hbck: Skipping. hbck is only run every {0} seconds. Next run in {1} seconds"
|
||||||
|
.format(hbase_hbck_interval_s, hbck_t_next_s))
|
||||||
|
|
||||||
|
hbase_exporter().main(hbase_master, run_hbck)
|
||||||
|
#prom_zookeeper_num_live.set(nzookeeper_live)
|
||||||
|
|
||||||
if nruns == 1:
|
if nruns == 1:
|
||||||
logging.info("Started HBase exporter")
|
logging.info("Started HBase exporter")
|
||||||
|
|
||||||
time.sleep(prom_scrape_interval_s)
|
logging.info("Sleeping for {0} seconds ".format(prom_export_interval_s))
|
||||||
|
|
||||||
|
time.sleep(prom_export_interval_s)
|
||||||
|
|
Loading…
Add table
Add a link
Reference in a new issue