Improve log messages, optionally remove hbck lockfile
This commit is contained in:
parent
2e6e541d14
commit
241b41cbcb
|
@ -83,6 +83,8 @@ namenode_use_tls = False
|
|||
hbase_master_ui_default_port = 16010
|
||||
hdfs_namenode_default_port = 50070
|
||||
cluster_is_kerberized = False
|
||||
hbase_hbck_remove_lockfile = True
|
||||
|
||||
|
||||
class zk():
|
||||
zk_client = ""
|
||||
|
@ -386,26 +388,21 @@ class hbase_exporter():
|
|||
logging.info("hbase: Write test failed! Is Thrift up and running?")
|
||||
prom_hbase_writeable.set(0)
|
||||
|
||||
self.check_health(run_hbck)
|
||||
hbase_health = self.check_health(run_hbck)
|
||||
prom_hbase_healthy.set(hbase_health)
|
||||
|
||||
|
||||
def check_health(self, run_hbck):
|
||||
# Only check for inconsistencies if we actually ran hbck
|
||||
if run_hbck and self.num_inconsistencies > 0:
|
||||
prom_hbase_healthy.set(0)
|
||||
return False
|
||||
|
||||
if self.num_regions_in_transition_stale > 0:
|
||||
prom_hbase_healthy.set(0)
|
||||
return False
|
||||
|
||||
if self.hbase_write_success != 0:
|
||||
prom_hbase_healthy.set(0)
|
||||
return False
|
||||
|
||||
prom_hbase_up.set(1)
|
||||
prom_hbase_healthy.set(1)
|
||||
|
||||
return True
|
||||
|
||||
|
||||
|
@ -446,7 +443,6 @@ class hbase_exporter():
|
|||
if 'Master not running' in r.stdout.decode('utf-8'):
|
||||
return False
|
||||
|
||||
prom_hbase_up.set(1)
|
||||
active_master = r.stdout.decode('utf-8').rstrip()
|
||||
|
||||
return active_master
|
||||
|
@ -474,33 +470,35 @@ class hbase_exporter():
|
|||
num_regions_in_transition_stale = self.hbaseui_parse_output(req.content)
|
||||
|
||||
if num_regions_in_transition_stale is None:
|
||||
logging.debug('Parse error - failed to find number of stale regions in transition')
|
||||
logging.debug('hbase-ui: Parse error - failed to find number of stale regions in transition')
|
||||
|
||||
if not isinstance(num_regions_in_transition_stale, int):
|
||||
logging.debug('Parse error - got non-integer for stale regions in transition')
|
||||
logging.debug('hbase-ui: Parse error - got non-integer for stale regions in transition')
|
||||
|
||||
self.num_regions_in_transition_stale = num_regions_in_transition_stale
|
||||
|
||||
|
||||
def hbaseui_parse_output(self, content):
|
||||
soup = BeautifulSoup(content, 'html.parser')
|
||||
num_regions_in_transition_stale = -1
|
||||
num_regions_in_transition_stale = 0
|
||||
try:
|
||||
headings = soup.findAll('h2')
|
||||
for heading in headings:
|
||||
# The section only exists if there are stale regions in transition
|
||||
if heading.get_text() == "Regions in Transition":
|
||||
logging.debug('Found Regions in Transition section header')
|
||||
logging.debug('Looking for table')
|
||||
logging.info('hbase-ui: Found Regions in Transition section header')
|
||||
logging.info('hbase-ui: Looking for table')
|
||||
table = heading.find_next('table')
|
||||
num_regions_in_transition_stale = self.hbaseui_parse_table(table)
|
||||
if not isinstance(num_regions_in_transition_stale, int):
|
||||
logging.debug('Got non-integer \'{0}\' for stale regions in transition when parsing HBase Master UI'\
|
||||
logging.info('hbase-ui: Got non-integer \'{0}\' for stale regions in transition when parsing HBase Master UI'\
|
||||
.format(num_regions_in_transition_stale))
|
||||
|
||||
return num_regions_in_transition_stale
|
||||
|
||||
except (AttributeError, TypeError):
|
||||
logging.info('Failed to parse HBase Master UI status page')
|
||||
logging.info('hbase-ui: Failed to parse HBase Master UI status page')
|
||||
return -1
|
||||
|
||||
return num_regions_in_transition_stale
|
||||
|
||||
|
||||
def hbck_inconsistencies(self):
|
||||
|
@ -538,6 +536,13 @@ class hbase_exporter():
|
|||
if match:
|
||||
hbck_status = match.group(0)
|
||||
logging.info('hbase-hbck: hbck status = %s', hbck_status)
|
||||
hdfs_lock_uri = re.findall('hdfs://(?:[a-zA-Z]|[0-9]|[$-_@.&+]|[!*\(\),]|(?:%[0-9a-fA-F][0-9a-fA-F]))+', hbck_status)
|
||||
for uri in hdfs_lock_uri:
|
||||
logging.info('hbase-hbck: Locked by lockfile: {0}'.format(hdfs_lock_uri[0]))
|
||||
if hbase_hbck_remove_lockfile:
|
||||
hdfs_remove_file(uri)
|
||||
else:
|
||||
logging.info('hbase-hbck: Please remove lockfile manually if no hbck is running')
|
||||
break
|
||||
|
||||
if hbck_status is None:
|
||||
|
@ -593,6 +598,20 @@ class hbase_exporter():
|
|||
return True
|
||||
|
||||
|
||||
def hdfs_remove_file(hdfs_uri):
|
||||
p = Popen(['hadoop', 'fs', '-rm', hdfs_uri], stdout=PIPE, stderr=PIPE, close_fds=False)
|
||||
output, error = p.communicate()
|
||||
output = output.splitlines()
|
||||
error = error.splitlines()
|
||||
for line in output:
|
||||
logging.info("hdfs-rm: %s", line)
|
||||
|
||||
for line in error:
|
||||
logging.info("hdfs-rm: %s", line)
|
||||
|
||||
return error
|
||||
|
||||
|
||||
def which(program):
|
||||
|
||||
def is_executable(fn):
|
||||
|
@ -727,7 +746,7 @@ if __name__ == '__main__':
|
|||
jmx.main(hdfs_namenodes)
|
||||
|
||||
hbase_hbck_time_s = int((dt.datetime.now() - hbase_hbck_timer_s).total_seconds())
|
||||
logging.info("hbase-hbck: Timer: {0} seconds".format(hbase_hbck_time_s))
|
||||
logging.debug("hbase-hbck: Timer: {0} seconds".format(hbase_hbck_time_s))
|
||||
|
||||
# Do an hbck on the first run and then whenever the interval
|
||||
# between to consecutive runs in seconds is higher than the configured interval
|
||||
|
|
Loading…
Reference in New Issue