Node.js的健康检查(Health Check)
Node.js是许多微服务架构的选择,健康心跳检查对于产品阶段Node.JS非常重要。下面看看简单健康检查,也就是HTTP请求总是返回200:
var http = require('http');
var dispatcher = require('httpdispatcher');
function handleRequest(req, res) {
try {
dispatcher.dispatch(req, res);
} catch (err) {
console.log(err);
}
}
dispatcher.onGet("/healthcheck", function(request, res) {
res.writeHead(200, {'Content-Type': 'text/plain'});
res.end('ok');
});
http.createServer(handleRequest).listen(8080);
启动Http服务器子8080端口,对于/health总是返回200 ok,这是最简单的健康检查,下面我们希望加入更多环境细节。
集群检查
Node是单线程执行,多核机器需要使用Node的机器能力来跨处理器分发请求,默认配置有一个主流程监听端口,将进来的请求交由worker池,在worker中处理业务逻辑,因此池大小是非常重要,对于出现好或坏的情况时,Node会接管worker池管理,重新启动,这时你需要监控worker是否太频繁启动,下面是设置每秒5个坏情况出现的阀值。
const cluster = require('cluster');
const http = require('http');
const cpus = require('os').cpus().length;
const movingaverage = require('moving-average');
const THRESHOLD = 5.0;
const ma = movingaverage(1000);
if (cluster.isMaster) {
for (var i = 0; i < cpus; i++) {
cluster.fork();
}
cluster.on('exit', function (worker, code, signal) {
if (!worker.exitedAfterDisconnect) {
ma.push(Date.now(), 1);
}
cluster.fork();
});
http.createServer(function (req, res) {
if (ma.movingAverage() < THRESHOLD) {
res.writeHeader(500, {'Content-Type': 'text/plain'});
res.end(ma.movingAverage() + ' errs/sec exceeds threshold of ' + THRESHOLD + '
');
} else {
res.writeHeader(200, {'Content-Type': 'text/plain'});
res.end('ok');
}
}).listen(8081);
} else {
http.createServer(function (req, res) {
//serve the real customer request
res.writeHead(200, {'Content-Type': 'text/plain'});
res.end('ok');
}).listen(8080);
}
Kardia
很幸运,有一个NPM模块能够自动进行大部分健康检查,称为 Kardia. 专门用于Node集群健康检查,以及汇聚所有worker状态,只要通过registerHealthcheck 实现即可:
const http = require('http');
const cluster = require('cluster');
const cpus = require('os').cpus().length;
if (cluster.isMaster) {
for (var i = 0; i < cpus; i++) {
cluster.fork();
}
var Kardia = require('kardia');
var kardia = Kardia.start({name: "example-service", host: '0.0.0.0', port: 8081});
} else {
var kardia = require('kardia');
http.createServer(function (req, res) {
//serve the real customer request
res.writeHead(200, {'Content-Type': 'text/plain'});
res.end('ok');
}).listen(8080);
}
Kardia可以实现事件堆栈 计数器和吞吐量,也能和Consul集成, 流畅和汇聚多个主机统计信息。