背景说明

服务部署在阿里云的k8s上,配置了基于prometheus的grafana监控。原本用的是自定义的metrics接口统计,上报一些字段,后面发现prometheus自带的监控非常全面好用,适合直接抓取统计,所以做了一些改变。

python prometheus-client 安装

pip install prometheus-client

python封装

# encoding: utf-8
from prometheus_client import counter, gauge, summary
from prometheus_client.core import collectorregistry
from prometheus_client.exposition import choose_encoder


class monitor:
    def __init__(self):
    # 注册收集器&最大耗时map
    self.collector_registry = collectorregistry(auto_describe=false)
    self.request_time_max_map = {}

    # 接口调用summary统计
    self.http_request_summary = summary(name="http_server_requests_seconds",
                                   documentation="num of request time summary",
                                   labelnames=("method", "code", "uri"),
                                   registry=self.collector_registry)
    # 接口最大耗时统计
    self.http_request_max_cost = gauge(name="http_server_requests_seconds_max",
                                  documentation="number of request max cost",
                                  labelnames=("method", "code", "uri"),
                                  registry=self.collector_registry)

    # 请求失败次数统计
    self.http_request_fail_count = counter(name="http_server_requests_error",
                                      documentation="times of request fail in total",
                                      labelnames=("method", "code", "uri"),
                                      registry=self.collector_registry)

    # 模型预测耗时统计
    self.http_request_predict_cost = counter(name="http_server_requests_seconds_predict",
                                        documentation="seconds of prediction cost in total",
                                        labelnames=("method", "code", "uri"),
                                        registry=self.collector_registry)
    # 图片下载耗时统计
    self.http_request_download_cost = counter(name="http_server_requests_seconds_download",
                                         documentation="seconds of download cost in total",
                                         labelnames=("method", "code", "uri"),
                                         registry=self.collector_registry)

    # 获取/metrics结果
    def get_prometheus_metrics_info(self, handler):
        encoder, content_type = choose_encoder(handler.request.headers.get('accept'))
        handler.set_header("content-type", content_type)
        handler.write(encoder(self.collector_registry))
        self.reset_request_time_max_map()

    # summary统计
    def set_prometheus_request_summary(self, handler):
        self.http_request_summary.labels(handler.request.method, handler.get_status(), handler.request.path).observe(handler.request.request_time())
        self.set_prometheus_request_max_cost(handler)

    # 自定义summary统计
    def set_prometheus_request_summary_customize(self, method, status, path, cost_time):
        self.http_request_summary.labels(method, status, path).observe(cost_time)
        self.set_prometheus_request_max_cost_customize(method, status, path, cost_time)

    # 失败统计
    def set_prometheus_request_fail_count(self, handler, amount=1.0):
        self.http_request_fail_count.labels(handler.request.method, handler.get_status(), handler.request.path).inc(amount)

    # 自定义失败统计
    def set_prometheus_request_fail_count_customize(self, method, status, path, amount=1.0):
        self.http_request_fail_count.labels(method, status, path).inc(amount)

    # 最大耗时统计
    def set_prometheus_request_max_cost(self, handler):
        requset_cost = handler.request.request_time()
        if self.check_request_time_max_map(handler.request.path, requset_cost):
            self.http_request_max_cost.labels(handler.request.method, handler.get_status(), handler.request.path).set(requset_cost)
            self.request_time_max_map[handler.request.path] = requset_cost

    # 自定义最大耗时统计
    def set_prometheus_request_max_cost_customize(self, method, status, path, cost_time):
        if self.check_request_time_max_map(path, cost_time):
            self.http_request_max_cost.labels(method, status, path).set(cost_time)
            self.request_time_max_map[path] = cost_time

    # 预测耗时统计
    def set_prometheus_request_predict_cost(self, handler, amount=1.0):
        self.http_request_predict_cost.labels(handler.request.method, handler.get_status(), handler.request.path).inc(amount)

    # 自定义预测耗时统计
    def set_prometheus_request_predict_cost_customize(self, method, status, path, cost_time):
        self.http_request_predict_cost.labels(method, status, path).inc(cost_time)

    # 下载耗时统计
    def set_prometheus_request_download_cost(self, handler, amount=1.0):
        self.http_request_download_cost.labels(handler.request.method, handler.get_status(), handler.request.path).inc(amount)

    # 自定义下载耗时统计
    def set_prometheus_request_download_cost_customize(self, method, status, path, cost_time):
        self.http_request_download_cost.labels(method, status, path).inc(cost_time)

    # 校验是否赋值最大耗时map
    def check_request_time_max_map(self, uri, cost):
        if uri not in self.request_time_max_map:
            return true
        if self.request_time_max_map[uri] < cost:
            return true
        return false

    # 重置最大耗时map
    def reset_request_time_max_map(self):
        for key in self.request_time_max_map:
            self.request_time_max_map[key] = 0.0

调用

import tornado
import tornado.ioloop
import tornado.web
import tornado.gen
from datetime import datetime
from tools.monitor import monitor

global g_monitor

class classifierhandler(tornado.web.requesthandler):
    def post(self):
        # todo something you need
        # work....
        # 统计summary,包括请求次数和每次耗时
        g_monitor.set_prometheus_request_summary(self)
        self.write("ok")


class pinghandler(tornado.web.requesthandler):
    def head(self):
        print('info', datetime.now(), "/ping head.")
        g_monitor.set_prometheus_request_summary(self)
        self.write("ok")

    def get(self):
        print('info', datetime.now(), "/ping get.")
        g_monitor.set_prometheus_request_summary(self)
        self.write("ok")


class metricshandler(tornado.web.requesthandler):
    def get(self):
        print('info', datetime.now(), "/metrics get.")
		g_monitor.set_prometheus_request_summary(self)
		# 通过metrics接口返回统计结果
    	g_monitor.get_prometheus_metrics_info(self)
    

def make_app():
    return tornado.web.application([
        (r"/ping?", pinghandler),
        (r"/metrics?", metricshandler),
        (r"/work?", classifierhandler)
    ])

if __name__ == "__main__":
    g_monitor = monitor()
	
	app = make_app()
    app.listen(port)
    tornado.ioloop.ioloop.current().start()

metrics返回结果实例

 到此这篇关于详解python prometheus_client使用方式的文章就介绍到这了,更多相关python prometheus_client内容请搜索www.887551.com以前的文章或继续浏览下面的相关文章希望大家以后多多支持www.887551.com!