Parsible – Transforme seu log em informação qualitativa

Precisava à partir do log do XEN – LIBXL obter informações sobre mudança de estados das máquinas virtuais.

Parsible é uma ferramenta que faz parser de log e ajuda a transformar em informação qualitativa. O ciclo de leitura do arquivo e stream de linha por linha fica a cargo do Parsible, sendo assim, desenvolvedor só precisa escrever uma função de parser.

O Parsible abre uma thread que fica lendo o arquivo de trás para frente (igual um tail -f), já está pronto para rotate de log e fica a cargo do desenvolvedor rodar apenas um parser ou carregar todos os parsers.

Fluxo básico (mais detalhes diagrama de sequência abaixo):

Arquivo atualizado -> parse -> processor -> output.

Output pode chamar um serviço REST, mandar os dados para um Riemann ou Graphite.

Diagramas:

Abstract_Parsible_Class_Diagram

Abstract_Flow_Parsible_Sequence_Diagram

Exemplos:

import re
import logging


def parse_xend42(line):
    """
    This parser try parser only details information about xend42 events.
        Example:
            [2014-03-25 13:17:02 2140] INFO (XendDomainInfo:2079) Domain has shutdown: name=CD976431-linux-centos-6-64b-base-sx-1819 id=5
            30 reason=poweroff.

            [2014-03-26 12:00:33 2140] INFO (XendDomainInfo:2079) Domain has shutdown: name=tempoexpress id=549 reason=reboot.

            [2014-03-26 12:05:16 2140] INFO (XendDomainInfo:2079) Domain has shutdown: name=tempoexpress id=550 reason=poweroff.

        They extract this information:
            @Parsed
            libname (XendDomainInfo:2079)
            loglevel INFO
            actionDetail reason=poweroff. : split("=") get only poweroff
            date [2014-03-25 : strip [

            time 13:17:02
            action shutdown:
            vmName name=CD976431-linux-centos-6-64b-base-sx-1819 : split("=") get only CD976431-linux-centos-6-64b-base-sx-1819
            id id=530 : split("=") get only 530

            @Ignored for now:
            domain Domain
            has has
            timeid 2140]
    """
    line = line.strip()

    # Shutdown
    regex = re.compile("(?P<date>[\w\S]+)\s(?P<time>[\w\S]+)\s(?P<timeid>[\w\S]+)\s(?P<loglevel>[\w\S]+)\s(?P<libname>[\w\S]+)\s(?P<domain>[\w\S]+)\s(?P<has>[\w\S]+)\s(?P<action>[\w\S]+)\s(?P<vmName>[\w\S]+)\s(?P<id>[\w\S]+)\s(?P<actionDetail>[\w\S]+)")
    r = regex.search(line)
    result_set = {}
    if r:
        try:
            result_set["libname"] = r.group("libname")
            logging.debug("Get libname=%s, OK!" % result_set["libname"])

            result_set["loglevel"] = r.group("loglevel")
            logging.debug("Get loglevel, OK!")

            lreport = r.group("actionDetail").split("=")
            result_set["actionDetail"] = lreport[1][:-1]  # strip .
            logging.debug("Get actionDetail=%s, OK!" % result_set["actionDetail"])

            result_set["date"] = r.group("date")[1:]  # strip first char [
            logging.debug("Get date=%s, OK!" % result_set["date"])

            result_set["time"] = r.group("time")
            logging.debug("Get time=%s, OK!" % result_set["time"])

            result_set["action"] = r.group("action")[:-1]  # strip .
            logging.debug("Get action=%s, OK!" % result_set["action"])

            lvmName = r.group("vmName").split("=")
            result_set["vmName"] = lvmName[1]
            logging.debug("Get vmName=%s, OK!" % result_set["vmName"])

            lID = r.group("id").split("=")
            result_set["id"] = lID[1]
            logging.debug("Get id=%s, OK!" % result_set["id"])

        except IndexError:
            logging.error("Unable parser information in libxl, some regex group missing")

    # CRASHED
    regex = re.compile("(?P<date>[\w\S]+)\s(?P<time>[\w\S]+)\s(?P<timeid>[\w\S]+)\s(?P<loglevel>[\w\S]+)\s(?P<libname>[\w\S]+)\s(?P<domain>[\w\S]+)\s(?P<has>[\w\S]+)\s(?P<action>[\w\S]+)\s(?P<vmName>[\w\S]+)\s(?P<id>[\w\S]+)")
    r = regex.search(line)
    result_set = {}
    if r:
        try:
            result_set["libname"] = r.group("libname")
            logging.debug("Get libname=%s, OK!" % result_set["libname"])

            result_set["loglevel"] = r.group("loglevel")
            logging.debug("Get loglevel, OK!")

            result_set["date"] = r.group("date")[1:]  # strip first char [
            logging.debug("Get date=%s, OK!" % result_set["date"])

            result_set["time"] = r.group("time")
            logging.debug("Get time=%s, OK!" % result_set["time"])

            result_set["action"] = r.group("action")[:-1]  # strip .
            logging.debug("Get action=%s, OK!" % result_set["action"])

            result_set["actionDetail"] = r.group("action")[:-1]  # strip .
            logging.debug("Get actionDetail=%s, OK!" % result_set["actionDetail"])

            lvmName = r.group("vmName").split("=")
            result_set["vmName"] = lvmName[1]
            logging.debug("Get vmName=%s, OK!" % result_set["vmName"])

            lID = r.group("id").split("=")
            result_set["id"] = lID[1]
            logging.debug("Get id=%s, OK!" % result_set["id"])

        except IndexError:
            logging.error("Unable parser information in libxl, some regex group missing")

    else:
        logging.debug("Not value found on regex")

    return result_set
import logging
from audit.plugins.outputs import output_libvirt_events


def process_action_shutdown(result):
    """

    """
    # TODO: Fix the if hierarchy and prop for action - Crashed and Shutdown repeated
    #if not result.viewkeys() & {'libname', 'actionDetail', 'action', 'vmName'}:
    if all(k in result for k in ("libname", 'actionDetail', "action", "vmName")):
        if result['libname'] == "(XendDomainInfo:2079)":

            if result['action'] == "shutdown":
                logging.debug("Shutdown event to vmName=%s and detail=%s " % (result['vmName'], result['actionDetail']))
                output_libvirt_events.broadcast_shutdown(result['vmName'], result['actionDetail'])

            elif result['action'] == "crashed":
                logging.debug("Shutdown event to vmName=%s and detail=%s " % (result['vmName'], result['actionDetail']))
                output_libvirt_events.broadcast_shutdown(result['vmName'], result['actionDetail'])

        else:
            logging.error("Detected action not implemented %s" % result['action'])

    else:
        logging.error("Detected shutdown in processor but parse can't get details")
import logging
from audit import VMProp
# import deleted from code
# import deleted from code

def broadcast_shutdown(vmName, actionDetail ):
    """
    Set the last detail information on global event_libvirt
        :event_libvirt = { vmName : lastStated }

    In Python documentation Dict is atomic. If you don't believe that try QUEUE.
    """
    tmp = {}
    if actionDetail == VMProp.vmState_SHUTTING_DOWN_DETAIL.lower():
        logging.debug("output called with action %s" + Utils.get_callee_name())
        API.set_vm_shutdown(vmName, VMProp.vmState_POWEROFF)

    elif actionDetail == VMProp.vmState_POWEROFF_CRASHED.lower():
        logging.debug("output called with action %s" + Utils.get_callee_name())
        API.set_vm_crashed(vmName)

    elif actionDetail == VMProp.vmState_POWEROFF_REBOOT.lower():
        logging.debug("output called with action %s" + Utils.get_callee_name())
        API.set_vm_shutdown(vmName, VMProp.vmState_POWEROFF_REBOOT)

    else:
        tmp[vmName] = None
        logging.error("Trying broadcast a new shutdown details but don't recognized actionDetail=%s " % actionDetail)

References e resources:

http://tech.yipit.com/2012/08/03/parsible-straightforward-log-parsing/

http://github.com/Yipit/parsible

https://github.com/paulorcf/parsible

parsible_conceptual_diagram_jude_v1.tar