Parsible – Transforme seu log em informação qualitativa
24 Apr 2014Precisava à partir do log do XEN – LIBXL obter informações sobre mudança de estados das máquinas virtuais.
Parsible é uma ferramenta que faz parser de log e ajuda a transformar em informação qualitativa. O ciclo de leitura do arquivo e stream de linha por linha fica a cargo do Parsible, sendo assim, desenvolvedor só precisa escrever uma função de parser.
O Parsible abre uma thread que fica lendo o arquivo de trás para frente (igual um tail -f), já está pronto para rotate de log e fica a cargo do desenvolvedor rodar apenas um parser ou carregar todos os parsers.
Fluxo básico (mais detalhes diagrama de sequência abaixo):
Arquivo atualizado -> parse -> processor -> output.
Output pode chamar um serviço REST, mandar os dados para um Riemann ou Graphite.
Diagramas:
Exemplos:
import re
import logging
def parse_xend42(line):
"""
This parser try parser only details information about xend42 events.
Example:
[2014-03-25 13:17:02 2140] INFO (XendDomainInfo:2079) Domain has shutdown: name=CD976431-linux-centos-6-64b-base-sx-1819 id=5
30 reason=poweroff.
[2014-03-26 12:00:33 2140] INFO (XendDomainInfo:2079) Domain has shutdown: name=tempoexpress id=549 reason=reboot.
[2014-03-26 12:05:16 2140] INFO (XendDomainInfo:2079) Domain has shutdown: name=tempoexpress id=550 reason=poweroff.
They extract this information:
@Parsed
libname (XendDomainInfo:2079)
loglevel INFO
actionDetail reason=poweroff. : split("=") get only poweroff
date [2014-03-25 : strip [
time 13:17:02
action shutdown:
vmName name=CD976431-linux-centos-6-64b-base-sx-1819 : split("=") get only CD976431-linux-centos-6-64b-base-sx-1819
id id=530 : split("=") get only 530
@Ignored for now:
domain Domain
has has
timeid 2140]
"""
line = line.strip()
# Shutdown
regex = re.compile("(?P<date>[\w\S]+)\s(?P<time>[\w\S]+)\s(?P<timeid>[\w\S]+)\s(?P<loglevel>[\w\S]+)\s(?P<libname>[\w\S]+)\s(?P<domain>[\w\S]+)\s(?P<has>[\w\S]+)\s(?P<action>[\w\S]+)\s(?P<vmName>[\w\S]+)\s(?P<id>[\w\S]+)\s(?P<actionDetail>[\w\S]+)")
r = regex.search(line)
result_set = {}
if r:
try:
result_set["libname"] = r.group("libname")
logging.debug("Get libname=%s, OK!" % result_set["libname"])
result_set["loglevel"] = r.group("loglevel")
logging.debug("Get loglevel, OK!")
lreport = r.group("actionDetail").split("=")
result_set["actionDetail"] = lreport[1][:-1] # strip .
logging.debug("Get actionDetail=%s, OK!" % result_set["actionDetail"])
result_set["date"] = r.group("date")[1:] # strip first char [
logging.debug("Get date=%s, OK!" % result_set["date"])
result_set["time"] = r.group("time")
logging.debug("Get time=%s, OK!" % result_set["time"])
result_set["action"] = r.group("action")[:-1] # strip .
logging.debug("Get action=%s, OK!" % result_set["action"])
lvmName = r.group("vmName").split("=")
result_set["vmName"] = lvmName[1]
logging.debug("Get vmName=%s, OK!" % result_set["vmName"])
lID = r.group("id").split("=")
result_set["id"] = lID[1]
logging.debug("Get id=%s, OK!" % result_set["id"])
except IndexError:
logging.error("Unable parser information in libxl, some regex group missing")
# CRASHED
regex = re.compile("(?P<date>[\w\S]+)\s(?P<time>[\w\S]+)\s(?P<timeid>[\w\S]+)\s(?P<loglevel>[\w\S]+)\s(?P<libname>[\w\S]+)\s(?P<domain>[\w\S]+)\s(?P<has>[\w\S]+)\s(?P<action>[\w\S]+)\s(?P<vmName>[\w\S]+)\s(?P<id>[\w\S]+)")
r = regex.search(line)
result_set = {}
if r:
try:
result_set["libname"] = r.group("libname")
logging.debug("Get libname=%s, OK!" % result_set["libname"])
result_set["loglevel"] = r.group("loglevel")
logging.debug("Get loglevel, OK!")
result_set["date"] = r.group("date")[1:] # strip first char [
logging.debug("Get date=%s, OK!" % result_set["date"])
result_set["time"] = r.group("time")
logging.debug("Get time=%s, OK!" % result_set["time"])
result_set["action"] = r.group("action")[:-1] # strip .
logging.debug("Get action=%s, OK!" % result_set["action"])
result_set["actionDetail"] = r.group("action")[:-1] # strip .
logging.debug("Get actionDetail=%s, OK!" % result_set["actionDetail"])
lvmName = r.group("vmName").split("=")
result_set["vmName"] = lvmName[1]
logging.debug("Get vmName=%s, OK!" % result_set["vmName"])
lID = r.group("id").split("=")
result_set["id"] = lID[1]
logging.debug("Get id=%s, OK!" % result_set["id"])
except IndexError:
logging.error("Unable parser information in libxl, some regex group missing")
else:
logging.debug("Not value found on regex")
return result_set
import logging
from audit.plugins.outputs import output_libvirt_events
def process_action_shutdown(result):
"""
"""
# TODO: Fix the if hierarchy and prop for action - Crashed and Shutdown repeated
#if not result.viewkeys() & {'libname', 'actionDetail', 'action', 'vmName'}:
if all(k in result for k in ("libname", 'actionDetail', "action", "vmName")):
if result['libname'] == "(XendDomainInfo:2079)":
if result['action'] == "shutdown":
logging.debug("Shutdown event to vmName=%s and detail=%s " % (result['vmName'], result['actionDetail']))
output_libvirt_events.broadcast_shutdown(result['vmName'], result['actionDetail'])
elif result['action'] == "crashed":
logging.debug("Shutdown event to vmName=%s and detail=%s " % (result['vmName'], result['actionDetail']))
output_libvirt_events.broadcast_shutdown(result['vmName'], result['actionDetail'])
else:
logging.error("Detected action not implemented %s" % result['action'])
else:
logging.error("Detected shutdown in processor but parse can't get details")
import logging
from audit import VMProp
# import deleted from code
# import deleted from code
def broadcast_shutdown(vmName, actionDetail ):
"""
Set the last detail information on global event_libvirt
:event_libvirt = { vmName : lastStated }
In Python documentation Dict is atomic. If you don't believe that try QUEUE.
"""
tmp = {}
if actionDetail == VMProp.vmState_SHUTTING_DOWN_DETAIL.lower():
logging.debug("output called with action %s" + Utils.get_callee_name())
API.set_vm_shutdown(vmName, VMProp.vmState_POWEROFF)
elif actionDetail == VMProp.vmState_POWEROFF_CRASHED.lower():
logging.debug("output called with action %s" + Utils.get_callee_name())
API.set_vm_crashed(vmName)
elif actionDetail == VMProp.vmState_POWEROFF_REBOOT.lower():
logging.debug("output called with action %s" + Utils.get_callee_name())
API.set_vm_shutdown(vmName, VMProp.vmState_POWEROFF_REBOOT)
else:
tmp[vmName] = None
logging.error("Trying broadcast a new shutdown details but don't recognized actionDetail=%s " % actionDetail)
References e resources:
http://tech.yipit.com/2012/08/03/parsible-straightforward-log-parsing/
http://github.com/Yipit/parsible

