Rößner-Network-Solutions

Commit 66660e57 authored by Patrick Ben Koetter's avatar Patrick Ben Koetter
Browse files

Initial commit

        Added base files to repo
parent 96a7453d
= Authors
Christian Roessner
cr@sys4.de
Franziskanerstrasse 15
81669 Muenchen
Deutschland
Patrick Ben Koetter
p@sys4.de
Franziskanerstrasse 15
81669 Muenchen
Deutschland
= Loganon
loganon is a generic log anonymizer. It expects an input, an output and a rules
file. A rules file contains search and replace patterns (regular expresssions).
This repository contains 'loganon', the command to anonymize log files. It also
contains `loganon.sh` a script to be called from a logrotate configuration and
an example rules file `mail.rules` (for Postfix and Dovecot).
== Contributions welcome!
Each programm has its own way of logging. If you use 'loganon' and if you create
a rule file for an application 'loganon' doesn't cover yet, consider to
contribute it. Each contribution makes 'loganon' more useful.
== License
loganon is free software: you can redistribute it and/or modify it under the
terms of the GNU Lesser General Public License as published by the Free
Software Foundation, either version 3 of the License, or (at your option) any
later version.
loganon is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU Lesser General Public License for more details.
You should have received a copy of the GNU Lesser General Public License
along with loganon. If not, see <http://www.gnu.org/licenses/>.
== Copyright
Copyright sys4 AG <https://sys4.de> 2015
// vim: set ft=asciidoc:
#!/usr/bin/env python
# copyright sys4 AG 2015
# This file is part of loganon.
#
# loganon is free software: you can redistribute it and/or modify it under the
# terms of the GNU Lesser General Public License as published by the Free
# Software Foundation, either version 3 of the License, or (at your option) any
# later version.
#
# loganon is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
# GNU Lesser General Public License for more details.
#
# You should have received a copy of the GNU Lesser General Public License
# along with loganon. If not, see <http://www.gnu.org/licenses/>.
import os
import sys
import re
import yaml
from getopt import getopt
from netaddr import IPAddress, IPNetwork
from netaddr.core import AddrFormatError
try:
from collections import OrderedDict
except:
from ordereddict import OrderedDict
def ordered_load(stream, Loader=yaml.Loader, object_pairs_hook=OrderedDict):
"""Force YAML parser to use OrderedDict instead of dict()
"""
class OrderedLoader(Loader):
pass
OrderedLoader.add_constructor(
yaml.resolver.BaseResolver.DEFAULT_MAPPING_TAG,
lambda loader, node: object_pairs_hook(loader.construct_pairs(node)))
return yaml.load(stream, OrderedLoader)
def usage():
"""Print a simple usage to stdout
"""
print """%s [options]
-h, --help prints out this help
-i, --input=file log file to read
-o, --output=file output result to this file
-r, --rules=file1,file2,... comma seperated list of rule files
Optional:
-4, --mask4=number number of bits to mask an IPv4 address
-6, --mask6=number number of bits to mask an IPv6 address
-t, --test test pattern and print output to stdout
""" % os.path.basename(__file__)
def main():
"""Main application
"""
# input file argument
fdinarg = None
# output file argument
fdoutarg = None
# A list of file names containing rule YAML definitions
rules = None
# Test mode
test = False
# A list of YAML parsed structures
rules_collection = list()
# Data structure for search and replace actions
rule_data = OrderedDict()
# Default IPv4 bit mask
bitmask4 = IPAddress("255.255.0.0")
# Default IPv6 bit mask
bitmask6 = IPAddress("ffff:ffff:ff00::")
# Pre-compile IPv4/IPv6 pattern
ipv4 = re.compile("[1-9][0-9]{0,2}\.[0-9.]{3,7}\.[0-9]{1,3}")
ipv6 = re.compile("([1-9a-fA-F][0-9a-fA-F]{3}):"
"[0-9a-fA-F:]{2,29}[0-9a-fA-F]{1,4}")
# Read command line options
try:
opts = getopt(sys.argv[1:],
"hi:o:r:4:6:t",
["help",
"input=",
"output=",
"rules=",
"mask4=",
"mask6=",
"test"])[0]
for opt, optarg in opts:
if opt in ("-h", "--help"):
usage()
sys.exit()
elif opt in ("-i", "--input"):
fdinarg = str(optarg)
elif opt in ("-o", "--output"):
fdoutarg = str(optarg)
elif opt in ("-r", "--rules"):
rules = str(optarg).split(",")
elif opt in ("-4", "--mask4"):
bitmask4 = IPNetwork("0.0.0.0/%i" % int(optarg)).netmask
elif opt in ("-6", "--mask6"):
bitmask6 = IPNetwork("::/%i" % int(optarg)).netmask
elif opt in ("-t", "--test"):
test = True
else:
usage()
sys.exit(os.EX_USAGE)
# Required: input file and pattern
if fdinarg is None or rules is None:
usage()
sys.exit(os.EX_USAGE)
# If we are not in test mode, an output file is required
if test is False and fdoutarg is None:
usage()
sys.exit(os.EX_USAGE)
except Exception, e:
print >> sys.stderr, "Syntax error: %s" % e
sys.exit(os.EX_USAGE)
# Read all rules
try:
for rule in iter(rules):
with open(rule, "r") as fd_rule:
rules_collection.append(ordered_load(fd_rule,
yaml.SafeLoader,
OrderedDict))
except IOError, e:
print >> sys.stderr, "IOError: %s" % e
sys.exit(os.EX_IOERR)
except Exception, e:
print >> sys.stderr, "Unknown error: %s" % e
sys.exit(os.EX_USAGE)
# Build macro dictionary
for rule_entity in iter(rules_collection):
for service, ruledef in rule_entity.iteritems():
for rulename, rulepattern in ruledef.iteritems():
search = None
replace = None
for patterndef in iter(rulepattern):
for actiondesc, actiondef in patterndef.iteritems():
if actiondesc == "search":
search = actiondef
if actiondesc == "replace":
replace = actiondef
if search is None:
print >> sys.stderr, "Missing <search> tag"
sys.exit(os.EX_USAGE)
if replace is None:
print >> sys.stderr, "Missing <replace> tag"
sys.exit(os.EX_USAGE)
try:
rule_data[rulename] = (re.compile(search), replace)
except Exception, e:
print >> sys.stderr, ("Syntax error in <search> or "
"<replace> pattern: %s" % e)
sys.exit(os.EX_USAGE)
# Open input and output files
try:
fd_in = open(fdinarg, "r")
if not test:
fd_out = open(fdoutarg, "w")
except IOError, e:
print >> sys.stderr, "IOError: %s" % e
sys.exit(os.EX_IOERR)
except Exception, e:
print >> sys.stderr, "Unknown error: %s" % e
sys.exit(os.EX_USAGE)
def reduce_ip(matchobj):
maybe_ip = False
# simple tests
if matchobj.group(0).startswith("127.0.0."):
return matchobj.group(0)
test = matchobj.group(0).split(".")
if len(test) == 4:
for octet in iter(test):
try:
if (":" in octet or
int(octet) < 0 or int(octet) > 255):
return matchobj.group(0)
else:
maybe_ip = True
except ValueError:
return matchobj.group(0)
elif len(test) == 1:
test = matchobj.group(0).split(":")
if len(test) >= 2:
maybe_ip = True
if maybe_ip:
try:
ip = IPAddress(matchobj.group(0))
except AddrFormatError:
# might be something else than an IPv6 address
return matchobj.group(0)
if ip.version == 4:
return str(bitmask4 & ip)
else:
return str(bitmask6 & ip)
return str(ip)
else:
return matchobj.group(0)
while True:
line = fd_in.readline()
if not line:
break
# Phase 1 - search and replace pattern
for key, value in rule_data.iteritems():
try:
linenew = value[0].sub(value[1], line)
if linenew is not None:
line = linenew
except Exception, e:
print >> sys.stderr, e
# Phase 2 - find IPv4/IPv6 address
line = re.sub(ipv4, reduce_ip, line)
line = re.sub(ipv6, reduce_ip, line)
if test:
print line.strip()
else:
fd_out.write(line)
# Close input and output files
fd_in.close()
if not test:
fd_out.close()
if __name__ == "__main__":
main()
sys.exit(os.EX_OK)
# vim: ts=4 sw=4 expandtab
#!/bin/bash
# copyright sys4 AG 2015
# This file is part of loganon.
#
# loganon is free software: you can redistribute it and/or modify it under the
# terms of the GNU Lesser General Public License as published by the Free
# Software Foundation, either version 3 of the License, or (at your option) any
# later version.
#
# loganon is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
# GNU Lesser General Public License for more details.
#
# You should have received a copy of the GNU Lesser General Public License
# along with loganon. If not, see <http://www.gnu.org/licenses/>.
export PATH="$PATH:/usr/local/sbin"
day=$(date +%Y%m%d -d "5 day ago")
# mail.log:
bunzip2 /var/log/mail/mail.log-$day.bz2
loganon \
-i /var/log/mail/mail.log-$day \
-o /var/log/mail/mail.log-$day-conv \
-r /usr/local/etc/mail.rules
mv /var/log/mail/mail.log-$day-conv /var/log/mail/mail.log-$day
bzip2 /var/log/mail/mail.log-$day
# dovecot.log:
bunzip2 /var/log/mail/dovecot.log-$day.bz2
loganon \
-i /var/log/mail/dovecot.log-$day \
-o /var/log/mail/dovecot.log-$day-conv \
-r /usr/local/etc/mail.rules
mv /var/log/mail/dovecot.log-$day-conv /var/log/mail/dovecot.log-$day
bzip2 /var/log/mail/dovecot.log-$day
exit 0
This directory holds examples for logrotation configurations.
/var/log/mail/mail.log
/var/log/mail/dovecot.log
{
rotate 14
daily
missingok
notifempty
compress
compresscmd /usr/bin/bzip2
compressoptions -9
compressext .bz2
dateext
delaycompress
sharedscripts
prerotate
/usr/local/sbin/loganon.sh
endscript
}
# Makefile to create documentation
# Patrick Ben Koetter, p@sys4.de
#
# Copyright (C) 2015, sys4 AG
#
# This program is free software: you can redistribute it and/or modify
# it under the terms of the GNU General Public License as published by
# the Free Software Foundation, either version 3 of the License, or
# (at your option) any later version.
#
# This program is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
# GNU General Public License for more details.
#
# You should have received a copy of the GNU General Public License
# along with this program. If not, see <http://www.gnu.org/licenses/>.
SHELL = /bin/sh
SOURCES = loganon.1.txt
# Substitutions
# HTMLS= $(patsubst %.txt,%.html,$(SOURCES))
MANS= $(patsubst %.txt,%.man,$(SOURCES))
# Build Targets
all: man
#html: $(HTMLS)
man: $(MANS)
# Build commands
%.html: %.txt
asciidoc -b html5 $<
%.man: %.txt
a2x --doctype manpage --format manpage $<
# Defaults
.PHONY: clean
clean:
rm -f *.html *.1 *.5 *.8
'\" t
.\" Title: loganon
.\" Author: [see the "AUTHOR" section]
.\" Generator: DocBook XSL Stylesheets v1.78.1 <http://docbook.sf.net/>
.\" Date: 04/30/2015
.\" Manual: loganon Manual
.\" Source: loganon 0.1
.\" Language: English
.\"
.TH "LOGANON" "1" "04/30/2015" "loganon 0\&.1" "loganon Manual"
.\" -----------------------------------------------------------------
.\" * Define some portability stuff
.\" -----------------------------------------------------------------
.\" ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
.\" http://bugs.debian.org/507673
.\" http://lists.gnu.org/archive/html/groff/2009-02/msg00013.html
.\" ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
.ie \n(.g .ds Aq \(aq
.el .ds Aq '
.\" -----------------------------------------------------------------
.\" * set default formatting
.\" -----------------------------------------------------------------
.\" disable hyphenation
.nh
.\" disable justification (adjust text to left margin only)
.ad l
.\" -----------------------------------------------------------------
.\" * MAIN CONTENT STARTS HERE *
.\" -----------------------------------------------------------------
.SH "NAME"
loganon \- anonymize log files
.SH "SYNOPSIS"
.sp
\fBloganon\fR \fI\-i inputfile\fR \fI\-r rulefile\fR \fI\-o outputfile\fR [\fI\-4 xx\fR] [\fI\-6 xx\fR] [\fI\-t\fR]
.SH "DESCRIPTION"
.sp
loganon is a log anonymizer\&. It takes log lines from an input file, processes them with search and replace patterns from a rules file and sends the result to an output file\&.
.SH "OPTIONS"
.PP
\-i, \-\-input=file (mandatory)
.RS 4
Name of the file to read input from\&.
.RE
.PP
\-o, \-\-output=file (manatory)
.RS 4
Name of the file to write anonymized output to\&.
.RE
.PP
\-r, \-\-rules=file1,file2,\&.\&.\&. (mandatory)
.RS 4
Name of one or more rule files containing search and replace patterns for log anonymization\&.
.RE
.PP
\-4, \-\-mask4=number (optional)
.RS 4
Number of bits to mask an IPv4 address\&.
.RE
.PP
\-6, \-\-mask6=number (optional)
.RS 4
Number of bits to mask an IPv6 address\&.
.RE
.PP
\-t, \-\-test (optional)
.RS 4
Test pattern and print output to stdout
.RE
.SH "BUGS"
.sp
Please submit BUGS to https://github\&.com/sys4/loganon/issues\&.
.SH "AUTHOR"
.sp
Christian Roessner <cr@sys4\&.de> wrote the program\&. Patrick Ben Koetter <p@sys4\&.de> wrote this man page\&.
.SH "RESOURCES"
.sp
loganons home is at https://github\&.com/sys4/loganon\&.
.SH "COPYING"
.sp
Copyright (C) 2015 sys4 AG\&. Free use of this software is granted under the terms of the GNU Lesser General Public License (GLPL)\&.
loganon(1)
==========
:doctype: manpage
:man source: loganon
:man version: 0.1
:man manual: loganon Manual
NAME
----
loganon - anonymize log files
SYNOPSIS
--------
*loganon* '-i inputfile' '-r rulefile' '-o outputfile' ['-4 xx'] ['-6 xx'] ['-t']
DESCRIPTION
-----------
loganon is a log anonymizer. It takes log lines from an input file, processes
them with search and replace patterns from a rules file and sends the result to
an output file.
OPTIONS
-------
`-i, --input=file` (mandatory)::
Name of the file to read input from.
`-o, --output=file` (manatory)::
Name of the file to write anonymized output to.
`-r, --rules=file1,file2,...` (mandatory)::
Name of one or more rule files containing search and replace patterns for log anonymization.
`-4, --mask4=number` (optional)::
Number of bits to mask an IPv4 address.
`-6, --mask6=number` (optional)::
Number of bits to mask an IPv6 address.
`-t, --test` (optional)::
Test pattern and print output to stdout
BUGS
----
Please submit BUGS to <https://github.com/sys4/loganon/issues>.
AUTHOR
------
Christian Roessner <cr@sys4.de> wrote the program. Patrick Ben Koetter <p@sys4.de> wrote this man page.
RESOURCES
---------
loganons home is at <https://github.com/sys4/loganon>.
COPYING
-------
Copyright \(C) 2015 sys4 AG. Free use of this software is granted under the terms of the GNU Lesser General Public License (GLPL).
// vim: set ft=asciidoc:
## {{{ http://code.activestate.com/recipes/576693/ (r9)
# Backport of OrderedDict() class that runs on Python 2.4, 2.5, 2.6, 2.7 and pypy.
# Passes Python2.7's test suite and incorporates all the latest updates.
# Note from Christian Roessner:
# As far as we know, this file is licensed under the MIT license.
# We have not changed any code line here, ecept adding this comment. For
# a discussion look at the provided URL above.
try:
from thread import get_ident as _get_ident
except ImportError:
from dummy_thread import get_ident as _get_ident
try:
from _abcoll import KeysView, ValuesView, ItemsView
except ImportError:
pass
class OrderedDict(dict):
'Dictionary that remembers insertion order'
# An inherited dict maps keys to values.
# The inherited dict provides __getitem__, __len__, __contains__, and get.
# The remaining methods are order-aware.
# Big-O running times for all methods are the same as for regular dictionaries.
# The internal self.__map dictionary maps keys to links in a doubly linked list.
# The circular doubly linked list starts and ends with a sentinel element.
# The sentinel element never gets deleted (this simplifies the algorithm).
# Each link is stored as a list of length three: [PREV, NEXT, KEY].
def __init__(self, *args, **kwds):
'''Initialize an ordered dictionary. Signature is the same as for
regular dictionaries, but keyword arguments are not recommended
because their insertion order is arbitrary.
'''
if len(args) > 1:
raise TypeError('expected at most 1 arguments, got %d' % len(args))
try:
self.__root
except AttributeError:
self.__root = root = [] # sentinel node
root[:] = [root, root, None]
self.__map = {}
self.__update(*args, **kwds)
def __setitem__(self, key, value, dict_setitem=dict.__setitem__):
'od.__setitem__(i, y) <==> od[i]=y'
# Setting a new item creates a new link which goes at the end of the linked
# list, and the inherited dictionary is updated with the new key/value pair.
if key not in self:
root = self.__root
last = root[0]
last[1] = root[0] = self.__map[key] = [last, root, key]
dict_setitem(self, key, value)