Source code for pysys.mappers

#!/usr/bin/env python
# PySys System Test Framework, Copyright (C) 2006-2020 M.B. Grieve

# This library is free software; you can redistribute it and/or
# modify it under the terms of the GNU Lesser General Public
# License as published by the Free Software Foundation; either
# version 2.1 of the License, or (at your option) any later version.

# This library is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
# Lesser General Public License for more details.

# You should have received a copy of the GNU Lesser General Public
# License along with this library; if not, write to the Free Software
# Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA

"""
Mappers that filter or transform lines of input, for use with methods such as `pysys.basetest.BaseTest.copy`. 

.. autosummary::
	RegexReplace
	IncludeLinesBetween
	IncludeLinesMatching
	ExcludeLinesMatching

.. versionadded:: 1.6.0
"""

import logging
import re
from pysys.utils.pycompat import isstring

log = logging.getLogger('pysys.mappers')


[docs]class RegexReplace(object):
	"""
	Mapper that transforms lines by replacing all character sequences matching the specified regular expression. 
	
	For example::
	
		self.copy('myfile.txt', 'myfile-processed.txt', mappers=[RegexReplace(RegexReplace.DATETIME_REGEX, '<timestamp>')])
	
	:param str|compiled_regex regex: The regular expression to search for. 
	:param str replacement: The string to replace it with. This can contain backslash references to groups in the 
		regex; see ``re.sub()`` in the Python documentation for more information. 


	>>> RegexReplace(RegexReplace.DATETIME_REGEX, '<timestamp>')('Test string x=2020-07-15T19:22:34+00:00.')
	'Test string x=<timestamp>.'

	>>> RegexReplace(RegexReplace.DATETIME_REGEX, '<timestamp>')('Test string x=5/7/2020 19:22:34.1234.')
	'Test string x=<timestamp>.'

	>>> RegexReplace(RegexReplace.DATETIME_REGEX, '<timestamp>')('Test string x=20200715T192234Z.')
	'Test string x=<timestamp>.'

	>>> RegexReplace(RegexReplace.NUMBER_REGEX, '<number>')('Test string x=123.')
	'Test string x=<number>.'

	>>> RegexReplace(RegexReplace.NUMBER_REGEX, '<number>')('Test string x=-12.45e+10.')
	'Test string x=<number>.'
	"""
	
	DATETIME_REGEX = '(%s)'%'|'.join([
		'([0-9]{1,4}[/-][0-9]{1,2}[/-][0-9]{2,4}[ T]?)?[0-9]{1,2}:[0-9]{2}:[0-9]{2}([.][0-9]+|Z|[+-][0-9][0-9](:[0-9][0-9])?)?',
		'[0-9]{8}T[0-9]{6}(Z|[+-][0-9][0-9]:)?',
		])
	"""A regular expression that can be used to match timestamps in ISO 8601 format and other common alternatives such as:
	"2020-07-15T19:22:34+00:00", 
	"5/7/2020 19:22:34.1234", 
	"20200715T192234Z"
	
	"""

	NUMBER_REGEX = '[+-]?[0-9]+([.][0-9]+)?([eE][-+]?[0-9]+)?'
	"""Mapper that transforms lines, replacing all integer or floating point numbers with "<number>". 
	
	This is useful for removing ids that would diff-ing files more difficult, if you only care about validating 
	the non-numeric text.

	"""

	def __init__(self, regex, replacement):
		self.__str = 'RegexReplace(%s, %s)'%(regex, replacement)
		self.regex = re.compile(regex) if isstring(regex) else regex
		self.repl = replacement

	def __call__(self, line):
		return self.regex.sub(self.repl, line)

	def __repr__(self): return self.__str


[docs]class IncludeLinesBetween(object):
	"""
	Mapper that filters out all lines except those within a range of expressions. 
	
	This is useful when a log file contains lots of data you don't care about, in addition to some multi-line sequences 
	that you want to extract (with `pysys.basetest.BaseTest.copy`) ready for `pysys.basetest.BaseTest.assertDiff`.
	
	As this mapper is stateful, so not use a single instance of it in multiple tests (or multiple threads). 
	
	The following parameters can be either a callable/lambda that accepts an input line and returns a boolean, or a 
	regular expression string to search for in the specified line. 
	
	:param str|callable[str]->bool startAt: If it matches then the current line and subsequent lines are included 
		(not filtered out). If not specified, lines from the start of the file onwards are matched. 
		
	:param str|callable[str]->bool stopAfter: If it matches then lines after the current one are filtered out 
		(unless/until a line matching startAt is found). Includes the stop line. 
		
	:param str|callable[str]->bool stopBefore: If it matches then this line and lines after it are filtered out 
		(unless/until a line matching startAt is found). Excludes the stop line. 
		
	>>> def test_IncludeLinesBetween(mapper, input): return ','.join(x for x in (mapper(line) for line in input.split(',')) if x is not None)
	>>> test_IncludeLinesBetween( IncludeLinesBetween('start.*', 'stopafter.*'), 'a,start line,b,c,stopafter line,d,start line2,e')
	'start line,b,c,stopafter line,start line2,e'

	>>> test_IncludeLinesBetween( IncludeLinesBetween(startAt='start.*'), 'a,start line,b,c')
	'start line,b,c'

	>>> test_IncludeLinesBetween( IncludeLinesBetween(stopAfter='stopafter.*'), 'a,stopafter,b,c')
	'a,stopafter'

	>>> test_IncludeLinesBetween( IncludeLinesBetween(stopBefore='stopbefore.*'), 'a,b,stopbefore,c')
	'a,b'

	"""
	def __init__(self, startAt=None, stopAfter=None, stopBefore=None):
		self.__str = 'IncludeLinesBetween(%s)'%', '.join('%s=%s'%(k, repr(v)) for (k,v) in {
			'startAt':startAt,
			'stopAfter':stopAfter,
			'stopBefore':stopBefore,
		}.items() if v is not None)
	
		if startAt is not None and not callable(startAt): self.startAt = lambda line, startAt=startAt: re.search(startAt, line) is not None
		else: self.startAt = startAt
			
		if stopAfter is not None and not callable(stopAfter): self.stopAfter = lambda line: re.search(stopAfter, line) is not None
		else: self.stopAfter = stopAfter or (lambda line: False)
		
		if stopBefore is not None and not callable(stopBefore): self.stopBefore = lambda line: re.search(stopBefore, line) is not None
		else: self.stopBefore = stopBefore or (lambda line: False)
		
		self.__including = self.startAt is None

	def __repr__(self): return self.__str

	def fileStarted(self, srcPath, destPath, srcFile, destFile):
		# reset every time we start a new file
		self.__including = self.startAt is None

	def __call__(self, line):
		if self.__including:
			if self.stopAfter(line):
				self.__including = False
			if self.stopBefore(line):
				self.__including = False
				return None
			return line
		else:
			if self.startAt is not None and self.startAt(line):
				self.__including = True
				return line
		return None


[docs]class IncludeLinesMatching(object):
	"""
	Mapper that filters lines by including only lines matching the specified regular expression. 
	
	:param str|compiled_regex regex: The regular expression to match (use ``.*`` at the beginning to allow extra 
		characters at the start of the line).  Multiple expressions can be combined using 
		``(expr1|expr2)`` syntax. 

	>>> IncludeLinesMatching('Foo.*')('Foo bar')
	'Foo bar'

	>>> IncludeLinesMatching('bar.*')('Foo bar') is None
	True

	"""
	
	def __init__(self, regex):
		self.__str = 'IncludeLinesMatching(%s)'%(regex)
		self.regex = re.compile(regex) if isstring(regex) else regex

	def __call__(self, line):
		return None if self.regex.match(line) is None else line

	def __repr__(self): return self.__str


[docs]class ExcludeLinesMatching(object):
	"""
	Mapper that filters lines by excluding/ignoring lines matching the specified regular expression. 
	
	:param str|compiled_regex regex: The regular expression to match (use ``.*`` at the beginning to allow extra 
		characters at the start of the line).  Multiple expressions can be combined using 
		``(expr1|expr2)`` syntax. 

	>>> ExcludeLinesMatching('Foo.*')('Foo bar') is None
	True

	>>> ExcludeLinesMatching('bar.*')('Foo bar')
	'Foo bar'

	"""
	
	def __init__(self, regex):
		self.__str = 'ExcludeLinesMatching(%s)'%(regex)
		self.regex = re.compile(regex) if isstring(regex) else regex

	def __call__(self, line):
		return None if self.regex.match(line) is not None else line

	def __repr__(self): return self.__str