Source code for pysys.writer.testoutput

#!/usr/bin/env python
# PySys System Test Framework, Copyright (C) 2006-2022 M.B. Grieve

# This library is free software; you can redistribute it and/or
# modify it under the terms of the GNU Lesser General Public
# License as published by the Free Software Foundation; either
# version 2.1 of the License, or (at your option) any later version.

# This library is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
# Lesser General Public License for more details.

# You should have received a copy of the GNU Lesser General Public
# License along with this library; if not, write to the Free Software
# Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA



"""
Writers that process the contents of test output directories by archiving or collecting output files.

"""

__all__ = [
	"TestOutputArchiveWriter", 
	"CollectTestOutputWriter",
	"PythonCoverageWriter",]

import time, stat, logging, sys, io
import zipfile
import locale
import shutil
import shlex

from pysys.constants import *
from pysys.writer.api import *
from pysys.utils.logutils import ColorLogFormatter, stripANSIEscapeCodes, stdoutPrint
from pysys.utils.fileutils import mkdir, deletedir, toLongPathSafe, fromLongPathSafe, pathexists
from pysys.utils.pycompat import openfile
from pysys.exceptions import UserError

log = logging.getLogger('pysys.writer')

[docs]class TestOutputArchiveWriter(BaseRecordResultsWriter):
	"""Writer that creates zip archives of each failed test's output directory, 
	producing artifacts that could be uploaded to a CI system or file share to allow the failures to be analysed. 
	
	This writer is enabled when running with ``--record``. If using this writer in conjunction with a CI writer that 
	publishes the generated archives, be sure to include this writer first in the list of writers in your project 
	configuration. 

	Publishes artifacts with category name "TestOutputArchive" and the directory (unless there are no archives) 
	as "TestOutputArchiveDir" for any enabled `pysys.writer.api.ArtifactPublisher` writers. 

	.. versionadded:: 1.6.0

	The following properties can be set in the project configuration for this writer:		
	"""

	destDir = '__pysys_output_archives.${outDirName}/'
	"""
	The directory to write the archives to, as an absolute path, or relative to the testRootDir (or --outdir if specified). 

	This directory will be deleted at the start of the run if it already exists. 
	
	Project ``${...}`` properties can be used in the path. 
	"""
	
	maxTotalSizeMB = 1024.0
	"""
	The (approximate) limit on the total size of all archives.
	"""
	
	maxArchiveSizeMB = 200.0
	"""
	The (approximate) limit on the size each individual test archive.
	"""
	
	maxArchives = 50
	"""
	The maximum number of archives to create. 
	"""
	
	archiveAtEndOfRun = True # if at end of run can give deterministic order, also reduces I/O while tests are executing
	"""
	By default all archives are created at the end of the run once all tests have finished executing. This avoids 
	I/O contention with execution of tests, and also selection of the tests to generated archives to be done 
	in a deterministic (but pseudo-random) fashion rather than just taking the first N failures. 
	
	Alternatively you can this property to false if you wish to create archives during the test run as each failure 
	occurs. 
	"""


	includeNonFailureOutcomes = 'REQUIRES INSPECTION'
	"""
	In addition to failure outcomes, any outcomes listed here (as comma-separated display names) will be archived. 
	"""

	fileExcludesRegex = u''
	"""
	A regular expression indicating test output paths that will be excluded from archiving, for example large 
	temporary files that are not useful for diagnosing problems. 
	
	For example ``".*/MyTest_001/.*/mybigfile.*[.]tmp"``.
	
	The expression is matched against the path of each output file relative to the test root dir, 
	using forward slashes as the path separator. Multiple paths can be specified using "(path1|path2)" syntax. 
	"""
	
	fileIncludesRegex = u'' # executed against the path relative to the test root dir e.g. (pattern1|pattern2)
	"""
	A regular expression indicating test output paths that will be included in the archive. This can be used to 
	archive just some particular files. Note that for use cases such as collecting graphs and code coverage files 
	generated by a test run, the collect-test-output feature is usually a better fit than using this writer. 
	
	The expression is matched against the path of each output file relative to the test root dir, 
	using forward slashes as the path separator. Multiple paths can be specified using "(path1|path2)" syntax. 
	"""
	
	def setup(self, numTests=0, cycles=1, xargs=None, threads=0, testoutdir=u'', runner=None, **kwargs):
		for k in self.pluginProperties: 
			if not hasattr(type(self), k): raise UserError('Unknown property "%s" for %s'%(k, self))

		self.runner = runner
		if not self.destDir: raise Exception('Cannot set destDir to ""')
		
		# avoid double-expanding (which could mess up ${$} escapes), but if using default value we need to expand it
		if self.destDir == TestOutputArchiveWriter.destDir: self.destDir = runner.project.expandProperties(self.destDir)
		self.destDir = toLongPathSafe(os.path.normpath(os.path.join(runner.output+'/..', self.destDir)))
		if os.path.exists(self.destDir) and all(f.endswith(('.txt', '.zip')) for f in os.listdir(self.destDir)):
			deletedir(self.destDir) # remove any existing archives (but not if this dir seems to have other stuff in it!)

		self.fileExcludesRegex = re.compile(self.fileExcludesRegex) if self.fileExcludesRegex else None
		self.fileIncludesRegex = re.compile(self.fileIncludesRegex) if self.fileIncludesRegex else None

		self.__totalBytesRemaining = int(float(self.maxTotalSizeMB)*1024*1024)

		if self.archiveAtEndOfRun:
			self.queuedInstructions = []


		self.skippedTests = []
		self.archivesCreated = 0
		
		self.includeNonFailureOutcomes = [str(o) for o in OUTCOMES] if self.includeNonFailureOutcomes=='*' else [o.strip().upper() for o in self.includeNonFailureOutcomes.split(',') if o.strip()]
		for o in self.includeNonFailureOutcomes:
			if not any(o == str(outcome) for outcome in OUTCOMES):
				raise UserError('Unknown outcome display name "%s" in includeNonFailureOutcomes'%o)

	def cleanup(self, **kwargs):
		if self.archiveAtEndOfRun:
			for _, id, outputDir in sorted(self.queuedInstructions): # sort by hash of testId so make order deterministic
				self._archiveTestOutputDir(id, outputDir)
		
		if self.skippedTests:
			# if we hit a limit, at least record the names of the tests we missed
			mkdir(self.destDir)
			with openfile(self.destDir+os.sep+'skipped_artifacts.txt', 'w', encoding='utf-8') as f:
				f.write('\n'.join(os.path.normpath(t) for t in self.skippedTests))
		
		(log.info if self.archivesCreated else log.debug)('%s created %d test output archive artifacts in: %s', 
			self.__class__.__name__, self.archivesCreated, self.destDir)

		if self.archivesCreated:
			self.runner.publishArtifact(self.destDir, 'TestOutputArchiveDir')

[docs]	def shouldArchive(self, testObj, **kwargs):
		"""
		Decides whether this test is eligible for archiving of its output. 
		
		The default implementation archives only tests that have a failure outcome, or are listed in 
		``includeNonFailureOutcomes``, but this can be customized if needed by subclasses. 
		
		:param pysys.basetest.BaseTest testObj: The test object under consideration.
		:return bool: True if this test's output can be archived. 
		"""
		return testObj.getOutcome().isFailure() or str(testObj.getOutcome()) in self.includeNonFailureOutcomes


	def processResult(self, testObj, cycle=0, testTime=0, testStart=0, runLogOutput=u'', **kwargs):
		if not self.shouldArchive(testObj): return 
		
		id = ('%s.cycle%03d'%(testObj.descriptor.id, testObj.testCycle)) if testObj.testCycle else testObj.descriptor.id
		
		if self.archiveAtEndOfRun:
			self.queuedInstructions.append([hash(id), id, testObj.output])
		else:
			self._archiveTestOutputDir(id, testObj.output)
	
[docs]	def _newArchive(self, id, **kwargs):
		"""
		Creates and opens a new archive file for the specified id.
		
		:return: (str path, filehandle) The path will include an appropriate extension for this archive type. 
		  The filehandle must have the same API as Python's ZipFile class. 
		"""
		path = self.destDir+os.sep+('%s.%s.zip'%(id, self.runner.project.properties['outDirName']))
		return path, zipfile.ZipFile(path, 'w', zipfile.ZIP_DEFLATED, allowZip64=True)

[docs]	def _archiveTestOutputDir(self, id, outputDir, **kwargs):
		"""
		Creates an archive for the specified test, unless doing so would violate the configured limits 
		(e.g. maxArchives). 
		
		:param str id: The testId (plus a cycle suffix if it's a multi-cycle run). 
		:param str outputDir: The path of the test output dir. 
		"""
		if self.archivesCreated == 0: mkdir(self.destDir)

		if self.archivesCreated == self.maxArchives:
			self.skippedTests.append(outputDir)
			log.debug('Skipping archiving for %s as maxArchives limit is reached', id)
			return
		if self.__totalBytesRemaining < 500:
			self.skippedTests.append(outputDir)
			log.debug('Skipping archiving for %s as maxTotalMB limit is reached', id)
			return
		self.archivesCreated += 1

		try:
			outputDir = toLongPathSafe(outputDir)
			skippedFiles = []
			
			# this is performance-critical so worth caching these
			fileExcludesRegex = self.fileExcludesRegex
			fileIncludesRegex = self.fileIncludesRegex
			isPurgableFile = self.runner.isPurgableFile
			
			bytesRemaining = min(int(self.maxArchiveSizeMB*1024*1024), self.__totalBytesRemaining)
			triedTmpZipFile = False
			
			
			zippath, myzip = self._newArchive(id)
			filesInZip = 0
			with myzip:
				rootlen = len(outputDir) + 1

				for base, dirs, files in os.walk(outputDir):
					# Just the files, don't bother with the directories for now
					
					files.sort(key=lambda fn: [fn!='run.log', fn] ) # be deterministic, and put run.log first
					
					for f in files:
						fn = os.path.join(base, f)
						if fileExcludesRegex is not None and fileExcludesRegex.search(fn.replace('\\','/')):
							skippedFiles.append(fn)
							continue
						if fileIncludesRegex is not None and not fileIncludesRegex.search(fn.replace('\\','/')):
							skippedFiles.append(fn)
							continue
						
						fileSize = os.path.getsize(fn)
						if fileSize == 0:
							# Since (if not waiting until end) this gets called before testComplete has had a chance to clean things up, skip the 
							# files that it would have deleted. Don't bother listing these in skippedFiles since user 
							# won't be expecting them anyway
							continue
						
						if bytesRemaining < 500:
							skippedFiles.append(fn)
							continue
						
						try:
							if fileSize > bytesRemaining:
								if triedTmpZipFile: # to save effort, don't keep trying once we're close - from now on only attempt small files
									skippedFiles.append(fn)
									continue
								triedTmpZipFile = True
								
								# Only way to know if it'll fit is to try compressing it
								log.debug('File size of %s might push the archive above the limit; creating a temp zip to check', fn)
								tmpname, tmpzip = self._newArchive(id+'.tmp')
								try:
									with tmpzip:
										tmpzip.write(fn, 'tmp')
										compressedSize = tmpzip.getinfo('tmp').compress_size
										if compressedSize > bytesRemaining:
											log.debug('Skipping file as compressed size of %s bytes exceeds remaining limit of %s bytes: %s', 
												compressedSize, bytesRemaining, fn)
											skippedFiles.append(fn)
											continue
								finally:
									os.remove(tmpname)
									
							# Here's where we actually add it to the real archive
							memberName = fn[rootlen:].replace('\\','/')
							myzip.write(fn, memberName)
						except Exception as ex: # might happen due to file locking or similar
							log.warning('Failed to add output file "%s" to archive: %s', fn, ex)
							skippedFiles.append(fn)
							continue
						filesInZip += 1
						bytesRemaining -= myzip.getinfo(memberName).compress_size
				
				if skippedFiles and fileIncludesRegex is None: # keep the archive clean if there's an explicit include
					skippedFilesStr = os.linesep.join([fromLongPathSafe(f) for f in skippedFiles])
					skippedFilesStr = skippedFilesStr.encode('utf-8')
					myzip.writestr('__pysys_skipped_archive_files.txt', skippedFilesStr)
	
			if filesInZip == 0:
				# don't leave empty zips around
				log.debug('No files added to zip so deleting: %s', zippath)
				self.archivesCreated -= 1
				os.remove(zippath)
				return
	
			self.__totalBytesRemaining -= os.path.getsize(zippath)
			self.runner.publishArtifact(zippath, 'TestOutputArchive')
	
		except Exception:
			self.skippedTests.append(outputDir)
			raise
		
[docs]class CollectTestOutputWriter(BaseRecordResultsWriter, TestOutputVisitor):
	"""Writer that collects files matching a specified pattern from the output directory after each test, and puts 
	them in a single directory or archive - for example code coverage files or performance graphs. 
	
	This writer can be used as-is or as a base class for writers that need to collect files during test execution 
	then do something with them during cleanup, for example generate a code coverage report. 
	
	Empty files are ignored. 
	
	This writer is always enabled. 

	.. versionadded:: 1.6.0

	The following properties can be set in the project configuration for this writer:		
	"""

	destDir = ''
	"""
	The directory in which the files will be collected, as an absolute path, or relative to the testRootDir (or --outdir if specified). 

	This directory will be deleted at the start of the run if it already exists. 
	
	Project ``${...}`` properties can be used in the path. 
	"""

	destArchive = ''
	"""
	Optional filename of a .zip archive to generate with the contents of the destDir. 
	
	If a non-absolute path is specified it is evaluated relative to the destDir. 
	
	Project ``${...}`` properties can be used in the path. 
	"""
	
	fileIncludesRegex = u'' # executed against the path relative to the test root dir e.g. (pattern1|pattern2)
	"""
	A regular expression indicating the test output paths that will be collected. This can be used to 
	archive just some particular files. This is required. 
	
	The expression is matched against the final characters of each output file's path (with the test root dir stripped 
	off), using forward slashes as the path separator. Multiple paths can be specified using "(path1|path2)" syntax. 
	"""

	fileExcludesRegex = u''
	"""
	A regular expression indicating test output paths that will be excluded from collection. 
	
	For example ``".*/MyTest_001/.*/mybigfile.*[.]tmp"``.
	
	The expression is matched against the path as described for fileIncludesRegex.
	"""

	outputPattern = u'@TESTID@.@FILENAME@.@UNIQUE@.@FILENAME_EXT@' 
	"""
	A string indicating the file (and optionally subdirectory name) to use when writing each collected file to 
	the destDir. 
	
	In addition to any standard ``${...}`` property variables from the project 
	configuration, the output pattern can contain these ``@...@`` 
	substitutions:

		- ``@FILENAME@`` is the original base filename with directory and extension removed, to which you 
		  can add prefixes or suffixes as desired. 

		- ``.@FILENAME_EXT@`` is the filename extension, such that the original filename 
		  is ``@FILENAME@.@FILENAME_EXT@`` (note the dot prefix is mandatory here, and will be replaced with 
		  empty string is there is no extension). 

		- ``@TESTID@`` is replaced by the identifier of the test that generated the 
		  output file (including mode suffix if present), which may be useful for tracking where each one came from. 

		- ``@UNIQUE@`` is replaced by a number that ensures the file does not clash 
		  with any other collected output file from another test. The ``@UNIQUE@`` 
		  substitution variable is mandatory. 
	"""
	
	publishArtifactDirCategory = u'' 
	"""
	If specified, the output directory will be published as an artifact using the specified category name, 
	e.g. ``MyCodeCoverageDir``. 
	"""

	publishArtifactArchiveCategory = u'' 
	"""
	If specified the ``destArchive`` file (if any) will be published as an artifact using the specified category name.
	"""

	def isEnabled(self, record=False, **kwargs): 
		return True

	def setup(self, numTests=0, cycles=1, xargs=None, threads=0, testoutdir=u'', runner=None, **kwargs):
		for k in self.pluginProperties: 
			if not hasattr(type(self), k): raise UserError('Unknown property "%s" for %s'%(k, self))
		
		self.runner = runner
		if not self.destDir: raise Exception('Cannot set destDir to ""')
		if not self.fileIncludesRegex: raise Exception('fileIncludesRegex must be specified for %s'%type(self).__name__)

		self.destDir = os.path.normpath(os.path.join(runner.output+'/..', self.destDir))
		if pathexists(self.destDir+os.sep+'pysysproject.xml'): raise Exception('Cannot set destDir to testRootDir')
		
		# the code below assumes (for long path safe logic) this includes correct slashes (if any)
		self.outputPattern = self.outputPattern.replace('/',os.sep).replace('\\', os.sep)
		
		if self.destArchive: self.destArchive = os.path.join(self.destDir, self.destArchive)
		
		if os.path.exists(self.destDir):
			deletedir(self.destDir) # remove any existing archives (but not if this dir seems to have other stuff in it!)
		
		def prepRegex(exp):
			if not exp: return None
			if not exp.endswith('$'): exp = exp+'$' # by default require regex to match up to the end to avoid common mistakes
			return re.compile(exp)

		self.fileExcludesRegex = prepRegex(self.fileExcludesRegex)
		self.fileIncludesRegex = prepRegex(self.fileIncludesRegex)
		
		self.collectedFileCount = 0

	def visitTestOutputFile(self, testObj, path, **kwargs):
		# strip off test root dir prefix for the regex comparison
		cmppath = fromLongPathSafe(path)
		if cmppath.startswith(self.runner.project.testRootDir):
			cmppath = cmppath[len(self.runner.project.testRootDir)+1:]
		cmppath = cmppath.replace('\\','/')

		if not self.fileIncludesRegex.search(cmppath): 
			#log.debug('skipping file due to fileIncludesRegex: %s', cmppath)
			return False
		
		fileExcludesRegex = self.fileExcludesRegex
		if fileExcludesRegex is not None and fileExcludesRegex.search(cmppath): 
			#log.debug('skipping file due to fileExcludesRegex: %s', cmppath)
			return False
		self.collectPath(testObj, path, **kwargs)

	def collectPath(self, testObj, path, **kwargs):
		name, ext = os.path.splitext(os.path.basename(path))
		collectdest = toLongPathSafe(os.path.join(self.destDir, (self.outputPattern
			.replace('@TESTID@', str(testObj))
			.replace('@FILENAME@', name)
			.replace('.@FILENAME_EXT@', ext)
			)))
		i = 1
		while pathexists(collectdest.replace('@UNIQUE@', '%d'%(i))):
			i += 1
		collectdest = collectdest.replace('@UNIQUE@', '%d'%(i))
		mkdir(os.path.dirname(collectdest))
		shutil.copyfile(toLongPathSafe(path.replace('/',os.sep)), collectdest)
		self.collectedFileCount += 1
	
[docs]	def archiveAndPublish(self):
		"""
		Generate an archive of the destDir (if configured) and publish artifacts (if configured). 
		
		Called by default as part of `cleanup()`.
		"""
		if self.destArchive:
			mkdir(os.path.dirname(toLongPathSafe(self.destArchive)))
			with zipfile.ZipFile(toLongPathSafe(self.destArchive), 'w', zipfile.ZIP_DEFLATED, allowZip64=True) as archive:
				rootlen = len(self.destDir)
				for base, dirs, files in os.walk(self.destDir):
					for f in files:
						if os.path.normpath(os.path.join(base, f))==os.path.normpath(self.destArchive): continue
						fn = os.path.join(base, f)
						
						destname = fn[rootlen:].replace('\\','/').lstrip('/')
						
						try:
							try:
								archive.write(fn, destname)
							except PermissionError: # pragma: no cover - can happen on windows due to file system locking issues
								time.sleep(5.0)
								archive.write(fn, destname)
						except Exception as ex: # pragma: no cover
							# Deal with failures (even after retry) - don't abort the whole archive 
							# (e.g. a locked .err file in coverage output dir doesn't matter)
							log.warning('Could not write file to archive %s: "%s" - %s: %s', os.path.basename(self.destArchive), fn, 
								ex.__class__.__name__, ex)
							archive.writestr(destname+'.pysyserror.txt', '!!! PySys could not write this file to the archive - %s: %s'%(
								ex.__class__.__name__, ex))

		if self.publishArtifactDirCategory:
			self.runner.publishArtifact(self.destDir, self.publishArtifactDirCategory)
		if self.publishArtifactArchiveCategory and self.destArchive:
			self.runner.publishArtifact(self.destArchive, self.publishArtifactArchiveCategory)

	def cleanup(self, **kwargs):
		if not pathexists(self.destDir): 
			log.debug('No matching output files were found for collection directory: %s', os.path.normpath(self.destDir))
			return

		log.info('Collected %s test output files to directory: %s', '{:}'.format(self.collectedFileCount), os.path.normpath(fromLongPathSafe(self.destDir)))
		self.archiveAndPublish()
		

# for compatibility with 1.6.0/1.6.1
from pysys.writer.coverage import PythonCoverageWriter