1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20 import os.path, sys, re, string, copy
21
22 from pysys import log
23 from pysys.constants import *
24 from pysys.exceptions import *
25 from pysys.utils.filediff import trimContents
26
27
29 """Look for matches on a regular expression in an input file, return a sequence of the matches.
30
31 @param file: The full path to the input file
32 @param regexpr: The regular expression used to search for matches
33 @param ignores: A list of regexes which will cause matches to be discarded
34 @return: A list of the match objects
35 @rtype: list
36 @raises FileNotFoundException: Raised if the input file does not exist
37
38 """
39 matches = []
40 rexp = re.compile(regexpr)
41
42 log.debug("Looking for expression \"%s\" in input file %s" %(regexpr, file))
43
44 if not os.path.exists(file):
45 raise FileNotFoundException, "unable to find file %s" % (os.path.basename(file))
46 else:
47 with open(file, 'r') as f:
48 for l in f:
49 match = rexp.search(l)
50 if match is not None:
51 shouldignore = False
52 if ignores:
53 for i in ignores:
54 if re.search(i, l):
55 shouldignore = True
56 break
57 if shouldignore: continue
58
59 log.debug(("Found match for line: %s" % l).rstrip())
60 matches.append(match)
61 return matches
62
63
64 -def filegrep(file, expr, ignores=None, returnMatch=False):
65 """Search for matches to a regular expression in an input file, returning true if a match occurs.
66
67 @param file: The full path to the input file
68 @param expr: The regular expression (uncompiled) to search for in the input file
69 @param ignores: Optional list of regular expression strings to ignore when searching file.
70 @param returnMatch: return the regex match object instead of a simple boolean
71 @returns: success (True / False), unless returnMatch=True in which case it returns the regex match
72 object (or None if not matched)
73 @rtype: integer
74 @raises FileNotFoundException: Raised if the input file does not exist
75
76 """
77 if not os.path.exists(file):
78 raise FileNotFoundException, "unable to find file %s" % (os.path.basename(file))
79 else:
80 f = open(file, 'r')
81 try:
82 if log.isEnabledFor(logging.DEBUG):
83 contents = f.readlines()
84 logContents("Contents of %s;" % os.path.basename(file), contents)
85 else:
86 contents = f
87
88 ignores = [re.compile(i) for i in (ignores or [])]
89
90 regexpr = re.compile(expr)
91 for line in contents:
92 m = regexpr.search(line)
93 if m is not None:
94 if not any([i.search(line) for i in ignores]):
95 if returnMatch: return m
96 return True
97 if returnMatch: return None
98 return False
99 finally:
100 f.close()
101
102
103 -def lastgrep(file, expr, ignore=[], include=[]):
104 """Search for matches to a regular expression in the last line of an input file, returning true if a match occurs.
105
106 @param file: The full path to the input file
107 @param expr: The regular expression (uncompiled) to search for in the last line of the input file
108 @returns: success (True / False)
109 @param ignore: A list of regular expressions which remove entries in the input file contents before making the grep
110 @param include: A list of regular expressions used to select lines from the input file contents to use in the grep
111 @rtype: integer
112 @raises FileNotFoundException: Raised if the input file does not exist
113
114 """
115 if not os.path.exists(file):
116 raise FileNotFoundException, "unable to find file %s" % (os.path.basename(file))
117 else:
118 contents = open(file, 'r').readlines()
119 contents = trimContents(contents, ignore, exclude=True)
120 contents = trimContents(contents, include, exclude=False)
121
122 logContents("Contents of %s after pre-processing;" % os.path.basename(file), contents)
123 if len(contents) > 0:
124 line = contents[len(contents)-1]
125 regexpr = re.compile(expr)
126 if regexpr.search(line) is not None: return True
127 return False
128
129
131 """Seach for ordered matches to a set of regular expressions in an input file, returning true if the matches occur in the correct order.
132
133 The ordered grep method will only return true if matches to the set of regular expression in the expression
134 list occur in the input file in the order they appear in the expression list. Matches to the regular expressions
135 do not have to be across sequential lines in the input file, only in the correct order. For example, for a file
136 with contents ::
137
138 A is for apple
139 B is for book
140 C is for cat
141 D is for dog
142
143 an expression list of ["^A.*$", "^C.*$", "^D.*$"] will return true, whilst an expression list of
144 ["^A.*$", "^C.$", "^B.$"] will return false.
145
146 @param file: The full path to the input file
147 @param exprList: A list of regular expressions (uncompiled) to search for in the input file
148 @returns: success (True / False)
149 @rtype: integer
150 @raises FileNotFoundException: Raised if the input file does not exist
151
152 """
153 list = copy.deepcopy(exprList)
154 list.reverse();
155 expr = list.pop();
156
157 if not os.path.exists(file):
158 raise FileNotFoundException, "unable to find file %s" % (os.path.basename(file))
159 else:
160 contents = open(file, 'r').readlines()
161 for i in range(len(contents)):
162 regexpr = re.compile(expr)
163 if regexpr.search(r"%s"%contents[i]) is not None:
164 try:
165 expr = list.pop();
166 except:
167 return None
168 return expr
169
170
171 -def logContents(message, list):
172 """Log a list of strings, prepending the line number to each line in the log output.
173
174 @param list: The list of strings to log
175 """
176 count = 0
177 log.debug(message)
178 for line in list:
179 count = count + 1
180 log.debug((" Line %-5d: %s" % (count, line)).rstrip())
181
182
183
184
185 if __name__ == "__main__":
186 if len(sys.argv) < 3:
187 print "Usage: filegrep.py <file> <regexpr>"
188 sys.exit()
189 else:
190 try:
191 status = filegrep(sys.argv[1], sys.argv[2])
192 except FileNotFoundException, value:
193 print "caught %s: %s" % (sys.exc_info()[0], value)
194 print "unable to perform grep... exiting"
195 else:
196 if status == True:
197 print "Matches found"
198 else:
199 print "No matches found"
200