# -*- coding: utf-8 -*- """ Created on Thu Jan 19 16:29:03 2012 @author: Fesh0r @version: v0.1 """ import sys import os import fnmatch import shutil import re from optparse import OptionParser _MODIFIERS = r'public|protected|private|static|abstract|final|native|synchronized|transient|volatile|strictfp' _REGEXP = { # convert OSX double 1.23456789E-12D to Windows double 1.23456789E-012D 'fltexp': re.compile(r'(?<=[Ee][+-])([0-9]{2})(?=[DdFf])'), # Remove trailing whitespace 'trailing': re.compile(r'[ \t]+$', re.MULTILINE), # close up extends, and implements that JAD puts on a seperate line 'extends': re.compile(r'$\n {4}(?=extends|implements)', re.MULTILINE), # close up throws that JAD puts on a seperate line 'throws': re.compile(r'$\n {8}(?=throws)', re.MULTILINE), # close up method parameters that have wrapped 'params': re.compile(r'^ {4}(?P
\S.*?)(?P(?:\n {12}\S.*?)+)(?P(?:\n {4}\{$)|;)', re.MULTILINE), 'params_sub': re.compile(r'\n {12}', re.MULTILINE), # Remove repeated blank lines 'newlines': re.compile(r'^\n{2,}', re.MULTILINE), # Rename variables called enum 'rename_enum': re.compile(r'(?<=\W)(enum)(?=\W)'), 'modifiers': re.compile(r'(' + _MODIFIERS + ') '), 'list': re.compile(r', '), 'deindent': re.compile(r'^ {4}(.*$)', re.MULTILINE), # Class 'class': re.compile(r'^(?P(?:(?:' + _MODIFIERS + r') )*)(?Pclass|interface|enum) (?P[\w$]+)(?: extends (?P[\w$.]+(?:, [\w$.]+)*))?(?: implements (?P[\w$.]+(?:, [\w$.]+)*))?\n\{\n(?P(?:.*?\n)*?)(?P\}\n+)', re.MULTILINE), # method regex 'method': re.compile(r'^ {4}(?P(?:(?:' + _MODIFIERS + r') )*)(?P(?!' + _MODIFIERS + r')[\w$.[\]]+) (?P[\w$]+)\((?P.*?)\)(?: throws (?P[\w$.]+(?:, [\w$.]+)*))?\n {4}\{\n(?P(?:.*?\n)*?)(?P {4}\}\n+)', re.MULTILINE), # abstract method regex 'method_abstract': re.compile(r'^ {4}(?P(?:(?:' + _MODIFIERS + r') )*)(?P(?!' + _MODIFIERS + r')[\w$.[\]]+) (?P[\w$]+)\((?P.*?)\)(?: throws (?P[\w$.]+(?:, [\w$.]+)*))?(?P;\n+)', re.MULTILINE), # move super call to start of method 'fix_super': re.compile(r'(?P(?:.*\n)*)(?P {8}super\((?P.*?)\);\n)(?P(?:.*\n)*)', re.MULTILINE), # remove super call in enum 'enum_super': re.compile(r' {8}super\(\w+, \w+\);\n', re.MULTILINE), # static block 'static': re.compile(r'^ {4}static\n {4}\{\n(?P(?:.*?\n)*?)(?P {4}\}\n+)', re.MULTILINE), # field_1234.field_5678 += abc + def; 'str1': re.compile(r'(?P^ +)new StringBuilder\(\);\n +(?P.*?);\n +JVM INSTR dup_x1 ;\n +(?P.*?);\n +append\(\);\n +(?P.*?);\n(:? +append\(\);\n +(?P.*?);\n)? +append\(\);\n +toString\(\);\n +(?P=dest2);$', re.MULTILINE), # field_1234[field_5678] += abc + def; 'str2': re.compile(r'(?P^ +)new StringBuilder\(\);\n +(?P.*?);\n +(?P.*?);\n +JVM INSTR dup2_x1 ;\n +JVM INSTR aaload ;\n +append\(\);\n +(?P.*?);\n +append\(\);\n(:? +(?P.*?);\n +append\(\);\n)? +toString\(\);\n +JVM INSTR aastore ;$', re.MULTILINE), # if(test) goto _L1; else goto _L2 'if_goto': re.compile(r'(?P^ +)if(?P\(.*\)) goto (?P_L[0-9]+); else goto (?P_L[0-9]+)\n(?P_L[0-9]+):$', re.MULTILINE), } _REGEXP_STR = { 'constructor': r'^ {4}(?P(?:(?:' + _MODIFIERS + r') )*)%s\((?P.*?)\)(?: throws (?P[\w$.]+(?:, [\w$.]+)*))?\n {4}\{\n(?P(?:.*?\n)*?)(?P {4}\}\n+)', 'enum_methods': r'^ {4}public static %s(?:\[\])? value(?:s|Of)\(.*?\)\n {4}\{\n(?:.*?\n)*? {4}\}\n+', 'enum_fields': r'^ {4}(?:public|private) static final %s [\w$.[\]]+;.*\n', 'enum_entries': r'^ {8}(?P[\w$]+) = new %s\("(?P=name)", [0-9]+(?:, (?P.*?))?\);\n+', 'enum_values': r'^ {8}(?P[\w$]+)(?P = \(new %s\[\] \{\n(?:.*\n)*? {8}\}\));\n+', } class Error(Exception): pass class ParseError(Error): pass def jadfix(srcdir): for path, _, filelist in os.walk(srcdir, followlinks=True): for cur_file in fnmatch.filter(filelist, '*.java'): src_file = os.path.normpath(os.path.join(path, cur_file)) _process_file(src_file) def _process_class(class_name, class_type, modifiers, extends, implements, body, end): if class_type == 'class': # if we have an enum class then fix the class declaration if 'final' in modifiers and 'Enum' in extends: modifiers.remove('final') extends.remove('Enum') class_type = 'enum' if class_type == 'interface': # is this an annotation type? still missing too much info for it to actually work if 'Annotation' in extends: extends.remove('Annotation') class_type = '@interface' if class_type == 'enum': body = _process_enum(class_name, body) # process normal methods def method_match(match): modifiers = _REGEXP['modifiers'].findall(match.group('modifiers')) if match.group('modifiers') and not modifiers: raise ParseError('no modifiers match in %s \'%s\'' % (match.group('name'), match.group('modifiers'))) parameters = [] if match.group('parameters'): parameters = _REGEXP['list'].split(match.group('parameters')) throws = [] if match.group('throws'): throws = _REGEXP['list'].split(match.group('throws')) return _process_method(class_name, match.group('name'), modifiers, match.group('type'), parameters, throws, match.group('body'), match.group('end')) body = _REGEXP['method'].sub(method_match, body) # process abstract methods def method_abstract_match(match): modifiers = _REGEXP['modifiers'].findall(match.group('modifiers')) if match.group('modifiers') and not modifiers: raise ParseError('no modifiers match in %s \'%s\'' % (match.group('name'), match.group('modifiers'))) parameters = [] if match.group('parameters'): parameters = _REGEXP['list'].split(match.group('parameters')) throws = [] if match.group('throws'): throws = _REGEXP['list'].split(match.group('throws')) return _process_method_abstract(class_name, match.group('name'), modifiers, match.group('type'), parameters, throws, match.group('end')) body = _REGEXP['method_abstract'].sub(method_abstract_match, body) # process constructors def constructor_match(match): modifiers = _REGEXP['modifiers'].findall(match.group('modifiers')) if match.group('modifiers') and not modifiers: raise ParseError('no modifiers match in %s \'%s\'' % (match.group('name'), match.group('modifiers'))) parameters = [] if match.group('parameters'): parameters = _REGEXP['list'].split(match.group('parameters')) throws = [] if match.group('throws'): throws = _REGEXP['list'].split(match.group('throws')) return _process_constructor(class_type, class_name, modifiers, parameters, throws, match.group('body'), match.group('end')) constructor_regex = re.compile(_REGEXP_STR['constructor'] % re.escape(class_name), re.MULTILINE) body = constructor_regex.sub(constructor_match, body) # rebuild class out = '' if modifiers: out += ' '.join(modifiers) + ' ' out += class_type + ' ' + class_name if extends: out += ' extends ' + ', '.join(extends) if implements: out += ' implements ' + ', '.join(implements) out += '\n{\n' + body + end return out def _process_enum(class_name, body): # remove super call in constructor body = _REGEXP['enum_super'].sub(r'', body) # remove values and valueOf methods methods_regex = re.compile(_REGEXP_STR['enum_methods'] % re.escape(class_name), re.MULTILINE) body = methods_regex.sub(r'', body) # remove enum fields and $VALUES fields_regex = re.compile(_REGEXP_STR['enum_fields'] % re.escape(class_name), re.MULTILINE) body = fields_regex.sub(r'', body) # rebuild enum entries from static block body = _process_enum_static(class_name, body) return body def _process_enum_static(class_name, enum_body): # do we have a static block? static_match = _REGEXP['static'].search(enum_body) if not static_match: return enum_body body = static_match.group('body') # for each enum field in the static build up a enum entry enum_entries = '' entries = [] def _enum_entries_match(match): entry_body = '' if match.group('body'): entry_body = '(' + match.group('body') + ')' new_entry = ' ' + match.group('name') + entry_body new_entry = _REGEXP['deindent'].sub(r'\1', new_entry) entries.append(new_entry) return '' entries_regex = re.compile(_REGEXP_STR['enum_entries'] % re.escape(class_name), re.MULTILINE | re.DOTALL) body = entries_regex.sub(_enum_entries_match, body) if entries: enum_entries = '\n' + ',\n'.join(entries) + ';\n\n' # remove the $VALUES array from the static block values_regex = re.compile(_REGEXP_STR['enum_values'] % re.escape(class_name), re.MULTILINE) body = values_regex.sub('', body) # add the entries and $VALUES to start of body enum_body = enum_entries + enum_body # remove the entries and values from the static block # and remove the block entirely if now empty full_static = '' if body: full_static = ' static\n {\n' + body + static_match.group('end') enum_body = _REGEXP['static'].sub(full_static, enum_body) return enum_body def _process_method(_class_name, method_name, modifiers, method_type, parameters, throws, body, end): # kill off the wierd _mthclass$ methods that JAD sticks in for some reason if method_name == '_mthclass$' and 'static' in modifiers: return '' body = _process_string(body) body = _process_if_goto(body) # rebuild method out = ' ' if modifiers: out += ' '.join(modifiers) + ' ' out += method_type + ' ' + method_name + '(' + ', '.join(parameters) + ')' if throws: out += ' throws ' + ', '.join(throws) out += '\n {\n' + body + end return out def _process_method_abstract(_class_name, method_name, modifiers, method_type, parameters, throws, end): # rebuild method out = ' ' if modifiers: out += ' '.join(modifiers) + ' ' out += method_type + ' ' + method_name + '(' + ', '.join(parameters) + ')' if throws: out += ' throws ' + ', '.join(throws) out += end return out def _process_constructor(class_type, class_name, modifiers, parameters, throws, body, end): if class_type == 'enum': if len(parameters) >= 2: if parameters[0].startswith('String ') and parameters[1].startswith('int '): parameters = parameters[2:] # empty constructor if body == '' and len(parameters) == 0: return '' else: raise ParseError('invalid initial parameters in enum %s: %s' % (class_name, str(parameters))) else: raise ParseError('not enough parameters in enum %s: %s' % (class_name, str(parameters))) body = _process_string(body) body = _process_if_goto(body) # move super calls to start of constructor and remove empty super calls def super_match(match): if match.group('parameters'): return match.group('super') + match.group('before') + match.group('after') else: return match.group('before') + match.group('after') body = _REGEXP['fix_super'].sub(super_match, body) # rebuild constructor out = ' ' if modifiers: out += ' '.join(modifiers) + ' ' out += class_name + '(' + ', '.join(parameters) + ')' if throws: out += ' throws ' + ', '.join(throws) out += '\n {\n' + body + end return out def _process_string(body): # fix up plain string appends def string1_match(match): indent = match.group('indent') src = match.group('src1') if match.group('src2'): src = '%s + %s' % (src, match.group('src2')) dest = match.group('dest2') if match.group('dest1') != 'this': dest = '%s.%s' % (match.group('dest1'), dest) return '%s%s += %s;' % (indent, dest, src) body = _REGEXP['str1'].sub(string1_match, body) # fix up string appends to an array def string2_match(match): indent = match.group('indent') src = match.group('src1') if match.group('src2'): src = '%s + %s' % (src, match.group('src2')) dest = '%s[%s]' % (match.group('dest1'), match.group('dest2')) return '%s%s += %s;' % (indent, dest, src) body = _REGEXP['str2'].sub(string2_match, body) return body def _process_if_goto(body): def if_goto_match(match): indent = match.group('indent') # depending on the following label negate the if test if match.group('label3') == match.group('label2'): test = 'if(!%s)' % match.group('test') comment = '## JADFIX %s %s' % (match.group('label2'), match.group('label1')) else: test = 'if%s' % match.group('test') comment = '## JADFIX %s %s' % (match.group('label1'), match.group('label2')) label = '%s:' % match.group('label3') return '%s%s\n%s\n%s' % (indent, test, comment, label) body = _REGEXP['if_goto'].sub(if_goto_match, body) return body def _process_file(src_file): class_name = os.path.splitext(os.path.basename(src_file))[0] tmp_file = src_file + '.tmp' with open(src_file, 'r') as fh: buf = fh.read() buf = _REGEXP['fltexp'].sub(r'0\1', buf) buf = _REGEXP['trailing'].sub(r'', buf) buf = _REGEXP['extends'].sub(r' ', buf) buf = _REGEXP['throws'].sub(r' ', buf) def params_match(match): body = re.sub(_REGEXP['params_sub'], r' ', match.group('wrapped')) return ' %s%s%s' % (match.group('main'), body, match.group('suffix')) buf = _REGEXP['params'].sub(params_match, buf) buf = _REGEXP['rename_enum'].sub(r'\1_', buf) def class_match(match): if class_name != match.group('name'): raise ParseError("file name and class name differ: '%s' '%s" % (class_name, match.group('name'))) modifiers = _REGEXP['modifiers'].findall(match.group('modifiers')) if match.group('modifiers') and not modifiers: raise ParseError("no modifiers match in %s '%s'" % (match.group('name'), match.group('modifiers'))) extends = [] if match.group('extends'): extends = _REGEXP['list'].split(match.group('extends')) implements = [] if match.group('implements'): implements = _REGEXP['list'].split(match.group('implements')) return _process_class(match.group('name'), match.group('type'), modifiers, extends, implements, match.group('body'), match.group('end')) (buf, match_count) = _REGEXP['class'].subn(class_match, buf) if not match_count: raise ParseError('no class in %s' % class_name) buf = _REGEXP['newlines'].sub(r'\n', buf) with open(tmp_file, 'w') as fh: fh.write(buf) shutil.move(tmp_file, src_file) def main(): usage = 'usage: %prog [options] src_dir' version = '%prog 6.0' parser = OptionParser(version=version, usage=usage) options, args = parser.parse_args() if len(args) != 1: print >> sys.stderr, 'src_dir required' sys.exit(1) jadfix(args[0]) if __name__ == '__main__': main()