Hello community, here is the log from the commit of package python-pefile for openSUSE:Factory checked in at 2019-04-30 13:03:45 ++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ Comparing /work/SRC/openSUSE:Factory/python-pefile (Old) and /work/SRC/openSUSE:Factory/.python-pefile.new.5536 (New) ++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ Package is "python-pefile" Tue Apr 30 13:03:45 2019 rev:5 rq:697757 version:2019.4.18 Changes: -------- --- /work/SRC/openSUSE:Factory/python-pefile/python-pefile.changes 2018-12-24 11:41:05.425468178 +0100 +++ /work/SRC/openSUSE:Factory/.python-pefile.new.5536/python-pefile.changes 2019-04-30 13:03:48.766073591 +0200 @@ -1,0 +2,7 @@ +Wed Apr 24 11:33:25 UTC 2019 - pgajdos@suse.com + +- version update to 2019.4.18 + * speed up parsing of files with many ordinals or exports + * other merged PRs and issues fixed since the last release + +------------------------------------------------------------------- Old: ---- pefile-2018.8.8.tar.gz New: ---- pefile-2019.4.18.tar.gz ++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ Other differences: ------------------ ++++++ python-pefile.spec ++++++ --- /var/tmp/diff_new_pack.ZntLwq/_old 2019-04-30 13:03:49.434073024 +0200 +++ /var/tmp/diff_new_pack.ZntLwq/_new 2019-04-30 13:03:49.438073021 +0200 @@ -1,7 +1,7 @@ # # spec file for package python-pefile # -# Copyright (c) 2018 SUSE LINUX GmbH, Nuernberg, Germany. +# Copyright (c) 2019 SUSE LINUX GmbH, Nuernberg, Germany. # # All modifications and additions to the file contributed by third parties # remain the property of their copyright owners, unless otherwise agreed @@ -18,7 +18,7 @@ %{?!python_module:%define python_module() python-%{**} python3-%{**}} Name: python-pefile -Version: 2018.8.8 +Version: 2019.4.18 Release: 0 Summary: A python module to work with PE (pertable executable) files License: BSD-3-Clause @@ -61,7 +61,9 @@ %python_install %python_expand %fdupes %{buildroot}%{$python_sitelib} -# Tests not in sdist +# Tests not in sdist and have good reason to at time of writing: +# https://github.com/erocarrera/pefile/issues/82#issuecomment-192018385 +# https://github.com/erocarrera/pefile/issues/171 # %%check # %%python_exec setup.py test ++++++ pefile-2018.8.8.tar.gz -> pefile-2019.4.18.tar.gz ++++++ diff -urN '--exclude=CVS' '--exclude=.cvsignore' '--exclude=.svn' '--exclude=.svnignore' old/pefile-2018.8.8/.travis.yml new/pefile-2019.4.18/.travis.yml --- old/pefile-2018.8.8/.travis.yml 2017-09-03 14:48:50.000000000 +0200 +++ new/pefile-2019.4.18/.travis.yml 2019-04-14 19:30:42.000000000 +0200 @@ -1,14 +1,24 @@ language: python -python: -- '2.7' -- '3.5' -- '3.5-dev' -- '3.6' -- '3.6-dev' +cache: pip +sudo: false +matrix: + include: + - python: 2.7 + - python: 3.5 + - python: 3.6 + - python: 3.7 + dist: xenial + sudo: yes + - python: nightly + dist: xenial + sudo: yes + - python: pypy + - python: pypy3 before_install: -- openssl aes-256-cbc -K $encrypted_a288ee1b388d_key -iv $encrypted_a288ee1b388d_iv - -in tests/test_data.tar.bz2.enc -out tests/test_data.tar.bz2 -d -- tar jxf tests/test_data.tar.bz2 -C tests +- if test -n "$encrypted_a288ee1b388d_key" && test -n "$encrypted_a288ee1b388d_iv"; then + openssl aes-256-cbc -K $encrypted_a288ee1b388d_key -iv $encrypted_a288ee1b388d_iv -in tests/test_data.tar.bz2.enc -out tests/test_data.tar.bz2 -d; + tar jxf tests/test_data.tar.bz2 -C tests; + fi install: - pip install -U pip pytest-cov codecov - pip install -I -e . diff -urN '--exclude=CVS' '--exclude=.cvsignore' '--exclude=.svn' '--exclude=.svnignore' old/pefile-2018.8.8/LICENSE new/pefile-2019.4.18/LICENSE --- old/pefile-2018.8.8/LICENSE 2018-08-08 09:28:09.000000000 +0200 +++ new/pefile-2019.4.18/LICENSE 2019-01-21 22:44:23.000000000 +0100 @@ -1,6 +1,6 @@ The MIT License (MIT) -Copyright (c) 2004-2018 Ero Carrera +Copyright (c) 2004-2019 Ero Carrera Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the "Software"), to deal diff -urN '--exclude=CVS' '--exclude=.cvsignore' '--exclude=.svn' '--exclude=.svnignore' old/pefile-2018.8.8/PKG-INFO new/pefile-2019.4.18/PKG-INFO --- old/pefile-2018.8.8/PKG-INFO 2018-08-08 10:00:27.000000000 +0200 +++ new/pefile-2019.4.18/PKG-INFO 2019-04-18 18:19:38.000000000 +0200 @@ -1,12 +1,12 @@ Metadata-Version: 1.1 Name: pefile -Version: 2018.8.8 +Version: 2019.4.18 Summary: Python PE parsing module Home-page: https://github.com/erocarrera/pefile Author: Ero Carrera Author-email: ero.carrera@gmail.com License: UNKNOWN -Download-URL: https://github.com/erocarrera/pefile/releases/download/v2018.8.8/pefile-2018... +Download-URL: https://github.com/erocarrera/pefile/releases/download/v2019.4.18/pefile-201... Description: pefile, Portable Executable reader module All the PE file basic structures are available with their default names as @@ -20,9 +20,7 @@ standard use. To the best of my knowledge most of the abuse is handled gracefully. - Copyright (c) 2005-2018 Ero Carrera <ero.carrera@gmail.com> - - All rights reserved. + Copyright (c) 2005-2019 Ero Carrera <ero.carrera@gmail.com> Keywords: pe,exe,dll,pefile,pecoff Platform: UNKNOWN Classifier: Development Status :: 5 - Production/Stable diff -urN '--exclude=CVS' '--exclude=.cvsignore' '--exclude=.svn' '--exclude=.svnignore' old/pefile-2018.8.8/README new/pefile-2019.4.18/README --- old/pefile-2018.8.8/README 2018-08-08 09:34:31.000000000 +0200 +++ new/pefile-2019.4.18/README 2019-04-14 19:26:16.000000000 +0200 @@ -31,13 +31,13 @@ ## Recent changes -Prompted by the move to GitHub, the need to support Python 3 in addition to resoling a slew of pending issues (some having to do with the old versioning scheme), _pefile_ has changed its version number scheme and from now on it will be using the release date as its version. +Prompted by the move to GitHub, the need to support Python 3 in addition to resolving a slew of pending issues (some having to do with the old versioning scheme), _pefile_ has changed its version number scheme and from now on it will be using the release date as its version. ## Projects and products using _pefile_ - * Didier Stevens' [pecheck](https://blog.didierstevens.com/2018/06/12/update-pecheck-py-version-0-7-3/), a tool for displaying PE file info, handles for PEiD files better then _pefile_ does - * [MAEC](http://maec.mitre.org) MAEC is a standardized language for encoding and communicating high-fidelity information about malware based upon attributes such as behaviors, artifacts, and attack patterns. MAEC [converts](https://github.com/MAECProject/pefile-to-maec) _pefile_'s output into their XML format. - * [Qiew](https://github.com/mtivadar/qiew) Qiew - Hex/File format viewer. + * Didier Stevens' [pecheck](https://blog.didierstevens.com/2018/06/12/update-pecheck-py-version-0-7-3/), a tool for displaying PE file info, handles PEiD files better then _pefile_ does + * [MAEC](http://maec.mitre.org) is a standardized language for encoding and communicating high-fidelity information about malware based upon attributes such as behaviors, artifacts, and attack patterns. MAEC [converts](https://github.com/MAECProject/pefile-to-maec) _pefile_'s output into their XML format. + * [Qiew](https://github.com/mtivadar/qiew) - Hex/File format viewer. * [VirusTotal](http://www.virustotal.com/) * [bbfreeze](http://pypi.python.org/pypi/bbfreeze) * **pyemu**: [download](https://www.openrce.org/repositories/browse/codypierce), [whitepaper](https://www.blackhat.com/presentations/bh-usa-07/Pierce/Whitepaper/bh-usa-07...) @@ -45,7 +45,7 @@ * [Immunity Debugger 1.1](https://www.openrce.org/blog/view/882/Immunity_Debugger_v1.1_Release) * [PyInstaller](http://www.pyinstaller.org/) * [Cuckoo](http://docs.cuckoosandbox.org/en/latest/) - * [MultiScanner](https://github.com/mitre/multiscanner) + * [MultiScanner](https://github.com/MITRECND/multiscanner) ## Additional resources diff -urN '--exclude=CVS' '--exclude=.cvsignore' '--exclude=.svn' '--exclude=.svnignore' old/pefile-2018.8.8/pefile.egg-info/PKG-INFO new/pefile-2019.4.18/pefile.egg-info/PKG-INFO --- old/pefile-2018.8.8/pefile.egg-info/PKG-INFO 2018-08-08 10:00:27.000000000 +0200 +++ new/pefile-2019.4.18/pefile.egg-info/PKG-INFO 2019-04-18 18:19:37.000000000 +0200 @@ -1,12 +1,12 @@ Metadata-Version: 1.1 Name: pefile -Version: 2018.8.8 +Version: 2019.4.18 Summary: Python PE parsing module Home-page: https://github.com/erocarrera/pefile Author: Ero Carrera Author-email: ero.carrera@gmail.com License: UNKNOWN -Download-URL: https://github.com/erocarrera/pefile/releases/download/v2018.8.8/pefile-2018... +Download-URL: https://github.com/erocarrera/pefile/releases/download/v2019.4.18/pefile-201... Description: pefile, Portable Executable reader module All the PE file basic structures are available with their default names as @@ -20,9 +20,7 @@ standard use. To the best of my knowledge most of the abuse is handled gracefully. - Copyright (c) 2005-2018 Ero Carrera <ero.carrera@gmail.com> - - All rights reserved. + Copyright (c) 2005-2019 Ero Carrera <ero.carrera@gmail.com> Keywords: pe,exe,dll,pefile,pecoff Platform: UNKNOWN Classifier: Development Status :: 5 - Production/Stable diff -urN '--exclude=CVS' '--exclude=.cvsignore' '--exclude=.svn' '--exclude=.svnignore' old/pefile-2018.8.8/pefile.py new/pefile-2019.4.18/pefile.py --- old/pefile-2018.8.8/pefile.py 2018-08-08 09:37:29.000000000 +0200 +++ new/pefile-2019.4.18/pefile.py 2019-04-18 18:17:42.000000000 +0200 @@ -13,9 +13,7 @@ standard use. To the best of my knowledge most of the abuse is handled gracefully. -Copyright (c) 2005-2018 Ero Carrera <ero.carrera@gmail.com> - -All rights reserved. +Copyright (c) 2005-2019 Ero Carrera <ero.carrera@gmail.com> """ from __future__ import division @@ -25,21 +23,19 @@ from builtins import object from builtins import range from builtins import str -from builtins import zip __author__ = 'Ero Carrera' -__version__ = '2018.8.8' +__version__ = '2019.4.18' __contact__ = 'ero.carrera@gmail.com' +import collections import os import struct import sys import codecs import time import math -import re import string -import array import mmap import ordlookup @@ -75,6 +71,12 @@ MAX_DLL_LENGTH = 0x200 MAX_SYMBOL_NAME_LENGTH = 0x200 +# Lmit maximum number of sections before processing of sections will stop +MAX_SECTIONS = 0x800 + +# Limit number of exported symbols +MAX_SYMBOL_EXPORT_COUNT = 0x2000 + IMAGE_DOS_SIGNATURE = 0x5A4D IMAGE_DOSZM_SIGNATURE = 0x4D5A IMAGE_NE_SIGNATURE = 0x454E @@ -307,6 +309,7 @@ DLL_CHARACTERISTICS = dict( [(e[1], e[0]) for e in dll_characteristics]+dll_characteristics) +FILE_ALIGNMENT_HARDCODED_VALUE = 0x200 # Resource types resource_type = [ @@ -597,8 +600,8 @@ matching the filter "flag_filter". """ - return [(f[0], f[1]) for f in list(flag_dict.items()) if - isinstance(f[0], (str, bytes)) and f[0].startswith(flag_filter)] + return [(flag, value) for flag, value in list(flag_dict.items()) if + isinstance(flag, (str, bytes)) and flag.startswith(flag_filter)] def set_flags(obj, flag_field, flags): @@ -609,13 +612,11 @@ flag value from "flags" to flag_field. """ - for flag in flags: - if flag[1] & flag_field: - #setattr(obj, flag[0], True) - obj.__dict__[flag[0]] = True + for flag, value in flags: + if value & flag_field: + obj.__dict__[flag] = True else: - #setattr(obj, flag[0], False) - obj.__dict__[flag[0]] = False + obj.__dict__[flag] = False def power_of_two(val): @@ -648,12 +649,6 @@ return codecs.encode(x, 'cp1252') -FILE_ALIGNMENT_HARDCODED_VALUE = 0x200 -FileAlignment_Warning = False # We only want to print the warning once -SectionAlignment_Warning = False # We only want to print the warning once - - - class UnicodeStringWrapperPostProcessor(object): """This class attempts to help the process of identifying strings that might be plain Unicode or Pascal. A list of strings will be @@ -909,7 +904,7 @@ new_val = getattr(self, key) old_val = self.__unpacked_data_elms__[i] - # In the case of Unions, when the first changed value + # In the case of unions, when the first changed value # is picked the loop is exited if new_val != old_val: break @@ -942,7 +937,10 @@ val = getattr(self, key) if isinstance(val, (int, long)): - val_str = '0x%-8X' % (val) + if key.startswith('Signature_'): + val_str = '%-8X' % (val) + else: + val_str = '0x%-8X' % (val) if key == 'TimeDateStamp' or key == 'dwTimeStamp': try: val_str += ' [%s UTC]' % time.asctime(time.gmtime(val)) @@ -950,7 +948,11 @@ val_str += ' [INVALID TIME]' else: val_str = bytearray(val) - val_str = ''.join( + if key.startswith('Signature'): + val_str = ''.join( + ['{:02X}'.format(i) for i in val_str.rstrip(b'\x00')]) + else: + val_str = ''.join( [chr(i) if (i in printable_bytes) else '\\x{0:02x}'.format(i) for i in val_str.rstrip(b'\x00')]) @@ -1007,10 +1009,10 @@ Allows to query data from the section by passing the addresses where the PE file would be loaded by default. - It is then possible to retrieve code and data by its real - addresses as it would be if loaded. + It is then possible to retrieve code and data by their real + addresses as they would be if loaded. - Returns bytes() under Python 3.x and set() under 2.7 + Returns bytes() under Python 3.x and set() under Python 2.7 """ PointerToRawData_adj = self.pe.adjust_FileAlignment( self.PointerToRawData, @@ -1089,7 +1091,7 @@ # Check if the SizeOfRawData is realistic. If it's bigger than the size of # the whole PE file minus the start address of the section it could be - # either truncated or the SizeOfRawData contain a misleading value. + # either truncated or the SizeOfRawData contains a misleading value. # In either of those cases we take the VirtualSize # if len(self.pe.__data__) - self.pe.adjust_FileAlignment( self.PointerToRawData, @@ -1108,7 +1110,7 @@ self.pe.OPTIONAL_HEADER.SectionAlignment, self.pe.OPTIONAL_HEADER.FileAlignment ) # Check whether there's any section after the current one that starts before the - # calculated end for the current one, if so, cut the current section's size + # calculated end for the current one. If so, cut the current section's size # to fit in the range up to where the next section starts. if (self.next_section_virtual_address is not None and self.next_section_virtual_address > self.VirtualAddress and @@ -1160,7 +1162,7 @@ def entropy_H(self, data): """Calculate the entropy of a chunk of data.""" - if len(data) == 0: + if not data: return 0.0 occurences = Counter(bytearray(data)) @@ -1349,7 +1351,7 @@ """Holds relocation information. type: Type of relocation - The type string is can be obtained by + The type string can be obtained by RELOCATION_TYPE[type] rva: RVA of the relocation """ @@ -1367,9 +1369,7 @@ if name == 'type': word = (val << 12) | (word & 0xfff) elif name == 'rva': - offset = val-self.base_rva - if offset < 0: - offset = 0 + offset = max(val-self.base_rva, 0) word = ( word & 0xf000) | ( offset & 0xfff) # Store the modified data @@ -1387,8 +1387,8 @@ class BoundImportDescData(DataContainer): """Holds bound import descriptor data. - This directory entry will provide with information on the - DLLs this PE files has been bound to (if bound at all). + This directory entry will provide information on the + DLLs this PE file has been bound to (if bound at all). The structure will contain the name and timestamp of the DLL at the time of binding so that the loader can know whether it differs from the one currently present in the @@ -1442,13 +1442,10 @@ return False # Allow path separators as import names can contain directories. allowed = allowed_filename + b'\\/' - for c in set(s): - if c not in allowed: - return False - return True + return all(c in allowed for c in set(s)) -# Check if a imported name uses the valid accepted characters expected in mangled +# Check if an imported name uses the valid accepted characters expected in mangled # function names. If the symbol's characters don't fall within this charset # we will assume the name is invalid # @@ -1462,13 +1459,9 @@ string.digits + b'_?@$()<>') def is_valid_function_name(s): - if s is None or not isinstance(s, (str, bytes, bytearray)): - return False - for c in set(s): - if c not in allowed_function_name: - return False - return True - + return (s is not None and + isinstance(s, (str, bytes, bytearray)) and + all(c in allowed_function_name for c in set(s))) class PE(object): @@ -1484,8 +1477,8 @@ pe = pefile.PE('module.dll') pe = pefile.PE(name='module.dll') - would load 'module.dll' and process it. If the data would be already - available in a buffer the same could be achieved with: + would load 'module.dll' and process it. If the data is already + available in a buffer the same can be achieved with: pe = pefile.PE(data=module_dll_data) @@ -1752,8 +1745,11 @@ self.__structures__ = [] self.__from_file = None - if not fast_load: - fast_load = globals()['fast_load'] + # We only want to print these warnings once + self.FileAlignment_Warning = False + self.SectionAlignment_Warning = False + + fast_load = fast_load or globals()['fast_load'] try: self.__parse__(name, data, fast_load) except: @@ -1772,7 +1768,7 @@ def __unpack_data__(self, format, data, file_offset): """Apply structure format to raw data. - Returns and unpacked structure object if successful, None otherwise. + Returns an unpacked structure object if successful, None otherwise. """ structure = Structure(format, file_offset=file_offset) @@ -1814,8 +1810,7 @@ self.__from_file = True except IOError as excp: exception_msg = '{0}'.format(excp) - if exception_msg: - exception_msg = ': %s' % exception_msg + exception_msg = exception_msg and (': %s' % exception_msg) raise Exception('Unable to access file \'{0}\'{1}'.format(fname, exception_msg)) finally: if fd is not None: @@ -2312,6 +2307,10 @@ self.sections = [] MAX_SIMULTANEOUS_ERRORS = 3 for i in range(self.FILE_HEADER.NumberOfSections): + if i >= MAX_SECTIONS: + self.__warnings.append("Too many sections {0} (>={1})".format( + self.FILE_HEADER.NumberOfSections, MAX_SECTIONS)) + break simultaneous_errors = 0 section = SectionStructure( self.__IMAGE_SECTION_HEADER_format__, pe=self ) if not section: @@ -2324,7 +2323,7 @@ self.__warnings.append( 'Invalid section {0}. Contents are null-bytes.'.format(i)) break - if len(section_data) == 0: + if not section_data: self.__warnings.append( 'Invalid section {0}. No data in the file (is this corkami\'s virtsectblXP?).'.format(i)) break @@ -2375,7 +2374,7 @@ if ( section.__dict__.get('IMAGE_SCN_MEM_WRITE', False) and section.__dict__.get('IMAGE_SCN_MEM_EXECUTE', False) ): - if section.Name == 'PAGE' and self.is_driver(): + if section.Name.rstrip(b'\x00') == b'PAGE' and self.is_driver(): # Drivers can have a PAGE section with those flags set without # implying that it is malicious pass @@ -2556,7 +2555,7 @@ # OffsetModuleName points to a DLL name. These shouldn't be too long. # Anything longer than a safety length of 128 will be taken to indicate # a corrupt entry and abort the processing of these entries. - # Names shorted than 4 characters will be taken as invalid as well. + # Names shorter than 4 characters will be taken as invalid as well. if name_str: invalid_chars = [ @@ -2735,7 +2734,7 @@ if (reloc_offset, reloc_type) in offsets_and_type: self.__warnings.append( 'Overlapping offsets in relocation data ' - 'data at RVA: 0x%x' % (reloc_offset+rva)) + 'at RVA: 0x%x' % (reloc_offset+rva)) break if len(offsets_and_type) >= 1000: offsets_and_type.pop() @@ -2796,9 +2795,9 @@ 'I,Signature_Data1', # Signature is of GUID type 'H,Signature_Data2', 'H,Signature_Data3', - 'H,Signature_Data4', - 'H,Signature_Data5', - 'I,Signature_Data6', + '8s,Signature_Data4', + # 'H,Signature_Data5', + # 'I,Signature_Data6', 'I,Age']] pdbFileName_size = ( dbg_type_size - @@ -2933,7 +2932,7 @@ self.__IMAGE_RESOURCE_DIRECTORY_format__, data, file_offset = self.get_offset_from_rva(rva) ) if resource_dir is None: - # If can't parse resources directory then silently return. + # If we can't parse resources directory then silently return. # This directory does not necessarily have to be valid to # still have a valid PE file self.__warnings.append( @@ -3105,14 +3104,14 @@ # Check if this entry contains version information # if level == 0 and res.Id == RESOURCE_TYPE['RT_VERSION']: - if len(dir_entries)>0: + if dir_entries: last_entry = dir_entries[-1] try: version_entries = last_entry.directory.entries[0].directory.entries except: # Maybe a malformed directory structure...? - # Lets ignore it + # Let's ignore it pass else: for version_entry in version_entries: @@ -3121,7 +3120,7 @@ rt_version_struct = version_entry.data.struct except: # Maybe a malformed directory structure...? - # Lets ignore it + # Let's ignore it pass if rt_version_struct is not None: @@ -3292,7 +3291,7 @@ # Set the PE object's VS_VERSIONINFO to this one vinfo = versioninfo_struct - # The the Key attribute to point to the unicode string identifying the structure + # Set the Key attribute to point to the unicode string identifying the structure vinfo.Key = versioninfo_string self.VS_VERSIONINFO.append(vinfo) @@ -3607,7 +3606,7 @@ section.VirtualAddress + len(section.get_data()) - export_dir.AddressOfNames) - symbol_counter = Counter() + symbol_counts = collections.defaultdict(int) export_parsing_loop_completed_normally = True for i in range(min(export_dir.NumberOfNames, int(safety_boundary / 4))): symbol_ordinal = self.get_word_from_data( @@ -3669,12 +3668,19 @@ # File 0b1d3d3664915577ab9a32188d29bbf3542b86c7b9ce333e245496c3018819f1 # was being parsed as potentially containing millions of exports. # Checking for duplicates addresses the issue. - most_common = symbol_counter.most_common(1) - if most_common and most_common[0][1] > 10: + symbol_counts[(symbol_name, symbol_address)] += 1 + if symbol_counts[(symbol_name, symbol_address)] > 10: self.__warnings.append( - 'Export directory contains more than 10 repeated entries. Assuming corrupt.') + 'Export directory contains more than 10 repeated entries ' + '({:s}, 0x{:x}). Assuming corrupt.'.format( + symbol_name, symbol_address)) + break + elif len(symbol_counts) > MAX_SYMBOL_EXPORT_COUNT: + self.__warnings.append( + 'Export directory contains more than {} symbol entries. ' + 'Assuming corrupt.'.format( + MAX_SYMBOL_EXPORT_COUNT)) break - symbol_counter[(symbol_name, symbol_address)] += 1 exports.append( ExportData( @@ -3693,7 +3699,7 @@ 'RVA AddressOfNames in the export directory points to an invalid address: %x' % export_dir.AddressOfNames) - ordinals = [exp.ordinal for exp in exports] + ordinals = {exp.ordinal for exp in exports} max_failed_entries_before_giving_up = 10 @@ -3705,7 +3711,7 @@ section.VirtualAddress + len(section.get_data()) - export_dir.AddressOfFunctions) - symbol_counter = Counter() + symbol_counts = collections.defaultdict(int) export_parsing_loop_completed_normally = True for idx in range(min( export_dir.NumberOfFunctions, @@ -3736,12 +3742,19 @@ # File 0b1d3d3664915577ab9a32188d29bbf3542b86c7b9ce333e245496c3018819f1 # was being parsed as potentially containing millions of exports. # Checking for duplicates addresses the issue. - most_common = symbol_counter.most_common(1) - if most_common and most_common[0][1] > 10: + symbol_counts[symbol_address] += 1 + if symbol_counts[symbol_address] > 10: + # if most_common and most_common[0][1] > 10: self.__warnings.append( - 'Export directory contains more than 10 repeated ordinal entries. Assuming corrupt.') + 'Export directory contains more than 10 repeated ' + 'ordinal entries (0x{:x}). Assuming corrupt.'.format( + symbol_address)) + break + elif len(symbol_counts) > MAX_SYMBOL_EXPORT_COUNT: + self.__warnings.append( + 'Export directory contains more than {} ordinal entries. Assuming corrupt.'.format( + MAX_SYMBOL_EXPORT_COUNT)) break - symbol_counter[symbol_address] += 1 exports.append( ExportData( @@ -3793,7 +3806,7 @@ rva += import_desc.sizeof() - # If the array of thunk's is somewhere earlier than the import + # If the array of thunks is somewhere earlier than the import # descriptor we can set a maximum length for the array. Otherwise # just set a maximum length of the size of the file max_len = len(self.__data__) - file_offset @@ -3903,7 +3916,7 @@ rva += import_desc.sizeof() - # If the array of thunk's is somewhere earlier than the import + # If the array of thunks is somewhere earlier than the import # descriptor we can set a maximum length for the array. Otherwise # just set a maximum length of the size of the file max_len = len(self.__data__) - file_offset @@ -3982,7 +3995,7 @@ It will fill a list, which will be available as the dictionary attribute "imports". Its keys will be the DLL names and the values - all the symbols imported from that object. + of all the symbols imported from that object. """ imported_symbols = [] @@ -3990,7 +4003,7 @@ # Import Lookup Table. Contains ordinals or pointers to strings. ilt = self.get_import_table(original_first_thunk, max_length) # Import Address Table. May have identical content to ILT if - # PE file is not bounded, Will contain the address of the + # PE file is not bound. It will contain the address of the # imported symbols once the binary is loaded or if it is already # bound. iat = self.get_import_table(first_thunk, max_length) @@ -4263,18 +4276,18 @@ if section.Misc_VirtualSize == 0 and section.SizeOfRawData == 0: continue - if section.SizeOfRawData > len(self.__data__): - continue - - if self.adjust_FileAlignment( section.PointerToRawData, - self.OPTIONAL_HEADER.FileAlignment ) > len(self.__data__): - - continue - - VirtualAddress_adj = self.adjust_SectionAlignment( section.VirtualAddress, - self.OPTIONAL_HEADER.SectionAlignment, self.OPTIONAL_HEADER.FileAlignment ) + srd = section.SizeOfRawData + prd = self.adjust_FileAlignment( + section.PointerToRawData, self.OPTIONAL_HEADER.FileAlignment) + VirtualAddress_adj = self.adjust_SectionAlignment( + section.VirtualAddress, + self.OPTIONAL_HEADER.SectionAlignment, + self.OPTIONAL_HEADER.FileAlignment ) - if VirtualAddress_adj >= max_virtual_address: + if (srd > len(self.__data__) or + prd > len(self.__data__) or + srd + prd > len(self.__data__) or + VirtualAddress_adj >= max_virtual_address): continue padding_length = VirtualAddress_adj - len(mapped_data) @@ -4482,10 +4495,9 @@ def get_section_by_rva(self, rva): """Get the section containing the given address.""" - sections = [s for s in self.sections if s.contains_rva(rva)] - - if sections: - return sections[0] + for section in self.sections: + if section.contains_rva(rva): + return section return None @@ -4602,7 +4614,7 @@ dump.add_lines(self.VS_FIXEDFILEINFO[idx].dump()) dump.add_newline() - if hasattr(self, 'FileInfo'): + if hasattr(self, 'FileInfo') and len(self.FileInfo) > idx: for entry in self.FileInfo[idx]: dump.add_lines(entry.dump()) dump.add_newline() @@ -4643,7 +4655,7 @@ name = b('None') if export.name: name = export.name - dump.add(u'%-10d 0x%08Xh %s' % ( + dump.add(u'%-10d 0x%08X %s' % ( export.ordinal, export.address, name.decode(encoding))) if export.forwarder: dump.add_line(u' forwarder: {0}'.format( @@ -4776,7 +4788,7 @@ dump.add_lines(resource_lang.data.struct.dump(), 12) if hasattr(resource_id.directory, 'strings') and resource_id.directory.strings: dump.add_line(u'[STRINGS]' , 10 ) - for idx, res_string in list(resource_id.directory.strings.items()): + for idx, res_string in list(sorted(resource_id.directory.strings.items())): dump.add_line( '{0:6d}: {1}'.format(idx, res_string.encode( 'unicode-escape', @@ -5307,7 +5319,7 @@ if not isinstance(data, bytes): raise TypeError('data should be of type: bytes') - if offset >= 0 and offset < len(self.__data__): + if 0 <= offset < len(self.__data__): self.__data__ = ( self.__data__[:offset] + data + self.__data__[offset+len(data):] ) else: return False @@ -5574,7 +5586,7 @@ driver_like_section_names = set( ('page', 'paged')) if driver_like_section_names.intersection( - [section.Name.lower() for section in self.sections]) and ( + [section.Name.lower().rstrip(b'\x00') for section in self.sections]) and ( self.OPTIONAL_HEADER.Subsystem in ( SUBSYSTEM_TYPE['IMAGE_SUBSYSTEM_NATIVE'], SUBSYSTEM_TYPE['IMAGE_SUBSYSTEM_NATIVE_WINDOWS'])): @@ -5655,15 +5667,14 @@ # size, then FileAlignment must match SectionAlignment." # # The following is a hard-coded constant if the Windows loader - def adjust_FileAlignment( self, val, file_alignment ): - global FileAlignment_Warning + def adjust_FileAlignment(self, val, file_alignment ): if file_alignment > FILE_ALIGNMENT_HARDCODED_VALUE: # If it's not a power of two, report it: - if not power_of_two(file_alignment) and FileAlignment_Warning is False: + if not power_of_two(file_alignment) and self.FileAlignment_Warning is False: self.__warnings.append( 'If FileAlignment > 0x200 it should be a power of 2. Value: %x' % ( file_alignment) ) - FileAlignment_Warning = True + self.FileAlignment_Warning = True if file_alignment < FILE_ALIGNMENT_HARDCODED_VALUE: return val @@ -5677,13 +5688,12 @@ # architecture." # def adjust_SectionAlignment( self, val, section_alignment, file_alignment ): - global SectionAlignment_Warning if file_alignment < FILE_ALIGNMENT_HARDCODED_VALUE: - if file_alignment != section_alignment and SectionAlignment_Warning is False: + if file_alignment != section_alignment and self.SectionAlignment_Warning is False: self.__warnings.append( 'If FileAlignment(%x) < 0x200 it should equal SectionAlignment(%x)' % ( file_alignment, section_alignment) ) - SectionAlignment_Warning = True + self.SectionAlignment_Warning = True if section_alignment < 0x1000: # page size section_alignment = file_alignment diff -urN '--exclude=CVS' '--exclude=.cvsignore' '--exclude=.svn' '--exclude=.svnignore' old/pefile-2018.8.8/peutils.py new/pefile-2019.4.18/peutils.py --- old/pefile-2018.8.8/peutils.py 2018-01-20 21:56:54.000000000 +0100 +++ new/pefile-2019.4.18/peutils.py 2018-10-20 20:01:03.000000000 +0200 @@ -148,7 +148,7 @@ If ep_only is True the result will be a string with the packer name. Otherwise it will be a list of the - form (file_ofsset, packer_name). Specifying where + form (file_offset, packer_name) specifying where in the file the signature was found. """ @@ -551,8 +551,8 @@ """Returns True is there is a high likelihood that a file is packed or contains compressed data. The sections of the PE file will be analyzed, if enough sections - look like containing containing compressed data and the data makes - up for more than 20% of the total file size. The function will + look like containing compressed data and the data makes + up for more than 20% of the total file size, the function will return True. """ @@ -572,7 +572,7 @@ for section in pe.sections: s_entropy = section.get_entropy() s_length = len( section.get_data() ) - # The value of 7.4 is empircal, based of looking at a few files packed + # The value of 7.4 is empircal, based on looking at a few files packed # by different packers if s_entropy > 7.4: total_compressed_data += s_length diff -urN '--exclude=CVS' '--exclude=.cvsignore' '--exclude=.svn' '--exclude=.svnignore' old/pefile-2018.8.8/setup.cfg new/pefile-2019.4.18/setup.cfg --- old/pefile-2018.8.8/setup.cfg 2018-08-08 10:00:27.000000000 +0200 +++ new/pefile-2019.4.18/setup.cfg 2019-04-18 18:19:38.000000000 +0200 @@ -1,5 +1,4 @@ [egg_info] tag_build = tag_date = 0 -tag_svn_revision = 0 diff -urN '--exclude=CVS' '--exclude=.cvsignore' '--exclude=.svn' '--exclude=.svnignore' old/pefile-2018.8.8/setup.py new/pefile-2019.4.18/setup.py --- old/pefile-2018.8.8/setup.py 2018-08-08 09:38:03.000000000 +0200 +++ new/pefile-2019.4.18/setup.py 2019-04-18 18:18:19.000000000 +0200 @@ -53,28 +53,18 @@ class TestCommand(Command): - """Run tests.""" - user_options = [] + """Run tests.""" + user_options = [] - def initialize_options(self): - pass + def initialize_options(self): + pass - def finalize_options(self): - pass + def finalize_options(self): + pass -class TestCommand(Command): - """Run tests.""" - user_options = [] - - def initialize_options(self): - pass - - def finalize_options(self): - pass - - def run(self): - test_suite = TestLoader().discover('./tests', pattern='*_test.py') - test_results = TextTestRunner(verbosity=2).run(test_suite) + def run(self): + test_suite = TestLoader().discover('./tests', pattern='*_test.py') + test_results = TextTestRunner(verbosity=2).run(test_suite) setup(name = 'pefile', @@ -83,7 +73,7 @@ author = _read_attr('__author__'), author_email = _read_attr('__contact__'), url = 'https://github.com/erocarrera/pefile', - download_url='https://github.com/erocarrera/pefile/releases/download/v2018.8.8/pefile-2018...', + download_url='https://github.com/erocarrera/pefile/releases/download/v2019.4.18/pefile-201...', keywords = ['pe', 'exe', 'dll', 'pefile', 'pecoff'], classifiers = [ 'Development Status :: 5 - Production/Stable',