Source code for penguin.arch

import dataclasses
from typing import Optional
from elftools.elf.constants import E_FLAGS, E_FLAGS_MASKS
from penguin import getColoredLogger

logger = getColoredLogger("penguin.arch")


[docs] @dataclasses.dataclass class ArchInfo: arch: Optional[str] = None abi: str = "default" bits: Optional[int] = None # All other fields are only used on MIPS endianness: Optional[str] = None description: Optional[str] = None def __str__(self): def print_field(field): name = field.name value = getattr(self, name) value = "unknown" if value is None else str(value) return f"{name}={value}" return ", ".join(map(print_field, dataclasses.fields(self)))
[docs] def arch_end(value): arch = None end = None tmp = value.lower() if tmp.startswith("mips64"): arch = "mips64" elif tmp.startswith("mips"): arch = "mips" elif tmp.startswith("aarch64"): arch = "aarch64" end = "el" elif tmp.startswith("arm"): arch = "arm" elif tmp.startswith("intel"): arch = "intel64" end = "el" elif tmp.startswith("riscv32"): arch = "riscv64" # just use riscv64 end = "el" elif tmp.startswith("riscv64"): arch = "riscv64" end = "el" elif tmp.startswith("ppc"): arch = "powerpc64" end = "eb" # elif tmp.startswith("ppc"): # arch = "powerpc" # end = "eb" # it can be either so we give it eb elif tmp.startswith("loongarch64"): arch = "loongarch64" end = "el" if tmp.endswith("el"): end = "el" elif tmp.endswith("eb"): end = "eb" if arch is None or end is None: logger.error(f"Unhandled arch_end for {value}. Have arch={arch}, end={end}") return (arch, end)
def _elf_bits(header): return 64 if header.e_ident["EI_CLASS"] == "ELFCLASS64" else 32 def _identify_arm_arch(elf): """ Check for hard/soft float """ attrs = elf.get_section_by_name(".ARM.attributes") hf = ( False if attrs is None else any( (attr.tag, attr.value) == ("TAG_ABI_VFP_ARGS", 1) for attrs in attrs.iter_subsections() for attrs in attrs.iter_subsubsections() for attr in attrs.iter_attributes() ) ) return ArchInfo( arch="armel", abi="hard_float" if hf else "soft_float", bits=_elf_bits(elf.header), ) def _identify_mips_arch(header): """ Mips is more complicated. We could have 32 bit binaries that only run on a 64-bit system (i.e., mips64 with the n32 ABI). Other permutations will likely cause issues later so trying to future-proof this a bit. Masks/comparisons based off readelf.py from PyElfTools. """ endianness = header.e_ident["EI_DATA"] flags = header["e_flags"] if flags & E_FLAGS.EF_MIPS_ARCH == E_FLAGS.EF_MIPS_ARCH_1: mips_arch = "mips1" if flags & E_FLAGS.EF_MIPS_ARCH == E_FLAGS.EF_MIPS_ARCH_2: mips_arch = "mips2" if flags & E_FLAGS.EF_MIPS_ARCH == E_FLAGS.EF_MIPS_ARCH_3: mips_arch = "mips3" if flags & E_FLAGS.EF_MIPS_ARCH == E_FLAGS.EF_MIPS_ARCH_4: mips_arch = "mips4" if flags & E_FLAGS.EF_MIPS_ARCH == E_FLAGS.EF_MIPS_ARCH_5: mips_arch = "mips5" if flags & E_FLAGS.EF_MIPS_ARCH == E_FLAGS.EF_MIPS_ARCH_32R2: mips_arch = "mips32r2" if flags & E_FLAGS.EF_MIPS_ARCH == E_FLAGS.EF_MIPS_ARCH_64R2: mips_arch = "mips64r2" if flags & E_FLAGS.EF_MIPS_ARCH == E_FLAGS.EF_MIPS_ARCH_32: mips_arch = "mips32" if flags & E_FLAGS.EF_MIPS_ARCH == E_FLAGS.EF_MIPS_ARCH_64: mips_arch = "mips64" # Some extra flags that only affect what gets printed description = "mips" if flags & E_FLAGS.EF_MIPS_NOREORDER: description += ", noreorder" if flags & E_FLAGS.EF_MIPS_PIC: description += ", pic" if flags & E_FLAGS.EF_MIPS_CPIC: description += ", cpic" if flags & E_FLAGS.EF_MIPS_ABI2: description += ", abi2" if flags & E_FLAGS.EF_MIPS_32BITMODE: description += ", 32bitmode" bits = _elf_bits(header) # GDB's source code is a good resource for MIPS ABI identification: # https://github.com/bminor/binutils-gdb/blob/master/gdb/mips-tdep.c if flags & E_FLAGS_MASKS.EFM_MIPS_ABI_O32: abi = "o32" elif flags & E_FLAGS_MASKS.EFM_MIPS_ABI_O64: abi = "o64" # never seen this before - unsupported for now? elif flags & 0x20: abi = "n32" bits = 64 # Even though n32 is 32-bit, it only runs on 64-bit CPUs elif bits == 32: abi = "o32" # Default 32-bit ABI else: assert bits == 64 abi = "n64" # Default 64-bit ABI logger.debug( f"Identified MIPS firmware: arch={mips_arch}, bits={bits}, abi={abi}, endian={endianness}, extras={description}" ) arch = { (32, "ELFDATA2LSB"): "mipsel", (32, "ELFDATA2MSB"): "mipseb", (64, "ELFDATA2LSB"): "mips64el", (64, "ELFDATA2MSB"): "mips64eb", }.get((bits, endianness)) if arch is None: logger.error( "Unexpected MIPS architecture: bits %d, endianness %s", bits, endianness ) return ArchInfo( arch=arch, abi=abi, bits=bits, endianness=endianness, description=description )
[docs] def arch_filter(elf): header = elf.header if not isinstance(header.e_machine, str): # It's an int sometimes? That's no good logger.warning( f"Unexpected e_machine type: {type(header.e_machine)}: {header.e_machine}. Cannot identify architecture." ) return ArchInfo() friendly_arch = header.e_machine.replace("EM_", "") arch = { # Normal architectures: "X86_64": "intel64", "386": "intel", "AARCH64": "aarch64", "PPC": "ppc", "PPC64": "ppc64", "RISCV": "riscv", "LOONGARCH": "loongarch64", # Additional processing required for these: "ARM": "arm", "MIPS": "mips", }.get(friendly_arch) if arch is None: logger.debug(f"Unsupported architecture: {friendly_arch}") logger.debug(f"ELF Header: {header}") return ArchInfo() # Special processing for ARM and MIPS if arch == "arm": return _identify_arm_arch(elf) elif arch == "mips": return _identify_mips_arch(header) elif arch == "riscv": return ArchInfo(arch=f"{arch}{_elf_bits(elf.header)}", bits=_elf_bits(elf.header)) elif arch == "ppc": # same for big and little return ArchInfo(arch="ppc", bits=_elf_bits(elf.header)) elif arch == "intel": return ArchInfo(arch=arch, bits=_elf_bits(elf.header), abi="i386") # Other architectures get eb suffix if big-endian. mips/arm are handled in their helpers if header.e_ident.get("EI_DATA", None) == "ELFDATA2MSB": arch += "eb" return ArchInfo(arch=arch, bits=_elf_bits(elf.header))