#!/usr/bin/env python3 # SPDX-License-Identifier: GPL-2.0-or-later # Copyright (C) 2022 Andrew I MacIntyre # Draytek firmware component extractor and decompressor # # This script implements the decompression routine described in # https://github.com/yath/vigor165/blob/main/decompress/decompress.S # as part of the process of extracting the executable firmware image # (and the web interface file system) from Draytek modem/router firmware # files. # # The Draytek firmware file structure was understood with information # from the Draytools project # (https://github.com/ammonium/draytools/ - now removed; I used the # fork at https://github.com/krolinventions/draytools/). # # Rather than try and translate MIPS disassembly to Python, this # script implements a rudimentary MIPS assembly interpreter and # executes a slightly simplified version of the above assembly code # sequence for each chunk presented for decompression. # # The MIPS assembly interpreter implementation includes support for only # those instructions required for the decompressor code to execute # correctly and doesn't strictly observe a number of aspects of standard # MIPS assembly code, including: # - the code and data address spaces are separate # - alignment isn't checked # - some complex pseudo-opcodes are implemented as single instructions # # This script has been tested with both Python 2.7 amd Python 3.8. The # script is self contained and has no dependencies on external Python # libraries. Operation has been tested only on Linux but I believe it # should run on Windows too as binary mode file I/O has been explicitly # stated. On an I5-3470 system approximately 4MB of compressed data # can be decompressed per minute. import os import sys import struct ### MIPS simulated CPU - constants # register naming - source seems to use O32 convention REG_ZERO = 0 REG_AT = 1 REG_V0 = 2 REG_V1 = 3 REG_A0 = 4 REG_A1 = 5 REG_A2 = 6 REG_A3 = 7 REG_T0 = 8 REG_T1 = 9 REG_T2 = 10 REG_T3 = 11 REG_T4 = 12 REG_T5 = 13 REG_T6 = 14 REG_T7 = 15 REG_S0 = 16 REG_S1 = 17 REG_S2 = 18 REG_S3 = 19 REG_S4 = 20 REG_S5 = 21 REG_S6 = 22 REG_S7 = 23 REG_T8 = 24 REG_T9 = 25 REG_K0 = 26 REG_K1 = 27 REG_GP = 28 REG_SP = 29 REG_FP = 30 REG_RA = 31 MIPS_REG_NAMES = (REG_ZERO, REG_AT, REG_V0, REG_V1, REG_A0, REG_A1, REG_A2, REG_A3, REG_T0, REG_T1, REG_T2, REG_T3, REG_T4, REG_T5, REG_T6, REG_T7, REG_S0, REG_S1, REG_S2, REG_S3, REG_S4, REG_S5, REG_S6, REG_S7, REG_T8, REG_T9, REG_K0, REG_K1, REG_GP, REG_SP, REG_FP, REG_RA) assert len(MIPS_REG_NAMES) == 32 # various power of 2 limits MAX_U8 = 255 MAX_U16 = 0xffff MAX_I16 = 2**15 - 1 MIN_I16 = -2**15 MAX_U32 = 0xffffffff MAX_I32 = int(2**31 - 1) # keep as int if Python is 32bit MIN_I32 = int(-2**31) # struct data type codes for register data (big-endian target) REG_DATA_UINT8 = 'B' REG_DATA_INT16 = '>h' REG_DATA_UINT16 = '>H' REG_DATA_INT32 = '>l' REG_DATA_UINT32 = '>L' # opcode mnemonics implemented MIPS_ADDI = 'addi' MIPS_ADDIU = 'addiu' MIPS_ADDU = 'addu' MIPS_ANDI = 'andi' MIPS_B = 'b' MIPS_BEQ = 'beq' MIPS_BNE = 'bne' MIPS_JR = 'jr' MIPS_LBU = 'lbu' MIPS_LI = 'li' MIPS_LW = 'lw' MIPS_MOVE = 'move' MIPS_OR = 'or' MIPS_SB = 'sb' MIPS_SLL = 'sll' MIPS_SLTIU = 'sltiu' MIPS_SLTU = 'sltu' MIPS_SRL = 'srl' MIPS_SUBU = 'subu' MIPS_SW = 'sw' MIPS_WSBH = 'wsbh' MIPS_NOP = 'nop' MIPS_MEMCPY = 'memcpy' ### Python 2/3 portability helpers if sys.version_info > (3,): xrange = range ### helper routines # write a message to stdout LOGLN_FMT_NL = '%s\n' LOGLN_FMT_RO = '%s\r' def logln(msg, NL=True): if NL: fmt = LOGLN_FMT_NL else: fmt = LOGLN_FMT_RO sys.stdout.write(fmt % msg) sys.stdout.flush() # limit a value to a signed range def limit_signed(v, v_min, v_max, wrap): if wrap: if v < v_min: v -= v_min v += v_max + 1 if v > v_max: v -= v_max + 1 v += v_min assert v_min <= v <= v_max return v # limit a value to an unsigned range def limit_unsigned(v, v_max, clip): if v > v_max and clip: v &= v_max assert 0 <= v <= v_max return v # limit a value to 8bit range (unsigned) # (don't clip excess bits by default - errors in byte handling # should be investigated) def limit_u8(value, clip_excess=False): return limit_unsigned(value, MAX_U8, clip_excess) # limit a value to 16bit range (signed) def limit_i16(value, wrap_around=True): return limit_signed(value, MIN_I16, MAX_I16, wrap_around) # limit a value to 16bit range (unsigned) def limit_u16(value, clip_excess=True): return limit_unsigned(value, MAX_U16, clip_excess) # limit a value to 32bit range (signed) def limit_i32(value, wrap_around=True): return limit_signed(value, MIN_I32, MAX_I32, wrap_around) # limit a value to 32bit range (unsigned) def limit_u32(value, clip_excess=True): return limit_unsigned(value, MAX_U32, clip_excess) # sign extend a value to it's unsigned representation def sign_extend_i16_u32(v): r = register() r.i32 = limit_i16(v) return r.u32 ### MIPS simulated CPU classes # a register with manipulation methods # - keep the register value unsigned, converting to signed as required class register(object): def __init__(self, read_write=True): self._data = 0 self._read_write = read_write @property def u8_lsb(self): return self._data & MAX_U8 @property def u8(self): bits = 8 v = self._data b0 = v & MAX_U8 v >>= bits b1 = v & MAX_U8 v >>= bits b2 = v & MAX_U8 v >>= bits b3 = v & MAX_U8 return (b3, b2, b1, b0) @u8.setter def u8(self, bytes): assert len(bytes) == 4 if self._read_write: bits = 8 b3, b2, b1, b0 = bytes assert 0 <= b3 <= MAX_U8 v = b3 v <<= bits assert 0 <= b2 <= MAX_U8 v += b2 v <<= bits assert 0 <= b1 <= MAX_U8 v += b1 v <<= bits assert 0 <= b0 <= MAX_U8 v += b0 self._data = v @property def i32(self): return limit_signed(self._data, MIN_I32, MAX_I32, True) @i32.setter def i32(self, value): if self._read_write: v = limit_signed(value, MIN_I32, MAX_I32, True) if v < 0: v += MAX_U32 v += 1 self._data = v @property def u32(self): return self._data @u32.setter def u32(self, value): if self._read_write: self._data = limit_unsigned(value, MAX_U32, True) # encapsulate the simulation class MIPS_Core: # basic system config _RAM_ADDRESS = 0 _RAM_SIZE = 0x200000 # internal representation of opcode mnemonic _OPCODE = 'instr_%s' _OPCODE_SUPPORTED = (MIPS_ADDI, MIPS_ADDIU, MIPS_ADDU, MIPS_ANDI, MIPS_B, MIPS_BEQ, MIPS_BNE, MIPS_JR, MIPS_LBU, MIPS_LI, MIPS_LW, MIPS_MOVE, MIPS_OR, MIPS_SB, MIPS_SLL, MIPS_SLTIU, MIPS_SLTU, MIPS_SRL, MIPS_SUBU, MIPS_SW, MIPS_WSBH, MIPS_NOP, MIPS_MEMCPY) # opcode result logging _LOG_OPC_RESULTS = False _LOG_OPC_FORMAT = '[%04x] %s\n' _LOG_OPC_REGWORD = '%s = 0x%08x' _LOG_OPC_MEMBYTE = '@0x%x = 0x%x' _LOG_OPC_MEMWORD = '@0x%x = 0x%08x' _LOG_OPC_NOBRANCH = '---' _LOG_OPC_BRANCHTO = '=> %s' _LOG_OPC_SUB_RET = 'RETURN' def __init__(self, instruction_seq=[]): # setup the register file regs = [register(False)] regs.extend(register() for rn in MIPS_REG_NAMES[1:]) self.registers = tuple(regs) # set the stack pointer to the top of RAM self.registers[REG_SP].u32 = self._RAM_ADDRESS + self._RAM_SIZE # configure the RAM self.ram = bytearray(0 for i in xrange(self._RAM_SIZE)) # cache the opcode implementations self._opc_cache = {o: getattr(self, self._OPCODE % o) for o in self._OPCODE_SUPPORTED} # scan the instruction sequence for labels to build a branch target index self.instructions = instruction_seq targets = {} for i, inst in enumerate(instruction_seq): if len(inst) == 3: targets[inst[2]] = i self.branch_tgts = targets # RAM access def read_ram(self, address, byte_count): ram_addr = address - self._RAM_ADDRESS return self.ram[ram_addr: ram_addr + byte_count] def write_ram(self, address, byte_seq): ram_addr = address - self._RAM_ADDRESS byte_count = len(byte_seq) self.ram[ram_addr: ram_addr + byte_count] = byte_seq # execute instruction sequence # - each instruction in sequence has the format # (instr_mnemonic, (arg_1,...),