Clean up the analyzer (somewhat!)
This commit is contained in:
3
compiler/.vscode/settings.json
vendored
Normal file
3
compiler/.vscode/settings.json
vendored
Normal file
@ -0,0 +1,3 @@
|
||||
{
|
||||
"python.analysis.typeCheckingMode": "standard"
|
||||
}
|
BIN
compiler/binaries/main.dat
Normal file
BIN
compiler/binaries/main.dat
Normal file
Binary file not shown.
7
compiler/format.txt
Normal file
7
compiler/format.txt
Normal file
@ -0,0 +1,7 @@
|
||||
[entry point]
|
||||
[number of libraries]
|
||||
<Library> <Import1> <Import1Addr> <Import2> <Import2Addr> 00: Import the following things. (Terminate on 00)
|
||||
|
||||
[number of bytes in starting state as a uint32_t]
|
||||
00 [00-ff] <bytes>: Use the following 00-ff bytes literally
|
||||
[01-ff] <byte>: Repeat the next byte 01 to ff times
|
162
compiler/main.py
Normal file
162
compiler/main.py
Normal file
@ -0,0 +1,162 @@
|
||||
import base64
|
||||
from dataclasses import dataclass
|
||||
from io import BytesIO
|
||||
import json
|
||||
from typing import Generator
|
||||
import pefile
|
||||
|
||||
@dataclass
|
||||
class Import:
|
||||
library: bytes
|
||||
procedures: list[tuple[bytes, int]]
|
||||
|
||||
|
||||
@dataclass
|
||||
class Binary(object):
|
||||
starting_state: bytes
|
||||
entry_point: int
|
||||
imports: list[Import]
|
||||
|
||||
|
||||
def _single_or_none[T](ts: Generator[T]) -> T | None:
|
||||
items = [t for t in ts]
|
||||
if len(items) == 0:
|
||||
return None
|
||||
|
||||
if len(items) == 1:
|
||||
return items[0]
|
||||
|
||||
raise ValueError(f"expected 1 or 0, got {len(items)}")
|
||||
|
||||
def _single[T](ts: Generator[T]) -> T:
|
||||
items = [t for t in ts]
|
||||
if len(items) == 1:
|
||||
return items[0]
|
||||
|
||||
raise ValueError(f"expected 1, got {len(items)}")
|
||||
|
||||
|
||||
def _create_binary(subject: pefile.PE) -> Binary:
|
||||
optional_header = subject.OPTIONAL_HEADER
|
||||
assert isinstance(optional_header, pefile.Structure)
|
||||
text_section = _single(i for i in subject.sections if i.Name == b".text\0\0\0")
|
||||
data_section = _single_or_none(i for i in subject.sections if i.Name == b".data\0\0\0")
|
||||
rdata_section = _single_or_none(i for i in subject.sections if i.Name == b".rdata\0\0")
|
||||
|
||||
relevant_sections = [section for section in (text_section, data_section, rdata_section) if section is not None]
|
||||
if len(relevant_sections) == 0:
|
||||
raise ValueError("no sections to plot")
|
||||
min_address = min(i.VirtualAddress for i in relevant_sections)
|
||||
max_address = max(i.VirtualAddress + i.SizeOfRawData for i in relevant_sections)
|
||||
|
||||
buffer = bytearray(max_address - min_address)
|
||||
for section in relevant_sections:
|
||||
data = section.get_data()
|
||||
start = section.VirtualAddress - min_address
|
||||
buffer[start:start+len(data)] = data
|
||||
|
||||
starting_state = bytes(buffer)
|
||||
|
||||
entry_point_rva = getattr(optional_header, "AddressOfEntryPoint")
|
||||
entry_point = (entry_point_rva - min_address)
|
||||
|
||||
imports: list[Import] = []
|
||||
for entry in getattr(subject, "DIRECTORY_ENTRY_IMPORT"):
|
||||
library: bytes = entry.dll
|
||||
procedures: list[tuple[bytes, int]] = []
|
||||
for imp in entry.imports:
|
||||
import_address_rva = imp.address - getattr(optional_header, "ImageBase")
|
||||
import_address = import_address_rva - min_address
|
||||
procedures.append((imp.name, import_address))
|
||||
|
||||
imports.append(Import(library, procedures))
|
||||
|
||||
return Binary(
|
||||
starting_state=starting_state,
|
||||
entry_point=entry_point,
|
||||
imports=imports,
|
||||
)
|
||||
|
||||
|
||||
def _encode_binary(binary: Binary) -> bytes:
|
||||
out = BytesIO()
|
||||
|
||||
def _write_u32(n: int):
|
||||
out.write(n.to_bytes(4, "little", signed=False))
|
||||
|
||||
def _write_u8(n: int):
|
||||
out.write(n.to_bytes(1, "little", signed=False))
|
||||
|
||||
def _write_zt(s: bytes):
|
||||
out.write(s)
|
||||
_write_u8(0)
|
||||
|
||||
_write_u32(binary.entry_point)
|
||||
for i in binary.imports:
|
||||
_write_zt(i.library)
|
||||
for (procedure, address) in i.procedures:
|
||||
_write_zt(procedure)
|
||||
_write_u32(address)
|
||||
_write_u8(0)
|
||||
_write_u8(0)
|
||||
|
||||
_write_u32(len(binary.starting_state))
|
||||
|
||||
# == encode RLE ==
|
||||
def _pull_repeats(start: int, data: bytes) -> int | None:
|
||||
i = 0
|
||||
first_byte = data[start + i]
|
||||
while True:
|
||||
if i >= 255:
|
||||
break
|
||||
if start + i >= len(data):
|
||||
break
|
||||
if data[start + i] != first_byte:
|
||||
break
|
||||
|
||||
i += 1
|
||||
|
||||
if i >= 2:
|
||||
_write_u8(i)
|
||||
_write_u8(first_byte)
|
||||
return start + i
|
||||
|
||||
return None
|
||||
|
||||
def _pull_non_repeats(start: int, data: bytes):
|
||||
i = 0
|
||||
while True:
|
||||
if i >= 255:
|
||||
break
|
||||
if start + i >= len(data):
|
||||
break
|
||||
if i >= 1 and data[start + i] == data[start + i - 1]:
|
||||
break
|
||||
|
||||
i += 1
|
||||
|
||||
_write_u8(0)
|
||||
_write_u8(i)
|
||||
for j in range(i):
|
||||
_write_u8(data[start + j])
|
||||
return start + i
|
||||
|
||||
i = 0
|
||||
while i < len(binary.starting_state):
|
||||
if new_i := _pull_repeats(i, binary.starting_state):
|
||||
i = new_i
|
||||
continue
|
||||
i = _pull_non_repeats(i, binary.starting_state)
|
||||
|
||||
return out.getbuffer()
|
||||
|
||||
def main():
|
||||
subject = pefile.PE("subjects\\main.exe")
|
||||
|
||||
binary = _create_binary(subject)
|
||||
code = _encode_binary(binary)
|
||||
with open("binaries\\main.dat", "wb") as f:
|
||||
f.write(code)
|
||||
|
||||
if __name__ == "__main__":
|
||||
main()
|
17
compiler/poetry.lock
generated
Normal file
17
compiler/poetry.lock
generated
Normal file
@ -0,0 +1,17 @@
|
||||
# This file is automatically @generated by Poetry 1.8.3 and should not be changed by hand.
|
||||
|
||||
[[package]]
|
||||
name = "pefile"
|
||||
version = "2024.8.26"
|
||||
description = "Python PE parsing module"
|
||||
optional = false
|
||||
python-versions = ">=3.6.0"
|
||||
files = [
|
||||
{file = "pefile-2024.8.26-py3-none-any.whl", hash = "sha256:76f8b485dcd3b1bb8166f1128d395fa3d87af26360c2358fb75b80019b957c6f"},
|
||||
{file = "pefile-2024.8.26.tar.gz", hash = "sha256:3ff6c5d8b43e8c37bb6e6dd5085658d658a7a0bdcd20b6a07b1fcfc1c4e9d632"},
|
||||
]
|
||||
|
||||
[metadata]
|
||||
lock-version = "2.0"
|
||||
python-versions = "^3.13"
|
||||
content-hash = "8e680dad2071f9d7a37ca34d4fd6da67ba3922e0de45f0442c7b38d07f8fa9f0"
|
15
compiler/pyproject.toml
Normal file
15
compiler/pyproject.toml
Normal file
@ -0,0 +1,15 @@
|
||||
[tool.poetry]
|
||||
name = "analyzer"
|
||||
version = "0.1.0"
|
||||
description = ""
|
||||
authors = ["Nyeogmi <economicsbat@gmail.com>"]
|
||||
readme = "README.md"
|
||||
|
||||
[tool.poetry.dependencies]
|
||||
python = "^3.13"
|
||||
pefile = "^2024.8.26"
|
||||
|
||||
|
||||
[build-system]
|
||||
requires = ["poetry-core"]
|
||||
build-backend = "poetry.core.masonry.api"
|
BIN
compiler/subjects/main.exe
Normal file
BIN
compiler/subjects/main.exe
Normal file
Binary file not shown.
Reference in New Issue
Block a user