Clean up the analyzer (somewhat!)

This commit is contained in:
2025-04-25 22:18:53 -07:00
parent f777d9ecf8
commit 9a07551a22
8 changed files with 3 additions and 28 deletions

3
compiler/.vscode/settings.json vendored Normal file
View File

@ -0,0 +1,3 @@
{
"python.analysis.typeCheckingMode": "standard"
}

BIN
compiler/binaries/main.dat Normal file

Binary file not shown.

7
compiler/format.txt Normal file
View File

@ -0,0 +1,7 @@
[entry point]
[number of libraries]
<Library> <Import1> <Import1Addr> <Import2> <Import2Addr> 00: Import the following things. (Terminate on 00)
[number of bytes in starting state as a uint32_t]
00 [00-ff] <bytes>: Use the following 00-ff bytes literally
[01-ff] <byte>: Repeat the next byte 01 to ff times

162
compiler/main.py Normal file
View File

@ -0,0 +1,162 @@
import base64
from dataclasses import dataclass
from io import BytesIO
import json
from typing import Generator
import pefile
@dataclass
class Import:
library: bytes
procedures: list[tuple[bytes, int]]
@dataclass
class Binary(object):
starting_state: bytes
entry_point: int
imports: list[Import]
def _single_or_none[T](ts: Generator[T]) -> T | None:
items = [t for t in ts]
if len(items) == 0:
return None
if len(items) == 1:
return items[0]
raise ValueError(f"expected 1 or 0, got {len(items)}")
def _single[T](ts: Generator[T]) -> T:
items = [t for t in ts]
if len(items) == 1:
return items[0]
raise ValueError(f"expected 1, got {len(items)}")
def _create_binary(subject: pefile.PE) -> Binary:
optional_header = subject.OPTIONAL_HEADER
assert isinstance(optional_header, pefile.Structure)
text_section = _single(i for i in subject.sections if i.Name == b".text\0\0\0")
data_section = _single_or_none(i for i in subject.sections if i.Name == b".data\0\0\0")
rdata_section = _single_or_none(i for i in subject.sections if i.Name == b".rdata\0\0")
relevant_sections = [section for section in (text_section, data_section, rdata_section) if section is not None]
if len(relevant_sections) == 0:
raise ValueError("no sections to plot")
min_address = min(i.VirtualAddress for i in relevant_sections)
max_address = max(i.VirtualAddress + i.SizeOfRawData for i in relevant_sections)
buffer = bytearray(max_address - min_address)
for section in relevant_sections:
data = section.get_data()
start = section.VirtualAddress - min_address
buffer[start:start+len(data)] = data
starting_state = bytes(buffer)
entry_point_rva = getattr(optional_header, "AddressOfEntryPoint")
entry_point = (entry_point_rva - min_address)
imports: list[Import] = []
for entry in getattr(subject, "DIRECTORY_ENTRY_IMPORT"):
library: bytes = entry.dll
procedures: list[tuple[bytes, int]] = []
for imp in entry.imports:
import_address_rva = imp.address - getattr(optional_header, "ImageBase")
import_address = import_address_rva - min_address
procedures.append((imp.name, import_address))
imports.append(Import(library, procedures))
return Binary(
starting_state=starting_state,
entry_point=entry_point,
imports=imports,
)
def _encode_binary(binary: Binary) -> bytes:
out = BytesIO()
def _write_u32(n: int):
out.write(n.to_bytes(4, "little", signed=False))
def _write_u8(n: int):
out.write(n.to_bytes(1, "little", signed=False))
def _write_zt(s: bytes):
out.write(s)
_write_u8(0)
_write_u32(binary.entry_point)
for i in binary.imports:
_write_zt(i.library)
for (procedure, address) in i.procedures:
_write_zt(procedure)
_write_u32(address)
_write_u8(0)
_write_u8(0)
_write_u32(len(binary.starting_state))
# == encode RLE ==
def _pull_repeats(start: int, data: bytes) -> int | None:
i = 0
first_byte = data[start + i]
while True:
if i >= 255:
break
if start + i >= len(data):
break
if data[start + i] != first_byte:
break
i += 1
if i >= 2:
_write_u8(i)
_write_u8(first_byte)
return start + i
return None
def _pull_non_repeats(start: int, data: bytes):
i = 0
while True:
if i >= 255:
break
if start + i >= len(data):
break
if i >= 1 and data[start + i] == data[start + i - 1]:
break
i += 1
_write_u8(0)
_write_u8(i)
for j in range(i):
_write_u8(data[start + j])
return start + i
i = 0
while i < len(binary.starting_state):
if new_i := _pull_repeats(i, binary.starting_state):
i = new_i
continue
i = _pull_non_repeats(i, binary.starting_state)
return out.getbuffer()
def main():
subject = pefile.PE("subjects\\main.exe")
binary = _create_binary(subject)
code = _encode_binary(binary)
with open("binaries\\main.dat", "wb") as f:
f.write(code)
if __name__ == "__main__":
main()

17
compiler/poetry.lock generated Normal file
View File

@ -0,0 +1,17 @@
# This file is automatically @generated by Poetry 1.8.3 and should not be changed by hand.
[[package]]
name = "pefile"
version = "2024.8.26"
description = "Python PE parsing module"
optional = false
python-versions = ">=3.6.0"
files = [
{file = "pefile-2024.8.26-py3-none-any.whl", hash = "sha256:76f8b485dcd3b1bb8166f1128d395fa3d87af26360c2358fb75b80019b957c6f"},
{file = "pefile-2024.8.26.tar.gz", hash = "sha256:3ff6c5d8b43e8c37bb6e6dd5085658d658a7a0bdcd20b6a07b1fcfc1c4e9d632"},
]
[metadata]
lock-version = "2.0"
python-versions = "^3.13"
content-hash = "8e680dad2071f9d7a37ca34d4fd6da67ba3922e0de45f0442c7b38d07f8fa9f0"

15
compiler/pyproject.toml Normal file
View File

@ -0,0 +1,15 @@
[tool.poetry]
name = "analyzer"
version = "0.1.0"
description = ""
authors = ["Nyeogmi <economicsbat@gmail.com>"]
readme = "README.md"
[tool.poetry.dependencies]
python = "^3.13"
pefile = "^2024.8.26"
[build-system]
requires = ["poetry-core"]
build-backend = "poetry.core.masonry.api"

BIN
compiler/subjects/main.exe Normal file

Binary file not shown.