Better file format 1

This commit is contained in:
2025-04-25 21:30:11 -07:00
parent f5cb818123
commit b20846c797
14 changed files with 213 additions and 688 deletions

3
analyzer/.vscode/settings.json vendored Normal file
View File

@ -0,0 +1,3 @@
{
"python.analysis.typeCheckingMode": "standard"
}

Binary file not shown.

Binary file not shown.

Binary file not shown.

7
analyzer/format.txt Normal file
View File

@ -0,0 +1,7 @@
[entry point]
[number of libraries]
<Library> <Import1> <Import1Addr> <Import2> <Import2Addr> 00: Import the following things. (Terminate on 00)
[number of bytes in starting state as a uint32_t]
00 [00-ff] <bytes>: Use the following 00-ff bytes literally
[01-ff] <byte>: Repeat the next byte 02 to ff times

View File

@ -1,80 +1,124 @@
import base64
from dataclasses import dataclass
from io import BytesIO
import json
from typing import Generator
import pefile
def main():
subject = pefile.PE("subjects\\main.exe")
@dataclass
class Import:
library: bytes
procedures: list[tuple[bytes, int]]
def _single_or_none(x):
items = [i for i in x]
if len(items) == 0:
return None
assert len(items) == 1
@dataclass
class Binary(object):
starting_state: bytes
entry_point: int
imports: list[Import]
def _single_or_none[T](ts: Generator[T]) -> T | None:
items = [t for t in ts]
if len(items) == 0:
return None
if len(items) == 1:
return items[0]
raise ValueError(f"expected 1 or 0, got {len(items)}")
def _single[T](ts: Generator[T]) -> T:
items = [t for t in ts]
if len(items) == 1:
return items[0]
def _dump(fname, section):
with open(fname, "wb") as f:
if section is not None:
if isinstance(section, bytes):
f.write(section)
else:
f.write(section.get_data())
raise ValueError(f"expected 1, got {len(items)}")
for i in subject.sections:
print(i)
text_section = _single_or_none(i for i in subject.sections if i.Name == b".text\0\0\0")
def _create_binary(subject: pefile.PE) -> Binary:
optional_header = subject.OPTIONAL_HEADER
assert isinstance(optional_header, pefile.Structure)
text_section = _single(i for i in subject.sections if i.Name == b".text\0\0\0")
data_section = _single_or_none(i for i in subject.sections if i.Name == b".data\0\0\0")
rdata_section = _single_or_none(i for i in subject.sections if i.Name == b".rdata\0\0")
_dump("dumps\\text.dat", text_section)
_dump("dumps\\data.dat", data_section)
_dump("dumps\\rdata.dat", rdata_section)
relevant_sections = [section for section in (text_section, data_section, rdata_section) if section is not None]
if len(relevant_sections) == 0:
raise ValueError("no sections to plot")
print([(i.VirtualAddress, i) for i in relevant_sections])
min_address = min(i.VirtualAddress for i in relevant_sections)
max_address = max(_round_up_to_page(i.VirtualAddress + i.SizeOfRawData) for i in relevant_sections)
print(min_address, max_address)
buffer = bytearray(max_address - min_address)
for section in relevant_sections:
data = section.get_data() # TODO: De-pad the text section from 0xccs
start = section.VirtualAddress - min_address
buffer[start:start+len(data)] = data
buffer = bytes(buffer)
_dump("dumps\\starting_state.dat", buffer)
binary = {
"startingState": base64.b64encode(buffer).decode("utf8"),
"imports": [],
}
starting_state = bytes(buffer)
# find imports
# print(subject)
# print(dir(subject))
for entry in subject.DIRECTORY_ENTRY_IMPORT:
entry_point_rva = getattr(optional_header, "AddressOfEntryPoint")
print(entry_point_rva)
entry_point = (entry_point_rva - min_address)
# print(entry.dll)
imports: list[Import] = []
for entry in getattr(subject, "DIRECTORY_ENTRY_IMPORT"):
library: bytes = entry.dll
procedures: list[tuple[bytes, int]] = []
for imp in entry.imports:
# print(dir(imp))
import_address = imp.address - subject.OPTIONAL_HEADER.ImageBase - min_address
print(hex(import_address), imp.name)
binary["imports"].append({
"dll": entry.dll.decode("utf8"),
"symbol": imp.name.decode("utf8"),
"address": import_address,
})
import_address_rva = imp.address - getattr(optional_header, "ImageBase")
import_address = import_address_rva - min_address
procedures.append((imp.name, import_address))
entry_point_rva = subject.OPTIONAL_HEADER.AddressOfEntryPoint
binary["entryPoint"] = entry_point_rva - min_address
with open("binaries/main.json", "wt") as f:
f.write(json.dumps(binary, indent=4))
imports.append(Import(library, procedures))
return Binary(
starting_state=starting_state,
entry_point=entry_point,
imports=imports,
)
def _encode_binary(binary: Binary) -> bytes:
out = BytesIO()
def _write_u32(n: int):
out.write(n.to_bytes(4, "little", signed=False))
def _write_u8(n: int):
out.write(n.to_bytes(1, "little", signed=False))
def _write_zt(s: bytes):
out.write(s)
_write_u8(0)
_write_u32(binary.entry_point)
for i in binary.imports:
print(i.library)
_write_zt(i.library)
print(i.procedures)
for (procedure, address) in i.procedures:
_write_zt(procedure)
_write_u32(address)
_write_u8(0)
_write_u8(0)
_write_u32(len(binary.starting_state))
# TODO: No RLE for now
for b in binary.starting_state:
_write_u8(b)
return out.getbuffer()
def main():
subject = pefile.PE("subjects\\main.exe")
binary = _create_binary(subject)
code = _encode_binary(binary)
with open("binaries\\main.dat", "wb") as f:
f.write(code)
def _round_up_to_page(x: int):
# TODO: Is this the page size on x64? I think it is

4
analyzer/poetry.lock generated
View File

@ -13,5 +13,5 @@ files = [
[metadata]
lock-version = "2.0"
python-versions = "^3.10"
content-hash = "6f28e4dc3bf3b09c57354693b75dc7975b70a7aac2ee7c83fc81b0058520d7f9"
python-versions = "^3.13"
content-hash = "8e680dad2071f9d7a37ca34d4fd6da67ba3922e0de45f0442c7b38d07f8fa9f0"

View File

@ -6,7 +6,7 @@ authors = ["Nyeogmi <economicsbat@gmail.com>"]
readme = "README.md"
[tool.poetry.dependencies]
python = "^3.10"
python = "^3.13"
pefile = "^2024.8.26"