Better file format 1
This commit is contained in:
3
analyzer/.vscode/settings.json
vendored
Normal file
3
analyzer/.vscode/settings.json
vendored
Normal file
@ -0,0 +1,3 @@
|
||||
{
|
||||
"python.analysis.typeCheckingMode": "standard"
|
||||
}
|
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
7
analyzer/format.txt
Normal file
7
analyzer/format.txt
Normal file
@ -0,0 +1,7 @@
|
||||
[entry point]
|
||||
[number of libraries]
|
||||
<Library> <Import1> <Import1Addr> <Import2> <Import2Addr> 00: Import the following things. (Terminate on 00)
|
||||
|
||||
[number of bytes in starting state as a uint32_t]
|
||||
00 [00-ff] <bytes>: Use the following 00-ff bytes literally
|
||||
[01-ff] <byte>: Repeat the next byte 02 to ff times
|
134
analyzer/main.py
134
analyzer/main.py
@ -1,80 +1,124 @@
|
||||
import base64
|
||||
from dataclasses import dataclass
|
||||
from io import BytesIO
|
||||
import json
|
||||
from typing import Generator
|
||||
import pefile
|
||||
|
||||
def main():
|
||||
subject = pefile.PE("subjects\\main.exe")
|
||||
@dataclass
|
||||
class Import:
|
||||
library: bytes
|
||||
procedures: list[tuple[bytes, int]]
|
||||
|
||||
def _single_or_none(x):
|
||||
items = [i for i in x]
|
||||
if len(items) == 0:
|
||||
return None
|
||||
|
||||
assert len(items) == 1
|
||||
@dataclass
|
||||
class Binary(object):
|
||||
starting_state: bytes
|
||||
entry_point: int
|
||||
imports: list[Import]
|
||||
|
||||
|
||||
def _single_or_none[T](ts: Generator[T]) -> T | None:
|
||||
items = [t for t in ts]
|
||||
if len(items) == 0:
|
||||
return None
|
||||
|
||||
if len(items) == 1:
|
||||
return items[0]
|
||||
|
||||
raise ValueError(f"expected 1 or 0, got {len(items)}")
|
||||
|
||||
def _single[T](ts: Generator[T]) -> T:
|
||||
items = [t for t in ts]
|
||||
if len(items) == 1:
|
||||
return items[0]
|
||||
|
||||
def _dump(fname, section):
|
||||
with open(fname, "wb") as f:
|
||||
if section is not None:
|
||||
if isinstance(section, bytes):
|
||||
f.write(section)
|
||||
else:
|
||||
f.write(section.get_data())
|
||||
raise ValueError(f"expected 1, got {len(items)}")
|
||||
|
||||
for i in subject.sections:
|
||||
print(i)
|
||||
|
||||
text_section = _single_or_none(i for i in subject.sections if i.Name == b".text\0\0\0")
|
||||
def _create_binary(subject: pefile.PE) -> Binary:
|
||||
optional_header = subject.OPTIONAL_HEADER
|
||||
assert isinstance(optional_header, pefile.Structure)
|
||||
text_section = _single(i for i in subject.sections if i.Name == b".text\0\0\0")
|
||||
data_section = _single_or_none(i for i in subject.sections if i.Name == b".data\0\0\0")
|
||||
rdata_section = _single_or_none(i for i in subject.sections if i.Name == b".rdata\0\0")
|
||||
|
||||
_dump("dumps\\text.dat", text_section)
|
||||
_dump("dumps\\data.dat", data_section)
|
||||
_dump("dumps\\rdata.dat", rdata_section)
|
||||
|
||||
relevant_sections = [section for section in (text_section, data_section, rdata_section) if section is not None]
|
||||
if len(relevant_sections) == 0:
|
||||
raise ValueError("no sections to plot")
|
||||
print([(i.VirtualAddress, i) for i in relevant_sections])
|
||||
min_address = min(i.VirtualAddress for i in relevant_sections)
|
||||
max_address = max(_round_up_to_page(i.VirtualAddress + i.SizeOfRawData) for i in relevant_sections)
|
||||
|
||||
print(min_address, max_address)
|
||||
buffer = bytearray(max_address - min_address)
|
||||
for section in relevant_sections:
|
||||
data = section.get_data() # TODO: De-pad the text section from 0xccs
|
||||
start = section.VirtualAddress - min_address
|
||||
buffer[start:start+len(data)] = data
|
||||
buffer = bytes(buffer)
|
||||
|
||||
_dump("dumps\\starting_state.dat", buffer)
|
||||
|
||||
binary = {
|
||||
"startingState": base64.b64encode(buffer).decode("utf8"),
|
||||
"imports": [],
|
||||
}
|
||||
starting_state = bytes(buffer)
|
||||
|
||||
# find imports
|
||||
# print(subject)
|
||||
# print(dir(subject))
|
||||
for entry in subject.DIRECTORY_ENTRY_IMPORT:
|
||||
entry_point_rva = getattr(optional_header, "AddressOfEntryPoint")
|
||||
print(entry_point_rva)
|
||||
entry_point = (entry_point_rva - min_address)
|
||||
|
||||
# print(entry.dll)
|
||||
imports: list[Import] = []
|
||||
for entry in getattr(subject, "DIRECTORY_ENTRY_IMPORT"):
|
||||
library: bytes = entry.dll
|
||||
procedures: list[tuple[bytes, int]] = []
|
||||
for imp in entry.imports:
|
||||
# print(dir(imp))
|
||||
import_address = imp.address - subject.OPTIONAL_HEADER.ImageBase - min_address
|
||||
print(hex(import_address), imp.name)
|
||||
binary["imports"].append({
|
||||
"dll": entry.dll.decode("utf8"),
|
||||
"symbol": imp.name.decode("utf8"),
|
||||
"address": import_address,
|
||||
})
|
||||
import_address_rva = imp.address - getattr(optional_header, "ImageBase")
|
||||
import_address = import_address_rva - min_address
|
||||
procedures.append((imp.name, import_address))
|
||||
|
||||
entry_point_rva = subject.OPTIONAL_HEADER.AddressOfEntryPoint
|
||||
binary["entryPoint"] = entry_point_rva - min_address
|
||||
with open("binaries/main.json", "wt") as f:
|
||||
f.write(json.dumps(binary, indent=4))
|
||||
imports.append(Import(library, procedures))
|
||||
|
||||
return Binary(
|
||||
starting_state=starting_state,
|
||||
entry_point=entry_point,
|
||||
imports=imports,
|
||||
)
|
||||
|
||||
|
||||
def _encode_binary(binary: Binary) -> bytes:
|
||||
out = BytesIO()
|
||||
|
||||
def _write_u32(n: int):
|
||||
out.write(n.to_bytes(4, "little", signed=False))
|
||||
|
||||
def _write_u8(n: int):
|
||||
out.write(n.to_bytes(1, "little", signed=False))
|
||||
|
||||
def _write_zt(s: bytes):
|
||||
out.write(s)
|
||||
_write_u8(0)
|
||||
|
||||
_write_u32(binary.entry_point)
|
||||
for i in binary.imports:
|
||||
print(i.library)
|
||||
_write_zt(i.library)
|
||||
print(i.procedures)
|
||||
for (procedure, address) in i.procedures:
|
||||
_write_zt(procedure)
|
||||
_write_u32(address)
|
||||
_write_u8(0)
|
||||
_write_u8(0)
|
||||
|
||||
_write_u32(len(binary.starting_state))
|
||||
# TODO: No RLE for now
|
||||
for b in binary.starting_state:
|
||||
_write_u8(b)
|
||||
|
||||
return out.getbuffer()
|
||||
|
||||
def main():
|
||||
subject = pefile.PE("subjects\\main.exe")
|
||||
|
||||
binary = _create_binary(subject)
|
||||
code = _encode_binary(binary)
|
||||
with open("binaries\\main.dat", "wb") as f:
|
||||
f.write(code)
|
||||
|
||||
def _round_up_to_page(x: int):
|
||||
# TODO: Is this the page size on x64? I think it is
|
||||
|
4
analyzer/poetry.lock
generated
4
analyzer/poetry.lock
generated
@ -13,5 +13,5 @@ files = [
|
||||
|
||||
[metadata]
|
||||
lock-version = "2.0"
|
||||
python-versions = "^3.10"
|
||||
content-hash = "6f28e4dc3bf3b09c57354693b75dc7975b70a7aac2ee7c83fc81b0058520d7f9"
|
||||
python-versions = "^3.13"
|
||||
content-hash = "8e680dad2071f9d7a37ca34d4fd6da67ba3922e0de45f0442c7b38d07f8fa9f0"
|
||||
|
@ -6,7 +6,7 @@ authors = ["Nyeogmi <economicsbat@gmail.com>"]
|
||||
readme = "README.md"
|
||||
|
||||
[tool.poetry.dependencies]
|
||||
python = "^3.10"
|
||||
python = "^3.13"
|
||||
pefile = "^2024.8.26"
|
||||
|
||||
|
||||
|
Reference in New Issue
Block a user