diff --git a/proj/server/src/intercept/__init__.py b/proj/server/src/intercept/__init__.py index eadb6d2..14e5ff0 100644 --- a/proj/server/src/intercept/__init__.py +++ b/proj/server/src/intercept/__init__.py @@ -1,7 +1,7 @@ #!/usr/bin/env python3 # -*- coding: utf-8 -*- -from typing import Optional, TypedDict, NotRequired +from typing import Optional, TypedDict, NotRequired, BinaryIO from socketserver import UnixStreamServer, StreamRequestHandler, ThreadingMixIn import os import re @@ -30,7 +30,9 @@ class ThreadedUnixStreamServer(ThreadingMixIn, UnixStreamServer): pass -class Handler(StreamRequestHandler): +class Parser: + rfile: BinaryIO + wfile: Optional[BinaryIO] pid: Optional[int] tid: Optional[int] path: Optional[str] @@ -40,18 +42,19 @@ class Handler(StreamRequestHandler): rel_ret_addr: int dli_sym_name: Optional[str] + def __init__(self, rfile: BinaryIO, wfile: BinaryIO = None): + self.rfile = rfile + self.wfile = wfile + self.stack = [] + self.pid = None + self.path = None + def before(self) -> None: pass def after(self) -> None: pass def before_fallback(self, func_name: str, *args) -> str: pass def after_fallback(self, func_name: str, *args, **kwargs) -> None: pass - def handle(self): - first = self.rfile.readline() - meta = {a[0]: a[1] for a in [tuple(p.decode('utf-8').split(':', 1)) for p in first.split(b' ', 3)[3].strip().split(b';')]} - self.pid = int(meta['PID']) if 'PID' in meta else None - self.path = meta['PATH'] if 'PATH' in meta else None - self.stack = [] - print(f'Process with PID {self.pid} connected ({self.path})') + def parse(self): self.before() try: while True: @@ -115,25 +118,25 @@ class Handler(StreamRequestHandler): m = re.match(r'\s*"', argument) if m: idx = len(m.group(0)) - 1 - s, i = Handler.parse_str(argument[idx:]) + s, i = Parser.parse_str(argument[idx:]) idx = i - if idx < len(argument) and argument[idx] == ',': + if idx < len(argument) and argument[idx] in ',;': idx += 1 return s, idx m = re.match(r'\s*\[', argument) if m: idx = len(m.group(0)) - s, i = Handler.parse_args(argument[idx:]) + s, i = Parser.parse_args(argument[idx:]) idx += i - if idx < len(argument) and argument[idx] == ',': + if idx < len(argument) and argument[idx] in ',;': idx += 1 return s, idx m = re.match(r'\s*\{', argument) if m: idx = len(m.group(0)) - s, i = Handler.parse_args(argument[idx:], named=True) + s, i = Parser.parse_args(argument[idx:], named=True) idx += i - if idx < len(argument) and argument[idx] == ',': + if idx < len(argument) and argument[idx] in ',;': idx += 1 return s, idx m = re.match(r'^\s*(.*?)([,:\]}]|$)', argument) @@ -153,7 +156,7 @@ class Handler(StreamRequestHandler): return val, idx - 1 if argument[idx] == '[': idx += 1 - l, i = Handler.parse_args(argument[idx:]) + l, i = Parser.parse_args(argument[idx:]) idx += i if idx < len(argument) and argument[idx] == ',': idx += 1 @@ -169,14 +172,14 @@ class Handler(StreamRequestHandler): flags = [f.strip() for f in flags[1:-1].split('|') if len(f.strip()) > 0] return (val, flags), idx elif argument[idx] == '"': - s, i = Handler.parse_str(argument[idx:]) + s, i = Parser.parse_str(argument[idx:]) idx += i if idx < len(argument) and argument[idx] == ',': idx += 1 return (val, s), idx elif argument[idx] == '{': idx += 1 - l, i = Handler.parse_args(argument[idx:], named=True) + l, i = Parser.parse_args(argument[idx:], named=True) idx += i if idx < len(argument) and argument[idx] == ',': idx += 1 @@ -192,9 +195,7 @@ class Handler(StreamRequestHandler): return (val, value), idx @staticmethod - def parse_args(arguments: str, named: bool = False) -> tuple[tuple or dict, int]: - # FIXME parse dicts in lists, dicts in dicts, ... - print(named, arguments) + def parse_args(arguments: str, named: bool = False, ret: bool = False) -> tuple[tuple or dict, int]: args = [] if not named else {} idx = 0 name = None @@ -202,11 +203,15 @@ class Handler(StreamRequestHandler): if arguments[idx] == ']' and not named or arguments[idx] == '}' and named: idx += 1 break + elif ret and named: + m = re.match(r'^\s*([^=]+)=', arguments[idx:]) + idx += len(m.group(0)) + name = m.group(1) elif named: m = re.match(r'^\s*([^:]+):', arguments[idx:]) idx += len(m.group(0)) name = m.group(1) - val, i = Handler.parse_arg(arguments[idx:]) + val, i = Parser.parse_arg(arguments[idx:]) if named: args[name] = val else: @@ -219,7 +224,7 @@ class Handler(StreamRequestHandler): self.pid, self.tid = int(pid), int(tid) if not data.startswith(b'return ') and not data == b'return': call = data.decode('utf-8') - print(f'[{self.pid}][{self.tid}] {call}') + #print(f'[{self.pid}][{self.tid}] {call}') func_name = call[:call.find('(')] ret = call[call.rfind(':'):] m = RET_ADDR_RE.match(ret) @@ -228,7 +233,7 @@ class Handler(StreamRequestHandler): self.dli_file_name = g_fname self.rel_ret_addr = int(g_ret, 0) self.dli_sym_name = g_sym - args, _ = Handler.parse_args(call[call.find('(') + 1:call.rfind(':') - 1]) + args, _ = Parser.parse_args(call[call.find('(') + 1:call.rfind(':') - 1]) self.stack.append((self.ret_addr, self.dli_file_name, self.rel_ret_addr, self.dli_sym_name, func_name, args)) try: func = getattr(self, f'before_{func_name}') @@ -242,12 +247,21 @@ class Handler(StreamRequestHandler): command = func(*args) or self.before_fallback(func_name, *args) or 'ok' except NotImplementedError: command = self.before_fallback(func_name, *args) or 'ok' - print(f'[{self.pid}][{self.tid}] -> {command}') - self.wfile.write(command.encode('utf-8') + b'\n') + if self.wfile: + #print(f'[{self.pid}][{self.tid}] -> {command}') + self.wfile.write(command.encode('utf-8') + b'\n') else: ret = data.decode('utf-8') - ret_value, _ = Handler.parse_arg(ret[7:].split(';', 1)[0]) - # FIXME parse return values (errno, ...) + ret = ret.split(';', 1) + other_vals = ret[1].strip() if len(ret) > 1 else '' + ret_value, _ = Parser.parse_arg(ret[0][7:]) + kwargs = {} + if other_vals.startswith('errno '): + ret = other_vals[6:].split(';', 1) + kwargs['errno'] = ret[0].strip() + other_vals = ret[1].strip() if len(ret) > 1 else '' + if len(other_vals) > 0: + kwargs, _ = Parser.parse_args(other_vals, named=True, ret=True) (self.ret_addr, self.dli_file_name, self.rel_ret_addr, self.dli_sym_name, func_name, args) = self.stack.pop() try: func = getattr(self, f'after_{func_name}') @@ -259,15 +273,15 @@ class Handler(StreamRequestHandler): if func is None: raise NotImplementedError() if ret_value is None: - func(*args) + func(*args, **kwargs) else: - func(*args, ret_value) + func(*args, ret_value, **kwargs) except NotImplementedError: if ret_value is None: - self.after_fallback(func_name, *args) + self.after_fallback(func_name, *args, **kwargs) else: - self.after_fallback(func_name, *args, ret_value) - print(f'[{self.pid}][{self.tid}] -> {ret}') + self.after_fallback(func_name, *args, ret_value, **kwargs) + #print(f'[{self.pid}][{self.tid}] -> {ret}') def before_malloc(self, size: int) -> str: raise NotImplementedError() @@ -548,6 +562,17 @@ class Handler(StreamRequestHandler): raise NotImplementedError() +class Handler(StreamRequestHandler, Parser): + def handle(self): + first = self.rfile.readline() + meta = {a[0]: a[1] for a in [tuple(p.decode('utf-8').split(':', 1)) for p in first.split(b' ', 3)[3].strip().split(b';')]} + self.pid = int(meta['PID']) if 'PID' in meta else None + self.path = meta['PATH'] if 'PATH' in meta else None + print(f'Process with PID {self.pid} connected ({self.path})') + self.stack = [] + self.parse() + + def intercept(socket: str, handler: type[Handler]) -> None: try: with ThreadedUnixStreamServer(socket, handler) as server: diff --git a/proj/server/src/intercept/standard.py b/proj/server/src/intercept/standard.py index 769f1ad..d0a67a1 100644 --- a/proj/server/src/intercept/standard.py +++ b/proj/server/src/intercept/standard.py @@ -2,6 +2,7 @@ # -*- coding: utf-8 -*- from intercept import * +import sys FUNCTION_ERRORS: dict[str, list[str]] = { @@ -9,30 +10,30 @@ FUNCTION_ERRORS: dict[str, list[str]] = { 'calloc': ['ENOMEM'], 'realloc': ['ENOMEM'], 'reallocarray': ['ENOMEM'], - 'read': [], - 'pread': [], - 'write': [], - 'pwrite': [], - 'close': ['EBADF'], # EINTR, EIO - 'sigaction': ['EINVAL'], - 'sem_init': ['EINVAL', 'ENOSYS'], - 'sem_open': ['EACCES', 'EEXIST', 'EINVAL', 'EMFILE', 'ENAMETOOLONG', 'ENFILE', 'ENOENT', 'ENOMEM'], - 'sem_post': ['EINVAL', 'EOVERFLOW'], - 'sem_wait': ['EINTR', 'EINVAL'], - 'sem_trywait': ['EAGAIN', 'EINTR', 'EINVAL'], - 'sem_timedwait': ['EINTR', 'EINVAL', 'ETIMEDOUT'], - 'sem_getvalule': ['EINVAL'], - 'sem_close': ['EINVAL'], - 'sem_unlink': ['EACCES', 'ENAMETOOLONG', 'ENOENT'], - 'sem_destroy': ['EINVAL'], - 'shm_open': ['EACCES', 'EEXIST', 'EINVAL', 'EMFILE', 'ENAMETOOLONG', 'ENFILE', 'ENOENT'], - 'shm_unlink': ['EACCES', 'ENAMETOOLONG', 'ENOENT'], - 'mmap': ['EACCES', 'EBADF', 'EINVAL', 'EMFILE', 'ENODEV', 'ENOMEM', 'ENOTSUP', 'ENXIO', 'EOVERFLOW'], # EAGAIN - 'munmap': ['EINVAL'], - 'ftruncate': ['EINTR', 'EINVAL', 'EFBIG', 'EIO', 'EBADF'], - 'fork': ['EAGAIN', 'ENOMEM', 'ENOSYS'], + 'read': ['EINTR', 'EIO', 'ECONNRESET', 'ENOTCONN', 'ETIMEDOUT'], + 'pread': ['EINTR', 'EIO'], + 'write': ['EINTR', 'EIO', 'EDQUOT', 'EFBIG', 'ENOSPC', 'EPIPE', 'ECONNRESET'], + 'pwrite': ['EINTR', 'EIO', 'EDQUOT', 'EFBIG', 'ENOSPC', 'EPIPE'], + 'close': [], + 'sigaction': [], + 'sem_init': ['ENOSYS'], + 'sem_open': ['EACCES', 'EEXIST', 'EMFILE', 'ENFILE', 'ENOENT', 'ENOMEM'], + 'sem_post': [], + 'sem_wait': ['EINTR'], + 'sem_trywait': ['EINTR'], + 'sem_timedwait': ['EINTR', 'ETIMEDOUT'], + 'sem_getvalule': [], + 'sem_close': [], + 'sem_unlink': ['EACCES', 'ENOENT'], + 'sem_destroy': [], + 'shm_open': ['EACCES', 'EEXIST', 'EMFILE', 'ENFILE', 'ENOENT'], + 'shm_unlink': ['EACCES', 'ENOENT'], + 'mmap': ['EACCES', 'EBADF', 'EMFILE', 'ENODEV', 'ENOMEM', 'ENOTSUP', 'ENXIO'], + 'munmap': [], + 'ftruncate': ['EINTR', 'EFBIG', 'EIO', 'EBADF'], + 'fork': ['ENOMEM', 'ENOSYS'], 'wait': ['ECHILD', 'EINTR'], - 'waitpid': ['ECHILD', 'EINTR', 'EINVAL'], + 'waitpid': ['ECHILD', 'EINTR'], 'execl': ['ECHILD', 'EINTR'], 'execlp': ['ECHILD', 'EINTR'], 'execle': ['ECHILD', 'EINTR'], @@ -42,19 +43,20 @@ FUNCTION_ERRORS: dict[str, list[str]] = { 'execve': ['ECHILD', 'EINTR'], 'fexecve': ['ECHILD', 'EINTR'], 'pipe': ['EMFILE', 'ENFILE'], - 'dup': ['EBADF', 'EMFILE'], # ENOMEM - 'dup2': ['EBADF', 'EBUSY', 'EINTR', 'EMFILE'], # ENOMEM - 'dup3': ['EBADF', 'EBUSY', 'EINTR', 'EINVAL', 'EMFILE'], # ENOMEM + 'dup': ['EBADF', 'EMFILE'], + 'dup2': ['EBADF', 'EBUSY', 'EINTR', 'EMFILE'], + 'dup3': ['EBADF', 'EBUSY', 'EINTR', 'EMFILE'], + 'send': ['EINTR'], + 'recv': ['EINTR'], } SKIP_ERRORS: list[str] = ['EINTR'] -IGNORE_ERRORS: list[str] = ['EINVAL', 'EBADF', 'EOVERFLOW', 'ENAMETOOLONG'] -class MemoryAllocationTester(Handler): - allocated: dict[int, tuple[str, int, int]] +class MemoryAllocationParser(Parser): + allocated: dict[int, tuple[str, str, str, int, int]] max_allocated: int - num_malloc: int + num_alloc: int num_realloc: int num_free: int num_invalid_free: int @@ -62,44 +64,44 @@ class MemoryAllocationTester(Handler): def before(self): self.allocated = {} self.max_allocated = 0 - self.num_malloc = 0 + self.num_alloc = 0 self.num_realloc = 0 self.num_free = 0 self.num_invalid_free = 0 def after(self): if len(self.allocated) > 0: - print("Not free'd:") - for ptr, (func, ret, size) in self.allocated.items(): - print(f' 0x{ptr:x}: {size} bytes ({func}, return address 0x{ret:x})') + print("\x1B[31;1mNot free'd:\x1B[0m", file=sys.stderr) + for ptr, (func, fname, sname, ret, size) in self.allocated.items(): + print(f'\x1B[31;1m 0x{ptr:x}: {size} bytes ({func}, return address {fname}+0x{ret:x} {sname})\x1B[0m', file=sys.stderr) else: - print("All blocks free'd!") - print(f'Max allocated: {self.max_allocated} bytes') + print("\x1B[32;1mAll blocks free'd!\x1B[0m", file=sys.stderr) + print(f'Max allocated: {self.max_allocated} bytes', file=sys.stderr) def update_max_allocated(self): - total = sum(a[2] for a in self.allocated.values()) + total = sum(a[-1] for a in self.allocated.values()) if total > self.max_allocated: self.max_allocated = total def after_malloc(self, size, ret_value, errno=None) -> None: - self.num_malloc += 1 + self.num_alloc += 1 if ret_value != 0: - self.allocated[ret_value] = ('malloc', self.ret_addr, size) + self.allocated[ret_value] = ('malloc', self.dli_file_name, self.dli_sym_name, self.rel_ret_addr, size) self.update_max_allocated() def after_calloc(self, nmemb, size, ret_value, errno=None) -> None: - self.num_malloc += 1 + self.num_alloc += 1 if ret_value != 0: - self.allocated[ret_value] = ('calloc', self.ret_addr, nmemb * size) + self.allocated[ret_value] = ('calloc', self.dli_file_name, self.dli_sym_name, self.rel_ret_addr, nmemb * size) self.update_max_allocated() def after_realloc(self, ptr, size, ret_value, errno=None) -> None: self.num_realloc += 1 if ptr != 0: - if ret_value != 0: + if ret_value != 0 and ptr in self.allocated: v = self.allocated[ptr] del self.allocated[ptr] - self.allocated[ret_value] = (v[0], v[1], size) + self.allocated[ret_value] = (v[0], v[1], v[2], v[3], size) self.update_max_allocated() def after_reallocarray(self, ptr, nmemb, size, ret_value, errno=None) -> None: @@ -108,9 +110,16 @@ class MemoryAllocationTester(Handler): if ret_value != 0: v = self.allocated[ptr] del self.allocated[ptr] - self.allocated[ret_value] = (v[0], v[1], nmemb * size) + self.allocated[ret_value] = (v[0], v[1], v[2], v[3], nmemb * size) self.update_max_allocated() + def after_getaddrinfo(self, node, service, hints, res_ptr, ret_value, errno=None, res=None) -> None: + self.num_alloc += 1 + if ret_value[0] == 0 and res is not None: + size = sum(48 + r['ai_addrlen'] for r in res[1]) + self.allocated[res[0]] = ('getaddrinfo', self.dli_file_name, self.dli_sym_name, self.rel_ret_addr, size) + self.update_max_allocated() + def after_free(self, ptr) -> None: self.num_free += 1 if ptr != 0: @@ -120,8 +129,21 @@ class MemoryAllocationTester(Handler): self.num_free -= 1 self.num_invalid_free += 1 + def after_freeaddrinfo(self, res: Pointer) -> None: + self.num_free += 1 + if res != 0: + if res in self.allocated: + del self.allocated[res] + else: + self.num_free -= 1 + self.num_invalid_free += 1 -class InterruptedCheckTester(Handler): + +class MemoryAllocationTester(MemoryAllocationParser, Handler): + pass + + +class InterruptedCheckParser(Parser): cycles: int = 50 functions: dict[str, tuple[str or None, str]] = { fn: ('fail EINTR' if fn not in ('sem_post',) else None, @@ -146,10 +168,13 @@ class InterruptedCheckTester(Handler): if self.while_testing: self.error() for (name, ret_addr), status in self.tested_functions.items(): - print(f'{name} (0x{ret_addr:x}) -> {status}') + if status == 'passed': + print(f'\x1B[32;1m{name} (0x{ret_addr:x}) -> {status}\x1B[0m', file=sys.stderr) + else: + print(f'\x1B[31;1m{name} (0x{ret_addr:x}) -> {status}\x1B[0m', file=sys.stderr) def error(self): - print(f'Error: Return value and errno EINTR not handled correctly in {self.last_func_name} (return address 0x{self.last_ret_addr:x})') + print(f'Error: Return value and errno EINTR not handled correctly in {self.last_func_name} (return address 0x{self.last_ret_addr:x})', file=sys.stderr) self.tested_functions[(self.last_func_name, self.last_ret_addr)] = 'failed' self.counter = 0 self.last_func_name = None @@ -176,7 +201,11 @@ class InterruptedCheckTester(Handler): return self.functions[func_name][1] -def get_return_value_check_tester() -> tuple[dict, type[Handler]]: +class InterruptedCheckTester(InterruptedCheckParser, Handler): + pass + + +def get_return_value_check_tester() -> tuple[dict, type[Parser]]: ctx = { 'state': 'init', 'call_sequence': [], @@ -185,11 +214,11 @@ def get_return_value_check_tester() -> tuple[dict, type[Handler]]: 'results': {}, } - class ReturnValueCheckTester(Handler): + class ReturnValueCheckTester(Parser): functions: dict[str, list[str]] = { - fn: [e for e in errors if e not in SKIP_ERRORS and e not in IGNORE_ERRORS] + fn: [e for e in errors if e not in SKIP_ERRORS] for fn, errors in FUNCTION_ERRORS.items() - if len(set(errors) - set(SKIP_ERRORS) - set(IGNORE_ERRORS)) > 0 + if len(set(errors) - set(SKIP_ERRORS)) > 0 } context: dict num: int = 0 diff --git a/proj/server/src/test-interrupts b/proj/server/src/test-interrupts index 72cea09..a4342e0 100755 --- a/proj/server/src/test-interrupts +++ b/proj/server/src/test-interrupts @@ -17,6 +17,11 @@ def socket_thread(socket: str) -> None: def main() -> None: parser = argparse.ArgumentParser() args, extra = parser.parse_known_args() + if len(extra) > 0 and extra[0] == '--': + extra.pop(0) + if len(extra) == 0: + parser.error('command expected after arguments or \'--\'') + socket_name = f'/tmp/intercept.interrupts.{os.getpid()}.sock' t1 = threading.Thread(target=socket_thread, args=(socket_name,)) t1.daemon = True diff --git a/proj/server/src/test-memory b/proj/server/src/test-memory index 5f8a3a4..b41f6d4 100755 --- a/proj/server/src/test-memory +++ b/proj/server/src/test-memory @@ -1,33 +1,38 @@ #!/usr/bin/env python3 # -*- coding: utf-8 -*- -import os import argparse -import threading import subprocess +import os +import sys import intercept import intercept.standard -def socket_thread(socket: str) -> None: - intercept.intercept(socket, intercept.standard.MemoryAllocationTester) - - def main() -> None: parser = argparse.ArgumentParser() args, extra = parser.parse_known_args() - socket_name = f'/tmp/intercept.memory.{os.getpid()}.sock' - t1 = threading.Thread(target=socket_thread, args=(socket_name,)) - t1.daemon = True - t1.start() - subprocess.run(extra, env={ - 'LD_PRELOAD': os.getcwd() + '/../../intercept/intercept.so', - 'INTERCEPT': 'unix:' + socket_name, - 'INTERCEPT_VERBOSE': '1', - 'INTERCEPT_FUNCTIONS': ','.join(['malloc', 'calloc', 'realloc', 'reallocarray', 'free', 'getaddrinfo', 'freeaddrinfo']), - 'INTERCEPT_LIBRARIES': ','.join(['*', '-/lib*', '-/usr/lib*']), - }) + if len(extra) > 0 and extra[0] == '--': + extra.pop(0) + if len(extra) == 0: + parser.error('command expected after arguments or \'--\'') + + log_file = f'/tmp/intercept.memory.{os.getpid()}.log' + + try: + subprocess.run(extra, stdin=sys.stdin, env={ + 'LD_PRELOAD': os.getcwd() + '/../../intercept/intercept.so', + 'INTERCEPT': 'file:' + log_file, + 'INTERCEPT_VERBOSE': '1', + 'INTERCEPT_FUNCTIONS': ','.join(['malloc', 'calloc', 'realloc', 'reallocarray', 'free', 'getaddrinfo', 'freeaddrinfo']), + 'INTERCEPT_LIBRARIES': ','.join(['*', '-/lib*', '-/usr/lib*']), + }) + finally: + with open(log_file, 'rb') as file: + parser = intercept.standard.MemoryAllocationParser(file) + parser.parse() + os.remove(log_file) if __name__ == '__main__': diff --git a/proj/server/src/test-return-values b/proj/server/src/test-return-values index a051ce2..b05e038 100755 --- a/proj/server/src/test-return-values +++ b/proj/server/src/test-return-values @@ -21,6 +21,8 @@ def main() -> None: args, extra = parser.parse_known_args() if len(extra) > 0 and extra[0] == '--': extra.pop(0) + if len(extra) == 0: + parser.error('command expected after arguments or \'--\'') stdin = open(args.stdin) if args.stdin else None