1
0

Compare commits

..

1 Commits

Author SHA1 Message Date
a4c9879472 thesis: Add chapter 5: comparison 2025-07-28 10:44:11 +02:00
18 changed files with 60 additions and 363 deletions

1
.gitignore vendored
View File

@@ -4,4 +4,3 @@ bin/
*.o
*.log
*.pdf
related-work/

View File

@@ -1226,7 +1226,7 @@ int sym(getopt)(const int argc, char *const argv[], const char *shortopts) {
else if_invalid(getopt)
}
const int ret = __real_getopt(argc, argv, shortopts);
msg("return %i; optind %i", ret, optind);
msg("return %i", ret);
return ret;
}
@@ -1419,7 +1419,7 @@ int sym(sigaction)(int sig, const struct sigaction *restrict act, struct sigacti
if (sigismember(&act->sa_mask, i) != 1)
continue;
if (maskstr[0] != 0) strcat(maskstr, ",");
sprintf(maskstr + strlen(maskstr), "%i:%s", i, getsigstr(i));
strcat(maskstr, getsigstr(i));
}
if (!verbosity) {
msg("sigaction(%i:%s, %p:{}, %p)" ret_str, sig, sigstr, act, oact, ret_data);
@@ -1462,7 +1462,7 @@ int sym(sigaction)(int sig, const struct sigaction *restrict act, struct sigacti
if (sigismember(&oact->sa_mask, i) != 1)
continue;
if (maskstr[0] != 0) strcat(maskstr, ",");
sprintf(maskstr + strlen(maskstr), "%i:%s", i, getsigstr(i));
strcat(maskstr, getsigstr(i));
}
msg("return %i; errno %s; oact={sa_flags: 0x%x:%s, %s: %p, sa_mask: [%s]}", ret, strerrorname_np(errno), oact->sa_flags, flgstr, name, ptr, maskstr);
} else {

View File

@@ -23,7 +23,7 @@ class Flags(NamedTuple):
flags: list[str]
StructTimeSpec = TypedDict('StructTimeSpec', {'tv_sec': int, 'tv_nsec': int})
StructSigAction = TypedDict('StructSigAction', {'sa_flags': Flags, 'sa_handler': NotRequired[Pointer], 'sa_sigaction': NotRequired[Pointer], 'sa_mask': list[Constant]})
StructSigAction = TypedDict('StructSigAction', {'sa_flags': Flags, 'sa_handler': NotRequired[Pointer], 'sa_sigaction': NotRequired[Pointer], 'sa_mask': list[str]})
StructSockAddr = TypedDict('StructSockAddr', {'sa_family': Constant, 'sa_data': NotRequired[bytes],
'sun_path': NotRequired[bytes],
'sin_addr': NotRequired[bytes], 'sin_port': NotRequired[int],
@@ -263,7 +263,7 @@ class Parser:
idx += 1
value = int(value, 0) if value != '(nil)' else 0
return PointerTo(val, value), idx
m = re.match(r'[A-Z0-9_]+|\?', argument[idx:])
m = re.match(r'[A-Z0-9_]+', argument[idx:])
if m is not None:
value = m.group(0)
idx += len(value)
@@ -302,8 +302,6 @@ class Parser:
self.pid, self.tid = int(pid), int(tid)
if len(self.stack) == 0:
self.stack[(self.pid, self.tid)] = []
elif (self.pid, self.tid) not in self.stack:
self.stack[(self.pid, self.tid)] = []
if not data.startswith(b'return ') and not data == b'return':
call = data.decode('utf-8')
#print(f'[{self.pid}][{self.tid}] {call}')
@@ -346,10 +344,6 @@ class Parser:
other_vals = ret[1].strip() if len(ret) > 1 else ''
ret_value, _ = Parser.parse_arg(ret[0][7:])
kwargs = {}
if other_vals.startswith('optind '):
ret = other_vals[7:].split(';', 1)
kwargs['optind'] = int(ret[0].strip())
other_vals = ret[1].strip() if len(ret) > 1 else ''
if other_vals.startswith('errno '):
ret = other_vals[6:].split(';', 1)
kwargs['errno'] = ret[0].strip()
@@ -410,7 +404,7 @@ class Parser:
def before_getopt(self, argc: int, argv: PointerTo[list[PointerTo[bytes]]], optstring: PointerTo[bytes]) -> str:
raise NotImplementedError()
def after_getopt(self, argc: int, argv: PointerTo[list[PointerTo[bytes]]], optstring: PointerTo[bytes],
ret_value: int, optind: int = None) -> None:
ret_value: int) -> None:
raise NotImplementedError()
def before_exit(self, status: int) -> str:
raise NotImplementedError()

View File

@@ -102,7 +102,7 @@ def main() -> None:
entry[ret] = set()
entry[ret].add(tuple(c for c in iter_tree_ok(child)))
allowed_cleanup_functions = ['malloc', 'free', 'freeaddrinfo', 'close', 'exit', 'sigaction']
allowed_cleanup_functions = ['malloc', 'free', 'freeaddrinfo', 'close', 'exit']
for call, errors in calls.items():
if len(errors) <= 1:
continue

1
thesis/.gitignore vendored
View File

@@ -2,7 +2,6 @@
/intro.*
/example.*
/*.pdf
!/*v*.pdf
/*.txt
/_minted/
*.acn

Binary file not shown.

Binary file not shown.

View File

@@ -1,57 +1,20 @@
\chapter{Introduction}\label{ch:introduction}
Intercepting (also known as Hooking, or Tracing) system or function calls allows one to trace what a given program does.
This information is useful for security analysis or when testing or verifying a program.
This chapter gives a general overview about what the motivation and goal for this work were (Section~\ref{sec:motivation-and-goal}), and what the difference between system calls and function calls is (Section~\ref{sec:definitions}).
Lorem Ipsum.
\section{TODO: Why intercept?}
\section{Motivation and Goal}\label{sec:motivation-and-goal}
Lorem Ipsum.
When teaching students about Operating Systems, their interfaces, and standard libraries, C is still a widely used language.
Especially when using Linux.
Therefore, it is obvious, why many university courses still require students to write their assignments and exams in C\@.
The problem when trying to verify, if students correctly implemented their assignment is that low-level OS constructs (like semaphores, pipes, sockets, memory management) make it hard to run automated tests, because the testing system needs to keep track, set up, and verify the usage of these resources.
\section{TODO: Why are current solutions not enough?}
The goal of this work was to find a way to easily intercept system or function calls and to verify if students called the right functions with the right arguments at the right time.
This restriction in scope allows to focus on simple binary programs without having to think about complex or I/O heavy programs.
Furthermore, in this setting the source code of the student's programs is obviously available because this is what they need to deliver.
The availability of source code is a key concern when trying to intercept function or system calls as will be clear in the next chapters.
Lorem Ipsum.
\section{TODO: Linux/C/ELF call structure}
\section{Definitions}\label{sec:definitions}
Lorem Ipsum.
First, function calls, system calls and their differences need to be defined.
The following subsections concern these definitions.
\section{TODO: System Calls vs. Function Calls}\label{sec:system-calls-vs-function-calls}
\subsection{Function Calls}\label{subsec:function-calls}
Generally, a function in C (and also most other programming languages) is a piece of code which may be called and therefore executed from elsewhere.
Functions have zero or more arguments and return a single value.
When calling a function, the caller places the return address onto the stack.
This address indicates where the function should continue executing when it is finished.
Functions are used to structure programs, reuse functionality, or expose functionality in libraries.
Other languages than C differentiate between functions, methods, procedures and so on.
A function written in the source code is almost always compiled to a function in the resulting binary.
Intercepting calls to functions would one allow to see the name of the function, arguments, return value, and return address.
\subsection{System Calls}\label{subsec:system-calls}
In contrast to functions, system calls are calls to the kernel itself.
Many operations on a modern operating system require special privileges, which a simple user-space process does not have.
By invoking a system call, the (user-space) process hands control over to the (privileged) kernel and requests an operation to be performed.
\cite[Chapter~10]{linuxkernel}
How exactly these system calls work is architecture and system specific.
But generally, the process places the system call number, and its arguments in defined registers and then executes a special system call opcode.
Then the kernel executes the requested operation and places the return value inside another register, and lastly hands the execution back to the process.
\cite[Chapter~10]{linuxkernel}
Intercepting calls to system calls would one allow to see the system call number, arguments and return value.
One has to keep in mind, that many system-related functionalities are not in fact translated to system calls one-to-one.
For example \texttt{malloc}~\cite{malloc.3} has no dedicated system call, it is managed by the C standard library internally.
Many system calls have corresponding wrapper functions in the C standard library (like \texttt{open}, \texttt{close}, \texttt{sem\_wait}).
Lorem Ipsum.

View File

@@ -11,7 +11,7 @@ For that see Chapter~\ref{ch:manipulating-function-calls}.
\section{Identified Methods for Intercepting Function and System Calls}\label{sec:methods-for-intercepting}
First, one has to answer the question on \textit{how exactly} to intercept function or system calls.
At the beginning of this work it was not yet determined if the interception of function calls, system calls, or both should be used to achieve the overarching goal (see Section~\ref{sec:motivation-and-goal}).
At the beginning of this work it was not yet determined if the interception of function calls, system calls, or both should be used to achieve the overarching goal (see\todo{Goals}).
This first section tries to list all possible methods on how to intercept function or system calls but does not claim completeness.
The order of the following subsections is roughly based on the thought process on finding the most appropriate method suitable for this work.
@@ -217,7 +217,7 @@ Although, one has to be aware that not only function calls inside the targeted b
\subsection{Conclusion}\label{subsec:methods-for-intercepting-conclusion}
During the research on different approaches to intercepting system and function calls,
it has been found that the most reliable way to achieve the goals of this work (see Section~\ref{sec:motivation-and-goal}) is to intercept function calls instead of system calls.
it has been found that the most reliable way to achieve the goals of this work (see \todo{Goals}) is to intercept function calls instead of system calls.
This is because (as long as the programs to test are dynamically linked), intercepting function calls allows one to intercept many more calls and in a more flexible way.
Therefore, from now on this work only considers function calls and no system calls directly.
@@ -566,14 +566,3 @@ Other checks may also include guards to calls to ``forbidden'' functions, or tha
Another important post-condition of most library functions is the return value, which in most cases indicates success or failure of an operation.
However, intercepting of calls alone may not be able to verify if a program really checks the return value of a function and acts accordingly.
Chapter~\ref{ch:manipulating-function-calls} shows how this problem may be solved.
\subsection{Validating Memory Management}\label{subsec:testing-memory-management}
Lorem Ipsum.
(malloc, calloc, realloc, free, getaddrinfo, freeaddrinfo).
\subsection{Validating Resource Management}\label{subsec:validating-resource-management}
Lorem Ipsum.
(open, close, socket, \dots).

View File

@@ -35,21 +35,21 @@ Figure~\ref{fig:control-flow} illustrates the control flow for manipulating func
\end{call}
\end{sdblock}
\begin{sdblock}{Modified Call}{}
\begin{call}{p}{malloc(x)}{i}{return b}
\begin{sdblock}{Manipulated Call}{}
\begin{call}{p}{malloc(x)}{i}{return a}
\mess{i}{{``malloc(x)''}}{s}
\mess{s}{``modify y''}{i}
\begin{call}{i}{malloc(y)}{l}{return b}
\begin{call}{i}{malloc(y)}{l}{return a}
\end{call}
\mess{i}{``return b''}{s}
\mess{i}{``return a''}{s}
\end{call}
\end{sdblock}
\begin{sdblock}{Mocked Call}{}
\begin{call}{p}{malloc(x)}{i}{return c}
\begin{call}{p}{malloc(x)}{i}{return z}
\mess{i}{{``malloc(x)''}}{s}
\mess{s}{``fail'' / ``return c''}{i}
\mess{i}{``return c''}{s}
\mess{s}{``fail'' / ``return z''}{i}
\mess{i}{``return z''}{s}
\end{call}
\end{sdblock}
\end{sequencediagram}
@@ -91,92 +91,14 @@ The server responds in one of four possible ways:
This message informs the server about the resulting return value.
The server does not acknowledge this message.
The contents of this message type correspond to the second line of an intercepted function call (see Section~\ref{sec:automated-testing-on-intercepted-function-calls}).
The contents of this message type correspond the second line of an intercepted function call (see Section~\ref{sec:automated-testing-on-intercepted-function-calls}).
\section{Creating a Socket Server in Python}\label{sec:creating-a-socket-server-in-python}
Lorem Ipsum.
\section{Automated Testing using Function Call Manipulation}\label{sec:automated-testing-using-function-call-manipulation}
As seen in Figure~\ref{fig:control-flow} function call manipulation allows for mocking individual calls.
Mocking may be used to see how the program behaves when individual calls to function fail or return an unusual, but valid, value.
The simplest way to automatically test programs is to run them multiple times and on each run let a single function call fail.
The resulting sequence of function calls now may be put together to a call sequence graph (or tree).
By analyzing this call graph, it is possible to decide if a program correctly terminated when faced with a failed function call.
This may be the case when the following function calls differ from those which were recorded on a default run (without any mocked function calls).
\subsection{Testing Return Value Checks}\label{subsec:testing-return-value-checks}
Figure~\ref{fig:call-sequence} shows the simplified and collapsed call sequence graph of the prior example in Section~\ref{sec:intercepting-example}.
Each edge between two nodes without any label indicates the next function call on a normal run of the program.
Edges labeled with ``fail'' indicate the next function call after a mocked failed call.
In reality, there are multiple failing paths, each for every possible error return value, but in this example they all yield the same resulting path, therefore, they have been collapsed.
To test, if a programmer always checked the return value of a function and acted accordingly, this resulting call sequence graph now may be analyzed.
This test seems trivial at first.
The simplest approach is to verify that after a failing function call only ``cleanup'' function calls (\texttt{free}, \texttt{close}, \texttt{exit}, \dots) follow.
For simple programs, this assumption may hold, but there are many exceptions.
For example, what if the program recognizes the failed call correctly as failed but recovers and continues to operate normally?
Or what if the ``cleanup'' path is very complex and includes function calls not priorly marked as valid cleanup functions?
However, for simple programs (like those mentioned in Section~\ref{sec:motivation-and-goal}), the simplest approach from above suffices.
\begin{figure}
\begin{tikzpicture}[node distance=15mm, thick, main/.style = {draw, circle}, text centered]
\newcommand{\fncall}[2]{\tiny{\begin{tabular}{c}\normalsize{\texttt{#1}}\\\texttt{#2}\end{tabular}}}
\node[main] (1)[elips] {\fncall{getopt}{client+0x1ac5, client.c:186}};
\node[main] (2)[elips] [below of=1] {\fncall{getaddrinfo}{client+0x147b, client.c:74}};
\node[main] (3)[elips] [below of=2] {\fncall{socket}{client+0x14f2, client.c:81}};
\node[main] (4)[elips] [below of=3] {\fncall{connect}{client+0x15f3, client.c:104}};
\node[main] (5)[elips] [below of=4] {\fncall{freeaddrinfo}{client+0x1638, client.c:114}};
\node[main] (6)[elips] [below of=5] {\fncall{send}{client+0x1f5c, client.c:277}};
\node[main] (7)[elips] [below of=6] {\fncall{recv}{client+0x1fa1, client.c:284}};
\node[main] (8)[elips] [below of=7] {\fncall{recv}{client+0x2062, client.c:300}};
\node[main] (9)[elips] [below of=8] {\fncall{recv}{client+0x2442, client.c:360}};
\node[main] (10)[elips] [below of=9] {\fncall{recv}{client+0x2442, client.c:360}};
\node[main] (11)[elips] [below of=10] {\fncall{close}{client+0x2489, client.c:375}};
\node[main] (12)[elips] [below of=11] {\fncall{exit}{sys+0x0}};
\node[main] (2f1)[elips] [right=10mm of 2] {\fncall{exit}{sys+0x0}};
\node[main] (3f1)[elips] [right=10mm of 3] {\fncall{freeaddrinfo}{client+0x1638, client.c:114}};
\node[main] (4f1)[elips] [right=10mm of 4] {\fncall{close}{client+0x1611, client.c:106}};
\draw[->] (1) -- (2);
\draw[->] (2) -- (3);
\draw[->] (2) -- node[midway, above, sloped, pos=0.5] {fail} (2f1);
\draw[->] (3) -- (4);
\draw[->] (3) -- node[midway, above, sloped, pos=0.5] {fail} (3f1);
\draw[->] (3f1) -- (2f1);
\draw[->] (4) -- (5);
\draw[->] (4) -- node[midway, above, sloped, pos=0.5] {fail} (4f1);
\draw[->] (4f1) -- (3f1);
\draw[->] (5) -- (6);
\draw[->] (6) -- (7);
\draw[->] (7) -- (8);
\draw[->] (8) -- (9);
\draw[->] (9) -- (10);
\draw[->] (10) -- (11);
\draw[->] (11) -- (12);
\draw[->] (6) to [out=0,in=-4,looseness=2] node[midway, above, pos=0.05] {fail} (11);
\draw[->] (7) to [out=0,in=-2,looseness=1.75] node[midway, above, pos=0.075] {fail} (11);
\draw[->] (8) to [out=0,in=0,looseness=1.5] node[midway, above, pos=0.1] {fail} (11);
\draw[->] (9) to [out=0,in=2,looseness=1.25] node[midway, above, pos=0.1] {fail} (11);
\draw[->] (10) to [out=0,in=3,looseness=1] node[midway, above, pos=0.1] {fail} (11);
\end{tikzpicture}
\centering
\caption{Simplified Call Sequence Graph of \texttt{./client}.}
\label{fig:call-sequence}
\end{figure}
\subsection{Testing Correct Handling of Interrupts}\label{subsec:testing-interrupts}
Many functions (like \texttt{read}, \texttt{write}, or \texttt{sem\_wait}) are interruptable by signals.
When this happens, they return a value indicating an error and set \texttt{errno} to \texttt{EINTR}.
Usually, the program is expected to repeat the call until it gets a real return value or error other than \texttt{EINTR}.
Therefore, testing correct handling of interrupts is a different type of test in contrast to general tests on return value checks as seen in Subsection~\ref{subsec:testing-return-value-checks}.
It is relatively simple to test if a program correctly handles interrupts.
On any function call, that may yield \texttt{EINTR} mock the call and return exactly that error.
Afterward, check if the same function is called again.
To increase confidence in the result, one may repeat this process multiple times.
As in the test in Subsection~\ref{subsec:testing-return-value-checks}, the handling of the interrupt may involve calls to other functions, so this method is not always the right choice.
But for simple programs, it totally suffices.
Lorem Ipsum.

View File

@@ -0,0 +1,4 @@
\chapter{Comparison to similar Solutions}\label{ch:comparison}
Lorem Ipsum.

View File

@@ -1,63 +0,0 @@
\chapter{Related Work}\label{ch:related-work}
This chapter gives a rough overview on techniques and methods to intercept or hook system calls and function calls.
See also Section~\ref{sec:methods-for-intercepting}.
Many methods have already been discussed there.
\section{Function Call Interception}\label{sec:function-call-interception}
All related work regarding function call interception has already been mentioned in the aforementioned Section.
See \texttt{ltrace} (Subsection~\ref{subsec:ltrace}), wrapper functions (Subsection~\ref{subsec:wrapper-functions}), and \texttt{LD\_PRELOAD} (Subsection~\ref{subsec:preloading}).
\section{System Call Interception}\label{sec:system-call-interception}
This section discusses further related work regarding system call interception.
This excludes techniques already discussed in Section~\ref{sec:methods-for-intercepting},
like \texttt{ptrace} (Subsection~\ref{subsec:ptrace}), and \texttt{strace} (Subsection~\ref{subsec:strace}).
Almost all following methods use binary rewriting to replace system calls with other instructions (except SUD, Subsection~\ref{subsec:syscall-user-dispatch}).
This is one of the reasons why they were not mentioned in Section~\ref{sec:methods-for-intercepting}.
Another one is that the focus of this work is function call interception, and not system call interception.
\subsection{\texttt{int3} Signaling}\label{subsec:int3-signaling}
\texttt{int3} is a one-byte instruction (\texttt{0xcc}) that invokes a software interrupt.
On Linux, the kernel handles it and raises \texttt{SIGTRAP} to the user-space process that executed \texttt{int3}.
The \texttt{int3} signaling technique exploits this behavior to hook system calls; it replaces \texttt{syscall}/\texttt{sysenter} with \texttt{int3} and employs the signal handler for \texttt{SIGTRAP} as the hook function.
Since \texttt{int3} is one byte, it can replace an arbitrary instruction without breaking the neighbor instructions.
This technique is traditionally used in debuggers to implement breakpoints.
However, signal handling incurs a large overhead because it involves context manipulation by the kernel.
\cite{zpoline}
\subsection{Syscall User Dispatch (SUD)}\label{subsec:syscall-user-dispatch}
Syscall User Dispatch (SUD)~\cite{sud} was added in Linux 5.11, and it offers a way to redirect system calls to arbitrary user-space code.
For the SUD feature, the kernel implements a hook point at the entry point of system calls.
A user-space process can activate SUD via the \texttt{prctl} interface.
When SUD is activated, the hook point raises \texttt{SIGSYS} to the user-space process.
This mechanism allows a user-space program to leverage the \texttt{SIGSYS} signal handler as the system call hook.
However, similarly to the \texttt{int3} signaling technique, SUD imposes a significant performance penalty on the user-space program due to the overhead of the signal handling.
\cite{zpoline}
\subsection{zpoline}\label{subsec:zpoline}
zpoline is a system call hook mechanism for x86-64 CPUs.
Binary rewriting is used to replace (two-byte) \texttt{syscall}/\texttt{sysenter} instructions with a (two-byte) \texttt{callq *\%rax} instruction.
Because this instruction jumps to \texttt{rax}, where also the syscall number is stored, the trampoline code has to be initialized beginning at virtual address 0.
zpoline is exhaustive and achieves very low performance reduction (28--761 times less overhead compared to other exhaustive system call hooking techniques).
\cite{zpoline}
\subsection{DataHook}\label{subsec:datahook}
DataHook is a system call hooking technique for 32-bit programs based on glibc running on x86 or x86-64 machines.
It relies on glibc's way of performing system calls, namely a \texttt{call *\%gs:0x10} instruction to call the \texttt{\_\_kernel\_vsyscall} function.
The content of \texttt{gs:0x10} is backed up and modified to jump to a given hook function.
DataHook is only exhaustive when used on glibc-based programs.
It achieves a very low performance reduction (5--1429 times less overhead compared to existing hooking techniques).
\cite{datahook}

View File

@@ -1,80 +0,0 @@
\chapter{Evaluation}\label{ch:evaluation}
Lorem Ipsum.
\section{Usefulness for the Operating Systems Course}\label{sec:usefulness}
Up until recently the Operating Systems Course (mentioned in Section~\ref{sec:motivation-and-goal}) was split into three exercise blocks:
Files, Shared Memory, Semaphores; Related Processes and Inter-Process Communication via Unnamed Pipes; and Sockets.
Table~\ref{tab:functions} lists all functions presented in the course and their implementation status in \texttt{intercept.so}.
As one may see, simple file stream functions are not currently implemented in \texttt{intercept.so}.
This is because of time restrictions on this work and the fact, that simple file operations may be tested easily in the conventional way of checking the resulting output.
All other functions have at least interception and mocking (returning, failing) implemented.
For some functions the modification of function arguments has been implemented.
\begin{table}[h!]
\centering
\begin{threeparttable}
\begin{tabular}{ c|l|c }
Ex. Block & Functions & Implementation \\
\hline
- & malloc, calloc, realloc, reallocarray, free & \texttt{icmrf} \\
- & getopt & \texttt{ic-rf} \\
- & sigaction & \texttt{ic-{}-f} \\
- & mmap & \texttt{ic-rf} \\
- & munmap & \texttt{icmrf} \\
- & close & \texttt{icmrf} \\
\hline
1 & open, fopen, fdopen & \texttt{-{}-{}-{}-{}-} \\
1 & read, pread, write, pwrite & \texttt{ic-rf} \\
1 & fread, fgets, fgetc, fwrite, fputs, fputc, fprintf, fseek & \texttt{-{}-{}-{}-{}-} \\
1 & ferror, feof, clearerr, fileno, fflush, fclose & \texttt{-{}-{}-{}-{}-} \\
1 & getline, getdelim & \texttt{ic-rf} \\
1 & shm\_open, ftruncate, shm\_unlink & \texttt{icmrf} \\
1 & sem\_open, shm\_close, sem\_unlink & \texttt{icmrf} \\
1 & sem\_wait, sem\_post & \texttt{icmrf} \\
1 & sem\_trywait, sem\_timedwait & \texttt{icmrf} \\
1 & sem\_getvalue, sem\_destroy & \texttt{icmrf} \\
\hline
2 & fork, wait, waitpid & \texttt{icmrf} \\
2 & exec*, fexecve & \texttt{ic-rf} \\
2 & exit & \texttt{icm-{}-} \\
2 & pipe & \texttt{ic-rf} \\
2 & dup, dup2, dup3 & \texttt{icmrf} \\
\hline
3 & socket, bind, accept, connect & \texttt{ic-rf} \\
3 & listen & \texttt{icmrf} \\
3 & getaddrinfo & \texttt{ic-rf} \\
3 & freeaddrinfo & \texttt{icmrf} \\
3 & send, recv & \texttt{ic-rf} \\
3 & sendto, sendmsg, recvfrom, recvmsg & \texttt{ic-rf} \\
3 & setsockopt & \texttt{-{}-{}-{}-{}-} \\
\end{tabular}
\begin{tablenotes}
\item[\texttt{i}] Function may be intercepted.
\item[\texttt{c}] Complex function arguments or return value(s) are recorded fully.
\item[\texttt{m}] Function arguments may be modified.
\item[\texttt{r}] Function may be mocked using a specified return value.
\item[\texttt{f}] Function may be mocked using a specified error value.
\end{tablenotes}
\caption{List of relevant functions and their implementation status.}
\label{tab:functions}
\end{threeparttable}
\end{table}
\section{Performance}\label{sec:performance}
Lorem Ipsum.
\subsection{Performance when Intercepting}\label{subsec:performance-intercepting}
Lorem Ipsum.
\subsection{Performance when Manipulating}\label{subsec:performance-manipulating}
Lorem Ipsum.

View File

@@ -0,0 +1,12 @@
\chapter{Related Work}\label{ch:related-work}
Lorem Ipsum.
What other solutions are available?
What are the differences?
What are the characteristics?
https://sholtrop.dev/blog/on-intercepting-linux-syscalls/
https://github.com/yasukata/zpoline

View File

@@ -2,3 +2,8 @@
\chapter{Conclusion}\label{ch:conclusion}
Lorem Ipsum.
Perhaps do some study/``research'' on performance (CPU/memory/\dots).
%\attachfile[appearance=false,print=false,mimetype=text/plain,description=intercept.c]{../proj/intercept/src/intercept.c}
%\attachfile[appearance=false,print=false,mimetype=text/plain,description=Makefile]{../proj/intercept/Makefile}

View File

@@ -46,49 +46,7 @@
publisher = {O'Reilly},
url = {https://litux.nl/mirror/networksecuritytools/0596007949/toc.html},
}
@book{linuxkernel,
author = {Daniel P. Bovet and Marco Cesati},
title = {Understanding the Linux Kernel},
subtitle = {From I/O Ports to Process Management},
edition = {3rd},
date = {November 2005},
isbn = {978-0-596-00565-8},
publisher = {O'Reilly},
}
@manual{gcc,
title = {Using the GNU Compiler Collection (GCC)},
url = {https://gcc.gnu.org/onlinedocs/gcc/index.html},
}
@manual{sud,
title = {Syscall User Dispatch -- The Linux Kernel documentation},
url = {https://docs.kernel.org/admin-guide/syscall-user-dispatch.html},
}
@inproceedings{zpoline,
author = {Kenichi Yasukata and Hajime Tazaki and Pierre-Louis Aublin and Kenta Ishiguro},
title = {zpoline: a system call hook mechanism based on binary rewriting},
booktitle = {2023 USENIX Annual Technical Conference (USENIX ATC '23)},
year = {2023},
isbn = {978-1-939133-35-9},
address = {Boston, MA},
pages = {293--300},
url = {https://www.usenix.org/conference/atc23/presentation/yasukata},
publisher = {USENIX Association},
month = jul,
}
@article{datahook,
author = {Hong, Quan and Li, Jiaqi and Zhang, Wen and Zhai, Lidong},
title = {DataHook: An Efficient and Lightweight System Call Hooking Technique without Instruction Modification},
year = {2025},
issue_date = {July 2025},
publisher = {Association for Computing Machinery},
address = {New York, NY, USA},
volume = {2},
number = {ISSTA},
url = {https://doi.org/10.1145/3728874},
doi = {10.1145/3728874},
journal = {Proc. ACM Softw. Eng.},
month = jun,
articleno = {ISSTA005},
numpages = {21},
keywords = {DataHook, Hooking technique, Software analysis, Software debugging, System call}
}

Binary file not shown.

View File

@@ -41,10 +41,7 @@
\usepackage{chngcntr}
\counterwithin{listing}{chapter}
\usepackage{pgf-umlsd}
\usepackage{textcomp}
\usepackage{tikz}
\usetikzlibrary{shapes}
\usepackage[flushleft]{threeparttable}
%\usepackage{attachfile}
\newcommand{\newthreadShift}[4][gray!30]{
\newinst[#4]{#2}{#3}
@@ -54,8 +51,6 @@
\tikzstyle{instcolor#2}=[fill=#1]
}
\tikzset{elips/.style={ellipse,draw,minimum width=2em,minimum height=1.8em,inner ysep=0pt},}
% Set PDF document properties
\hypersetup{
pdfpagelayout = TwoPageRight, % How the document is shown in PDF viewers (optional).
@@ -92,7 +87,7 @@
% Required data.
\setregnumber{12119052}
\setdate{01}{08}{2025} % Set date with 3 arguments: {day}{month}{year}.
\setdate{01}{06}{2025} % Set date with 3 arguments: {day}{month}{year}.
\settitle{\thesistitle}{Abfangen und Manipulieren von\\System-/Funktionsaufrufen in\\Linux-Systemen} % Sets English and German version of the title (both can be English or German). If your title contains commas, enclose it with additional curvy brackets (i.e., {{your title}}) or define it as a macro as done with \thesistitle.
%\setsubtitle{Optional Subtitle of the Thesis}{Optionaler Untertitel der Arbeit} % Sets English and German version of the subtitle (both can be English or German).
@@ -153,15 +148,15 @@
\input{src/01.introduction}
\input{src/02.intercept}
\input{src/03.manipulate}
\input{src/04.related-work}
\input{src/05.evaluation}
\input{src/04.comparison}
\input{src/05.related-work}
\input{src/06.conclusion}
\backmatter
% Declare the use of AI tools as mentioned in the statement of originality.
% Use either the English aitools or the German kitools.
\begin{aitools}
No generative AI tools were used in and for this work whatsoever.
\todo{Enter your text here.}
\end{aitools}
%\begin{kitools}
@@ -176,8 +171,8 @@ No generative AI tools were used in and for this work whatsoever.
\listoftables % Starred version, i.e., \listoftables*, removes the toc entry.
% Use an optional list of algorithms.
%\listofalgorithms
%\addcontentsline{toc}{chapter}{List of Algorithms}
\listofalgorithms
\addcontentsline{toc}{chapter}{List of Algorithms}
% Use an optional list of listings.
\cleardoublepage