Compare commits
1 Commits
4c91cf7a6e
...
comparison
| Author | SHA1 | Date | |
|---|---|---|---|
| a4c9879472 |
2
.gitignore
vendored
2
.gitignore
vendored
@@ -4,5 +4,3 @@ bin/
|
|||||||
*.o
|
*.o
|
||||||
*.log
|
*.log
|
||||||
*.pdf
|
*.pdf
|
||||||
related-work/
|
|
||||||
main
|
|
||||||
|
|||||||
@@ -1226,7 +1226,7 @@ int sym(getopt)(const int argc, char *const argv[], const char *shortopts) {
|
|||||||
else if_invalid(getopt)
|
else if_invalid(getopt)
|
||||||
}
|
}
|
||||||
const int ret = __real_getopt(argc, argv, shortopts);
|
const int ret = __real_getopt(argc, argv, shortopts);
|
||||||
msg("return %i; optind %i", ret, optind);
|
msg("return %i", ret);
|
||||||
return ret;
|
return ret;
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -1240,7 +1240,7 @@ void sym(exit)(int status) {
|
|||||||
char msg_buf[BUFFER_SIZE];
|
char msg_buf[BUFFER_SIZE];
|
||||||
rcv(msg_buf, sizeof(msg_buf));
|
rcv(msg_buf, sizeof(msg_buf));
|
||||||
if_modify_int("exit", int, status)
|
if_modify_int("exit", int, status)
|
||||||
else if_invalid(exit)
|
else if_invalid(getopt)
|
||||||
}
|
}
|
||||||
__real_exit(status);
|
__real_exit(status);
|
||||||
}
|
}
|
||||||
@@ -1419,7 +1419,7 @@ int sym(sigaction)(int sig, const struct sigaction *restrict act, struct sigacti
|
|||||||
if (sigismember(&act->sa_mask, i) != 1)
|
if (sigismember(&act->sa_mask, i) != 1)
|
||||||
continue;
|
continue;
|
||||||
if (maskstr[0] != 0) strcat(maskstr, ",");
|
if (maskstr[0] != 0) strcat(maskstr, ",");
|
||||||
sprintf(maskstr + strlen(maskstr), "%i:%s", i, getsigstr(i));
|
strcat(maskstr, getsigstr(i));
|
||||||
}
|
}
|
||||||
if (!verbosity) {
|
if (!verbosity) {
|
||||||
msg("sigaction(%i:%s, %p:{}, %p)" ret_str, sig, sigstr, act, oact, ret_data);
|
msg("sigaction(%i:%s, %p:{}, %p)" ret_str, sig, sigstr, act, oact, ret_data);
|
||||||
@@ -1462,7 +1462,7 @@ int sym(sigaction)(int sig, const struct sigaction *restrict act, struct sigacti
|
|||||||
if (sigismember(&oact->sa_mask, i) != 1)
|
if (sigismember(&oact->sa_mask, i) != 1)
|
||||||
continue;
|
continue;
|
||||||
if (maskstr[0] != 0) strcat(maskstr, ",");
|
if (maskstr[0] != 0) strcat(maskstr, ",");
|
||||||
sprintf(maskstr + strlen(maskstr), "%i:%s", i, getsigstr(i));
|
strcat(maskstr, getsigstr(i));
|
||||||
}
|
}
|
||||||
msg("return %i; errno %s; oact={sa_flags: 0x%x:%s, %s: %p, sa_mask: [%s]}", ret, strerrorname_np(errno), oact->sa_flags, flgstr, name, ptr, maskstr);
|
msg("return %i; errno %s; oact={sa_flags: 0x%x:%s, %s: %p, sa_mask: [%s]}", ret, strerrorname_np(errno), oact->sa_flags, flgstr, name, ptr, maskstr);
|
||||||
} else {
|
} else {
|
||||||
|
|||||||
@@ -1,16 +0,0 @@
|
|||||||
|
|
||||||
CC=gcc
|
|
||||||
CFLAGS=-std=c99 -pedantic -Wall -D_DEFAULT_SOURCE -D_BSD_SOURCE -D_SVID_SOURCE -D_POSIX_C_SOURCE=200809L -g
|
|
||||||
|
|
||||||
.PHONY: all clean
|
|
||||||
all: default
|
|
||||||
default: main
|
|
||||||
|
|
||||||
main.o: main.c
|
|
||||||
$(CC) -c -o $@ $^ $(CFLAGS)
|
|
||||||
|
|
||||||
main: main.o
|
|
||||||
$(CC) -o $@ $^ $(CFLAGS) -lc
|
|
||||||
|
|
||||||
clean:
|
|
||||||
rm -rf main *.o
|
|
||||||
@@ -1,33 +0,0 @@
|
|||||||
|
|
||||||
#include <errno.h>
|
|
||||||
#include <stdio.h>
|
|
||||||
#include <stdlib.h>
|
|
||||||
#include <string.h>
|
|
||||||
#include <time.h>
|
|
||||||
#include <unistd.h>
|
|
||||||
|
|
||||||
int main(const int argc, const char *argv[]) {
|
|
||||||
if (argc != 2) {
|
|
||||||
fprintf(stderr, "usage: main <cycles>\n");
|
|
||||||
return 1;
|
|
||||||
}
|
|
||||||
|
|
||||||
const long cycles = strtol(argv[1], NULL, 10);
|
|
||||||
|
|
||||||
struct timespec start, end;
|
|
||||||
clock_gettime(CLOCK_MONOTONIC, &start);
|
|
||||||
|
|
||||||
int pipes[2];
|
|
||||||
for (int i = 0; i < cycles; i++) {
|
|
||||||
if (pipe(pipes) != 0) {
|
|
||||||
fprintf(stderr, "unable to create pipes: %s\n", strerror(errno));
|
|
||||||
exit(1);
|
|
||||||
}
|
|
||||||
close(pipes[0]);
|
|
||||||
close(pipes[1]);
|
|
||||||
}
|
|
||||||
|
|
||||||
clock_gettime(CLOCK_MONOTONIC, &end);
|
|
||||||
const long duration = end.tv_sec * 1000000000 + end.tv_nsec - start.tv_sec * 1000000000 - start.tv_nsec;
|
|
||||||
printf("start: %li.%09li\nend: %li.%09li\nduration: %li,%03li,%03li ns\n", start.tv_sec, start.tv_nsec, end.tv_sec, end.tv_nsec, duration / 1000000, duration / 1000 % 1000, duration % 1000);
|
|
||||||
}
|
|
||||||
@@ -1,22 +0,0 @@
|
|||||||
#!/bin/bash
|
|
||||||
|
|
||||||
REPEAT=30
|
|
||||||
CYCLES=50
|
|
||||||
|
|
||||||
function test() {
|
|
||||||
echo $@
|
|
||||||
for c in $(seq 1 $CYCLES); do
|
|
||||||
for i in $(seq 1 $REPEAT); do
|
|
||||||
echo "cycles: $((c * 100)) ($i/$REPEAT)"
|
|
||||||
$@ $((c * 100))
|
|
||||||
sleep 1
|
|
||||||
done
|
|
||||||
done
|
|
||||||
}
|
|
||||||
|
|
||||||
test ./main
|
|
||||||
cd ../intercept
|
|
||||||
test ./intercept -o -i - -- ../perf/main
|
|
||||||
test ./intercept -o -i stderr -- ../perf/main
|
|
||||||
test ./intercept -o -i file:out.log -- ../perf/main
|
|
||||||
#test ./intercept -o -i unix:/ -- ../perf/main
|
|
||||||
@@ -23,7 +23,7 @@ class Flags(NamedTuple):
|
|||||||
flags: list[str]
|
flags: list[str]
|
||||||
|
|
||||||
StructTimeSpec = TypedDict('StructTimeSpec', {'tv_sec': int, 'tv_nsec': int})
|
StructTimeSpec = TypedDict('StructTimeSpec', {'tv_sec': int, 'tv_nsec': int})
|
||||||
StructSigAction = TypedDict('StructSigAction', {'sa_flags': Flags, 'sa_handler': NotRequired[Pointer], 'sa_sigaction': NotRequired[Pointer], 'sa_mask': list[Constant]})
|
StructSigAction = TypedDict('StructSigAction', {'sa_flags': Flags, 'sa_handler': NotRequired[Pointer], 'sa_sigaction': NotRequired[Pointer], 'sa_mask': list[str]})
|
||||||
StructSockAddr = TypedDict('StructSockAddr', {'sa_family': Constant, 'sa_data': NotRequired[bytes],
|
StructSockAddr = TypedDict('StructSockAddr', {'sa_family': Constant, 'sa_data': NotRequired[bytes],
|
||||||
'sun_path': NotRequired[bytes],
|
'sun_path': NotRequired[bytes],
|
||||||
'sin_addr': NotRequired[bytes], 'sin_port': NotRequired[int],
|
'sin_addr': NotRequired[bytes], 'sin_port': NotRequired[int],
|
||||||
@@ -263,7 +263,7 @@ class Parser:
|
|||||||
idx += 1
|
idx += 1
|
||||||
value = int(value, 0) if value != '(nil)' else 0
|
value = int(value, 0) if value != '(nil)' else 0
|
||||||
return PointerTo(val, value), idx
|
return PointerTo(val, value), idx
|
||||||
m = re.match(r'[A-Z0-9_]+|\?', argument[idx:])
|
m = re.match(r'[A-Z0-9_]+', argument[idx:])
|
||||||
if m is not None:
|
if m is not None:
|
||||||
value = m.group(0)
|
value = m.group(0)
|
||||||
idx += len(value)
|
idx += len(value)
|
||||||
@@ -302,8 +302,6 @@ class Parser:
|
|||||||
self.pid, self.tid = int(pid), int(tid)
|
self.pid, self.tid = int(pid), int(tid)
|
||||||
if len(self.stack) == 0:
|
if len(self.stack) == 0:
|
||||||
self.stack[(self.pid, self.tid)] = []
|
self.stack[(self.pid, self.tid)] = []
|
||||||
elif (self.pid, self.tid) not in self.stack:
|
|
||||||
self.stack[(self.pid, self.tid)] = []
|
|
||||||
if not data.startswith(b'return ') and not data == b'return':
|
if not data.startswith(b'return ') and not data == b'return':
|
||||||
call = data.decode('utf-8')
|
call = data.decode('utf-8')
|
||||||
#print(f'[{self.pid}][{self.tid}] {call}')
|
#print(f'[{self.pid}][{self.tid}] {call}')
|
||||||
@@ -346,10 +344,6 @@ class Parser:
|
|||||||
other_vals = ret[1].strip() if len(ret) > 1 else ''
|
other_vals = ret[1].strip() if len(ret) > 1 else ''
|
||||||
ret_value, _ = Parser.parse_arg(ret[0][7:])
|
ret_value, _ = Parser.parse_arg(ret[0][7:])
|
||||||
kwargs = {}
|
kwargs = {}
|
||||||
if other_vals.startswith('optind '):
|
|
||||||
ret = other_vals[7:].split(';', 1)
|
|
||||||
kwargs['optind'] = int(ret[0].strip())
|
|
||||||
other_vals = ret[1].strip() if len(ret) > 1 else ''
|
|
||||||
if other_vals.startswith('errno '):
|
if other_vals.startswith('errno '):
|
||||||
ret = other_vals[6:].split(';', 1)
|
ret = other_vals[6:].split(';', 1)
|
||||||
kwargs['errno'] = ret[0].strip()
|
kwargs['errno'] = ret[0].strip()
|
||||||
@@ -410,7 +404,7 @@ class Parser:
|
|||||||
def before_getopt(self, argc: int, argv: PointerTo[list[PointerTo[bytes]]], optstring: PointerTo[bytes]) -> str:
|
def before_getopt(self, argc: int, argv: PointerTo[list[PointerTo[bytes]]], optstring: PointerTo[bytes]) -> str:
|
||||||
raise NotImplementedError()
|
raise NotImplementedError()
|
||||||
def after_getopt(self, argc: int, argv: PointerTo[list[PointerTo[bytes]]], optstring: PointerTo[bytes],
|
def after_getopt(self, argc: int, argv: PointerTo[list[PointerTo[bytes]]], optstring: PointerTo[bytes],
|
||||||
ret_value: int, optind: int = None) -> None:
|
ret_value: int) -> None:
|
||||||
raise NotImplementedError()
|
raise NotImplementedError()
|
||||||
def before_exit(self, status: int) -> str:
|
def before_exit(self, status: int) -> str:
|
||||||
raise NotImplementedError()
|
raise NotImplementedError()
|
||||||
|
|||||||
@@ -102,7 +102,7 @@ def main() -> None:
|
|||||||
entry[ret] = set()
|
entry[ret] = set()
|
||||||
entry[ret].add(tuple(c for c in iter_tree_ok(child)))
|
entry[ret].add(tuple(c for c in iter_tree_ok(child)))
|
||||||
|
|
||||||
allowed_cleanup_functions = ['malloc', 'free', 'freeaddrinfo', 'close', 'exit', 'sigaction']
|
allowed_cleanup_functions = ['malloc', 'free', 'freeaddrinfo', 'close', 'exit']
|
||||||
for call, errors in calls.items():
|
for call, errors in calls.items():
|
||||||
if len(errors) <= 1:
|
if len(errors) <= 1:
|
||||||
continue
|
continue
|
||||||
|
|||||||
1
thesis/.gitignore
vendored
1
thesis/.gitignore
vendored
@@ -2,7 +2,6 @@
|
|||||||
/intro.*
|
/intro.*
|
||||||
/example.*
|
/example.*
|
||||||
/*.pdf
|
/*.pdf
|
||||||
!/*v*.pdf
|
|
||||||
/*.txt
|
/*.txt
|
||||||
/_minted/
|
/_minted/
|
||||||
*.acn
|
*.acn
|
||||||
|
|||||||
Binary file not shown.
Binary file not shown.
@@ -1,57 +1,20 @@
|
|||||||
|
|
||||||
\chapter{Introduction}\label{ch:introduction}
|
\chapter{Introduction}\label{ch:introduction}
|
||||||
|
|
||||||
Intercepting (also known as Hooking, or Tracing) system or function calls allows one to trace what a given program does.
|
Lorem Ipsum.
|
||||||
This information is useful for security analysis or when testing or verifying a program.
|
|
||||||
This chapter gives a general overview about what the motivation and goal for this work were (Section~\ref{sec:motivation-and-goal}), and what the difference between system calls and function calls is (Section~\ref{sec:definitions}).
|
|
||||||
|
|
||||||
|
\section{TODO: Why intercept?}
|
||||||
|
|
||||||
\section{Motivation and Goal}\label{sec:motivation-and-goal}
|
Lorem Ipsum.
|
||||||
|
|
||||||
When teaching students about Operating Systems, their interfaces, and standard libraries, C is still a widely used language.
|
\section{TODO: Why are current solutions not enough?}
|
||||||
Especially when using Linux.
|
|
||||||
Therefore, it is obvious, why many university courses still require students to write their assignments and exams in C\@.
|
|
||||||
The problem when trying to verify, if students correctly implemented their assignment is that low-level OS constructs (like semaphores, pipes, sockets, memory management) make it hard to run automated tests, because the testing system needs to keep track, set up, and verify the usage of these resources.
|
|
||||||
|
|
||||||
The goal of this work was to find a way to easily intercept system or function calls and to verify if students called the right functions with the right arguments at the right time.
|
Lorem Ipsum.
|
||||||
This restriction in scope allows focusing on simple binary programs without having to think about complex or I/O heavy programs.
|
|
||||||
Furthermore, in this setting the source code of the student's programs is obviously available because this is what they need to deliver.
|
|
||||||
The availability of source code is a key concern when trying to intercept function or system calls, as will be clear in the next chapters.
|
|
||||||
|
|
||||||
|
\section{TODO: Linux/C/ELF call structure}
|
||||||
|
|
||||||
\section{Definitions}\label{sec:definitions}
|
Lorem Ipsum.
|
||||||
|
|
||||||
First, function calls, system calls, and their differences need to be defined.
|
\section{TODO: System Calls vs. Function Calls}\label{sec:system-calls-vs-function-calls}
|
||||||
The following subsections concern these definitions.
|
|
||||||
|
|
||||||
|
Lorem Ipsum.
|
||||||
\subsection{Function Calls}\label{subsec:function-calls}
|
|
||||||
|
|
||||||
Generally, a function in C (and also most other programming languages) is a piece of code that may be called and therefore executed from elsewhere.
|
|
||||||
Functions have zero or more arguments and return a single value.
|
|
||||||
When calling a function, the caller places the return address onto the stack.
|
|
||||||
This address indicates where the function should continue executing when it is finished.
|
|
||||||
|
|
||||||
Functions are used to structure programs, reuse functionality, or expose functionality in libraries.
|
|
||||||
Other languages than C differentiate between functions, methods, procedures, and so on.
|
|
||||||
A function written in the source code is almost always compiled to a function in the resulting binary.
|
|
||||||
|
|
||||||
Intercepting calls to functions would one allow seeing the name of the function, arguments, return value, and return address.
|
|
||||||
|
|
||||||
|
|
||||||
\subsection{System Calls}\label{subsec:system-calls}
|
|
||||||
|
|
||||||
In contrast to functions, system calls are calls to the kernel itself.
|
|
||||||
Many operations on a modern operating system require special privileges, which a simple user-space process does not have.
|
|
||||||
By invoking a system call, the (user-space) process hands control over to the (privileged) kernel and requests an operation to be performed.
|
|
||||||
\cite[Chapter~10]{linuxkernel}
|
|
||||||
|
|
||||||
How exactly these system calls work is architecture and system specific.
|
|
||||||
But generally, the process places the system call number and its arguments in defined registers and then executes a special system call opcode.
|
|
||||||
Then the kernel executes the requested operation and places the return value inside another register, and lastly hands the execution back to the process.
|
|
||||||
\cite[Chapter~10]{linuxkernel}
|
|
||||||
|
|
||||||
Intercepting calls to system calls would one allow seeing the system call number, arguments, and return value.
|
|
||||||
One has to keep in mind that many system-related functionalities are not in fact translated to system calls one-to-one.
|
|
||||||
For example, \texttt{malloc}~\cite{malloc.3} has no dedicated system call, it is managed by the C standard library internally.
|
|
||||||
Many system calls have corresponding wrapper functions in the C standard library (like \texttt{open}, \texttt{close}, \texttt{sem\_wait}).
|
|
||||||
|
|||||||
@@ -1,17 +1,18 @@
|
|||||||
|
|
||||||
\chapter{Intercepting Function Calls}\label{ch:intercepting-function-calls}
|
\chapter{Intercepting Function Calls}\label{ch:intercepting-function-calls}
|
||||||
|
|
||||||
In this chapter, all steps on how to intercept function calls in this work are discussed.
|
In this chapter all steps on how to intercept function calls in this work are discussed.
|
||||||
An example of what the resulting interception looks like may be found in Section~\ref{sec:intercepting-example}.
|
An example of what the resulting interception looks like may be found in Section~\ref{sec:intercepting-example}.
|
||||||
Furthermore, an overview on how to test given programs is presented in Section~\ref{sec:automated-testing-on-intercepted-function-calls}.
|
Furthermore, an overview on how to test given programs is presented in Section~\ref{sec:automated-testing-on-intercepted-function-calls}.
|
||||||
How these function calls may be manipulated is discussed in Chapter~\ref{ch:manipulating-function-calls}.
|
This chapter does not discuss how these function calls may be manipulated in any way.
|
||||||
|
For that see Chapter~\ref{ch:manipulating-function-calls}.
|
||||||
|
|
||||||
|
|
||||||
\section{Identified Methods for Intercepting Function and System Calls}\label{sec:methods-for-intercepting}
|
\section{Identified Methods for Intercepting Function and System Calls}\label{sec:methods-for-intercepting}
|
||||||
|
|
||||||
First, one has to answer the question on \textit{how exactly} to intercept function or system calls.
|
First, one has to answer the question on \textit{how exactly} to intercept function or system calls.
|
||||||
At the beginning of this work, it was not yet determined if the interception of function calls, system calls, or both should be used to achieve the overarching goal (see Section~\ref{sec:motivation-and-goal}).
|
At the beginning of this work it was not yet determined if the interception of function calls, system calls, or both should be used to achieve the overarching goal (see\todo{Goals}).
|
||||||
This first section tries to list all possible and relevant methods on how to intercept function or system calls but does not claim exhaustiveness.
|
This first section tries to list all possible methods on how to intercept function or system calls but does not claim completeness.
|
||||||
The order of the following subsections is roughly based on the thought process on finding the most appropriate method suitable for this work.
|
The order of the following subsections is roughly based on the thought process on finding the most appropriate method suitable for this work.
|
||||||
|
|
||||||
|
|
||||||
@@ -134,7 +135,7 @@ See the gcc(1) Linux manual page~\cite[Section OPTIONS]{gcc.1}:
|
|||||||
This means, by specifying \texttt{-Wl,-{}-wrap=\textit{symbol}} when compiling using gcc,
|
This means, by specifying \texttt{-Wl,-{}-wrap=\textit{symbol}} when compiling using gcc,
|
||||||
all calls from the currently compiled program to \texttt{\textit{symbol}} are redirected to \texttt{\_\_wrap\_\textit{symbol}}.
|
all calls from the currently compiled program to \texttt{\textit{symbol}} are redirected to \texttt{\_\_wrap\_\textit{symbol}}.
|
||||||
To call the real function inside the wrapper, \texttt{\_\_real\_\textit{symbol}} may be used.
|
To call the real function inside the wrapper, \texttt{\_\_real\_\textit{symbol}} may be used.
|
||||||
Listings~\ref{lst:wrap.c} and~\ref{lst:wrap} illustrate this by overriding the \texttt{malloc} function of the C standard library.
|
Listings~\ref{lst:wrap.c} and~\ref{lst:wrap} try to illustrate this by overriding the \texttt{malloc} function of the C standard library.
|
||||||
|
|
||||||
\begin{listing}[htbp]
|
\begin{listing}[htbp]
|
||||||
\inputminted[linenos]{c}{src/listings/wrap.c}
|
\inputminted[linenos]{c}{src/listings/wrap.c}
|
||||||
@@ -158,7 +159,7 @@ Therefore, the source code (or the corresponding \texttt{*.out} files) needs to
|
|||||||
Note, only calls from the targeted source code will be redirected, calls from other libraries won't.
|
Note, only calls from the targeted source code will be redirected, calls from other libraries won't.
|
||||||
|
|
||||||
Theoretically, it should be possible to re-link a given binary without having access to its source code.
|
Theoretically, it should be possible to re-link a given binary without having access to its source code.
|
||||||
But due to other more straight-forward methods (see Subsection~\ref{subsec:preloading}), this has not been investigated further.
|
But due to other more straight-forward methods (see Subsection~\ref{subsec:preloading}), this has not been further investigated.
|
||||||
|
|
||||||
|
|
||||||
\subsection{Preloading using \texttt{LD\_PRELOAD}}\label{subsec:preloading}
|
\subsection{Preloading using \texttt{LD\_PRELOAD}}\label{subsec:preloading}
|
||||||
@@ -187,7 +188,7 @@ See the ld.so(8) Linux manual page~\cite[Section ENVIRONMENT]{ld.so.8}:
|
|||||||
\end{quote}
|
\end{quote}
|
||||||
|
|
||||||
This means, by setting the environment variable \texttt{LD\_PRELOAD}, it is possible to override specific functions.
|
This means, by setting the environment variable \texttt{LD\_PRELOAD}, it is possible to override specific functions.
|
||||||
Listings~\ref{lst:preload.c} and~\ref{lst:preload} illustrate this by overriding the \texttt{malloc} function of the C standard library.
|
Listings~\ref{lst:preload.c} and~\ref{lst:preload} try to illustrate this by overriding the \texttt{malloc} function of the C standard library.
|
||||||
|
|
||||||
\begin{listing}[htbp]
|
\begin{listing}[htbp]
|
||||||
\inputminted[linenos]{c}{src/listings/preload.c}
|
\inputminted[linenos]{c}{src/listings/preload.c}
|
||||||
@@ -216,11 +217,11 @@ Although, one has to be aware that not only function calls inside the targeted b
|
|||||||
\subsection{Conclusion}\label{subsec:methods-for-intercepting-conclusion}
|
\subsection{Conclusion}\label{subsec:methods-for-intercepting-conclusion}
|
||||||
|
|
||||||
During the research on different approaches to intercepting system and function calls,
|
During the research on different approaches to intercepting system and function calls,
|
||||||
it has been found that the most reliable way to achieve the goals of this work (see Section~\ref{sec:motivation-and-goal}) is to intercept function calls instead of system calls.
|
it has been found that the most reliable way to achieve the goals of this work (see \todo{Goals}) is to intercept function calls instead of system calls.
|
||||||
This is because---as long as the programs to test are dynamically linked---, intercepting function calls allows one to intercept many more calls and in a more flexible way.
|
This is because (as long as the programs to test are dynamically linked), intercepting function calls allows one to intercept many more calls and in a more flexible way.
|
||||||
Therefore, from now on this work only considers function calls and no system calls directly.
|
Therefore, from now on this work only considers function calls and no system calls directly.
|
||||||
|
|
||||||
In this work, preloading (see Subsection~\ref{subsec:preloading}) was chosen to be used
|
In this work preloading (see Subsection~\ref{subsec:preloading}) was chosen to be used
|
||||||
because it is simple to use (``clean'' source code, easy to compile and run programs with it) and offers the means to arbitrarily execute code when the intercepted function call is redirected.
|
because it is simple to use (``clean'' source code, easy to compile and run programs with it) and offers the means to arbitrarily execute code when the intercepted function call is redirected.
|
||||||
The following sections concern the next steps in what else is needed to create a powerful ``interceptor''.
|
The following sections concern the next steps in what else is needed to create a powerful ``interceptor''.
|
||||||
|
|
||||||
@@ -230,7 +231,7 @@ The following sections concern the next steps in what else is needed to create a
|
|||||||
After deciding to use the preloading method to intercept function calls, a more detailed plan is needed to continue developing.
|
After deciding to use the preloading method to intercept function calls, a more detailed plan is needed to continue developing.
|
||||||
It was decided to have one single \texttt{intercept.so} file as a resulting artifact which then may be loaded via the \texttt{LD\_PRELOAD} environment variable.
|
It was decided to have one single \texttt{intercept.so} file as a resulting artifact which then may be loaded via the \texttt{LD\_PRELOAD} environment variable.
|
||||||
The easiest and most straightforward way to structure the source code was to put all code in one single C file.
|
The easiest and most straightforward way to structure the source code was to put all code in one single C file.
|
||||||
Listing~\ref{lst:intercept-preload.c} gives an overview of the underlying code structure.
|
Listing~\ref{lst:intercept-preload.c} gives an overview over the grounding code structure.
|
||||||
For each function that should be intercepted, this function simply has to be declared and defined the same way \texttt{malloc} was.
|
For each function that should be intercepted, this function simply has to be declared and defined the same way \texttt{malloc} was.
|
||||||
|
|
||||||
\begin{listing}[htbp]
|
\begin{listing}[htbp]
|
||||||
@@ -243,8 +244,8 @@ For each function that should be intercepted, this function simply has to be dec
|
|||||||
\section{Retrieving Function Argument Values}\label{sec:retrieving-function-argument-values}
|
\section{Retrieving Function Argument Values}\label{sec:retrieving-function-argument-values}
|
||||||
|
|
||||||
Now that the first steps have been done, one needs to think about what exactly to record when intercepting.
|
Now that the first steps have been done, one needs to think about what exactly to record when intercepting.
|
||||||
A simple notification that a given function was called would not be sufficient.
|
A simple notification that a given function was called would be too less.
|
||||||
Within the following subsections, effort is put into getting as much information as possible from each function call.
|
Within the following subsections it is tried to get as much information as possible from each function call.
|
||||||
|
|
||||||
As already mentioned, \texttt{ltrace} uses prototype functions to format its function arguments.
|
As already mentioned, \texttt{ltrace} uses prototype functions to format its function arguments.
|
||||||
This allows \texttt{ltrace} to ``dynamically'' display function arguments for any new or unknown functions without the need for recompilation.
|
This allows \texttt{ltrace} to ``dynamically'' display function arguments for any new or unknown functions without the need for recompilation.
|
||||||
@@ -253,9 +254,9 @@ This allows \texttt{ltrace} to ``dynamically'' display function arguments for an
|
|||||||
However, due to implementation complexity reasons and the need for ``complex'' return types for string/buffer and structure values (see Section~\ref{sec:retrieving-function-return-values}) a statically compiled approach has been used for this work.
|
However, due to implementation complexity reasons and the need for ``complex'' return types for string/buffer and structure values (see Section~\ref{sec:retrieving-function-return-values}) a statically compiled approach has been used for this work.
|
||||||
This means that each function formats its arguments and return values itself without any configuration option.
|
This means that each function formats its arguments and return values itself without any configuration option.
|
||||||
|
|
||||||
The reason for retrieving as much information as possible from each function call is that at a later point in time, it is possible to completely reconstruct the exact function calls and their sequence.
|
The reason for retrieving as much information as possible from each function call is that at a later point in time it is possible to completely reconstruct the exact function calls and their sequence.
|
||||||
This allows analysis on these records to be performed independently of the corresponding execution of the program.
|
This allows analysis on these records to be performed independently of the corresponding execution of the program.
|
||||||
It should always be possible to fully parse the recorded calls without any specific knowledge of specific functions, their argument types, or return value type.
|
It should always be possible for any parser to fully parse the recorded calls without any specific knowledge of specific functions, their argument types, or return value type.
|
||||||
|
|
||||||
|
|
||||||
\subsection{Numbers}\label{subsec:retrieving-numbers}
|
\subsection{Numbers}\label{subsec:retrieving-numbers}
|
||||||
@@ -292,7 +293,7 @@ Example: \texttt{write(3, 0x1234:"Test\textbackslash{}x00ABC", 8)}.
|
|||||||
|
|
||||||
\subsection{Flags}\label{subsec:retrieving-flags}
|
\subsection{Flags}\label{subsec:retrieving-flags}
|
||||||
|
|
||||||
Some functions have one of their arguments dedicated to flags which may be combined by bitwise XOR\@.
|
Some functions have one of their arguments dedicated to flags which may be combined by bitwise XOR.
|
||||||
These arguments are also of type integer.
|
These arguments are also of type integer.
|
||||||
To distinguish flag arguments from others, a pipe symbol (\texttt{|}) is used after the colon and between the flags.
|
To distinguish flag arguments from others, a pipe symbol (\texttt{|}) is used after the colon and between the flags.
|
||||||
|
|
||||||
@@ -303,7 +304,7 @@ Example: \texttt{open(0x1234:"test.txt", 0102:|O\_CREAT|O\_RDWR|, 0644)}.
|
|||||||
For some functions constants are used.
|
For some functions constants are used.
|
||||||
These constants are typically used C macros in the source code.
|
These constants are typically used C macros in the source code.
|
||||||
This makes the source code more readable (and portable).
|
This makes the source code more readable (and portable).
|
||||||
Constants are represented as an integer again followed by a colon, this time without any special characters to distinguish them from other types.
|
Constants are represented as an integer again followed by a colon, this time without any special characters to disdinguish them from other types.
|
||||||
|
|
||||||
Example: \texttt{socket(2:AF\_INET, 1:SOCK\_STREAM, 6)}.
|
Example: \texttt{socket(2:AF\_INET, 1:SOCK\_STREAM, 6)}.
|
||||||
|
|
||||||
@@ -319,7 +320,7 @@ Example: \\
|
|||||||
|
|
||||||
\subsection{Pointers to Structures}\label{subsec:retrieving-pointers-to-structures}
|
\subsection{Pointers to Structures}\label{subsec:retrieving-pointers-to-structures}
|
||||||
|
|
||||||
In rare cases, structures (\texttt{struct}) are used as argument types.
|
In rare cases structures (\texttt{struct}) are used as argument types.
|
||||||
Two curly brackets (\texttt{\{\}}) are used to indicate structures.
|
Two curly brackets (\texttt{\{\}}) are used to indicate structures.
|
||||||
Then the field names are displayed plainly, followed by a colon and then the value of that field.
|
Then the field names are displayed plainly, followed by a colon and then the value of that field.
|
||||||
Commas are used to separate the fields respectively.
|
Commas are used to separate the fields respectively.
|
||||||
@@ -346,15 +347,13 @@ Example (\texttt{read}): \\
|
|||||||
\texttt{return 12; errno 0; buf=0x7fff70:"Hello World!"}, \\
|
\texttt{return 12; errno 0; buf=0x7fff70:"Hello World!"}, \\
|
||||||
\texttt{return -1; errno EINTR}.
|
\texttt{return -1; errno EINTR}.
|
||||||
|
|
||||||
\todo{Explain Examples}
|
|
||||||
|
|
||||||
|
|
||||||
\section{Determining Function Call Location}\label{sec:determining-function-call-location}
|
\section{Determining Function Call Location}\label{sec:determining-function-call-location}
|
||||||
|
|
||||||
Besides argument values and return values, it would be interesting to know from where inside the intercepted program the function call came.
|
Besides from argument values and return values, it would be interesting to know from where inside the intercepted program the function call came from.
|
||||||
At first this seems quite impossible.
|
At first this seems quite impossible.
|
||||||
But a function always knows at least the return address, the address to set the instruction pointer to when the function finishes.
|
But a function always knows at least the return address, the address to set then instruction pointer to when the function finishes.
|
||||||
With this information, it may be estimated where the call to the current function came from.
|
With this information it may be estimated where the call to the current function came from.
|
||||||
|
|
||||||
\subsection{Return Address and Relative Position}\label{subsec:return-address-and-relative-position}
|
\subsection{Return Address and Relative Position}\label{subsec:return-address-and-relative-position}
|
||||||
|
|
||||||
@@ -487,7 +486,7 @@ The shared object currently supports intercepting the following functions:
|
|||||||
|
|
||||||
\section{\texttt{intercept} Command}\label{sec:intercept-command}
|
\section{\texttt{intercept} Command}\label{sec:intercept-command}
|
||||||
|
|
||||||
To make the usage of the aforementioned shared object easier, a simple python script has been put together.
|
To make the usage of the aforementioned shared object more easy, a simple python script has been put together.
|
||||||
This script may be used as a command line tool.
|
This script may be used as a command line tool.
|
||||||
See Listing~\ref{lst:intercept}.
|
See Listing~\ref{lst:intercept}.
|
||||||
|
|
||||||
@@ -552,14 +551,14 @@ This includes the offset relative to the calling binary and a source file and li
|
|||||||
|
|
||||||
\section{Automated Testing on Intercepted Function Calls}\label{sec:automated-testing-on-intercepted-function-calls}
|
\section{Automated Testing on Intercepted Function Calls}\label{sec:automated-testing-on-intercepted-function-calls}
|
||||||
|
|
||||||
The recorded function calls of a program run may now be used to perform checks and tests on them.
|
The recorded function calls of a program run now may be used to perform checks and tests on them.
|
||||||
It is trivially possible to check which functions were called and in what order.
|
It is trivially possible to check which functions were called and in what order.
|
||||||
Furthermore, it is possible to check various pre- and post-conditions for each function call.
|
Furthermore, it is possible to check various pre- and post-conditions for each function call.
|
||||||
This is beneficial because many library functions in C rely on these pre- and post-conditions, which are not enforced by the compiler or in any other way.
|
This is beneficial because many library functions in C rely on these pre- and post-conditions, which are not enforced by the compiler or in any other way.
|
||||||
|
|
||||||
For example, the \texttt{malloc} function has the post-condition that the returned value later needs to be passed to \texttt{free} to avoid memory leaks.
|
For example, the \texttt{malloc} function has the post-condition that the returned value later needs to be passed to \texttt{free} to avoid memory leaks.
|
||||||
The \texttt{free} function, on the other hand, has the pre-condition that the passed value was previously acquired using \texttt{malloc} and may not be yet free'd.
|
The \texttt{free} function, on the other hand, has the pre-condition that the passed value was previously acquired using \texttt{malloc} and may not be yet free'd.
|
||||||
Any violation of such pre- and post-conditions may be reported as non-compliant behavior.
|
Any violation of such pre- and post-conditions may be reported as incompliant behavior.
|
||||||
\cite{malloc.3}
|
\cite{malloc.3}
|
||||||
|
|
||||||
This means that intercepted function calls allow a tester to check if programmers use library function in compliance to their specification.
|
This means that intercepted function calls allow a tester to check if programmers use library function in compliance to their specification.
|
||||||
@@ -567,14 +566,3 @@ Other checks may also include guards to calls to ``forbidden'' functions, or tha
|
|||||||
Another important post-condition of most library functions is the return value, which in most cases indicates success or failure of an operation.
|
Another important post-condition of most library functions is the return value, which in most cases indicates success or failure of an operation.
|
||||||
However, intercepting of calls alone may not be able to verify if a program really checks the return value of a function and acts accordingly.
|
However, intercepting of calls alone may not be able to verify if a program really checks the return value of a function and acts accordingly.
|
||||||
Chapter~\ref{ch:manipulating-function-calls} shows how this problem may be solved.
|
Chapter~\ref{ch:manipulating-function-calls} shows how this problem may be solved.
|
||||||
|
|
||||||
\subsection{Validating Memory Management}\label{subsec:testing-memory-management}
|
|
||||||
|
|
||||||
Lorem Ipsum.
|
|
||||||
(malloc, calloc, realloc, free, getaddrinfo, freeaddrinfo).
|
|
||||||
|
|
||||||
|
|
||||||
\subsection{Validating Resource Management}\label{subsec:validating-resource-management}
|
|
||||||
|
|
||||||
Lorem Ipsum.
|
|
||||||
(open, close, socket, \dots).
|
|
||||||
|
|||||||
@@ -2,9 +2,9 @@
|
|||||||
\chapter{Manipulating Function Calls}\label{ch:manipulating-function-calls}
|
\chapter{Manipulating Function Calls}\label{ch:manipulating-function-calls}
|
||||||
|
|
||||||
This chapter discusses how to manipulate function calls and how this may be used to test programs.
|
This chapter discusses how to manipulate function calls and how this may be used to test programs.
|
||||||
How function calls may be intercepted at all is discussed in Chapter~\ref{ch:intercepting-function-calls}.
|
For how function calls may be intercepted at all, see Chapter~\ref{ch:intercepting-function-calls}.
|
||||||
This chapter builds on the basis of the previous one and expands its functions.
|
This chapter builds on the basis of the previous one and expands its functions.
|
||||||
``Manipulation'' in this context means to change the arguments of a function then calling it with those changed arguments, or skipping the execution of the real function completely and simply returning a given value (``mocking'').
|
``Manipulation'' in this context means to change the arguments of a function then running it normally, or skipping the execution of the real function completely and simply returning a given value (``mocking'').
|
||||||
These techniques allow in-depth testing of programs.
|
These techniques allow in-depth testing of programs.
|
||||||
|
|
||||||
In contrast to simply recording and logging function calls which may be controlled via environment variables, manipulation of such function calls requires some other process to indicate how to handle each call.
|
In contrast to simply recording and logging function calls which may be controlled via environment variables, manipulation of such function calls requires some other process to indicate how to handle each call.
|
||||||
@@ -35,21 +35,21 @@ Figure~\ref{fig:control-flow} illustrates the control flow for manipulating func
|
|||||||
\end{call}
|
\end{call}
|
||||||
\end{sdblock}
|
\end{sdblock}
|
||||||
|
|
||||||
\begin{sdblock}{Modified Call}{}
|
\begin{sdblock}{Manipulated Call}{}
|
||||||
\begin{call}{p}{malloc(x)}{i}{return b}
|
\begin{call}{p}{malloc(x)}{i}{return a}
|
||||||
\mess{i}{{``malloc(x)''}}{s}
|
\mess{i}{{``malloc(x)''}}{s}
|
||||||
\mess{s}{``modify y''}{i}
|
\mess{s}{``modify y''}{i}
|
||||||
\begin{call}{i}{malloc(y)}{l}{return b}
|
\begin{call}{i}{malloc(y)}{l}{return a}
|
||||||
\end{call}
|
\end{call}
|
||||||
\mess{i}{``return b''}{s}
|
\mess{i}{``return a''}{s}
|
||||||
\end{call}
|
\end{call}
|
||||||
\end{sdblock}
|
\end{sdblock}
|
||||||
|
|
||||||
\begin{sdblock}{Mocked Call}{}
|
\begin{sdblock}{Mocked Call}{}
|
||||||
\begin{call}{p}{malloc(x)}{i}{return c}
|
\begin{call}{p}{malloc(x)}{i}{return z}
|
||||||
\mess{i}{{``malloc(x)''}}{s}
|
\mess{i}{{``malloc(x)''}}{s}
|
||||||
\mess{s}{``fail'' / ``return c''}{i}
|
\mess{s}{``fail'' / ``return z''}{i}
|
||||||
\mess{i}{``return c''}{s}
|
\mess{i}{``return z''}{s}
|
||||||
\end{call}
|
\end{call}
|
||||||
\end{sdblock}
|
\end{sdblock}
|
||||||
\end{sequencediagram}
|
\end{sequencediagram}
|
||||||
@@ -62,7 +62,7 @@ Figure~\ref{fig:control-flow} illustrates the control flow for manipulating func
|
|||||||
\section{Defining a Protocol}\label{sec:defining-a-protocol}
|
\section{Defining a Protocol}\label{sec:defining-a-protocol}
|
||||||
|
|
||||||
When using a socket to communicate with another process, a protocol definition is needed.
|
When using a socket to communicate with another process, a protocol definition is needed.
|
||||||
This work defines a text-based protocol in which line breaks denote the end of a message.
|
This works defines a text-based protocol in which line breaks denote the end of a message.
|
||||||
The following subsections describe the defined message types.
|
The following subsections describe the defined message types.
|
||||||
|
|
||||||
\subsection{\textit{Init} Message (Client \textrightarrow{} Server)}\label{subsec:init-message}
|
\subsection{\textit{Init} Message (Client \textrightarrow{} Server)}\label{subsec:init-message}
|
||||||
@@ -72,7 +72,7 @@ The client (\texttt{intercept.so}) uses it to identify the running program to th
|
|||||||
|
|
||||||
\subsection{\textit{Call} Message (Client \textrightarrow{} Server)}\label{subsec:call-message}
|
\subsection{\textit{Call} Message (Client \textrightarrow{} Server)}\label{subsec:call-message}
|
||||||
|
|
||||||
For each function call, the client sends this message to the server and waits for a reply (\textit{Action} message).
|
For each function call the client sends this message to the server and waits for a reply (\textit{Action} message).
|
||||||
The contents of this message type correspond to the first line of an intercepted function call (see Section~\ref{sec:automated-testing-on-intercepted-function-calls}).
|
The contents of this message type correspond to the first line of an intercepted function call (see Section~\ref{sec:automated-testing-on-intercepted-function-calls}).
|
||||||
|
|
||||||
\subsection{\textit{Action} Message (Server \textrightarrow{} Client)}\label{subsec:action-message}
|
\subsection{\textit{Action} Message (Server \textrightarrow{} Client)}\label{subsec:action-message}
|
||||||
@@ -91,92 +91,14 @@ The server responds in one of four possible ways:
|
|||||||
|
|
||||||
This message informs the server about the resulting return value.
|
This message informs the server about the resulting return value.
|
||||||
The server does not acknowledge this message.
|
The server does not acknowledge this message.
|
||||||
The contents of this message type correspond to the second line of an intercepted function call (see Section~\ref{sec:automated-testing-on-intercepted-function-calls}).
|
The contents of this message type correspond the second line of an intercepted function call (see Section~\ref{sec:automated-testing-on-intercepted-function-calls}).
|
||||||
|
|
||||||
|
|
||||||
|
\section{Creating a Socket Server in Python}\label{sec:creating-a-socket-server-in-python}
|
||||||
|
|
||||||
|
Lorem Ipsum.
|
||||||
|
|
||||||
|
|
||||||
\section{Automated Testing using Function Call Manipulation}\label{sec:automated-testing-using-function-call-manipulation}
|
\section{Automated Testing using Function Call Manipulation}\label{sec:automated-testing-using-function-call-manipulation}
|
||||||
|
|
||||||
As seen in Figure~\ref{fig:control-flow} function call manipulation allows for mocking individual calls.
|
Lorem Ipsum.
|
||||||
Mocking may be used to see how the program behaves when individual calls to function fail or return an unusual, but valid, value.
|
|
||||||
The simplest way to automatically test programs is to run them multiple times and on each run let a single function call fail.
|
|
||||||
The resulting sequence of function calls now may be put together to a call sequence graph (or tree).
|
|
||||||
By analyzing this call graph, it is possible to decide if a program correctly terminated when faced with a failed function call.
|
|
||||||
This may be the case when the following function calls differ from those which were recorded on a default run (without any mocked function calls).
|
|
||||||
|
|
||||||
|
|
||||||
\subsection{Testing Return Value Checks}\label{subsec:testing-return-value-checks}
|
|
||||||
|
|
||||||
Figure~\ref{fig:call-sequence} shows the simplified and collapsed call sequence graph of the prior example in Section~\ref{sec:intercepting-example}.
|
|
||||||
Each edge between two nodes without any label indicates the next function call on a normal run of the program.
|
|
||||||
Edges labeled with ``fail'' indicate the next function call after a mocked failed call.
|
|
||||||
In reality, there are multiple failing paths, each for every possible error return value, but in this example they all yield the same resulting path, therefore, they have been collapsed.
|
|
||||||
|
|
||||||
To test, if a programmer always checked the return value of a function and acted accordingly, this resulting call sequence graph now may be analyzed.
|
|
||||||
This test seems trivial at first.
|
|
||||||
The simplest approach is to verify that after a failing function call only ``cleanup'' function calls (\texttt{free}, \texttt{close}, \texttt{exit}, \dots) follow.
|
|
||||||
For simple programs, this assumption may hold, but there are many exceptions.
|
|
||||||
For example, what if the program recognizes the failed call correctly as failed but recovers and continues to operate normally?
|
|
||||||
Or what if the ``cleanup'' path is very complex and includes function calls not priorly marked as valid cleanup functions?
|
|
||||||
However, for simple programs (like those mentioned in Section~\ref{sec:motivation-and-goal}), the simplest approach from above suffices.
|
|
||||||
|
|
||||||
\begin{figure}
|
|
||||||
\begin{tikzpicture}[node distance=15mm, thick, main/.style = {draw, circle}, text centered]
|
|
||||||
\newcommand{\fncall}[2]{\tiny{\begin{tabular}{c}\normalsize{\texttt{#1}}\\\texttt{#2}\end{tabular}}}
|
|
||||||
\node[main] (1)[elips] {\fncall{getopt}{client+0x1ac5, client.c:186}};
|
|
||||||
\node[main] (2)[elips] [below of=1] {\fncall{getaddrinfo}{client+0x147b, client.c:74}};
|
|
||||||
\node[main] (3)[elips] [below of=2] {\fncall{socket}{client+0x14f2, client.c:81}};
|
|
||||||
\node[main] (4)[elips] [below of=3] {\fncall{connect}{client+0x15f3, client.c:104}};
|
|
||||||
\node[main] (5)[elips] [below of=4] {\fncall{freeaddrinfo}{client+0x1638, client.c:114}};
|
|
||||||
\node[main] (6)[elips] [below of=5] {\fncall{send}{client+0x1f5c, client.c:277}};
|
|
||||||
\node[main] (7)[elips] [below of=6] {\fncall{recv}{client+0x1fa1, client.c:284}};
|
|
||||||
\node[main] (8)[elips] [below of=7] {\fncall{recv}{client+0x2062, client.c:300}};
|
|
||||||
\node[main] (9)[elips] [below of=8] {\fncall{recv}{client+0x2442, client.c:360}};
|
|
||||||
\node[main] (10)[elips] [below of=9] {\fncall{recv}{client+0x2442, client.c:360}};
|
|
||||||
\node[main] (11)[elips] [below of=10] {\fncall{close}{client+0x2489, client.c:375}};
|
|
||||||
\node[main] (12)[elips] [below of=11] {\fncall{exit}{sys+0x0}};
|
|
||||||
|
|
||||||
\node[main] (2f1)[elips] [right=10mm of 2] {\fncall{exit}{sys+0x0}};
|
|
||||||
\node[main] (3f1)[elips] [right=10mm of 3] {\fncall{freeaddrinfo}{client+0x1638, client.c:114}};
|
|
||||||
\node[main] (4f1)[elips] [right=10mm of 4] {\fncall{close}{client+0x1611, client.c:106}};
|
|
||||||
|
|
||||||
\draw[->] (1) -- (2);
|
|
||||||
\draw[->] (2) -- (3);
|
|
||||||
\draw[->] (2) -- node[midway, above, sloped, pos=0.5] {fail} (2f1);
|
|
||||||
\draw[->] (3) -- (4);
|
|
||||||
\draw[->] (3) -- node[midway, above, sloped, pos=0.5] {fail} (3f1);
|
|
||||||
\draw[->] (3f1) -- (2f1);
|
|
||||||
\draw[->] (4) -- (5);
|
|
||||||
\draw[->] (4) -- node[midway, above, sloped, pos=0.5] {fail} (4f1);
|
|
||||||
\draw[->] (4f1) -- (3f1);
|
|
||||||
\draw[->] (5) -- (6);
|
|
||||||
\draw[->] (6) -- (7);
|
|
||||||
\draw[->] (7) -- (8);
|
|
||||||
\draw[->] (8) -- (9);
|
|
||||||
\draw[->] (9) -- (10);
|
|
||||||
\draw[->] (10) -- (11);
|
|
||||||
\draw[->] (11) -- (12);
|
|
||||||
\draw[->] (6) to [out=0,in=-4,looseness=2] node[midway, above, pos=0.05] {fail} (11);
|
|
||||||
\draw[->] (7) to [out=0,in=-2,looseness=1.75] node[midway, above, pos=0.075] {fail} (11);
|
|
||||||
\draw[->] (8) to [out=0,in=0,looseness=1.5] node[midway, above, pos=0.1] {fail} (11);
|
|
||||||
\draw[->] (9) to [out=0,in=2,looseness=1.25] node[midway, above, pos=0.1] {fail} (11);
|
|
||||||
\draw[->] (10) to [out=0,in=3,looseness=1] node[midway, above, pos=0.1] {fail} (11);
|
|
||||||
\end{tikzpicture}
|
|
||||||
\centering
|
|
||||||
\caption{Simplified Call Sequence Graph of \texttt{./client}.}
|
|
||||||
\label{fig:call-sequence}
|
|
||||||
\end{figure}
|
|
||||||
|
|
||||||
|
|
||||||
\subsection{Testing Correct Handling of Interrupts}\label{subsec:testing-interrupts}
|
|
||||||
|
|
||||||
Many functions (like \texttt{read}, \texttt{write}, or \texttt{sem\_wait}) are interruptable by signals.
|
|
||||||
When this happens, they return a value indicating an error and set \texttt{errno} to \texttt{EINTR}.
|
|
||||||
Usually, the program is expected to repeat the call until it gets a real return value or error other than \texttt{EINTR}.
|
|
||||||
Therefore, testing correct handling of interrupts is a different type of test in contrast to general tests on return value checks as seen in Subsection~\ref{subsec:testing-return-value-checks}.
|
|
||||||
|
|
||||||
It is relatively simple to test if a program correctly handles interrupts.
|
|
||||||
On any function call, that may yield \texttt{EINTR} mock the call and return exactly that error.
|
|
||||||
Afterward, check if the same function is called again.
|
|
||||||
To increase confidence in the result, one may repeat this process multiple times.
|
|
||||||
As in the test in Subsection~\ref{subsec:testing-return-value-checks}, the handling of the interrupt may involve calls to other functions, so this method is not always the right choice.
|
|
||||||
But for simple programs, it totally suffices.
|
|
||||||
|
|||||||
4
thesis/src/04.comparison.tex
Normal file
4
thesis/src/04.comparison.tex
Normal file
@@ -0,0 +1,4 @@
|
|||||||
|
|
||||||
|
\chapter{Comparison to similar Solutions}\label{ch:comparison}
|
||||||
|
|
||||||
|
Lorem Ipsum.
|
||||||
@@ -1,63 +0,0 @@
|
|||||||
|
|
||||||
\chapter{Related Work}\label{ch:related-work}
|
|
||||||
|
|
||||||
This chapter gives a rough overview on techniques and methods to intercept or hook system calls and function calls.
|
|
||||||
See also Section~\ref{sec:methods-for-intercepting}.
|
|
||||||
Many methods have already been discussed there.
|
|
||||||
|
|
||||||
|
|
||||||
\section{Function Call Interception}\label{sec:function-call-interception}
|
|
||||||
|
|
||||||
All related work regarding function call interception has already been mentioned in the aforementioned Section.
|
|
||||||
See \texttt{ltrace} (Subsection~\ref{subsec:ltrace}), wrapper functions (Subsection~\ref{subsec:wrapper-functions}), and \texttt{LD\_PRELOAD} (Subsection~\ref{subsec:preloading}).
|
|
||||||
|
|
||||||
|
|
||||||
\section{System Call Interception}\label{sec:system-call-interception}
|
|
||||||
|
|
||||||
This section discusses further related work regarding system call interception.
|
|
||||||
This excludes techniques already discussed in Section~\ref{sec:methods-for-intercepting},
|
|
||||||
like \texttt{ptrace} (Subsection~\ref{subsec:ptrace}), and \texttt{strace} (Subsection~\ref{subsec:strace}).
|
|
||||||
Almost all following methods use binary rewriting to replace system calls with other instructions (except SUD, Subsection~\ref{subsec:syscall-user-dispatch}).
|
|
||||||
This is one of the reasons why they were not mentioned in Section~\ref{sec:methods-for-intercepting}.
|
|
||||||
Another one is that the focus of this work is function call interception, and not system call interception.
|
|
||||||
|
|
||||||
|
|
||||||
\subsection{\texttt{int3} Signaling}\label{subsec:int3-signaling}
|
|
||||||
|
|
||||||
\texttt{int3} is a one-byte instruction (\texttt{0xcc}) that invokes a software interrupt.
|
|
||||||
On Linux, the kernel handles it and raises \texttt{SIGTRAP} to the user-space process that executed \texttt{int3}.
|
|
||||||
The \texttt{int3} signaling technique exploits this behavior to hook system calls; it replaces \texttt{syscall}/\texttt{sysenter} with \texttt{int3} and employs the signal handler for \texttt{SIGTRAP} as the hook function.
|
|
||||||
Since \texttt{int3} is one byte, it can replace an arbitrary instruction without breaking the neighbor instructions.
|
|
||||||
This technique is traditionally used in debuggers to implement breakpoints.
|
|
||||||
However, signal handling incurs a large overhead because it involves context manipulation by the kernel.
|
|
||||||
\cite{zpoline}
|
|
||||||
|
|
||||||
|
|
||||||
\subsection{Syscall User Dispatch (SUD)}\label{subsec:syscall-user-dispatch}
|
|
||||||
|
|
||||||
Syscall User Dispatch (SUD)~\cite{sud} was added in Linux 5.11, and it offers a way to redirect system calls to arbitrary user-space code.
|
|
||||||
For the SUD feature, the kernel implements a hook point at the entry point of system calls.
|
|
||||||
A user-space process can activate SUD via the \texttt{prctl} interface.
|
|
||||||
When SUD is activated, the hook point raises \texttt{SIGSYS} to the user-space process.
|
|
||||||
This mechanism allows a user-space program to leverage the \texttt{SIGSYS} signal handler as the system call hook.
|
|
||||||
However, similarly to the \texttt{int3} signaling technique, SUD imposes a significant performance penalty on the user-space program due to the overhead of the signal handling.
|
|
||||||
\cite{zpoline}
|
|
||||||
|
|
||||||
|
|
||||||
\subsection{zpoline}\label{subsec:zpoline}
|
|
||||||
|
|
||||||
zpoline is a system call hook mechanism for x86-64 CPUs.
|
|
||||||
Binary rewriting is used to replace (two-byte) \texttt{syscall}/\texttt{sysenter} instructions with a (two-byte) \texttt{callq *\%rax} instruction.
|
|
||||||
Because this instruction jumps to \texttt{rax}, where also the syscall number is stored, the trampoline code has to be initialized beginning at virtual address 0.
|
|
||||||
zpoline is exhaustive and achieves very low performance reduction (28--761 times less overhead compared to other exhaustive system call hooking techniques).
|
|
||||||
\cite{zpoline}
|
|
||||||
|
|
||||||
|
|
||||||
\subsection{DataHook}\label{subsec:datahook}
|
|
||||||
|
|
||||||
DataHook is a system call hooking technique for 32-bit programs based on glibc running on x86 or x86-64 machines.
|
|
||||||
It relies on glibc's way of performing system calls, namely a \texttt{call *\%gs:0x10} instruction to call the \texttt{\_\_kernel\_vsyscall} function.
|
|
||||||
The content of \texttt{gs:0x10} is backed up and modified to jump to a given hook function.
|
|
||||||
DataHook is only exhaustive when used on glibc-based programs.
|
|
||||||
It achieves a very low performance reduction (5--1429 times less overhead compared to existing hooking techniques).
|
|
||||||
\cite{datahook}
|
|
||||||
@@ -1,80 +0,0 @@
|
|||||||
|
|
||||||
\chapter{Evaluation}\label{ch:evaluation}
|
|
||||||
|
|
||||||
Lorem Ipsum.
|
|
||||||
|
|
||||||
|
|
||||||
\section{Usefulness for the Operating Systems Course}\label{sec:usefulness}
|
|
||||||
|
|
||||||
Up until recently the Operating Systems Course (mentioned in Section~\ref{sec:motivation-and-goal}) was split into three exercise blocks:
|
|
||||||
Files, Shared Memory, Semaphores; Related Processes and Inter-Process Communication via Unnamed Pipes; and Sockets.
|
|
||||||
Table~\ref{tab:functions} lists all functions presented in the course and their implementation status in \texttt{intercept.so}.
|
|
||||||
As one may see, simple file stream functions are not currently implemented in \texttt{intercept.so}.
|
|
||||||
This is because of time restrictions on this work and the fact, that simple file operations may be tested easily in the conventional way of checking the resulting output.
|
|
||||||
All other functions have at least interception and mocking (returning, failing) implemented.
|
|
||||||
For some functions the modification of function arguments has been implemented.
|
|
||||||
|
|
||||||
\begin{table}[h!]
|
|
||||||
\centering
|
|
||||||
\begin{threeparttable}
|
|
||||||
\begin{tabular}{ c|l|c }
|
|
||||||
Ex. Block & Functions & Implementation \\
|
|
||||||
\hline
|
|
||||||
- & malloc, calloc, realloc, reallocarray, free & \texttt{icmrf} \\
|
|
||||||
- & getopt & \texttt{ic-rf} \\
|
|
||||||
- & sigaction & \texttt{ic-{}-f} \\
|
|
||||||
- & mmap & \texttt{ic-rf} \\
|
|
||||||
- & munmap & \texttt{icmrf} \\
|
|
||||||
- & close & \texttt{icmrf} \\
|
|
||||||
\hline
|
|
||||||
1 & open, fopen, fdopen & \texttt{-{}-{}-{}-{}-} \\
|
|
||||||
1 & read, pread, write, pwrite & \texttt{ic-rf} \\
|
|
||||||
1 & fread, fgets, fgetc, fwrite, fputs, fputc, fprintf, fseek & \texttt{-{}-{}-{}-{}-} \\
|
|
||||||
1 & ferror, feof, clearerr, fileno, fflush, fclose & \texttt{-{}-{}-{}-{}-} \\
|
|
||||||
1 & getline, getdelim & \texttt{ic-rf} \\
|
|
||||||
1 & shm\_open, ftruncate, shm\_unlink & \texttt{icmrf} \\
|
|
||||||
1 & sem\_open, shm\_close, sem\_unlink & \texttt{icmrf} \\
|
|
||||||
1 & sem\_wait, sem\_post & \texttt{icmrf} \\
|
|
||||||
1 & sem\_trywait, sem\_timedwait & \texttt{icmrf} \\
|
|
||||||
1 & sem\_getvalue, sem\_destroy & \texttt{icmrf} \\
|
|
||||||
\hline
|
|
||||||
2 & fork, wait, waitpid & \texttt{icmrf} \\
|
|
||||||
2 & exec*, fexecve & \texttt{ic-rf} \\
|
|
||||||
2 & exit & \texttt{icm-{}-} \\
|
|
||||||
2 & pipe & \texttt{ic-rf} \\
|
|
||||||
2 & dup, dup2, dup3 & \texttt{icmrf} \\
|
|
||||||
\hline
|
|
||||||
3 & socket, bind, accept, connect & \texttt{ic-rf} \\
|
|
||||||
3 & listen & \texttt{icmrf} \\
|
|
||||||
3 & getaddrinfo & \texttt{ic-rf} \\
|
|
||||||
3 & freeaddrinfo & \texttt{icmrf} \\
|
|
||||||
3 & send, recv & \texttt{ic-rf} \\
|
|
||||||
3 & sendto, sendmsg, recvfrom, recvmsg & \texttt{ic-rf} \\
|
|
||||||
3 & setsockopt & \texttt{-{}-{}-{}-{}-} \\
|
|
||||||
\end{tabular}
|
|
||||||
\begin{tablenotes}
|
|
||||||
\item[\texttt{i}] Function may be intercepted.
|
|
||||||
\item[\texttt{c}] Complex function arguments or return value(s) are recorded fully.
|
|
||||||
\item[\texttt{m}] Function arguments may be modified.
|
|
||||||
\item[\texttt{r}] Function may be mocked using a specified return value.
|
|
||||||
\item[\texttt{f}] Function may be mocked using a specified error value.
|
|
||||||
\end{tablenotes}
|
|
||||||
\caption{List of relevant functions and their implementation status.}
|
|
||||||
\label{tab:functions}
|
|
||||||
\end{threeparttable}
|
|
||||||
\end{table}
|
|
||||||
|
|
||||||
|
|
||||||
\section{Performance}\label{sec:performance}
|
|
||||||
|
|
||||||
Lorem Ipsum.
|
|
||||||
|
|
||||||
|
|
||||||
\subsection{Performance when Intercepting}\label{subsec:performance-intercepting}
|
|
||||||
|
|
||||||
Lorem Ipsum.
|
|
||||||
|
|
||||||
|
|
||||||
\subsection{Performance when Manipulating}\label{subsec:performance-manipulating}
|
|
||||||
|
|
||||||
Lorem Ipsum.
|
|
||||||
12
thesis/src/05.related-work.tex
Normal file
12
thesis/src/05.related-work.tex
Normal file
@@ -0,0 +1,12 @@
|
|||||||
|
|
||||||
|
\chapter{Related Work}\label{ch:related-work}
|
||||||
|
|
||||||
|
Lorem Ipsum.
|
||||||
|
|
||||||
|
What other solutions are available?
|
||||||
|
What are the differences?
|
||||||
|
What are the characteristics?
|
||||||
|
|
||||||
|
|
||||||
|
https://sholtrop.dev/blog/on-intercepting-linux-syscalls/
|
||||||
|
https://github.com/yasukata/zpoline
|
||||||
@@ -2,3 +2,8 @@
|
|||||||
\chapter{Conclusion}\label{ch:conclusion}
|
\chapter{Conclusion}\label{ch:conclusion}
|
||||||
|
|
||||||
Lorem Ipsum.
|
Lorem Ipsum.
|
||||||
|
|
||||||
|
Perhaps do some study/``research'' on performance (CPU/memory/\dots).
|
||||||
|
|
||||||
|
%\attachfile[appearance=false,print=false,mimetype=text/plain,description=intercept.c]{../proj/intercept/src/intercept.c}
|
||||||
|
%\attachfile[appearance=false,print=false,mimetype=text/plain,description=Makefile]{../proj/intercept/Makefile}
|
||||||
|
|||||||
@@ -46,49 +46,7 @@
|
|||||||
publisher = {O'Reilly},
|
publisher = {O'Reilly},
|
||||||
url = {https://litux.nl/mirror/networksecuritytools/0596007949/toc.html},
|
url = {https://litux.nl/mirror/networksecuritytools/0596007949/toc.html},
|
||||||
}
|
}
|
||||||
@book{linuxkernel,
|
|
||||||
author = {Daniel P. Bovet and Marco Cesati},
|
|
||||||
title = {Understanding the Linux Kernel},
|
|
||||||
subtitle = {From I/O Ports to Process Management},
|
|
||||||
edition = {3rd},
|
|
||||||
date = {November 2005},
|
|
||||||
isbn = {978-0-596-00565-8},
|
|
||||||
publisher = {O'Reilly},
|
|
||||||
}
|
|
||||||
@manual{gcc,
|
@manual{gcc,
|
||||||
title = {Using the GNU Compiler Collection (GCC)},
|
title = {Using the GNU Compiler Collection (GCC)},
|
||||||
url = {https://gcc.gnu.org/onlinedocs/gcc/index.html},
|
url = {https://gcc.gnu.org/onlinedocs/gcc/index.html},
|
||||||
}
|
}
|
||||||
@manual{sud,
|
|
||||||
title = {Syscall User Dispatch -- The Linux Kernel documentation},
|
|
||||||
url = {https://docs.kernel.org/admin-guide/syscall-user-dispatch.html},
|
|
||||||
}
|
|
||||||
@inproceedings{zpoline,
|
|
||||||
author = {Kenichi Yasukata and Hajime Tazaki and Pierre-Louis Aublin and Kenta Ishiguro},
|
|
||||||
title = {zpoline: a system call hook mechanism based on binary rewriting},
|
|
||||||
booktitle = {2023 USENIX Annual Technical Conference (USENIX ATC '23)},
|
|
||||||
year = {2023},
|
|
||||||
isbn = {978-1-939133-35-9},
|
|
||||||
address = {Boston, MA},
|
|
||||||
pages = {293--300},
|
|
||||||
url = {https://www.usenix.org/conference/atc23/presentation/yasukata},
|
|
||||||
publisher = {USENIX Association},
|
|
||||||
month = jul,
|
|
||||||
}
|
|
||||||
@article{datahook,
|
|
||||||
author = {Hong, Quan and Li, Jiaqi and Zhang, Wen and Zhai, Lidong},
|
|
||||||
title = {DataHook: An Efficient and Lightweight System Call Hooking Technique without Instruction Modification},
|
|
||||||
year = {2025},
|
|
||||||
issue_date = {July 2025},
|
|
||||||
publisher = {Association for Computing Machinery},
|
|
||||||
address = {New York, NY, USA},
|
|
||||||
volume = {2},
|
|
||||||
number = {ISSTA},
|
|
||||||
url = {https://doi.org/10.1145/3728874},
|
|
||||||
doi = {10.1145/3728874},
|
|
||||||
journal = {Proc. ACM Softw. Eng.},
|
|
||||||
month = jun,
|
|
||||||
articleno = {ISSTA005},
|
|
||||||
numpages = {21},
|
|
||||||
keywords = {DataHook, Hooking technique, Software analysis, Software debugging, System call}
|
|
||||||
}
|
|
||||||
|
|||||||
Binary file not shown.
@@ -41,10 +41,7 @@
|
|||||||
\usepackage{chngcntr}
|
\usepackage{chngcntr}
|
||||||
\counterwithin{listing}{chapter}
|
\counterwithin{listing}{chapter}
|
||||||
\usepackage{pgf-umlsd}
|
\usepackage{pgf-umlsd}
|
||||||
\usepackage{textcomp}
|
%\usepackage{attachfile}
|
||||||
\usepackage{tikz}
|
|
||||||
\usetikzlibrary{shapes}
|
|
||||||
\usepackage[flushleft]{threeparttable}
|
|
||||||
|
|
||||||
\newcommand{\newthreadShift}[4][gray!30]{
|
\newcommand{\newthreadShift}[4][gray!30]{
|
||||||
\newinst[#4]{#2}{#3}
|
\newinst[#4]{#2}{#3}
|
||||||
@@ -54,8 +51,6 @@
|
|||||||
\tikzstyle{instcolor#2}=[fill=#1]
|
\tikzstyle{instcolor#2}=[fill=#1]
|
||||||
}
|
}
|
||||||
|
|
||||||
\tikzset{elips/.style={ellipse,draw,minimum width=2em,minimum height=1.8em,inner ysep=0pt},}
|
|
||||||
|
|
||||||
% Set PDF document properties
|
% Set PDF document properties
|
||||||
\hypersetup{
|
\hypersetup{
|
||||||
pdfpagelayout = TwoPageRight, % How the document is shown in PDF viewers (optional).
|
pdfpagelayout = TwoPageRight, % How the document is shown in PDF viewers (optional).
|
||||||
@@ -92,7 +87,7 @@
|
|||||||
|
|
||||||
% Required data.
|
% Required data.
|
||||||
\setregnumber{12119052}
|
\setregnumber{12119052}
|
||||||
\setdate{01}{08}{2025} % Set date with 3 arguments: {day}{month}{year}.
|
\setdate{01}{06}{2025} % Set date with 3 arguments: {day}{month}{year}.
|
||||||
\settitle{\thesistitle}{Abfangen und Manipulieren von\\System-/Funktionsaufrufen in\\Linux-Systemen} % Sets English and German version of the title (both can be English or German). If your title contains commas, enclose it with additional curvy brackets (i.e., {{your title}}) or define it as a macro as done with \thesistitle.
|
\settitle{\thesistitle}{Abfangen und Manipulieren von\\System-/Funktionsaufrufen in\\Linux-Systemen} % Sets English and German version of the title (both can be English or German). If your title contains commas, enclose it with additional curvy brackets (i.e., {{your title}}) or define it as a macro as done with \thesistitle.
|
||||||
%\setsubtitle{Optional Subtitle of the Thesis}{Optionaler Untertitel der Arbeit} % Sets English and German version of the subtitle (both can be English or German).
|
%\setsubtitle{Optional Subtitle of the Thesis}{Optionaler Untertitel der Arbeit} % Sets English and German version of the subtitle (both can be English or German).
|
||||||
|
|
||||||
@@ -153,15 +148,15 @@
|
|||||||
\input{src/01.introduction}
|
\input{src/01.introduction}
|
||||||
\input{src/02.intercept}
|
\input{src/02.intercept}
|
||||||
\input{src/03.manipulate}
|
\input{src/03.manipulate}
|
||||||
\input{src/04.related-work}
|
\input{src/04.comparison}
|
||||||
\input{src/05.evaluation}
|
\input{src/05.related-work}
|
||||||
\input{src/06.conclusion}
|
\input{src/06.conclusion}
|
||||||
\backmatter
|
\backmatter
|
||||||
|
|
||||||
% Declare the use of AI tools as mentioned in the statement of originality.
|
% Declare the use of AI tools as mentioned in the statement of originality.
|
||||||
% Use either the English aitools or the German kitools.
|
% Use either the English aitools or the German kitools.
|
||||||
\begin{aitools}
|
\begin{aitools}
|
||||||
No generative AI tools were used in and for this work whatsoever.
|
\todo{Enter your text here.}
|
||||||
\end{aitools}
|
\end{aitools}
|
||||||
|
|
||||||
%\begin{kitools}
|
%\begin{kitools}
|
||||||
@@ -176,8 +171,8 @@ No generative AI tools were used in and for this work whatsoever.
|
|||||||
\listoftables % Starred version, i.e., \listoftables*, removes the toc entry.
|
\listoftables % Starred version, i.e., \listoftables*, removes the toc entry.
|
||||||
|
|
||||||
% Use an optional list of algorithms.
|
% Use an optional list of algorithms.
|
||||||
%\listofalgorithms
|
\listofalgorithms
|
||||||
%\addcontentsline{toc}{chapter}{List of Algorithms}
|
\addcontentsline{toc}{chapter}{List of Algorithms}
|
||||||
|
|
||||||
% Use an optional list of listings.
|
% Use an optional list of listings.
|
||||||
\cleardoublepage
|
\cleardoublepage
|
||||||
|
|||||||
Reference in New Issue
Block a user