1
0

Compare commits

..

2 Commits

Author SHA1 Message Date
63e9ca046f thesis: Adapt to feedback from Prof. 2025-08-29 11:58:07 +02:00
d028bfb65d proj: Add author file header 2025-08-29 11:57:13 +02:00
13 changed files with 48 additions and 12 deletions

View File

@@ -1,6 +1,10 @@
#!/usr/bin/env python3 #!/usr/bin/env python3
# -*- coding: utf-8 -*- # -*- coding: utf-8 -*-
# File: intercept
# Author: Lorenz Stechauner <e12119052@student.tuwien.ac.at>
# Lorenz Stechauner <lorenz.stechauner@necronda.net>
import argparse import argparse
import subprocess import subprocess
import os import os

View File

@@ -1,4 +1,10 @@
/**
* File: intercept.c
* Author: Lorenz Stechauner <e12119052@student.tuwien.ac.at>
* Lorenz Stechauner <lorenz.stechauner@necronda.net>
*/
#define _GNU_SOURCE #define _GNU_SOURCE
#include <getopt.h> #include <getopt.h>

View File

@@ -1,6 +1,10 @@
#!/usr/bin/env python3 #!/usr/bin/env python3
# -*- coding: utf-8 -*- # -*- coding: utf-8 -*-
# File: intercept/__init__.py
# Author: Lorenz Stechauner <e12119052@student.tuwien.ac.at>
# Lorenz Stechauner <lorenz.stechauner@necronda.net>
from __future__ import annotations from __future__ import annotations
from typing import Optional, TypedDict, NamedTuple, NotRequired, BinaryIO from typing import Optional, TypedDict, NamedTuple, NotRequired, BinaryIO
from socketserver import UnixStreamServer, StreamRequestHandler, ThreadingMixIn from socketserver import UnixStreamServer, StreamRequestHandler, ThreadingMixIn

View File

@@ -1,6 +1,10 @@
#!/usr/bin/env python3 #!/usr/bin/env python3
# -*- coding: utf-8 -*- # -*- coding: utf-8 -*-
# File: intercept/standard.py
# Author: Lorenz Stechauner <e12119052@student.tuwien.ac.at>
# Lorenz Stechauner <lorenz.stechauner@necronda.net>
from __future__ import annotations from __future__ import annotations
from intercept import * from intercept import *

View File

@@ -1,6 +1,10 @@
#!/usr/bin/env python3 #!/usr/bin/env python3
# -*- coding: utf-8 -*- # -*- coding: utf-8 -*-
# File: test-interrupts
# Author: Lorenz Stechauner <e12119052@student.tuwien.ac.at>
# Lorenz Stechauner <lorenz.stechauner@necronda.net>
import os import os
import sys import sys
import argparse import argparse

View File

@@ -1,6 +1,10 @@
#!/usr/bin/env python3 #!/usr/bin/env python3
# -*- coding: utf-8 -*- # -*- coding: utf-8 -*-
# File: test-memory
# Author: Lorenz Stechauner <e12119052@student.tuwien.ac.at>
# Lorenz Stechauner <lorenz.stechauner@necronda.net>
import argparse import argparse
import subprocess import subprocess
import os import os

View File

@@ -1,6 +1,10 @@
#!/usr/bin/env python3 #!/usr/bin/env python3
# -*- coding: utf-8 -*- # -*- coding: utf-8 -*-
# File: test-return-values
# Author: Lorenz Stechauner <e12119052@student.tuwien.ac.at>
# Lorenz Stechauner <lorenz.stechauner@necronda.net>
import os import os
import sys import sys
import argparse import argparse

View File

@@ -9,8 +9,8 @@ This chapter gives a general overview about what the motivation and goal for thi
\section{Motivation and Goal}\label{sec:motivation-and-goal} \section{Motivation and Goal}\label{sec:motivation-and-goal}
When teaching students about Operating Systems, their interfaces, and standard libraries, C is still a widely used language, especially when using Linux. When teaching students about Operating Systems, their interfaces, and standard libraries, C is still a widely used language, especially when using Linux.
Therefore, it is obvious, why many university courses still require students to write their assignments and exams in C\@. Therefore, it is obvious why many university courses still require students to write their assignments and exams in C\@.
The problem, when trying to verify whether students have correctly implemented their assignment, is that low-level OS constructs (like semaphores, pipes, sockets, memory management) make it hard to run automated tests, because the testing system needs to keep track, set up, and verify the usage of these resources. The problem when trying to verify whether students have correctly implemented their assignment, is that low-level OS constructs (like semaphores, pipes, sockets, memory management) make it hard to run automated tests, because the testing system needs to keep track, set up, and verify the usage of these resources.
The goal of this work was to find a way to easily intercept system or function calls, and to verify if students called the right functions, with the right arguments, at the right time. The goal of this work was to find a way to easily intercept system or function calls, and to verify if students called the right functions, with the right arguments, at the right time.
This restriction in scope allows focusing on simple binary programs without having to think about complex or I/O heavy programs. This restriction in scope allows focusing on simple binary programs without having to think about complex or I/O heavy programs.

View File

@@ -252,8 +252,8 @@ This allows \texttt{ltrace} to ``dynamically'' display function arguments for an
However, due to implementation complexity reasons and the need for ``complex'' return types for string/buffer and structure values (see Section~\ref{sec:retrieving-function-return-values}) a statically compiled approach has been used for this work. However, due to implementation complexity reasons and the need for ``complex'' return types for string/buffer and structure values (see Section~\ref{sec:retrieving-function-return-values}) a statically compiled approach has been used for this work.
This means that each function formats its arguments and return values itself without any configuration option. This means that each function formats its arguments and return values itself without any configuration option.
The reason for retrieving as much information as possible from each function call, is that at a later point in time, it is possible to completely reconstruct the exact function calls and their sequence. The reason for retrieving as much information as possible from each function call is that at a later point in time it is possible to completely reconstruct the exact function calls and their sequence.
This allows analysis on these records to be performed independently of the corresponding execution of the program. This allows an analysis on these records to be performed independently of the corresponding execution of the program.
It should always be possible to fully parse the recorded calls without any specific knowledge of specific functions, their argument types, or return value type. It should always be possible to fully parse the recorded calls without any specific knowledge of specific functions, their argument types, or return value type.

View File

@@ -2,7 +2,7 @@
\chapter{Manipulating Function Calls}\label{ch:manipulating-function-calls} \chapter{Manipulating Function Calls}\label{ch:manipulating-function-calls}
This chapter discusses how to manipulate function calls and how this may be used to test programs. This chapter discusses how to manipulate function calls and how this may be used to test programs.
How function calls may be intercepted at all is discussed in Chapter~\ref{ch:intercepting-function-calls}. How function calls may be intercepted at all has been discussed in Chapter~\ref{ch:intercepting-function-calls}.
This chapter builds on the basis of the previous one and expands its functions. This chapter builds on the basis of the previous one and expands its functions.
In this context, ``manipulation'' means changing the arguments of a function, before calling it with the modified arguments, or skipping the execution of the real function completely and simply returning a given value (``mocking''). In this context, ``manipulation'' means changing the arguments of a function, before calling it with the modified arguments, or skipping the execution of the real function completely and simply returning a given value (``mocking'').
These techniques allow in-depth testing of programs. These techniques allow in-depth testing of programs.
@@ -97,10 +97,10 @@ The contents of this message type correspond to the second line of an intercepte
\section{Automated Testing using Function Call Manipulation}\label{sec:automated-testing-using-function-call-manipulation} \section{Automated Testing using Function Call Manipulation}\label{sec:automated-testing-using-function-call-manipulation}
As seen in Figure~\ref{fig:control-flow} function call manipulation allows for mocking individual calls. As seen in Figure~\ref{fig:control-flow} function call manipulation allows for mocking individual calls.
Mocking may be used to see how the program behaves when individual calls to function fail, or return an unusual, but valid, value. Mocking may be used to see how the program behaves when individual calls to a function fail, or return an unusual, but valid, value.
The simplest way to automatically test programs is to run them multiple times, allowing a single function call to fail in each run. The simplest way to automatically test programs is to run them multiple times, allowing a single function call to fail in each run.
The resulting sequence of function calls now may be put together to a call sequence graph (or tree). The resulting sequence of function calls now may be put together to a call sequence graph (or tree).
By analyzing this call graph, it is possible to decide, if a program correctly terminated, when faced with a failed function call. By analyzing this call graph, it is possible to decide if a program correctly terminated, when faced with a failed function call.
This may be the case when the following function calls differ from those which were recorded on a default run (without any mocked function calls). This may be the case when the following function calls differ from those which were recorded on a default run (without any mocked function calls).
@@ -109,11 +109,12 @@ This may be the case when the following function calls differ from those which w
Figure~\ref{fig:call-sequence} shows the simplified and collapsed call sequence graph of the prior example in Section~\ref{sec:intercepting-example}. Figure~\ref{fig:call-sequence} shows the simplified and collapsed call sequence graph of the prior example in Section~\ref{sec:intercepting-example}.
Each edge between two nodes without any label indicates the next function call on a normal run of the program. Each edge between two nodes without any label indicates the next function call on a normal run of the program.
Edges labeled with ``fail'' indicate the next function call after a mocked failed call. Edges labeled with ``fail'' indicate the next function call after a mocked failed call.
In reality, there are multiple failing paths, each for every possible error return value, but in this example they all yield the same resulting path, therefore, they have been collapsed. In reality, there are multiple failing paths, one for each possible error return value.
However, in this example they all yield the same resulting path, and have therefore been collapsed.
To test, if a programmer always checked the return value of a function and acted accordingly, this resulting call sequence graph now may be analyzed. To test if a programmer always checked the return value of a function and acted accordingly, this resulting call sequence graph now may be analyzed.
At first glance, this test appears trivial. At first glance, this test appears trivial.
The simplest approach is to verify, that after a failing function call, only ``cleanup'' function calls (\texttt{free}, \texttt{close}, \texttt{exit}, \dots) follow. The simplest approach is to verify that after a failing function call only ``cleanup'' function calls (\texttt{free}, \texttt{close}, \texttt{exit}, \dots) follow.
For simple programs, this assumption may hold, but there are many exceptions. For simple programs, this assumption may hold, but there are many exceptions.
For example, what if the program recognizes the failed call correctly as failed but recovers and continues to operate normally? For example, what if the program recognizes the failed call correctly as failed but recovers and continues to operate normally?
Or what if the ``cleanup'' path is very complex and includes function calls not priorly marked as valid cleanup functions? Or what if the ``cleanup'' path is very complex and includes function calls not priorly marked as valid cleanup functions?

View File

@@ -153,3 +153,5 @@ As seen in Subsection~\ref{subsec:performance-intercepting}, most delay comes no
This also applies to function call manipulation. This also applies to function call manipulation.
The performance degradation heavily depends on the response speed of the used socket. The performance degradation heavily depends on the response speed of the used socket.
Therefore, an explicit performance test on manipulation was deemed unlikely to yield meaningful results and was not carried out. Therefore, an explicit performance test on manipulation was deemed unlikely to yield meaningful results and was not carried out.
\todo{Simple performance test}

View File

@@ -1,6 +1,8 @@
\chapter{Conclusion}\label{ch:conclusion} \chapter{Conclusion}\label{ch:conclusion}
\todo{Start with Goals in OSVU}
This work presented \texttt{intercept.so}, a shared object file intended to be preloaded using \texttt{LD\_PRELOAD}, which may be used to intercept function calls on Linux systems. This work presented \texttt{intercept.so}, a shared object file intended to be preloaded using \texttt{LD\_PRELOAD}, which may be used to intercept function calls on Linux systems.
Furthermore, a supporting Python program, \texttt{intercept}, was presented to make the shared object easier to use. Furthermore, a supporting Python program, \texttt{intercept}, was presented to make the shared object easier to use.
By using preloading to hook or intercept function calls, the overhead and performance degradation remain negligible for the purpose of testing student submissions. By using preloading to hook or intercept function calls, the overhead and performance degradation remain negligible for the purpose of testing student submissions.

View File

@@ -170,6 +170,7 @@
\begin{aitools} \begin{aitools}
No generative AI tools were used in and for this work whatsoever. No generative AI tools were used in and for this work whatsoever.
The only exception was the use of ChatGPT for proofreading and refining of the abstract. The only exception was the use of ChatGPT for proofreading and refining of the abstract.
\todo{Remove}
\end{aitools} \end{aitools}
%\begin{kitools} %\begin{kitools}
@@ -180,7 +181,7 @@
\listoffigures % Starred version, i.e., \listoffigures*, removes the toc entry. \listoffigures % Starred version, i.e., \listoffigures*, removes the toc entry.
% Use an optional list of tables. % Use an optional list of tables.
\cleardoublepage % Start list of tables on the next empty right hand page. %\cleardoublepage % Start list of tables on the next empty right hand page.
\listoftables % Starred version, i.e., \listoftables*, removes the toc entry. \listoftables % Starred version, i.e., \listoftables*, removes the toc entry.
% Use an optional list of algorithms. % Use an optional list of algorithms.
@@ -188,7 +189,7 @@
%\addcontentsline{toc}{chapter}{List of Algorithms} %\addcontentsline{toc}{chapter}{List of Algorithms}
% Use an optional list of listings. % Use an optional list of listings.
\cleardoublepage %\cleardoublepage
\listof{listing}{\listoflistingscaption} \listof{listing}{\listoflistingscaption}
\addcontentsline{toc}{chapter}{\listoflistingscaption} \addcontentsline{toc}{chapter}{\listoflistingscaption}