From 17ffb06306ae886fe28462c09b54fe58ef9bc0b2 Mon Sep 17 00:00:00 2001 From: Lorenz Stechauner Date: Fri, 18 Jul 2025 23:47:24 +0200 Subject: [PATCH] thesis: Complete 2.6 and 2.7 --- proj/intercept/intercept | 5 +- thesis/src/02.intercept.tex | 151 +++++++++++++++++++++++++++++++++++- thesis/src/99.intercept.bib | 10 +++ 3 files changed, 160 insertions(+), 6 deletions(-) diff --git a/proj/intercept/intercept b/proj/intercept/intercept index 2f88813..587791c 100755 --- a/proj/intercept/intercept +++ b/proj/intercept/intercept @@ -21,7 +21,7 @@ def main() -> None: if len(extra) > 0 and extra[0] == '--': extra.pop(0) if len(extra) == 0: - parser.error('command expected after arguments or \'--\'') + parser.error("command expected after arguments or '--'") if args.intercept: intercept = args.intercept @@ -34,7 +34,8 @@ def main() -> None: 'INTERCEPT': intercept, 'INTERCEPT_VERBOSE': '0' if args.sparse else '1', 'INTERCEPT_FUNCTIONS': args.functions or '*', - 'INTERCEPT_LIBRARIES': '*,-/lib*,-/usr/lib*' if args.only_own else args.libraries or '*', + 'INTERCEPT_LIBRARIES': '*,-/lib*,-/usr/lib*' if + args.only_own else args.libraries or '*', }) diff --git a/thesis/src/02.intercept.tex b/thesis/src/02.intercept.tex index d3428cb..e208780 100644 --- a/thesis/src/02.intercept.tex +++ b/thesis/src/02.intercept.tex @@ -226,18 +226,18 @@ because it is simple to use (``clean'' source code, easy to compile and run prog The following sections concern the next steps in what else is needed to create a powerful ``interceptor''. -\section{Fundamental Project Structure}\label{sec:structure} +\section{Fundamental Project Structure}\label{sec:fundameltal-project-structure} After deciding to use the preloading method to intercept function calls, a more detailed plan is needed to continue developing. It was decided to have one single \texttt{intercept.so} file as a resulting artifact which then may be loaded via the \texttt{LD\_PRELOAD} environment variable. The easiest and most straightforward way to structure the source code was to put all code in one single C file. -Listing \ref{lst:intecept-preload.c} gives an overview over the grounding code structure. +Listing \ref{lst:intercept-preload.c} gives an overview over the grounding code structure. For each function that should be intercepted, this function simply has to be declared and defined the same way \texttt{malloc} was. \begin{listing}[htbp] \inputminted[linenos]{c}{listings/intercept-preload.c} \caption{Contents of \texttt{intercept-preload.c}.} - \label{lst:intecept-preload.c} + \label{lst:intercept-preload.c} \end{listing} @@ -350,7 +350,150 @@ Example (\texttt{read}): \\ \section{Determining Function Call Location}\label{sec:determining-function-call-location} -Lorem Ipsum. +\todo{} +Besides from argument values and return values, it would be interesting to know from where inside the intercepted program the function call came from. +At first this seems quite impossible. +But\dots + +\subsection{Return Address and Relative Position}\label{subsec:return-address-and-relative-position} + +\todo{} +See in the manual of GCC~\cite[Section~7.6]{gcc}: + +\begin{quote} + \begin{description} + \item[\texttt{void *\_\_builtin\_return\_address(unsigned int \textit{level})}] \ \ + + This function returns the return address of the current function, or of one of its callers. + The \textit{level} argument is number of frames to scan up the call stack. + A value of \texttt{0} yields the return address of the current function, a value of \texttt{1} yields the return address of the caller of the current function, and so forth. + \lbrack\dots\rbrack + \end{description} +\end{quote} + +\todo{} +See the dladdr(3) Linux manual page~\cite{dladdr.3}: + +\begin{quote} + + \begin{description} + \item[\texttt{int dladdr(const void *addr, Dl\_info *info)}] \ \ + + The function \texttt{dladdr()} determines whether the address specified in \textit{addr} is located in one of the shared objects loaded by the calling application. + If it is, then \texttt{dladdr()} returns information about the shared object and symbol that overlaps \textit{addr}. + This information is returned in a \texttt{Dl\_info} structure: + + \begin{minted}{C} +typedef struct { + const char *dli_fname; /* Pathname of shared object + that contains address */ + void *dli_fbase; /* Base address at which + shared object is loaded */ + const char *dli_sname; /* Name of symbol whose + definition overlaps addr */ + void *dli_saddr; /* Exact address of symbol + named in dli_sname */ +} Dl_info; + \end{minted} + + \lbrack\dots\rbrack + \end{description} +\end{quote} + + +\subsection{Source File and Line Number}\label{subsec:source-file-and-line-number} + +\todo{} +See the OPTIONS section in the readelf(1) Linux manual page~\cite{readelf.1}: + +\begin{quote} + \begin{description} + \item[\texttt{-{}-debug-dump}] + Displays the contents of the DWARF debug sections in the file, if any are present. + [\dots] + The letters and words refer to the following information: + \begin{description} + \item {}[\dots] + \item[\texttt{=rawline}] Displays the contents of the \texttt{.debug\_line }section in a raw format. + \item[\texttt{=decodedline}] Displays the interpreted contents of the \texttt{.debug\_line} section. + \item {}[\dots] + \end{description} + \end{description} +\end{quote} + + +\section{\texttt{intercept.so} Library}\label{sec:intercept.so-library} + +The time has come for putting it all together. +As mentioned in \ref{sec:fundameltal-project-structure}, almost the whole project exists in one source file, \texttt{intercept.c}. +This file is compiled to \texttt{intercept.so}, which may be preloaded using \texttt{LD\_PRELOAD} and controlled with other environment variables. +These other environment variables are described in the following: + +\begin{description} + \item[\texttt{INTERCEPT}] + This variable has to be set to enable function call interception. + The value decides where to output/print/write/send the recorded function calls. + Values may be \texttt{stdout}, \texttt{stderr}, \texttt{file:\textit{}}, \texttt{unix:\textit{}}. + \item[\texttt{INTERCEPT\_VERBOSE}] + This variable indicates whether string and structure types should be printed fully or empty. + Possible values are \texttt{0} and \texttt{1} (default). + \item[\texttt{INTERCEPT\_FUNCTIONS}] + This variable is used to specify which function calls should be intercepted. + It is a list separated by commas, colons, or semicolons. + Wildcards (\texttt{*}) at the end of function names are possible. + A prefix of \texttt{-} indicates that the following function should not be intercepted. + Example: \texttt{*,-sem\_} intercepts all functions except those which start with \texttt{sem\_}. + By default, all (implemented) functions are intercepted. + \item[\texttt{INTERCEPT\_LIBRARIES}] + This variable is used to specify which libraries' function calls should be intercepted. + It is a list separated by commas, colons, or semicolons. + Wildcards (\texttt{*}) at the end of library paths are possible. + A prefix of \texttt{-} indicates that the following library path should not be intercepted. + Example: \texttt{*,-/lib*,-/usr/lib*} intercepts only function calls originating from binaries outside \texttt{/lib*} or \texttt{/usr/lib*} which in most cases is the executed program itself. + By default, function calls from everywhere are intercepted. +\end{description} + + +\section{\texttt{intercept} Command}\label{sec:intercept-command} + +To make the usage of the aforementioned shared object more easy, a simple python script has been put together. +This script may be used as a command line tool. +See listing \ref{lst:intercept}. + +\begin{listing}[htbp] + \inputminted[linenos]{python}{../proj/intercept/intercept} + \caption{Contents of \texttt{intercept}.} + \label{lst:intercept} +\end{listing} + +The synopsis of the command is as follows: +\begin{minted}{text} +intercept [-h] [-F FUNCTIONS] [-s] [-o | -L LIBRARIES] \ + [-l LOG | -i INTERCEPT] [--] COMMAND [ARGS...] +\end{minted} + +\begin{description} + \item[\texttt{-F}, \texttt{-{}-functions}] + A list of functions to intercept. + See \ref{sec:intercept.so-library} for more details. + Default value is \texttt{*}. + \item[\texttt{-s}, \texttt{-{}-sparse}] + Indicates that strings and structures should be printed empty to save bandwidth. + \item[\texttt{-o}, \texttt{-{}-only-own}] + A shorthand for \texttt{-L *,-/lib*,-/usr/lib*}. + This has the effect, that only function calls from the executed binary itself are recorded. + \item[\texttt{-L}, \texttt{-{}-libraries}] + A list of library paths to intercept function calls from. + See \ref{sec:intercept.so-library} for more details. + Default value is \texttt{*} (except when \texttt{-o} is present). + \item[\texttt{-l}, \texttt{-{}-log}] + Used to specify in which file the recorded function calls should be logged. + Shorthand for \texttt{-i file:\textit{}}. + \item[\texttt{-i}, \texttt{-{}-intercept}] + Decides where to output/print/write/send the recorded function calls. + Values may be \texttt{stdout}, \texttt{stderr}, \texttt{file:\textit{}}, \texttt{unix:\textit{}}. + See \ref{sec:intercept.so-library} for more details. +\end{description} \section{Example}\label{sec:intercepting-example} diff --git a/thesis/src/99.intercept.bib b/thesis/src/99.intercept.bib index 9e06d49..9daced0 100644 --- a/thesis/src/99.intercept.bib +++ b/thesis/src/99.intercept.bib @@ -22,6 +22,12 @@ @manual{ltrace.conf.5, title = {ltrace.conf(5) -- ltrace configuration file -- Linux manual pages}, } +@manual{dladdr.3, + title = {dladdr(3) -- Library Functions Manual -- Linux manual pages}, +} +@manual{readelf.1, + title = {READELF(1) -- GNU Development Tools -- Linux manual pages}, +} @book{netsectools2005, author = {Dhanjani, Nitesh and Clarke, Justin}, title = {Network Security Tools}, @@ -31,3 +37,7 @@ publisher = {O'Reilly}, url = {https://litux.nl/mirror/networksecuritytools/0596007949/toc.html}, } +@manual{gcc, + title = {Using the GNU Compiler Collection (GCC)}, + url = {https://gcc.gnu.org/onlinedocs/gcc/index.html}, +}