@phdthesis{Drobczyk2024, author = {Drobczyk, Martin}, title = {Ultra-Wideband Wireless Network for Enhanced Intra-Spacecraft Communication}, doi = {10.25972/OPUS-35956}, url = {http://nbn-resolving.de/urn:nbn:de:bvb:20-opus-359564}, school = {Universit{\"a}t W{\"u}rzburg}, year = {2024}, abstract = {Wireless communication networks already comprise an integral part of both the private and industrial sectors and are successfully replacing existing wired networks. They enable the development of novel applications and offer greater flexibility and efficiency. Although some efforts are already underway in the aerospace sector to deploy wireless communication networks on board spacecraft, none of these projects have yet succeeded in replacing the hard-wired state-of-the-art architecture for intra-spacecraft communication. The advantages are evident as the reduction of the wiring harness saves time, mass, and costs, and makes the whole integration process more flexible. It also allows for easier scaling when interconnecting different systems. This dissertation deals with the design and implementation of a wireless network architecture to enhance intra-spacecraft communications by breaking with the state-of-the-art standards that have existed in the space industry for decades. The potential and benefits of this novel wireless network architecture are evaluated, an innovative design using ultra-wideband technology is presented. It is combined with a Medium Access Control (MAC) layer tailored for low-latency and deterministic networks supporting even mission-critical applications. As demonstrated by the Wireless Compose experiment on the International Space Station (ISS), this technology is not limited to communications but also enables novel positioning applications. To adress the technological challenges, extensive studies have been carried out on electromagnetic compatibility, space radiation, and data robustness. The architecture was evaluated from various perspectives and successfully demonstrated in space. Overall, this research highlights how a wireless network can improve and potentially replace existing state-of-the-art communication systems on board spacecraft in future missions. And it will help to adapt and ultimately accelerate the implementation of wireless networks in space systems.}, subject = {Raumfahrttechnik}, language = {en} } @phdthesis{Zink2024, author = {Zink, Johannes}, title = {Algorithms for Drawing Graphs and Polylines with Straight-Line Segments}, doi = {10.25972/OPUS-35475}, url = {http://nbn-resolving.de/urn:nbn:de:bvb:20-opus-354756}, school = {Universit{\"a}t W{\"u}rzburg}, year = {2024}, abstract = {Graphs provide a key means to model relationships between entities. They consist of vertices representing the entities, and edges representing relationships between pairs of entities. To make people conceive the structure of a graph, it is almost inevitable to visualize the graph. We call such a visualization a graph drawing. Moreover, we have a straight-line graph drawing if each vertex is represented as a point (or a small geometric object, e.g., a rectangle) and each edge is represented as a line segment between its two vertices. A polyline is a very simple straight-line graph drawing, where the vertices form a sequence according to which the vertices are connected by edges. An example of a polyline in practice is a GPS trajectory. The underlying road network, in turn, can be modeled as a graph. This book addresses problems that arise when working with straight-line graph drawings and polylines. In particular, we study algorithms for recognizing certain graphs representable with line segments, for generating straight-line graph drawings, and for abstracting polylines. In the first part, we first examine, how and in which time we can decide whether a given graph is a stick graph, that is, whether its vertices can be represented as vertical and horizontal line segments on a diagonal line, which intersect if and only if there is an edge between them. We then consider the visual complexity of graphs. Specifically, we investigate, for certain classes of graphs, how many line segments are necessary for any straight-line graph drawing, and whether three (or more) different slopes of the line segments are sufficient to draw all edges. Last, we study the question, how to assign (ordered) colors to the vertices of a graph with both directed and undirected edges such that no neighboring vertices get the same color and colors are ascending along directed edges. Here, the special property of the considered graph is that the vertices can be represented as intervals that overlap if and only if there is an edge between them. The latter problem is motivated by an application in automated drawing of cable plans with vertical and horizontal line segments, which we cover in the second part. We describe an algorithm that gets the abstract description of a cable plan as input, and generates a drawing that takes into account the special properties of these cable plans, like plugs and groups of wires. We then experimentally evaluate the quality of the resulting drawings. In the third part, we study the problem of abstracting (or simplifying) a single polyline and a bundle of polylines. In this problem, the objective is to remove as many vertices as possible from the given polyline(s) while keeping each resulting polyline sufficiently similar to its original course (according to a given similarity measure).}, subject = {Graphenzeichnen}, language = {en} } @phdthesis{Loh2024, author = {Loh, Frank}, title = {Monitoring the Quality of Streaming and Internet of Things Applications}, edition = {korrigierte Version}, issn = {1432-8801}, doi = {10.25972/OPUS-35096}, url = {http://nbn-resolving.de/urn:nbn:de:bvb:20-opus-350969}, school = {Universit{\"a}t W{\"u}rzburg}, year = {2024}, abstract = {The ongoing and evolving usage of networks presents two critical challenges for current and future networks that require attention: (1) the task of effectively managing the vast and continually increasing data traffic and (2) the need to address the substantial number of end devices resulting from the rapid adoption of the Internet of Things. Besides these challenges, there is a mandatory need for energy consumption reduction, a more efficient resource usage, and streamlined processes without losing service quality. We comprehensively address these efforts, tackling the monitoring and quality assessment of streaming applications, a leading contributor to the total Internet traffic, as well as conducting an exhaustive analysis of the network performance within a Long Range Wide Area Network (LoRaWAN), one of the rapidly emerging LPWAN solutions.}, subject = {Leistungsbewertung}, language = {en} } @phdthesis{Kobs2024, author = {Kobs, Konstantin}, title = {Think outside the Black Box: Model-Agnostic Deep Learning with Domain Knowledge}, doi = {10.25972/OPUS-34968}, url = {http://nbn-resolving.de/urn:nbn:de:bvb:20-opus-349689}, school = {Universit{\"a}t W{\"u}rzburg}, year = {2024}, abstract = {Deep Learning (DL) models are trained on a downstream task by feeding (potentially preprocessed) input data through a trainable Neural Network (NN) and updating its parameters to minimize the loss function between the predicted and the desired output. While this general framework has mainly remained unchanged over the years, the architectures of the trainable models have greatly evolved. Even though it is undoubtedly important to choose the right architecture, we argue that it is also beneficial to develop methods that address other components of the training process. We hypothesize that utilizing domain knowledge can be helpful to improve DL models in terms of performance and/or efficiency. Such model-agnostic methods can be applied to any existing or future architecture. Furthermore, the black box nature of DL models motivates the development of techniques to understand their inner workings. Considering the rapid advancement of DL architectures, it is again crucial to develop model-agnostic methods. In this thesis, we explore six principles that incorporate domain knowledge to understand or improve models. They are applied either on the input or output side of the trainable model. Each principle is applied to at least two DL tasks, leading to task-specific implementations. To understand DL models, we propose to use Generated Input Data coming from a controllable generation process requiring knowledge about the data properties. This way, we can understand the model's behavior by analyzing how it changes when one specific high-level input feature changes in the generated data. On the output side, Gradient-Based Attribution methods create a gradient at the end of the NN and then propagate it back to the input, indicating which low-level input features have a large influence on the model's prediction. The resulting input features can be interpreted by humans using domain knowledge. To improve the trainable model in terms of downstream performance, data and compute efficiency, or robustness to unwanted features, we explore principles that each address one of the training components besides the trainable model. Input Masking and Augmentation directly modifies the training input data, integrating knowledge about the data and its impact on the model's output. We also explore the use of Feature Extraction using Pretrained Multimodal Models which can be seen as a beneficial preprocessing step to extract useful features. When no training data is available for the downstream task, using such features and domain knowledge expressed in other modalities can result in a Zero-Shot Learning (ZSL) setting, completely eliminating the trainable model. The Weak Label Generation principle produces new desired outputs using knowledge about the labels, giving either a good pretraining or even exclusive training dataset to solve the downstream task. Finally, improving and choosing the right Loss Function is another principle we explore in this thesis. Here, we enrich existing loss functions with knowledge about label interactions or utilize and combine multiple task-specific loss functions in a multitask setting. We apply the principles to classification, regression, and representation tasks as well as to image and text modalities. We propose, apply, and evaluate existing and novel methods to understand and improve the model. Overall, this thesis introduces and evaluates methods that complement the development and choice of DL model architectures.}, subject = {Deep learning}, language = {en} } @phdthesis{Loh2024, author = {Loh, Frank}, title = {Monitoring the Quality of Streaming and Internet of Things Applications}, issn = {1432-8801}, doi = {10.25972/OPUS-34783}, url = {http://nbn-resolving.de/urn:nbn:de:bvb:20-opus-347831}, school = {Universit{\"a}t W{\"u}rzburg}, year = {2024}, abstract = {The ongoing and evolving usage of networks presents two critical challenges for current and future networks that require attention: (1) the task of effectively managing the vast and continually increasing data traffic and (2) the need to address the substantial number of end devices resulting from the rapid adoption of the Internet of Things. Besides these challenges, there is a mandatory need for energy consumption reduction, a more efficient resource usage, and streamlined processes without losing service quality. We comprehensively address these efforts, tackling the monitoring and quality assessment of streaming applications, a leading contributor to the total Internet traffic, as well as conducting an exhaustive analysis of the network performance within a Long Range Wide Area Network (LoRaWAN), one of the rapidly emerging LPWAN solutions.}, subject = {Leistungsbewertung}, language = {en} } @phdthesis{Bleier2023, author = {Bleier, Michael}, title = {Underwater Laser Scanning - Refractive Calibration, Self-calibration and Mapping for 3D Reconstruction}, isbn = {978-3-945459-45-4}, doi = {10.25972/OPUS-32269}, url = {http://nbn-resolving.de/urn:nbn:de:bvb:20-opus-322693}, school = {Universit{\"a}t W{\"u}rzburg}, year = {2023}, abstract = {There is great interest in affordable, precise and reliable metrology underwater: Archaeologists want to document artifacts in situ with high detail. In marine research, biologists require the tools to monitor coral growth and geologists need recordings to model sediment transport. Furthermore, for offshore construction projects, maintenance and inspection millimeter-accurate measurements of defects and offshore structures are essential. While the process of digitizing individual objects and complete sites on land is well understood and standard methods, such as Structure from Motion or terrestrial laser scanning, are regularly applied, precise underwater surveying with high resolution is still a complex and difficult task. Applying optical scanning techniques in water is challenging due to reduced visibility caused by turbidity and light absorption. However, optical underwater scanners provide significant advantages in terms of achievable resolution and accuracy compared to acoustic systems. This thesis proposes an underwater laser scanning system and the algorithms for creating dense and accurate 3D scans in water. It is based on laser triangulation and the main optical components are an underwater camera and a cross-line laser projector. The prototype is configured with a motorized yaw axis for capturing scans from a tripod. Alternatively, it is mounted to a moving platform for mobile mapping. The main focus lies on the refractive calibration of the underwater camera and laser projector, the image processing and 3D reconstruction. For highest accuracy, the refraction at the individual media interfaces must be taken into account. This is addressed by an optimization-based calibration framework using a physical-geometric camera model derived from an analytical formulation of a ray-tracing projection model. In addition to scanning underwater structures, this work presents the 3D acquisition of semi-submerged structures and the correction of refraction effects. As in-situ calibration in water is complex and time-consuming, the challenge of transferring an in-air scanner calibration to water without re-calibration is investigated, as well as self-calibration techniques for structured light. The system was successfully deployed in various configurations for both static scanning and mobile mapping. An evaluation of the calibration and 3D reconstruction using reference objects and a comparison of free-form surfaces in clear water demonstrate the high accuracy potential in the range of one millimeter to less than one centimeter, depending on the measurement distance. Mobile underwater mapping and motion compensation based on visual-inertial odometry is demonstrated using a new optical underwater scanner based on fringe projection. Continuous registration of individual scans allows the acquisition of 3D models from an underwater vehicle. RGB images captured in parallel are used to create 3D point clouds of underwater scenes in full color. 3D maps are useful to the operator during the remote control of underwater vehicles and provide the building blocks to enable offshore inspection and surveying tasks. The advancing automation of the measurement technology will allow non-experts to use it, significantly reduce acquisition time and increase accuracy, making underwater metrology more cost-effective.}, subject = {Selbstkalibrierung}, language = {en} } @phdthesis{Krenzer2023, author = {Krenzer, Adrian}, title = {Machine learning to support physicians in endoscopic examinations with a focus on automatic polyp detection in images and videos}, doi = {10.25972/OPUS-31911}, url = {http://nbn-resolving.de/urn:nbn:de:bvb:20-opus-319119}, school = {Universit{\"a}t W{\"u}rzburg}, year = {2023}, abstract = {Deep learning enables enormous progress in many computer vision-related tasks. Artificial Intel- ligence (AI) steadily yields new state-of-the-art results in the field of detection and classification. Thereby AI performance equals or exceeds human performance. Those achievements impacted many domains, including medical applications. One particular field of medical applications is gastroenterology. In gastroenterology, machine learning algorithms are used to assist examiners during interventions. One of the most critical concerns for gastroenterologists is the development of Colorectal Cancer (CRC), which is one of the leading causes of cancer-related deaths worldwide. Detecting polyps in screening colonoscopies is the essential procedure to prevent CRC. Thereby, the gastroenterologist uses an endoscope to screen the whole colon to find polyps during a colonoscopy. Polyps are mucosal growths that can vary in severity. This thesis supports gastroenterologists in their examinations with automated detection and clas- sification systems for polyps. The main contribution is a real-time polyp detection system. This system is ready to be installed in any gastroenterology practice worldwide using open-source soft- ware. The system achieves state-of-the-art detection results and is currently evaluated in a clinical trial in four different centers in Germany. The thesis presents two additional key contributions: One is a polyp detection system with ex- tended vision tested in an animal trial. Polyps often hide behind folds or in uninvestigated areas. Therefore, the polyp detection system with extended vision uses an endoscope assisted by two additional cameras to see behind those folds. If a polyp is detected, the endoscopist receives a vi- sual signal. While the detection system handles the additional two camera inputs, the endoscopist focuses on the main camera as usual. The second one are two polyp classification models, one for the classification based on shape (Paris) and the other on surface and texture (NBI International Colorectal Endoscopic (NICE) classification). Both classifications help the endoscopist with the treatment of and the decisions about the detected polyp. The key algorithms of the thesis achieve state-of-the-art performance. Outstandingly, the polyp detection system tested on a highly demanding video data set shows an F1 score of 90.25 \% while working in real-time. The results exceed all real-time systems in the literature. Furthermore, the first preliminary results of the clinical trial of the polyp detection system suggest a high Adenoma Detection Rate (ADR). In the preliminary study, all polyps were detected by the polyp detection system, and the system achieved a high usability score of 96.3 (max 100). The Paris classification model achieved an F1 score of 89.35 \% which is state-of-the-art. The NICE classification model achieved an F1 score of 81.13 \%. Furthermore, a large data set for polyp detection and classification was created during this thesis. Therefore a fast and robust annotation system called Fast Colonoscopy Annotation Tool (FastCAT) was developed. The system simplifies the annotation process for gastroenterologists. Thereby the i gastroenterologists only annotate key parts of the endoscopic video. Afterward, those video parts are pre-labeled by a polyp detection AI to speed up the process. After the AI has pre-labeled the frames, non-experts correct and finish the annotation. This annotation process is fast and ensures high quality. FastCAT reduces the overall workload of the gastroenterologist on average by a factor of 20 compared to an open-source state-of-art annotation tool.}, subject = {Deep Learning}, language = {en} } @phdthesis{Kanbar2023, author = {Kanbar, Farah}, title = {Asymptotic and Stationary Preserving Schemes for Kinetic and Hyperbolic Partial Differential Equations}, edition = {1. Auflage}, publisher = {W{\"u}rzburg University Press}, address = {W{\"u}rzburg}, isbn = {978-3-95826-210-2}, doi = {10.25972/WUP-978-3-95826-211-9}, url = {http://nbn-resolving.de/urn:nbn:de:bvb:20-opus-301903}, school = {W{\"u}rzburg University Press}, pages = {xiv, 137}, year = {2023}, abstract = {In this thesis, we are interested in numerically preserving stationary solutions of balance laws. We start by developing finite volume well-balanced schemes for the system of Euler equations and the system of MHD equations with gravitational source term. Since fluid models and kinetic models are related, this leads us to investigate AP schemes for kinetic equations and their ability to preserve stationary solutions. Kinetic models typically have a stiff term, thus AP schemes are needed to capture good solutions of the model. For such kinetic models, equilibrium solutions are reached after large time. Thus we need a new technique to numerically preserve stationary solutions for AP schemes. We find a criterion for SP schemes for kinetic equations which states, that AP schemes under a particular discretization are also SP. In an attempt to mimic our result for kinetic equations in the context of fluid models, for the isentropic Euler equations we developed an AP scheme in the limit of the Mach number going to zero. Our AP scheme is proven to have a SP property under the condition that the pressure is a function of the density and the latter is obtained as a solution of an elliptic equation. The properties of the schemes we developed and its criteria are validated numerically by various test cases from the literature.}, subject = {Angewandte Mathematik}, language = {en} } @phdthesis{Steininger2023, author = {Steininger, Michael}, title = {Deep Learning for Geospatial Environmental Regression}, doi = {10.25972/OPUS-31312}, url = {http://nbn-resolving.de/urn:nbn:de:bvb:20-opus-313121}, school = {Universit{\"a}t W{\"u}rzburg}, year = {2023}, abstract = {Environmental issues have emerged especially since humans burned fossil fuels, which led to air pollution and climate change that harm the environment. These issues' substantial consequences evoked strong efforts towards assessing the state of our environment. Various environmental machine learning (ML) tasks aid these efforts. These tasks concern environmental data but are common ML tasks otherwise, i.e., datasets are split (training, validatition, test), hyperparameters are optimized on validation data, and test set metrics measure a model's generalizability. This work focuses on the following environmental ML tasks: Regarding air pollution, land use regression (LUR) estimates air pollutant concentrations at locations where no measurements are available based on measured locations and each location's land use (e.g., industry, streets). For LUR, this work uses data from London (modeled) and Zurich (measured). Concerning climate change, a common ML task is model output statistics (MOS), where a climate model's output for a study area is altered to better fit Earth observations and provide more accurate climate data. This work uses the regional climate model (RCM) REMO and Earth observations from the E-OBS dataset for MOS. Another task regarding climate is grain size distribution interpolation where soil properties at locations without measurements are estimated based on the few measured locations. This can provide climate models with soil information, that is important for hydrology. For this task, data from Lower Franconia is used. Such environmental ML tasks commonly have a number of properties: (i) geospatiality, i.e., their data refers to locations relative to the Earth's surface. (ii) The environmental variables to estimate or predict are usually continuous. (iii) Data can be imbalanced due to relatively rare extreme events (e.g., extreme precipitation). (iv) Multiple related potential target variables can be available per location, since measurement devices often contain different sensors. (v) Labels are spatially often only sparsely available since conducting measurements at all locations of interest is usually infeasible. These properties present challenges but also opportunities when designing ML methods for such tasks. In the past, environmental ML tasks have been tackled with conventional ML methods, such as linear regression or random forests (RFs). However, the field of ML has made tremendous leaps beyond these classic models through deep learning (DL). In DL, models use multiple layers of neurons, producing increasingly higher-level feature representations with growing layer depth. DL has made previously infeasible ML tasks feasible, improved the performance for many tasks in comparison to existing ML models significantly, and eliminated the need for manual feature engineering in some domains due to its ability to learn features from raw data. To harness these advantages for environmental domains it is promising to develop novel DL methods for environmental ML tasks. This thesis presents methods for dealing with special challenges and exploiting opportunities inherent to environmental ML tasks in conjunction with DL. To this end, the proposed methods explore the following techniques: (i) Convolutions as in convolutional neural networks (CNNs) to exploit reoccurring spatial patterns in geospatial data. (ii) Posing the problems as regression tasks to estimate the continuous variables. (iii) Density-based weighting to improve estimation performance for rare and extreme events. (iv) Multi-task learning to make use of multiple related target variables. (v) Semi-supervised learning to cope with label sparsity. Using these techniques, this thesis considers four research questions: (i) Can air pollution be estimated without manual feature engineering? This is answered positively by the introduction of the CNN-based LUR model MapLUR as well as the off-the-shelf LUR solution OpenLUR. (ii) Can colocated pollution data improve spatial air pollution models? Multi-task learning for LUR is developed for this, showing potential for improvements with colocated data. (iii) Can DL models improve the quality of climate model outputs? The proposed DL climate MOS architecture ConvMOS demonstrates this. Additionally, semi-supervised training of multilayer perceptrons (MLPs) for grain size distribution interpolation is presented, which can provide improved input data. (iv) Can DL models be taught to better estimate climate extremes? To this end, density-based weighting for imbalanced regression (DenseLoss) is proposed and applied to the DL architecture ConvMOS, improving climate extremes estimation. These methods show how especially DL techniques can be developed for environmental ML tasks with their special characteristics in mind. This allows for better models than previously possible with conventional ML, leading to more accurate assessment and better understanding of the state of our environment.}, subject = {Deep learning}, language = {en} } @phdthesis{Eismann2023, author = {Eismann, Simon}, title = {Performance Engineering of Serverless Applications and Platforms}, doi = {10.25972/OPUS-30313}, url = {http://nbn-resolving.de/urn:nbn:de:bvb:20-opus-303134}, school = {Universit{\"a}t W{\"u}rzburg}, year = {2023}, abstract = {Serverless computing is an emerging cloud computing paradigm that offers a highlevel application programming model with utilization-based billing. It enables the deployment of cloud applications without managing the underlying resources or worrying about other operational aspects. Function-as-a-Service (FaaS) platforms implement serverless computing by allowing developers to execute code on-demand in response to events with continuous scaling while having to pay only for the time used with sub-second metering. Cloud providers have further introduced many fully managed services for databases, messaging buses, and storage that also implement a serverless computing model. Applications composed of these fully managed services and FaaS functions are quickly gaining popularity in both industry and in academia. However, due to this rapid adoption, much information surrounding serverless computing is inconsistent and often outdated as the serverless paradigm evolves. This makes the performance engineering of serverless applications and platforms challenging, as there are many open questions, such as: What types of applications is serverless computing well suited for, and what are its limitations? How should serverless applications be designed, configured, and implemented? Which design decisions impact the performance properties of serverless platforms and how can they be optimized? These and many other open questions can be traced back to an inconsistent understanding of serverless applications and platforms, which could present a major roadblock in the adoption of serverless computing. In this thesis, we address the lack of performance knowledge surrounding serverless applications and platforms from multiple angles: we conduct empirical studies to further the understanding of serverless applications and platforms, we introduce automated optimization methods that simplify the operation of serverless applications, and we enable the analysis of design tradeoffs of serverless platforms by extending white-box performance modeling.}, subject = {Leistungsbewertung}, language = {en} } @phdthesis{Nogatz2023, author = {Nogatz, Falco}, title = {Defining and Implementing Domain-Specific Languages with Prolog}, doi = {10.25972/OPUS-30187}, url = {http://nbn-resolving.de/urn:nbn:de:bvb:20-opus-301872}, school = {Universit{\"a}t W{\"u}rzburg}, year = {2023}, abstract = {The landscape of today's programming languages is manifold. With the diversity of applications, the difficulty of adequately addressing and specifying the used programs increases. This often leads to newly designed and implemented domain-specific languages. They enable domain experts to express knowledge in their preferred format, resulting in more readable and concise programs. Due to its flexible and declarative syntax without reserved keywords, the logic programming language Prolog is particularly suitable for defining and embedding domain-specific languages. This thesis addresses the questions and challenges that arise when integrating domain-specific languages into Prolog. We compare the two approaches to define them either externally or internally, and provide assisting tools for each. The grammar of a formal language is usually defined in the extended Backus-Naur form. In this work, we handle this formalism as a domain-specific language in Prolog, and define term expansions that allow to translate it into equivalent definite clause grammars. We present the package library(dcg4pt) for SWI-Prolog, which enriches them by an additional argument to automatically process the term's corresponding parse tree. To simplify the work with definite clause grammars, we visualise their application by a web-based tracer. The external integration of domain-specific languages requires the programmer to keep the grammar, parser, and interpreter in sync. In many cases, domain-specific languages can instead be directly embedded into Prolog by providing appropriate operator definitions. In addition, we propose syntactic extensions for Prolog to expand its expressiveness, for instance to state logic formulas with their connectives verbatim. This allows to use all tools that were originally written for Prolog, for instance code linters and editors with syntax highlighting. We present the package library(plammar), a standard-compliant parser for Prolog source code, written in Prolog. It is able to automatically infer from example sentences the required operator definitions with their classes and precedences as well as the required Prolog language extensions. As a result, we can automatically answer the question: Is it possible to model these example sentences as valid Prolog clauses, and how? We discuss and apply the two approaches to internal and external integrations for several domain-specific languages, namely the extended Backus-Naur form, GraphQL, XPath, and a controlled natural language to represent expert rules in if-then form. The created toolchain with library(dcg4pt) and library(plammar) yields new application opportunities for static Prolog source code analysis, which we also present.}, subject = {PROLOG }, language = {en} } @phdthesis{Schloer2022, author = {Schl{\"o}r, Daniel}, title = {Detecting Anomalies in Transaction Data}, doi = {10.25972/OPUS-29856}, url = {http://nbn-resolving.de/urn:nbn:de:bvb:20-opus-298569}, school = {Universit{\"a}t W{\"u}rzburg}, year = {2022}, abstract = {Detecting anomalies in transaction data is an important task with a high potential to avoid financial loss due to irregularities deliberately or inadvertently carried out, such as credit card fraud, occupational fraud in companies or ordering and accounting errors. With ongoing digitization of our world, data-driven approaches, including machine learning, can draw benefit from data with less manual effort and feature engineering. A large variety of machine learning-based anomaly detection methods approach this by learning a precise model of normality from which anomalies can be distinguished. Modeling normality in transactional data, however, requires to capture distributions and dependencies within the data precisely with special attention to numerical dependencies such as quantities, prices or amounts. To implicitly model numerical dependencies, Neural Arithmetic Logic Units have been proposed as neural architecture. In practice, however, these have stability and precision issues. Therefore, we first develop an improved neural network architecture, iNALU, which is designed to better model numerical dependencies as found in transaction data. We compare this architecture to the previous approach and show in several experiments of varying complexity that our novel architecture provides better precision and stability. We integrate this architecture into two generative neural network models adapted for transaction data and investigate how well normal behavior is modeled. We show that both architectures can successfully model normal transaction data, with our neural architecture improving generative performance for one model. Since categorical and numerical variables are common in transaction data, but many machine learning methods only process numerical representations, we explore different representation learning techniques to transform categorical transaction data into dense numerical vectors. We extend this approach by proposing an outlier-aware discretization, thus incorporating numerical attributes into the computation of categorical embeddings, and investigate latent spaces, as well as quantitative performance for anomaly detection. Next, we evaluate different scenarios for anomaly detection on transaction data. We extend our iNALU architecture to a neural layer that can model both numerical and non-numerical dependencies and evaluate it in a supervised and one-class setting. We investigate the stability and generalizability of our approach and show that it outperforms a variety of models in the balanced supervised setting and performs comparably in the one-class setting. Finally, we evaluate three approaches to using a generative model as an anomaly detector and compare the anomaly detection performance.}, subject = {Anomalieerkennung}, language = {en} } @phdthesis{Stauffert2022, author = {Stauffert, Jan-Philipp}, title = {Temporal Confounding Effects in Virtual and Extended Reality Systems}, doi = {10.25972/OPUS-29060}, url = {http://nbn-resolving.de/urn:nbn:de:bvb:20-opus-290609}, school = {Universit{\"a}t W{\"u}rzburg}, year = {2022}, abstract = {Latency is an inherent problem of computing systems. Each computation takes time until the result is available. Virtual reality systems use elaborated computer resources to create virtual experiences. The latency of those systems is often ignored or assumed as small enough to provide a good experience. This cumulative thesis is comprised of published peer reviewed research papers exploring the behaviour and effects of latency. Contrary to the common description of time invariant latency, latency is shown to fluctuate. Few other researchers have looked into this time variant behaviour. This thesis explores time variant latency with a focus on randomly occurring latency spikes. Latency spikes are observed both for small algorithms and as end to end latency in complete virtual reality systems. Most latency measurements gather close to the mean latency with potentially multiple smaller clusters of larger latency values and rare extreme outliers. The latency behaviour differs for different implementations of an algorithm. Operating system schedulers and programming language environments such as garbage collectors contribute to the overall latency behaviour. The thesis demonstrates these influences on the example of different implementations of message passing. The plethora of latency sources result in an unpredictable latency behaviour. Measuring and reporting it in scientific experiments is important. This thesis describes established approaches to measuring latency and proposes an enhanced setup to gather detailed information. The thesis proposes to dissect the measured data with a stacked z-outlier-test to separate the clusters of latency measurements for better reporting. Latency in virtual reality applications can degrade the experience in multiple ways. The thesis focuses on cybersickness as a major detrimental effect. An approach to simulate time variant latency is proposed to make latency available as an independent variable in experiments to understand latency's effects. An experiment with modified latency shows that latency spikes can contribute to cybersickness. A review of related research shows that different time invariant latency behaviour also contributes to cybersickness.}, subject = {Virtuelle Realit{\"a}t}, language = {en} } @phdthesis{Dorin2022, author = {Dorin, Michael}, title = {The Relationship Between Software Complicacy and Software Reliability}, doi = {10.25972/OPUS-28308}, url = {http://nbn-resolving.de/urn:nbn:de:bvb:20-opus-283085}, school = {Universit{\"a}t W{\"u}rzburg}, year = {2022}, abstract = {An enduring engineering problem is the creation of unreliable software leading to unreliable systems. One reason for this is source code is written in a complicated manner making it too hard for humans to review and understand. Complicated code leads to other issues beyond dependability, such as expanded development efforts and ongoing difficulties with maintenance, ultimately costing developers and users more money. There are many ideas regarding where blame lies in the reation of buggy and unreliable systems. One prevalent idea is the selected life cycle model is to blame. The oft-maligned "waterfall" life cycle model is a particularly popular recipient of blame. In response, many organizations changed their life cycle model in hopes of addressing these issues. Agile life cycle models have become very popular, and they promote communication between team members and end users. In theory, this communication leads to fewer misunderstandings and should lead to less complicated and more reliable code. Changing the life cycle model can indeed address communications ssues, which can resolve many problems with understanding requirements. However, most life cycle models do not specifically address coding practices or software architecture. Since lifecycle models do not address the structure of the code, they are often ineffective at addressing problems related to code complicacy. This dissertation answers several research questions concerning software complicacy, beginning with an investigation of traditional metrics and static analysis to evaluate their usefulness as measurement tools. This dissertation also establishes a new concept in applied linguistics by creating a measurement of software complicacy based on linguistic economy. Linguistic economy describes the efficiencies of speech, and this thesis shows the applicability of linguistic economy to software. Embedded in each topic is a discussion of the ramifications of overly complicated software, including the relationship of complicacy to software faults. Image recognition using machine learning is also investigated as a potential method of identifying problematic source code. The central part of the work focuses on analyzing the source code of hundreds of different projects from different areas. A static analysis was performed on the source code of each project, and traditional software metrics were calculated. Programs were also analyzed using techniques developed by linguists to measure expression and statement complicacy and identifier complicacy. Professional software engineers were also directly surveyed to understand mainstream perspectives. This work shows it is possible to use traditional metrics as indicators of potential project bugginess. This work also discovered it is possible to use image recognition to identify problematic pieces of source code. Finally, this work discovered it is possible to use linguistic methods to determine which statements and expressions are least desirable and more complicated for programmers. This work's principle conclusion is that there are multiple ways to discover traits indicating a project or a piece of source code has characteristics of being buggy. Traditional metrics and static analysis can be used to gain some understanding of software complicacy and bugginess potential. Linguistic economy demonstrates a new tool for measuring software complicacy, and machine learning can predict where bugs may lie in source code. The significant implication of this work is developers can recognize when a project is becoming buggy and take practical steps to avoid creating buggy projects.}, subject = {Softwareentwicklung}, language = {en} } @phdthesis{Freimann2022, author = {Freimann, Andreas}, title = {Efficient Communication in Networks of Small Low Earth Orbit Satellites and Ground Stations}, isbn = {978-3-945459-41-6}, doi = {10.25972/OPUS-28052}, url = {http://nbn-resolving.de/urn:nbn:de:bvb:20-opus-280521}, school = {Universit{\"a}t W{\"u}rzburg}, year = {2022}, abstract = {With the miniaturization of satellites a fundamental change took place in the space industry. Instead of single big monolithic satellites nowadays more and more systems are envisaged consisting of a number of small satellites to form cooperating systems in space. The lower costs for development and launch as well as the spatial distribution of these systems enable the implementation of new scientific missions and commercial services. With this paradigm shift new challenges constantly emerge for satellite developers, particularly in the area of wireless communication systems and network protocols. Satellites in low Earth orbits and ground stations form dynamic space-terrestrial networks. The characteristics of these networks differ fundamentally from those of other networks. The resulting challenges with regard to communication system design, system analysis, packet forwarding, routing and medium access control as well as challenges concerning the reliability and efficiency of wireless communication links are addressed in this thesis. The physical modeling of space-terrestrial networks is addressed by analyzing existing satellite systems and communication devices, by evaluating measurements and by implementing a simulator for space-terrestrial networks. The resulting system and channel models were used as a basis for the prediction of the dynamic network topologies, link properties and channel interference. These predictions allowed for the implementation of efficient routing and medium access control schemes for space-terrestrial networks. Further, the implementation and utilization of software-defined ground stations is addressed, and a data upload scheme for the operation of small satellite formations is presented.}, subject = {Satellitenfunk}, language = {en} } @phdthesis{Schmitt2022, author = {Schmitt, Norbert}, title = {Measurement, Modeling, and Emulation of Power Consumption of Distributed Systems}, doi = {10.25972/OPUS-27658}, url = {http://nbn-resolving.de/urn:nbn:de:bvb:20-opus-276582}, school = {Universit{\"a}t W{\"u}rzburg}, year = {2022}, abstract = {Today's cloud data centers consume an enormous amount of energy, and energy consumption will rise in the future. An estimate from 2012 found that data centers consume about 30 billion watts of power, resulting in about 263TWh of energy usage per year. The energy consumption will rise to 1929TWh until 2030. This projected rise in energy demand is fueled by a growing number of services deployed in the cloud. 50\% of enterprise workloads have been migrated to the cloud in the last decade so far. Additionally, an increasing number of devices are using the cloud to provide functionalities and enable data centers to grow. Estimates say more than 75 billion IoT devices will be in use by 2025. The growing energy demand also increases the amount of CO2 emissions. Assuming a CO2-intensity of 200g CO2 per kWh will get us close to 227 billion tons of CO2. This emission is more than the emissions of all energy-producing power plants in Germany in 2020. However, data centers consume energy because they respond to service requests that are fulfilled through computing resources. Hence, it is not the users and devices that consume the energy in the data center but the software that controls the hardware. While the hardware is physically consuming energy, it is not always responsible for wasting energy. The software itself plays a vital role in reducing the energy consumption and CO2 emissions of data centers. The scenario of our thesis is, therefore, focused on software development. Nevertheless, we must first show developers that software contributes to energy consumption by providing evidence of its influence. The second step is to provide methods to assess an application's power consumption during different phases of the development process and to allow modern DevOps and agile development methods. We, therefore, need to have an automatic selection of system-level energy-consumption models that can accommodate rapid changes in the source code and application-level models allowing developers to locate power-consuming software parts for constant improvements. Afterward, we need emulation to assess the energy efficiency before the actual deployment.}, subject = {Leistungsbedarf}, language = {en} } @phdthesis{Runge2022, author = {Runge, Isabel Madeleine}, title = {Network Coding for Reliable Data Dissemination in Wireless Sensor Networks}, doi = {10.25972/OPUS-27224}, url = {http://nbn-resolving.de/urn:nbn:de:bvb:20-opus-272245}, school = {Universit{\"a}t W{\"u}rzburg}, year = {2022}, abstract = {The application of Wireless Sensor Networks (WSNs) with a large number of tiny, cost-efficient, battery-powered sensor nodes that are able to communicate directly with each other poses many challenges. Due to the large number of communicating objects and despite a used CSMA/CA MAC protocol, there may be many signal collisions. In addition, WSNs frequently operate under harsh conditions and nodes are often prone to failure, for example, due to a depleted battery or unreliable components. Thus, nodes or even large parts of the network can fail. These aspects lead to reliable data dissemination and data storage being a key issue. Therefore, these issues are addressed herein while keeping latency low, throughput high, and energy consumption reduced. Furthermore, simplicity as well as robustness to changes in conditions are essential here. In order to achieve these aims, a certain amount of redundancy has to be included. This can be realized, for example, by using network coding. Existing approaches, however, often only perform well under certain conditions or for a specific scenario, have to perform a time-consuming initialization, require complex calculations, or do not provide the possibility of early decoding. Therefore, we developed a network coding procedure called Broadcast Growth Codes (BCGC) for reliable data dissemination, which performs well under a broad range of diverse conditions. These can be a high probability of signal collisions, any degree of nodes' mobility, a large number of nodes, or occurring node failures, for example. BCGC do not require complex initialization and only use simple XOR operations for encoding and decoding. Furthermore, decoding can be started as soon as a first packet/codeword has been received. Evaluations by using an in-house implemented network simulator as well as a real-world testbed showed that BCGC enhance reliability and enable to retrieve data dependably despite an unreliable network. In terms of latency, throughput, and energy consumption, depending on the conditions and the procedure being compared, BCGC can achieve the same performance or even outperform existing procedures significantly while being robust to changes in conditions and allowing low complexity of the nodes as well as early decoding.}, subject = {Zuverl{\"a}ssigkeit}, language = {en} } @phdthesis{Lesch2022, author = {Lesch, Veronika}, title = {Self-Aware Optimization of Cyber-Physical Systems in Intelligent Transportation and Logistics Systems}, doi = {10.25972/OPUS-27228}, url = {http://nbn-resolving.de/urn:nbn:de:bvb:20-opus-272285}, school = {Universit{\"a}t W{\"u}rzburg}, year = {2022}, abstract = {In today's world, circumstances, processes, and requirements for systems in general-in this thesis a special focus is given to the context of Cyber-Physical Systems (CPS)-are becoming increasingly complex and dynamic. In order to operate properly in such dynamic environments, systems must adapt to dynamic changes, which has led to the research area of Self-Adaptive Systems (SAS). These systems can deal with changes in their environment and the system itself. In our daily lives, we come into contact with many different self-adaptive systems that are designed to support and improve our way of life. In this work we focus on the two domains Intelligent Transportation Systems (ITS) and logistics as both domains provide complex and adaptable use cases to prototypical apply the contributions of this thesis. However, the contributions are not limited to these areas and can be generalized also to other domains such as the general area of CPS and Internet of Things including smart grids or even intelligent computer networks. In ITS, real-time traffic control is an example adaptive system that monitors the environment, analyzes observations, and plans and executes adaptation actions. Another example is platooning, which is the ability of vehicles to drive with close inter-vehicle distances. This technology enables an increase in road throughput and safety, which directly addresses the increased infrastructure needs due to increased traffic on the roads. In logistics, the Vehicle Routing Problem (VRP) deals with the planning of road freight transport tours. To cope with the ever-increasing transport volume due to the rise of just-in-time production and online shopping, efficient and correct route planning for transports is important. Further, warehouses play a central role in any company's supply chain and contribute to the logistical success. The processes of storage assignment and order picking are the two main tasks in mezzanine warehouses highly affected by a dynamic environment. Usually, optimization algorithms are applied to find solutions in reasonable computation time. SASes can help address these dynamics by allowing systems to deal with changing demands and constraints. For the application of SASes in the two areas ITS and logistics, the definition of adaptation planning strategies is the key success factor. A wide range of adaptation planning strategies for different domains can be found in the literature, and the operator must select the most promising strategy for the problem at hand. However, the No-Free-Lunch theorem states that the performance of one strategy is not necessarily transferable to other problems. Accordingly, the algorithm selection problem, first defined in 1976, aims to find the best performing algorithm for the current problem. Since then, this problem has been explored more and more, and the machine learning community, for example, considers it a learning problem. The ideas surrounding the algorithm selection problem have been applied in various use cases, but little research has been done to generalize the approaches. Moreover, especially in the field of SASes, the selection of the most appropriate strategy depends on the current situation of the system. Techniques for identifying the situation of a system can be found in the literature, such as the use of rules or clustering techniques. This knowledge can then be used to improve the algorithm selection, or in the scope of this thesis, to improve the selection of adaptation planning strategies. In addition, knowledge about the current situation and the performance of strategies in similar previously observed situations provides another opportunity for improvements. This ongoing learning and reasoning about the system and its environment is found in the research area Self-Aware Computing (SeAC). In this thesis, we explore common characteristics of adaptation planning strategies in the domain of ITS and logistics presenting a self-aware optimization framework for adaptation planning strategies. We consider platooning coordination strategies from ITS and optimization techniques from logistics as adaptation planning strategies that can be exchanged during operation to better reflect the current situation. Further, we propose to integrate fairness and uncertainty handling mechanisms directly into the adaptation planning strategies. We then examine the complex structure of the logistics use cases VRP and mezzanine warehouses and identify their systems-of-systems structure. We propose a two-stage approach for vertical or nested systems and propose to consider the impact of intertwining horizontal or coexisting systems. More specifically, we summarize the six main contributions of this thesis as follows: First, we analyze specific characteristics of adaptation planning strategies with a particular focus on ITS and logistics. We use platooning and route planning in highly dynamic environments as representatives of ITS and we use the rich Vehicle Routing Problem (rVRP) and mezzanine warehouses as representatives of the logistics domain. Using these case studies, we derive the need for situation-aware optimization of adaptation planning strategies and argue that fairness is an important consideration when applying these strategies in ITS. In logistics, we discuss that these complex systems can be considered as systems-of-systems and this structure affects each subsystem. Hence, we argue that the consideration of these characteristics is a crucial factor for the success of the system. Second, we design a self-aware optimization framework for adaptation planning strategies. The optimization framework is abstracted into a third layer above the application and its adaptation planning system, which allows the concept to be applied to a diverse set of use cases. Further, the Domain Data Model (DDM) used to configure the framework enables the operator to easily apply it by defining the available adaptation planning strategies, parameters to be optimized, and performance measures. The framework consists of four components: (i) Coordination, (ii) Situation Detection, (iii) Strategy Selection, and (iv) Parameter Optimization. While the coordination component receives observations and triggers the other components, the situation detection applies rules or clustering techniques to identify the current situation. The strategy selection uses this knowledge to select the most promising strategy for the current situation, and the parameter optimization applies optimization algorithms to tune the parameters of the strategy. Moreover, we apply the concepts of the SeAC domain and integrate learning and reasoning processes to enable ongoing advancement of the framework. We evaluate our framework using the platooning use case and consider platooning coordination strategies as the adaptation planning strategies to be selected and optimized. Our evaluation shows that the framework is able to select the most appropriate adaptation strategy and learn the situational behavior of the system. Third, we argue that fairness aspects, previously identified as an important characteristic of adaptation planning strategies, are best addressed directly as part of the strategies. Hence, focusing on platooning as an example use case, we propose a set of fairness mechanisms to balance positive and negative effects of platooning among all participants in a platoon. We design six vehicle sequence rotation mechanisms that continuously change the leader position among all participants, as this is the position with the least positive effects. We analyze these strategies on roads of different sizes and with different traffic volumes, and show that these mechanisms should also be chosen wisely. Fourth, we address the uncertainty characteristic of adaptation planning strategies and propose a methodology to account for uncertainty and also address it directly as part of the adaptation planning strategies. We address the use case of fueling planning along a route associated with highly dynamic fuel prices and develop six utility functions that account for different aspects of route planning. Further, we incorporate uncertainty measures for dynamic fuel prices by adding penalties for longer travel times or greater distance to the next gas station. Through this approach, we are able to reduce the uncertainty at planning time and obtain a more robust route planning. Fifth, we analyze optimization of nested systems-of-systems for the use case rVRP. Before proposing an approach to deal with the complex structure of the problem, we analyze important constraints and objectives that need to be considered when formulating a real-world rVRP. Then, we propose a two-stage workflow to optimize both systems individually, flexibly, and interchangeably. We apply Genetic Algorithms and Ant Colony Optimization (ACO) to both nested systems and compare the performance of our workflow with state-of-the-art optimization algorithms for this use case. In our evaluation, we show that the proposed two-stage workflow is able to handle the complex structure of the problem and consider all real-world constraints and objectives. Finally, we study coexisting systems-of-systems by optimizing typical processes in mezzanine warehouses. We first define which ergonomic and economic constraints and objectives must be considered when addressing a real-world problem. Then, we analyze the interrelatedness of the storage assignment and order picking problems; we identify opportunities to design optimization approaches that optimize all objectives and aim for a good overall system performance, taking into account the interdependence of both systems. We use the NSGA-II for storage assignment and Ant Colony Optimization (ACO) for order picking and adapt them to the specific requirements of horizontal systems-of-systems. In our evaluation, we compare our approaches to state-of-the-art approaches in mezzanine warehouses and show that our proposed approaches increase the system performance. Our proposed approaches provide important contributions to both academic research and practical applications. To the best of our knowledge, we are the first to design a self-aware optimization framework for adaptation planning strategies that integrates situation-awareness, algorithm selection, parameter tuning, as well as learning and reasoning. Our evaluation of platooning coordination shows promising results for the application of the framework. Moreover, our proposed strategies to compensate for negative effects of platooning represent an important milestone, which could lead to higher acceptance of this technology in society and support its future adoption in the real world. The proposed methodology and utility functions that address uncertainty are an important step to improving the capabilities of SAS in an increasingly turbulent environment. Similarly, our contributions to systems-of-systems optimization are major contributions to the state of logistics and systems-of-systems research. Finally, we select real-world use cases for the application of our approaches and cooperate with industrial partners, which highlights the practical relevance of our contributions. The reduction of manual effort and required expert knowledge in our self-aware optimization framework is a milestone in bridging the gap between academia and practice. One of our partners integrated the two-stage approach to tackling the rVRP into its software system, improving both time to solution and solution quality. In conclusion, the contributions of this thesis have spawned several research projects such as a long-term industrial project on optimizing tours and routes in parcel delivery funded by Bayerisches Verbundforschungsprogramm (BayVFP) - Digitalisierung and further collaborations, opening up many promising avenues for future research.}, subject = {Mehrkriterielle Optimierung}, language = {en} } @phdthesis{Geissler2022, author = {Geißler, Stefan}, title = {Performance Evaluation of Next-Generation Data Plane Architectures and their Components}, issn = {1432-8801}, doi = {10.25972/OPUS-26015}, url = {http://nbn-resolving.de/urn:nbn:de:bvb:20-opus-260157}, school = {Universit{\"a}t W{\"u}rzburg}, year = {2022}, abstract = {In this doctoral thesis we cover the performance evaluation of next generation data plane architectures, comprised of complex software as well as programmable hardware components that allow fine granular configuration. In the scope of the thesis we propose mechanisms to monitor the performance of singular components and model key performance indicators of software based packet processing solutions. We present novel approaches towards network abstraction that allow the integration of heterogeneous data plane technologies into a singular network while maintaining total transparency between control and data plane. Finally, we investigate a full, complex system consisting of multiple software-based solutions and perform a detailed performance analysis. We employ simulative approaches to investigate overload control mechanisms that allow efficient operation under adversary conditions. The contributions of this work build the foundation for future research in the areas of network softwarization and network function virtualization.}, subject = {Leistungsbewertung}, language = {en} } @phdthesis{Grohmann2022, author = {Grohmann, Johannes Sebastian}, title = {Model Learning for Performance Prediction of Cloud-native Microservice Applications}, doi = {10.25972/OPUS-26160}, url = {http://nbn-resolving.de/urn:nbn:de:bvb:20-opus-261608}, school = {Universit{\"a}t W{\"u}rzburg}, year = {2022}, abstract = {One consequence of the recent coronavirus pandemic is increased demand and use of online services around the globe. At the same time, performance requirements for modern technologies are becoming more stringent as users become accustomed to higher standards. These increased performance and availability requirements, coupled with the unpredictable usage growth, are driving an increasing proportion of applications to run on public cloud platforms as they promise better scalability and reliability. With data centers already responsible for about one percent of the world's power consumption, optimizing resource usage is of paramount importance. Simultaneously, meeting the increasing and changing resource and performance requirements is only possible by optimizing resource management without introducing additional overhead. This requires the research and development of new modeling approaches to understand the behavior of running applications with minimal information. However, the emergence of modern software paradigms makes it increasingly difficult to derive such models and renders previous performance modeling techniques infeasible. Modern cloud applications are often deployed as a collection of fine-grained and interconnected components called microservices. Microservice architectures offer massive benefits but also have broad implications for the performance characteristics of the respective systems. In addition, the microservices paradigm is typically paired with a DevOps culture, resulting in frequent application and deployment changes. Such applications are often referred to as cloud-native applications. In summary, the increasing use of ever-changing cloud-hosted microservice applications introduces a number of unique challenges for modeling the performance of modern applications. These include the amount, type, and structure of monitoring data, frequent behavioral changes, or infrastructure variabilities. This violates common assumptions of the state of the art and opens a research gap for our work. In this thesis, we present five techniques for automated learning of performance models for cloud-native software systems. We achieve this by combining machine learning with traditional performance modeling techniques. Unlike previous work, our focus is on cloud-hosted and continuously evolving microservice architectures, so-called cloud-native applications. Therefore, our contributions aim to solve the above challenges to deliver automated performance models with minimal computational overhead and no manual intervention. Depending on the cloud computing model, privacy agreements, or monitoring capabilities of each platform, we identify different scenarios where performance modeling, prediction, and optimization techniques can provide great benefits. Specifically, the contributions of this thesis are as follows: Monitorless: Application-agnostic prediction of performance degradations. To manage application performance with only platform-level monitoring, we propose Monitorless, the first truly application-independent approach to detecting performance degradation. We use machine learning to bridge the gap between platform-level monitoring and application-specific measurements, eliminating the need for application-level monitoring. Monitorless creates a single and holistic resource saturation model that can be used for heterogeneous and untrained applications. Results show that Monitorless infers resource-based performance degradation with 97\% accuracy. Moreover, it can achieve similar performance to typical autoscaling solutions, despite using less monitoring information. SuanMing: Predicting performance degradation using tracing. We introduce SuanMing to mitigate performance issues before they impact the user experience. This contribution is applied in scenarios where tracing tools enable application-level monitoring. SuanMing predicts explainable causes of expected performance degradations and prevents performance degradations before they occur. Evaluation results show that SuanMing can predict and pinpoint future performance degradations with an accuracy of over 90\%. SARDE: Continuous and autonomous estimation of resource demands. We present SARDE to learn application models for highly variable application deployments. This contribution focuses on the continuous estimation of application resource demands, a key parameter of performance models. SARDE represents an autonomous ensemble estimation technique. It dynamically and continuously optimizes, selects, and executes an ensemble of approaches to estimate resource demands in response to changes in the application or its environment. Through continuous online adaptation, SARDE efficiently achieves an average resource demand estimation error of 15.96\% in our evaluation. DepIC: Learning parametric dependencies from monitoring data. DepIC utilizes feature selection techniques in combination with an ensemble regression approach to automatically identify and characterize parametric dependencies. Although parametric dependencies can massively improve the accuracy of performance models, DepIC is the first approach to automatically learn such parametric dependencies from passive monitoring data streams. Our evaluation shows that DepIC achieves 91.7\% precision in identifying dependencies and reduces the characterization prediction error by 30\% compared to the best individual approach. Baloo: Modeling the configuration space of databases. To study the impact of different configurations within distributed DBMSs, we introduce Baloo. Our last contribution models the configuration space of databases considering measurement variabilities in the cloud. More specifically, Baloo dynamically estimates the required benchmarking measurements and automatically builds a configuration space model of a given DBMS. Our evaluation of Baloo on a dataset consisting of 900 configuration points shows that the framework achieves a prediction error of less than 11\% while saving up to 80\% of the measurement effort. Although the contributions themselves are orthogonally aligned, taken together they provide a holistic approach to performance management of modern cloud-native microservice applications. Our contributions are a significant step forward as they specifically target novel and cloud-native software development and operation paradigms, surpassing the capabilities and limitations of previous approaches. In addition, the research presented in this paper also has a significant impact on the industry, as the contributions were developed in collaboration with research teams from Nokia Bell Labs, Huawei, and Google. Overall, our solutions open up new possibilities for managing and optimizing cloud applications and improve cost and energy efficiency.}, subject = {Cloud Computing}, language = {en} } @phdthesis{Gall2022, author = {Gall, Dominik}, title = {Increasing the effectiveness of human-computer interfaces for mental health interventions}, doi = {10.25972/OPUS-23012}, url = {http://nbn-resolving.de/urn:nbn:de:bvb:20-opus-230120}, school = {Universit{\"a}t W{\"u}rzburg}, year = {2022}, abstract = {Human-computer interfaces have the potential to support mental health practitioners in alleviating mental distress. Adaption of this technology in practice is, however, slow. We provide means to extend the design space of human-computer interfaces for mitigating mental distress. To this end, we suggest three complementary approaches: using presentation technology, using virtual environments, and using communication technology to facilitate social interaction. We provide new evidence that elementary aspects of presentation technology affect the emotional processing of virtual stimuli, that perception of our environment affects the way we assess our environment, and that communication technologies affect social bonding between users. By showing how interfaces modify emotional reactions and facilitate social interaction, we provide converging evidence that human-computer interfaces can help alleviate mental distress. These findings may advance the goal of adapting technological means to the requirements of mental health practitioners.}, subject = {Mensch-Maschine-Kommunikation}, language = {en} } @phdthesis{Zuefle2022, author = {Z{\"u}fle, Marwin Otto}, title = {Proactive Critical Event Prediction based on Monitoring Data with Focus on Technical Systems}, doi = {10.25972/OPUS-25575}, url = {http://nbn-resolving.de/urn:nbn:de:bvb:20-opus-255757}, school = {Universit{\"a}t W{\"u}rzburg}, year = {2022}, abstract = {The importance of proactive and timely prediction of critical events is steadily increasing, whether in the manufacturing industry or in private life. In the past, machines in the manufacturing industry were often maintained based on a regular schedule or threshold violations, which is no longer competitive as it causes unnecessary costs and downtime. In contrast, the predictions of critical events in everyday life are often much more concealed and hardly noticeable to the private individual, unless the critical event occurs. For instance, our electricity provider has to ensure that we, as end users, are always supplied with sufficient electricity, or our favorite streaming service has to guarantee that we can watch our favorite series without interruptions. For this purpose, they have to constantly analyze what the current situation is, how it will develop in the near future, and how they have to react in order to cope with future conditions without causing power outages or video stalling. In order to analyze the performance of a system, monitoring mechanisms are often integrated to observe characteristics that describe the workload and the state of the system and its environment. Reactive systems typically employ thresholds, utility functions, or models to determine the current state of the system. However, such reactive systems cannot proactively estimate future events, but only as they occur. In the case of critical events, reactive determination of the current system state is futile, whereas a proactive system could have predicted this event in advance and enabled timely countermeasures. To achieve proactivity, the system requires estimates of future system states. Given the gap between design time and runtime, it is typically not possible to use expert knowledge to a priori model all situations a system might encounter at runtime. Therefore, prediction methods must be integrated into the system. Depending on the available monitoring data and the complexity of the prediction task, either time series forecasting in combination with thresholding or more sophisticated machine and deep learning models have to be trained. Although numerous forecasting methods have been proposed in the literature, these methods have their advantages and disadvantages depending on the characteristics of the time series under consideration. Therefore, expert knowledge is required to decide which forecasting method to choose. However, since the time series observed at runtime cannot be known at design time, such expert knowledge cannot be implemented in the system. In addition to selecting an appropriate forecasting method, several time series preprocessing steps are required to achieve satisfactory forecasting accuracy. In the literature, this preprocessing is often done manually, which is not practical for autonomous computing systems, such as Self-Aware Computing Systems. Several approaches have also been presented in the literature for predicting critical events based on multivariate monitoring data using machine and deep learning. However, these approaches are typically highly domain-specific, such as financial failures, bearing failures, or product failures. Therefore, they require in-depth expert knowledge. For this reason, these approaches cannot be fully automated and are not transferable to other use cases. Thus, the literature lacks generalizable end-to-end workflows for modeling, detecting, and predicting failures that require only little expert knowledge. To overcome these shortcomings, this thesis presents a system model for meta-self-aware prediction of critical events based on the LRA-M loop of Self-Aware Computing Systems. Building upon this system model, this thesis provides six further contributions to critical event prediction. While the first two contributions address critical event prediction based on univariate data via time series forecasting, the three subsequent contributions address critical event prediction for multivariate monitoring data using machine and deep learning algorithms. Finally, the last contribution addresses the update procedure of the system model. Specifically, the seven main contributions of this thesis can be summarized as follows: First, we present a system model for meta self-aware prediction of critical events. To handle both univariate and multivariate monitoring data, it offers univariate time series forecasting for use cases where a single observed variable is representative of the state of the system, and machine learning algorithms combined with various preprocessing techniques for use cases where a large number of variables are observed to characterize the system's state. However, the two different modeling alternatives are not disjoint, as univariate time series forecasts can also be included to estimate future monitoring data as additional input to the machine learning models. Finally, a feedback loop is incorporated to monitor the achieved prediction quality and trigger model updates. We propose a novel hybrid time series forecasting method for univariate, seasonal time series, called Telescope. To this end, Telescope automatically preprocesses the time series, performs a kind of divide-and-conquer technique to split the time series into multiple components, and derives additional categorical information. It then forecasts the components and categorical information separately using a specific state-of-the-art method for each component. Finally, Telescope recombines the individual predictions. As Telescope performs both preprocessing and forecasting automatically, it represents a complete end-to-end approach to univariate seasonal time series forecasting. Experimental results show that Telescope achieves enhanced forecast accuracy, more reliable forecasts, and a substantial speedup. Furthermore, we apply Telescope to the scenario of predicting critical events for virtual machine auto-scaling. Here, results show that Telescope considerably reduces the average response time and significantly reduces the number of service level objective violations. For the automatic selection of a suitable forecasting method, we introduce two frameworks for recommending forecasting methods. The first framework extracts various time series characteristics to learn the relationship between them and forecast accuracy. In contrast, the other framework divides the historical observations into internal training and validation parts to estimate the most appropriate forecasting method. Moreover, this framework also includes time series preprocessing steps. Comparisons between the proposed forecasting method recommendation frameworks and the individual state-of-the-art forecasting methods and the state-of-the-art forecasting method recommendation approach show that the proposed frameworks considerably improve the forecast accuracy. With regard to multivariate monitoring data, we first present an end-to-end workflow to detect critical events in technical systems in the form of anomalous machine states. The end-to-end design includes raw data processing, phase segmentation, data resampling, feature extraction, and machine tool anomaly detection. In addition, the workflow does not rely on profound domain knowledge or specific monitoring variables, but merely assumes standard machine monitoring data. We evaluate the end-to-end workflow using data from a real CNC machine. The results indicate that conventional frequency analysis does not detect the critical machine conditions well, while our workflow detects the critical events very well with an F1-score of almost 91\%. To predict critical events rather than merely detecting them, we compare different modeling alternatives for critical event prediction in the use case of time-to-failure prediction of hard disk drives. Given that failure records are typically significantly less frequent than instances representing the normal state, we employ different oversampling strategies. Next, we compare the prediction quality of binary class modeling with downscaled multi-class modeling. Furthermore, we integrate univariate time series forecasting into the feature generation process to estimate future monitoring data. Finally, we model the time-to-failure using not only classification models but also regression models. The results suggest that multi-class modeling provides the overall best prediction quality with respect to practical requirements. In addition, we prove that forecasting the features of the prediction model significantly improves the critical event prediction quality. We propose an end-to-end workflow for predicting critical events of industrial machines. Again, this approach does not rely on expert knowledge except for the definition of monitoring data, and therefore represents a generalizable workflow for predicting critical events of industrial machines. The workflow includes feature extraction, feature handling, target class mapping, and model learning with integrated hyperparameter tuning via a grid-search technique. Drawing on the result of the previous contribution, the workflow models the time-to-failure prediction in terms of multiple classes, where we compare different labeling strategies for multi-class classification. The evaluation using real-world production data of an industrial press demonstrates that the workflow is capable of predicting six different time-to-failure windows with a macro F1-score of 90\%. When scaling the time-to-failure classes down to a binary prediction of critical events, the F1-score increases to above 98\%. Finally, we present four update triggers to assess when critical event prediction models should be re-trained during on-line application. Such re-training is required, for instance, due to concept drift. The update triggers introduced in this thesis take into account the elapsed time since the last update, the prediction quality achieved on the current test data, and the prediction quality achieved on the preceding test data. We compare the different update strategies with each other and with the static baseline model. The results demonstrate the necessity of model updates during on-line application and suggest that the update triggers that consider both the prediction quality of the current and preceding test data achieve the best trade-off between prediction quality and number of updates required. We are convinced that the contributions of this thesis constitute significant impulses for the academic research community as well as for practitioners. First of all, to the best of our knowledge, we are the first to propose a fully automated, end-to-end, hybrid, component-based forecasting method for seasonal time series that also includes time series preprocessing. Due to the combination of reliably high forecast accuracy and reliably low time-to-result, it offers many new opportunities in applications requiring accurate forecasts within a fixed time period in order to take timely countermeasures. In addition, the promising results of the forecasting method recommendation systems provide new opportunities to enhance forecasting performance for all types of time series, not just seasonal ones. Furthermore, we are the first to expose the deficiencies of the prior state-of-the-art forecasting method recommendation system. Concerning the contributions to critical event prediction based on multivariate monitoring data, we have already collaborated closely with industrial partners, which supports the practical relevance of the contributions of this thesis. The automated end-to-end design of the proposed workflows that do not demand profound domain or expert knowledge represents a milestone in bridging the gap between academic theory and industrial application. Finally, the workflow for predicting critical events in industrial machines is currently being operationalized in a real production system, underscoring the practical impact of this thesis.}, subject = {Prognose}, language = {en} } @phdthesis{Kryven2022, author = {Kryven, Myroslav}, title = {Optimizing Crossings in Circular-Arc Drawings and Circular Layouts}, isbn = {978-3-95826-174-7}, doi = {10.25972/WUP-978-3-95826-175-4}, url = {http://nbn-resolving.de/urn:nbn:de:bvb:20-opus-245960}, school = {Universit{\"a}t W{\"u}rzburg}, pages = {viii, 129}, year = {2022}, abstract = {A graph is an abstract network that represents a set of objects, called vertices, and relations between these objects, called edges. Graphs can model various networks. For example, a social network where the vertices correspond to users of the network and the edges represent relations between the users. To better see the structure of a graph it is helpful to visualize it. The research field of visualizing graphs is called Graph Drawing. A standard visualization is a node-link diagram in the Euclidean plane. In such a representation the vertices are drawn as points in the plane and edges are drawn as Jordan curves between every two vertices connected by an edge. Edge crossings decrease the readability of a drawing, therefore, Crossing Optimization is a fundamental problem in Graph Drawing. Graphs that can be drawn with few crossings are called beyond-planar graphs. The topic that deals with definition and analysis of beyond-planar graphs is called Beyond Planarity and it is an important and relatively new research area in Graph Drawing. In general, beyond planar graphs posses drawings where edge crossings are restricted in some way. For example, the number of crossings may be bounded by a constant independent of the size of the graph. Crossings can also be restricted locally by, for example, restricting the number of crossings per edge, restricting the number of pairwise crossing edges, or bounding the crossing angle of two edges in the drawing from below. This PhD thesis defines and analyses beyond-planar graph classes that arise from such local restrictions on edge crossings.}, subject = {Graphenzeichnen}, language = {en} } @phdthesis{Dombrovski2022, author = {Dombrovski, Veaceslav}, title = {Software Framework to Support Operations of Nanosatellite Formations}, isbn = {978-3-945459-38-6}, doi = {10.25972/OPUS-24931}, url = {http://nbn-resolving.de/urn:nbn:de:bvb:20-opus-249314}, school = {Universit{\"a}t W{\"u}rzburg}, year = {2022}, abstract = {Since the first CubeSat launch in 2003, the hardware and software complexity of the nanosatellites was continuosly increasing. To keep up with the continuously increasing mission complexity and to retain the primary advantages of a CubeSat mission, a new approach for the overall space and ground software architecture and protocol configuration is elaborated in this work. The aim of this thesis is to propose a uniform software and protocol architecture as a basis for software development, test, simulation and operation of multiple pico-/nanosatellites based on ultra-low power components. In contrast to single-CubeSat missions, current and upcoming nanosatellite formation missions require faster and more straightforward development, pre-flight testing and calibration procedures as well as simultaneous operation of multiple satellites. A dynamic and decentral Compass mission network was established in multiple active CubeSat missions, consisting of uniformly accessible nodes. Compass middleware was elaborated to unify the communication and functional interfaces between all involved mission-related software and hardware components. All systems can access each other via dynamic routes to perform service-based M2M communication. With the proposed model-based communication approach, all states, abilities and functionalities of a system are accessed in a uniform way. The Tiny scripting language was designed to allow dynamic code execution on ultra-low power components as a basis for constraint-based in-orbit scheduler and experiment execution. The implemented Compass Operations front-end enables far-reaching monitoring and control capabilities of all ground and space systems. Its integrated constraint-based operations task scheduler allows the recording of complex satellite operations, which are conducted automatically during the overpasses. The outcome of this thesis became an enabling technology for UWE-3, UWE-4 and NetSat CubeSat missions.}, subject = {Kleinsatellit}, language = {en} } @phdthesis{Flederer2021, author = {Flederer, Frank}, title = {CORFU - An Extended Model-Driven Framework for Small Satellite Software with Code Feedback}, doi = {10.25972/OPUS-24981}, url = {http://nbn-resolving.de/urn:nbn:de:bvb:20-opus-249817}, school = {Universit{\"a}t W{\"u}rzburg}, year = {2021}, abstract = {Corfu is a framework for satellite software, not only for the onboard part but also for the ground. Developing software with Corfu follows an iterative model-driven approach. The basis of the process is an engineering model. Engineers formally describe the basic structure of the onboard software in configuration files, which build the engineering model. In the first step, Corfu verifies the model at different levels. Not only syntactically and semantically but also on a higher level such as the scheduling. Based on the model, Corfu generates a software scaffold, which follows an application-centric approach. Software images onboard consist of a list of applications connected through communication channels called topics. Corfu's generic and generated code covers this fundamental communication, telecommand, and telemetry handling. All users have to do is inheriting from a generated class and implement the behavior in overridden methods. For each application, the generator creates an abstract class with pure virtual methods. Those methods are callback functions, e.g., for handling telecommands or executing code in threads. However, from the model, one can not foresee the software implementation by users. Therefore, as an innovation compared to other frameworks, Corfu introduces feedback from the user code back to the model. In this way, we extend the engineering model with information about functions/methods, their invocations, their stack usage, and information about events and telemetry emission. Indeed, it would be possible to add further information extraction for additional use cases. We extract the information in two ways: assembly and source code analysis. The assembly analysis collects information about the stack usage of functions and methods. On the one side, Corfu uses the gathered information to accomplished additional verification steps, e.g., checking if stack usages exceed stack sizes of threads. On the other side, we use the gathered information to improve the performance of onboard software. In a use case, we show how the compiled binary and bandwidth towards the ground is reducible by exploiting source code information at run-time.}, subject = {FRAMEWORK }, language = {en} } @phdthesis{Schiemenz2021, author = {Schiemenz, Fabian}, title = {Covariance and Uncertainty Realism for Low Earth Orbiting Satellites via Quantification of Dominant Force Model Uncertainties}, doi = {10.25972/OPUS-24947}, url = {http://nbn-resolving.de/urn:nbn:de:bvb:20-opus-249474}, school = {Universit{\"a}t W{\"u}rzburg}, year = {2021}, abstract = {The safety of future spaceflight depends on space surveillance and space traffic management, as the density of objects in Earth orbit has reached a level that requires collision avoidance maneuvers to be performed on a regular basis to avoid a mission or, in the context of human space flight, life-endangering threat. Driven by enhanced sensor systems capable of detecting centimeter-sized debris, megaconstellations and satellite miniaturization, the space debris problem has revealed many parallels to the plastic waste in our oceans, however with much less visibility to the eye. Future catalog sizes are expected to increase drastically, making it even more important to detect potentially dangerous encounters as early as possible. Due to the limited number of monitoring sensors, continuous observation of all objects is impossible, resulting in the need to predict the orbital paths and their uncertainty via models to perform collision risk assessment and space object catalog maintenance. For many years the uncertainty models used for orbit determination neglected any uncertainty in the astrodynamic force models, thereby implicitly assuming them to be flawless descriptions of the true space environment. This assumption is known to result in overly optimistic uncertainty estimates, which in turn complicate collision risk analysis. The keynote of this doctoral thesis is to establish uncertainty realism for low Earth orbiting satellites via a physically connected quantification of the dominant force model uncertainties, particularly multiple sources of atmospheric density uncertainty and orbital gravity uncertainty. The resulting process noise models are subsequently integrated into classical and state of the art orbit determination algorithms. Their positive impact is demonstrated via numerical orbit determination simulations and a collision risk assessment study using all non-restricted objects in the official United States space catalogs. It is shown that the consideration of atmospheric density uncertainty and gravity uncertainty significantly improves the quality of the orbit determination and thus makes a contribution to future spaceflight safety by increasing the reliability of the uncertainty estimates used for collision risk assessment.}, subject = {Space Debris}, language = {en} } @phdthesis{Poehner2021, author = {P{\"o}hner, Nicolai}, title = {Educational robotics competitions as out-of-school learning setting for STEM education: An empirical study on students' learning of problem solving skills through participation in the World Robot Olympiad}, doi = {10.25972/OPUS-24317}, url = {http://nbn-resolving.de/urn:nbn:de:bvb:20-opus-243179}, school = {Universit{\"a}t W{\"u}rzburg}, year = {2021}, abstract = {Educational robotics is an innovative approach to teaching and learning a variety of different concepts and skills as well as motivating students in the field of Science, Technology, Engineering, and Mathematics (STEM) education. This especially applies to educational robotics competitions such as, for example, the FIRST LEGO League, the RoboCup Junior, or the World Robot Olympiad as out-of-school and goal-oriented approach to educational robotics. These competitions have gained greatly in popularity in recent years and thousands of students participate in these competitions worldwide each year. Moreover, the corresponding technology became more accessible for teachers and students to use it in their classrooms and has arguably a high potential to impact the nature of science education at all levels. One skill, which is said to be benefitting from educational robotics, is problem solving. This thesis understands problem solving skills as engineering design skills (in contrast to scientific inquiry). Problem solving skills count as important skills as demanded by industry leaders and policy makers in the context of 21st century skills, which are relevant for students to be well-prepared for their future working life in today's world, shaped by an ongoing process of automation, globalization, and digitalization. The overall aim of this thesis is to try to answer the question if educational robotics competitions such as the World Robot Olympiad (WRO) have a positive impact on students' learning in terms of their problem solving skills (as part of 21st century skills). In detail, this thesis focuses on a) if students can improve their problem solving skills through participation in educational robotics competitions, b) how this skill development is accomplished, and c) the teachers' support of their students during their learning process in the competition. The corresponding empirical studies were conducted throughout the seasons of 2018 and 2019 of the WRO in Germany. The results show overall positive effects of the participation in the WRO on students' learning of problem solving skills. They display an increase of students' problem solving skills, which is not moderated by other variables such as the competition's category or age group, the students' gender or experience, or the success of the teams at the competition. Moreover, the results indicate that students develop their problem solving skills by using a systematic engineering design process and sophisticated problem solving strategies. Lastly, the teacher's role in the educational robotics competitions as manager and guide (in terms of the constructionist learning theory) of the students' learning process (especially regarding the affective level) is underlined by the results of this thesis. All in all, this thesis contributes to the research gap concerning the lack of systematic evaluation of educational robotics to promote students' learning by providing more (methodologically) sophisticated research on this topic. Thereby, this thesis follows the call for more rigorous (quantitative) research by the educational robotics community, which is necessary to validate the impact of educational robotics.}, subject = {Fachdidaktik}, language = {en} } @phdthesis{Moldovan2021, author = {Moldovan, Christian}, title = {Performance Modeling of Mobile Video Streaming}, issn = {1432-8801}, doi = {10.25972/OPUS-22871}, url = {http://nbn-resolving.de/urn:nbn:de:bvb:20-opus-228715}, school = {Universit{\"a}t W{\"u}rzburg}, year = {2021}, abstract = {In the past two decades, there has been a trend to move from traditional television to Internet-based video services. With video streaming becoming one of the most popular applications in the Internet and the current state of the art in media consumption, quality expectations of consumers are increasing. Low quality videos are no longer considered acceptable in contrast to some years ago due to the increased sizes and resolution of devices. If the high expectations of the users are not met and a video is delivered in poor quality, they often abandon the service. Therefore, Internet Service Providers (ISPs) and video service providers are facing the challenge of providing seamless multimedia delivery in high quality. Currently, during peak hours, video streaming causes almost 58\\% of the downstream traffic on the Internet. With higher mobile bandwidth, mobile video streaming has also become commonplace. According to the 2019 Cisco Visual Networking Index, in 2022 79\% of mobile traffic will be video traffic and, according to Ericsson, by 2025 video is forecasted to make up 76\% of total Internet traffic. Ericsson further predicts that in 2024 over 1.4 billion devices will be subscribed to 5G, which will offer a downlink data rate of 100 Mbit/s in dense urban environments. One of the most important goals of ISPs and video service providers is for their users to have a high Quality of Experience (QoE). The QoE describes the degree of delight or annoyance a user experiences when using a service or application. In video streaming the QoE depends on how seamless a video is played and whether there are stalling events or quality degradations. These characteristics of a transmitted video are described as the application layer Quality of Service (QoS). In general, the QoS is defined as "the totality of characteristics of a telecommunications service that bear on its ability to satisfy stated and implied needs of the user of the service" by the ITU. The network layer QoS describes the performance of the network and is decisive for the application layer QoS. In Internet video, typically a buffer is used to store downloaded video segments to compensate for network fluctuations. If the buffer runs empty, stalling occurs. If the available bandwidth decreases temporarily, the video can still be played out from the buffer without interruption. There are different policies and parameters that determine how large the buffer is, at what buffer level to start the video, and at what buffer level to resume playout after stalling. These have to be finely tuned to achieve the highest QoE for the user. If the bandwidth decreases for a longer time period, a limited buffer will deplete and stalling can not be avoided. An important research question is how to configure the buffer optimally for different users and situations. In this work, we tackle this question using analytic models and measurement studies. With HTTP Adaptive Streaming (HAS), the video players have the capability to adapt the video bit rate at the client side according to the available network capacity. This way the depletion of the video buffer and thus stalling can be avoided. In HAS, the quality in which the video is played and the number of quality switches also has an impact on the QoE. Thus, an important problem is the adaptation of video streaming so that these parameters are optimized. In a shared WiFi multiple video users share a single bottleneck link and compete for bandwidth. In such a scenario, it is important that resources are allocated to users in a way that all can have a similar QoE. In this work, we therefore investigate the possible fairness gain when moving from network fairness towards application-layer QoS fairness. In mobile scenarios, the energy and data consumption of the user device are limited resources and they must be managed besides the QoE. Therefore, it is also necessary, to investigate solutions, that conserve these resources in mobile devices. But how can resources be conserved without sacrificing application layer QoS? As an example for such a solution, this work presents a new probabilistic adaptation algorithm that uses abandonment statistics for ts decision making, aiming at minimizing the resource consumption while maintaining high QoS. With current protocol developments such as 5G, bandwidths are increasing, latencies are decreasing and networks are becoming more stable, leading to higher QoS. This allows for new real time data intensive applications such as cloud gaming, virtual reality and augmented reality applications to become feasible on mobile devices which pose completely new research questions. The high energy consumption of such applications still remains an issue as the energy capacity of devices is currently not increasing as quickly as the available data rates. In this work we compare the optimal performance of different strategies for adaptive 360-degree video streaming.}, subject = {Video{\"u}bertragung}, language = {en} } @phdthesis{Ifflaender2021, author = {Iffl{\"a}nder, Lukas}, title = {Attack-aware Security Function Management}, doi = {10.25972/OPUS-22421}, url = {http://nbn-resolving.de/urn:nbn:de:bvb:20-opus-224211}, school = {Universit{\"a}t W{\"u}rzburg}, year = {2021}, abstract = {Over the last decades, cybersecurity has become an increasingly important issue. Between 2019 and 2011 alone, the losses from cyberattacks in the United States grew by 6217\%. At the same time, attacks became not only more intensive but also more and more versatile and diverse. Cybersecurity has become everyone's concern. Today, service providers require sophisticated and extensive security infrastructures comprising many security functions dedicated to various cyberattacks. Still, attacks become more violent to a level where infrastructures can no longer keep up. Simply scaling up is no longer sufficient. To address this challenge, in a whitepaper, the Cloud Security Alliance (CSA) proposed multiple work packages for security infrastructure, leveraging the possibilities of Software-defined Networking (SDN) and Network Function Virtualization (NFV). Security functions require a more sophisticated modeling approach than regular network functions. Notably, the property to drop packets deemed malicious has a significant impact on Security Service Function Chains (SSFCs)—service chains consisting of multiple security functions to protect against multiple at- tack vectors. Under attack, the order of these chains influences the end-to-end system performance depending on the attack type. Unfortunately, it is hard to predict the attack composition at system design time. Thus, we make a case for dynamic attack-aware SSFC reordering. Also, we tackle the issues of the lack of integration between security functions and the surrounding network infrastructure, the insufficient use of short term CPU frequency boosting, and the lack of Intrusion Detection and Prevention Systems (IDPS) against database ransomware attacks. Current works focus on characterizing the performance of security functions and their behavior under overload without considering the surrounding infrastructure. Other works aim at replacing security functions using network infrastructure features but do not consider integrating security functions within the network. Further publications deal with using SDN for security or how to deal with new vulnerabilities introduced through SDN. However, they do not take security function performance into account. NFV is a popular field for research dealing with frameworks, benchmarking methods, the combination with SDN, and implementing security functions as Virtualized Network Functions (VNFs). Research in this area brought forth the concept of Service Function Chains (SFCs) that chain multiple network functions after one another. Nevertheless, they still do not consider the specifics of security functions. The mentioned CSA whitepaper proposes many valuable ideas but leaves their realization open to others. This thesis presents solutions to increase the performance of single security functions using SDN, performance modeling, a framework for attack-aware SSFC reordering, a solution to make better use of CPU frequency boosting, and an IDPS against database ransomware. Specifically, the primary contributions of this work are: • We present approaches to dynamically bypass Intrusion Detection Systems (IDS) in order to increase their performance without reducing the security level. To this end, we develop and implement three SDN-based approaches (two dynamic and one static). We evaluate the proposed approaches regarding security and performance and show that they significantly increase the performance com- pared to an inline IDS without significant security deficits. We show that using software switches can further increase the performance of the dynamic approaches up to a point where they can eliminate any throughput drawbacks when using the IDS. • We design a DDoS Protection System (DPS) against TCP SYN flood at tacks in the form of a VNF that works inside an SDN-enabled network. This solution eliminates known scalability and performance drawbacks of existing solutions for this attack type. Then, we evaluate this solution showing that it correctly handles the connection establishment and present solutions for an observed issue. Next, we evaluate the performance showing that our solution increases performance up to three times. Parallelization and parameter tuning yields another 76\% performance boost. Based on these findings, we discuss optimal deployment strategies. • We introduce the idea of attack-aware SSFC reordering and explain its impact in a theoretical scenario. Then, we discuss the required information to perform this process. We validate our claim of the importance of the SSFC order by analyzing the behavior of single security functions and SSFCs. Based on the results, we conclude that there is a massive impact on the performance up to three orders of magnitude, and we find contradicting optimal orders for different workloads. Thus, we demonstrate the need for dynamic reordering. Last, we develop a model for SSFC regarding traffic composition and resource demands. We classify the traffic into multiple classes and model the effect of single security functions on the traffic and their generated resource demands as functions of the incoming network traffic. Based on our model, we propose three approaches to determine optimal orders for reordering. • We implement a framework for attack-aware SSFC reordering based on this knowledge. The framework places all security functions inside an SDN-enabled network and reorders them using SDN flows. Our evaluation shows that the framework can enforce all routes as desired. It correctly adapts to all attacks and returns to the original state after the attacks cease. We find possible security issues at the moment of reordering and present solutions to eliminate them. • Next, we design and implement an approach to load balance servers while taking into account their ability to go into a state of Central Processing Unit (CPU) frequency boost. To this end, the approach collects temperature information from available hosts and places services on the host that can attain the boosted mode the longest. We evaluate this approach and show its effectiveness. For high load scenarios, the approach increases the overall performance and the performance per watt. Even better results show up for low load workloads, where not only all performance metrics improve but also the temperatures and total power consumption decrease. • Last, we design an IDPS protecting against database ransomware attacks that comprise multiple queries to attain their goal. Our solution models these attacks using a Colored Petri Net (CPN). A proof-of-concept implementation shows that our approach is capable of detecting attacks without creating false positives for benign scenarios. Furthermore, our solution creates only a small performance impact. Our contributions can help to improve the performance of security infrastructures. We see multiple application areas from data center operators over software and hardware developers to security and performance researchers. Most of the above-listed contributions found use in several research publications. Regarding future work, we see the need to better integrate SDN-enabled security functions and SSFC reordering in data center networks. Future SSFC should discriminate between different traffic types, and security frameworks should support automatically learning models for security functions. We see the need to consider energy efficiency when regarding SSFCs and take CPU boosting technologies into account when designing performance models as well as placement, scaling, and deployment strategies. Last, for a faster adaptation against recent ransomware attacks, we propose machine-assisted learning for database IDPS signatures.}, subject = {Software-defined networking}, language = {en} } @phdthesis{Strohmeier2021, author = {Strohmeier, Michael}, title = {FARN - A Novel UAV Flight Controller for Highly Accurate and Reliable Navigation}, doi = {10.25972/OPUS-22313}, url = {http://nbn-resolving.de/urn:nbn:de:bvb:20-opus-223136}, school = {Universit{\"a}t W{\"u}rzburg}, year = {2021}, abstract = {This thesis describes the functional principle of FARN, a novel flight controller for Unmanned Aerial Vehicles (UAVs) designed for mission scenarios that require highly accurate and reliable navigation. The required precision is achieved by combining low-cost inertial sensors and Ultra-Wide Band (UWB) radio ranging with raw and carrier phase observations from the Global Navigation Satellite System (GNSS). The flight controller is developed within the scope of this work regarding the mission requirements of two research projects, and successfully applied under real conditions. FARN includes a GNSS compass that allows a precise heading estimation even in environments where the conventional heading estimation based on a magnetic compass is not reliable. The GNSS compass combines the raw observations of two GNSS receivers with FARN's real-time capable attitude determination. Thus, especially the deployment of UAVs in Arctic environments within the project for ROBEX is possible despite the weak horizontal component of the Earth's magnetic field. Additionally, FARN allows centimeter-accurate relative positioning of multiple UAVs in real-time. This enables precise flight maneuvers within a swarm, but also the execution of cooperative tasks in which several UAVs have a common goal or are physically coupled. A drone defense system based on two cooperative drones that act in a coordinated manner and carry a commonly suspended net to capture a potentially dangerous drone in mid-air was developed in conjunction with the project MIDRAS. Within this thesis, both theoretical and practical aspects are covered regarding UAV development with an emphasis on the fields of signal processing, guidance and control, electrical engineering, robotics, computer science, and programming of embedded systems. Furthermore, this work aims to provide a condensed reference for further research in the field of UAVs. The work describes and models the utilized UAV platform, the propulsion system, the electronic design, and the utilized sensors. After establishing mathematical conventions for attitude representation, the actual core of the flight controller, namely the embedded ego-motion estimation and the principle control architecture are outlined. Subsequently, based on basic GNSS navigation algorithms, advanced carrier phase-based methods and their coupling to the ego-motion estimation framework are derived. Additionally, various implementation details and optimization steps of the system are described. The system is successfully deployed and tested within the two projects. After a critical examination and evaluation of the developed system, existing limitations and possible improvements are outlined.}, subject = {Drohne }, language = {en} } @phdthesis{Oberdoerfer2021, author = {Oberd{\"o}rfer, Sebastian}, title = {Better Learning with Gaming: Knowledge Encoding and Knowledge Learning Using Gamification}, doi = {10.25972/OPUS-21970}, url = {http://nbn-resolving.de/urn:nbn:de:bvb:20-opus-219707}, school = {Universit{\"a}t W{\"u}rzburg}, year = {2021}, abstract = {Computer games are highly immersive, engaging, and motivating learning environments. By providing a tutorial at the start of a new game, players learn the basics of the game's underlying principles as well as practice how to successfully play the game. During the actual gameplay, players repetitively apply this knowledge, thus improving it due to repetition. Computer games also challenge players with a constant stream of new challenges which increase in difficulty over time. As a result, computer games even require players to transfer their knowledge to master these new challenges. A computer game consists of several game mechanics. Game mechanics are the rules of a computer game and encode the game's underlying principles. They create the virtual environments, generate a game's challenges and allow players to interact with the game. Game mechanics also can encode real world knowledge. This knowledge may be acquired by players via gameplay. However, the actual process of knowledge encoding and knowledge learning using game mechanics has not been thoroughly defined, yet. This thesis therefore proposes a theoretical model to define the knowledge learning using game mechanics: the Gamified Knowledge Encoding. The model is applied to design a serious game for affine transformations, i.e., GEtiT, and to predict the learning outcome of playing a computer game that encodes orbital mechanics in its game mechanics, i.e., Kerbal Space Program. To assess the effects of different visualization technologies on the overall learning outcome, GEtiT visualizes the gameplay in desktop-3D and immersive virtual reality. The model's applicability for effective game design as well as GEtiT's overall design are evaluated in a usability study. The learning outcome of playing GEtiT and Kerbal Space Program is assessed in four additional user studies. The studies' results validate the use of the Gamified Knowledge Encoding for the purpose of developing effective serious games and to predict the learning outcome of existing serious games. GEtiT and Kerbal Space Program yield a similar training effect but a higher motivation to tackle the assignments in comparison to a traditional learning method. In conclusion, this thesis expands the understanding of using game mechanics for an effective learning of knowledge. The presented results are of high importance for researches, educators, and developers as they also provide guidelines for the development of effective serious games.}, subject = {Serious game}, language = {en} } @phdthesis{Dose2021, author = {Dose, Titus}, title = {Balance Problems for Integer Circuits and Separations of Relativized Conjectures on Incompleteness in Promise Classes}, doi = {10.25972/OPUS-22220}, url = {http://nbn-resolving.de/urn:nbn:de:bvb:20-opus-222209}, school = {Universit{\"a}t W{\"u}rzburg}, year = {2021}, abstract = {This thesis is divided into two parts. In the first part we contribute to a working program initiated by Pudl{\´a}k (2017) who lists several major complexity theoretic conjectures relevant to proof complexity and asks for oracles that separate pairs of corresponding relativized conjectures. Among these conjectures are: - \(\mathsf{CON}\) and \(\mathsf{SAT}\): coNP (resp., NP) does not contain complete sets that have P-optimal proof systems. - \(\mathsf{CON}^{\mathsf{N}}\): coNP does not contain complete sets that have optimal proof systems. - \(\mathsf{TFNP}\): there do not exist complete total polynomial search problems (also known as total NP search problems). - \(\mathsf{DisjNP}\) and \(\mathsf{DisjCoNP}\): There do not exist complete disjoint NP pairs (coNP pairs). - \(\mathsf{UP}\): UP does not contain complete problems. - \(\mathsf{NP}\cap\mathsf{coNP}\): \(\mathrm{NP}\cap\mathrm{coNP}\) does not contain complete problems. - \(\mathrm{P}\ne\mathrm{NP}\). We construct several of the oracles that Pudl{\´a}k asks for. In the second part we investigate the computational complexity of balance problems for \(\{-,\cdot\}\)-circuits computing finite sets of natural numbers (note that \(-\) denotes the set difference). These problems naturally build on problems for integer expressions and integer circuits studied by Stockmeyer and Meyer (1973), McKenzie and Wagner (2007), and Glaßer et al. (2010). Our work shows that the balance problem for \(\{-,\cdot\}\)-circuits is undecidable which is the first natural problem for integer circuits or related constraint satisfaction problems that admits only one arithmetic operation and is proven to be undecidable. Starting from this result we precisely characterize the complexity of balance problems for proper subsets of \(\{-,\cdot\}\). These problems turn out to be complete for one of the classes L, NL, and NP.}, subject = {NP-vollst{\"a}ndiges Problem}, language = {en} } @phdthesis{Loeffler2021, author = {L{\"o}ffler, Andre}, title = {Constrained Graph Layouts: Vertices on the Outer Face and on the Integer Grid}, edition = {1. Auflage}, publisher = {W{\"u}rzburg University Press}, address = {W{\"u}rzburg}, isbn = {978-3-95826-146-4}, doi = {10.25972/WUP-978-3-95826-147-1}, url = {http://nbn-resolving.de/urn:nbn:de:bvb:20-opus-215746}, school = {W{\"u}rzburg University Press}, pages = {viii, 161}, year = {2021}, abstract = {Constraining graph layouts - that is, restricting the placement of vertices and the routing of edges to obey certain constraints - is common practice in graph drawing. In this book, we discuss algorithmic results on two different restriction types: placing vertices on the outer face and on the integer grid. For the first type, we look into the outer k-planar and outer k-quasi-planar graphs, as well as giving a linear-time algorithm to recognize full and closed outer k-planar graphs Monadic Second-order Logic. For the second type, we consider the problem of transferring a given planar drawing onto the integer grid while perserving the original drawings topology; we also generalize a variant of Cauchy's rigidity theorem for orthogonal polyhedra of genus 0 to those of arbitrary genus.}, subject = {Graphenzeichnen}, language = {en} } @phdthesis{Bauer2021, author = {Bauer, Andr{\´e}}, title = {Automated Hybrid Time Series Forecasting: Design, Benchmarking, and Use Cases}, doi = {10.25972/OPUS-22025}, url = {http://nbn-resolving.de/urn:nbn:de:bvb:20-opus-220255}, school = {Universit{\"a}t W{\"u}rzburg}, year = {2021}, abstract = {These days, we are living in a digitalized world. Both our professional and private lives are pervaded by various IT services, which are typically operated using distributed computing systems (e.g., cloud environments). Due to the high level of digitalization, the operators of such systems are confronted with fast-paced and changing requirements. In particular, cloud environments have to cope with load fluctuations and respective rapid and unexpected changes in the computing resource demands. To face this challenge, so-called auto-scalers, such as the threshold-based mechanism in Amazon Web Services EC2, can be employed to enable elastic scaling of the computing resources. However, despite this opportunity, business-critical applications are still run with highly overprovisioned resources to guarantee a stable and reliable service operation. This strategy is pursued due to the lack of trust in auto-scalers and the concern that inaccurate or delayed adaptations may result in financial losses. To adapt the resource capacity in time, the future resource demands must be "foreseen", as reacting to changes once they are observed introduces an inherent delay. In other words, accurate forecasting methods are required to adapt systems proactively. A powerful approach in this context is time series forecasting, which is also applied in many other domains. The core idea is to examine past values and predict how these values will evolve as time progresses. According to the "No-Free-Lunch Theorem", there is no algorithm that performs best for all scenarios. Therefore, selecting a suitable forecasting method for a given use case is a crucial task. Simply put, each method has its benefits and drawbacks, depending on the specific use case. The choice of the forecasting method is usually based on expert knowledge, which cannot be fully automated, or on trial-and-error. In both cases, this is expensive and prone to error. Although auto-scaling and time series forecasting are established research fields, existing approaches cannot fully address the mentioned challenges: (i) In our survey on time series forecasting, we found that publications on time series forecasting typically consider only a small set of (mostly related) methods and evaluate their performance on a small number of time series with only a few error measures while providing no information on the execution time of the studied methods. Therefore, such articles cannot be used to guide the choice of an appropriate method for a particular use case; (ii) Existing open-source hybrid forecasting methods that take advantage of at least two methods to tackle the "No-Free-Lunch Theorem" are computationally intensive, poorly automated, designed for a particular data set, or they lack a predictable time-to-result. Methods exhibiting a high variance in the time-to-result cannot be applied for time-critical scenarios (e.g., auto-scaling), while methods tailored to a specific data set introduce restrictions on the possible use cases (e.g., forecasting only annual time series); (iii) Auto-scalers typically scale an application either proactively or reactively. Even though some hybrid auto-scalers exist, they lack sophisticated solutions to combine reactive and proactive scaling. For instance, resources are only released proactively while resource allocation is entirely done in a reactive manner (inherently delayed); (iv) The majority of existing mechanisms do not take the provider's pricing scheme into account while scaling an application in a public cloud environment, which often results in excessive charged costs. Even though some cost-aware auto-scalers have been proposed, they only consider the current resource demands, neglecting their development over time. For example, resources are often shut down prematurely, even though they might be required again soon. To address the mentioned challenges and the shortcomings of existing work, this thesis presents three contributions: (i) The first contribution-a forecasting benchmark-addresses the problem of limited comparability between existing forecasting methods; (ii) The second contribution-Telescope-provides an automated hybrid time series forecasting method addressing the challenge posed by the "No-Free-Lunch Theorem"; (iii) The third contribution-Chamulteon-provides a novel hybrid auto-scaler for coordinated scaling of applications comprising multiple services, leveraging Telescope to forecast the workload intensity as a basis for proactive resource provisioning. In the following, the three contributions of the thesis are summarized: Contribution I - Forecasting Benchmark To establish a level playing field for evaluating the performance of forecasting methods in a broad setting, we propose a novel benchmark that automatically evaluates and ranks forecasting methods based on their performance in a diverse set of evaluation scenarios. The benchmark comprises four different use cases, each covering 100 heterogeneous time series taken from different domains. The data set was assembled from publicly available time series and was designed to exhibit much higher diversity than existing forecasting competitions. Besides proposing a new data set, we introduce two new measures that describe different aspects of a forecast. We applied the developed benchmark to evaluate Telescope. Contribution II - Telescope To provide a generic forecasting method, we introduce a novel machine learning-based forecasting approach that automatically retrieves relevant information from a given time series. More precisely, Telescope automatically extracts intrinsic time series features and then decomposes the time series into components, building a forecasting model for each of them. Each component is forecast by applying a different method and then the final forecast is assembled from the forecast components by employing a regression-based machine learning algorithm. In more than 1300 hours of experiments benchmarking 15 competing methods (including approaches from Uber and Facebook) on 400 time series, Telescope outperformed all methods, exhibiting the best forecast accuracy coupled with a low and reliable time-to-result. Compared to the competing methods that exhibited, on average, a forecast error (more precisely, the symmetric mean absolute forecast error) of 29\%, Telescope exhibited an error of 20\% while being 2556 times faster. In particular, the methods from Uber and Facebook exhibited an error of 48\% and 36\%, and were 7334 and 19 times slower than Telescope, respectively. Contribution III - Chamulteon To enable reliable auto-scaling, we present a hybrid auto-scaler that combines proactive and reactive techniques to scale distributed cloud applications comprising multiple services in a coordinated and cost-effective manner. More precisely, proactive adaptations are planned based on forecasts of Telescope, while reactive adaptations are triggered based on actual observations of the monitored load intensity. To solve occurring conflicts between reactive and proactive adaptations, a complex conflict resolution algorithm is implemented. Moreover, when deployed in public cloud environments, Chamulteon reviews adaptations with respect to the cloud provider's pricing scheme in order to minimize the charged costs. In more than 400 hours of experiments evaluating five competing auto-scaling mechanisms in scenarios covering five different workloads, four different applications, and three different cloud environments, Chamulteon exhibited the best auto-scaling performance and reliability while at the same time reducing the charged costs. The competing methods provided insufficient resources for (on average) 31\% of the experimental time; in contrast, Chamulteon cut this time to 8\% and the SLO (service level objective) violations from 18\% to 6\% while using up to 15\% less resources and reducing the charged costs by up to 45\%. The contributions of this thesis can be seen as major milestones in the domain of time series forecasting and cloud resource management. (i) This thesis is the first to present a forecasting benchmark that covers a variety of different domains with a high diversity between the analyzed time series. Based on the provided data set and the automatic evaluation procedure, the proposed benchmark contributes to enhance the comparability of forecasting methods. The benchmarking results for different forecasting methods enable the selection of the most appropriate forecasting method for a given use case. (ii) Telescope provides the first generic and fully automated time series forecasting approach that delivers both accurate and reliable forecasts while making no assumptions about the analyzed time series. Hence, it eliminates the need for expensive, time-consuming, and error-prone procedures, such as trial-and-error searches or consulting an expert. This opens up new possibilities especially in time-critical scenarios, where Telescope can provide accurate forecasts with a short and reliable time-to-result. Although Telescope was applied for this thesis in the field of cloud computing, there is absolutely no limitation regarding the applicability of Telescope in other domains, as demonstrated in the evaluation. Moreover, Telescope, which was made available on GitHub, is already used in a number of interdisciplinary data science projects, for instance, predictive maintenance in an Industry 4.0 context, heart failure prediction in medicine, or as a component of predictive models of beehive development. (iii) In the context of cloud resource management, Chamulteon is a major milestone for increasing the trust in cloud auto-scalers. The complex resolution algorithm enables reliable and accurate scaling behavior that reduces losses caused by excessive resource allocation or SLO violations. In other words, Chamulteon provides reliable online adaptations minimizing charged costs while at the same time maximizing user experience.}, subject = {Zeitreihenanalyse}, language = {en} } @phdthesis{SchauerMarinRodrigues2020, author = {Schauer Marin Rodrigues, Johannes}, title = {Detecting Changes and Finding Collisions in 3D Point Clouds : Data Structures and Algorithms for Post-Processing Large Datasets}, isbn = {978-3-945459-32-4}, doi = {10.25972/OPUS-21428}, url = {http://nbn-resolving.de/urn:nbn:de:bvb:20-opus-214285}, school = {Universit{\"a}t W{\"u}rzburg}, year = {2020}, abstract = {Affordable prices for 3D laser range finders and mature software solutions for registering multiple point clouds in a common coordinate system paved the way for new areas of application for 3D point clouds. Nowadays we see 3D laser scanners being used not only by digital surveying experts but also by law enforcement officials, construction workers or archaeologists. Whether the purpose is digitizing factory production lines, preserving historic sites as digital heritage or recording environments for gaming or virtual reality applications -- it is hard to imagine a scenario in which the final point cloud must also contain the points of "moving" objects like factory workers, pedestrians, cars or flocks of birds. For most post-processing tasks, moving objects are undesirable not least because moving objects will appear in scans multiple times or are distorted due to their motion relative to the scanner rotation. The main contributions of this work are two postprocessing steps for already registered 3D point clouds. The first method is a new change detection approach based on a voxel grid which allows partitioning the input points into static and dynamic points using explicit change detection and subsequently remove the latter for a "cleaned" point cloud. The second method uses this cleaned point cloud as input for detecting collisions between points of the environment point cloud and a point cloud of a model that is moved through the scene. Our approach on explicit change detection is compared to the state of the art using multiple datasets including the popular KITTI dataset. We show how our solution achieves similar or better F1-scores than an existing solution while at the same time being faster. To detect collisions we do not produce a mesh but approximate the raw point cloud data by spheres or cylindrical volumes. We show how our data structures allow efficient nearest neighbor queries that make our CPU-only approach comparable to a massively-parallel algorithm running on a GPU. The utilized algorithms and data structures are discussed in detail. All our software is freely available for download under the terms of the GNU General Public license. Most of the datasets used in this thesis are freely available as well. We provide shell scripts that allow one to directly reproduce the quantitative results shown in this thesis for easy verification of our findings.}, subject = {Punktwolke}, language = {en} } @phdthesis{Borchert2020, author = {Borchert, Kathrin Johanna}, title = {Estimating Quality of Experience of Enterprise Applications - A Crowdsourcing-based Approach}, issn = {1432-8801}, doi = {10.25972/OPUS-21697}, url = {http://nbn-resolving.de/urn:nbn:de:bvb:20-opus-216978}, school = {Universit{\"a}t W{\"u}rzburg}, year = {2020}, abstract = {Nowadays, employees have to work with applications, technical services, and systems every day for hours. Hence, performance degradation of such systems might be perceived negatively by the employees, increase frustration, and might also have a negative effect on their productivity. The assessment of the application's performance in order to provide a smooth operation of the application is part of the application management. Within this process it is not sufficient to assess the system performance solely on technical performance parameters, e.g., response or loading times. These values have to be set into relation to the perceived performance quality on the user's side - the quality of experience (QoE). This dissertation focuses on the monitoring and estimation of the QoE of enterprise applications. As building models to estimate the QoE requires quality ratings from the users as ground truth, one part of this work addresses methods to collect such ratings. Besides the evaluation of approaches to improve the quality of results of tasks and studies completed on crowdsourcing platforms, a general concept for monitoring and estimating QoE in enterprise environments is presented. Here, relevant design dimension of subjective studies are identified and their impact of the QoE is evaluated and discussed. By considering the findings, a methodology for collecting quality ratings from employees during their regular work is developed. The method is realized by implementing a tool to conduct short surveys and deployed in a cooperating company. As a foundation for learning QoE estimation models, this work investigates the relationship between user-provided ratings and technical performance parameters. This analysis is based on a data set collected in a user study in a cooperating company during a time span of 1.5 years. Finally, two QoE estimation models are introduced and their performance is evaluated.}, subject = {Quality of Experience}, language = {en} } @phdthesis{Borchers2020, author = {Borchers, Kai}, title = {Decentralized and Pulse-based Clock Synchronization in SpaceWire Networks for Time-triggered Data Transfers}, doi = {10.25972/OPUS-21560}, url = {http://nbn-resolving.de/urn:nbn:de:bvb:20-opus-215606}, school = {Universit{\"a}t W{\"u}rzburg}, year = {2020}, abstract = {Time-triggered communication is widely used throughout several industry do- mains, primarily for reliable and real-time capable data transfers. However, existing time-triggered technologies are designed for terrestrial usage and not directly applicable to space applications due to the harsh environment. In- stead, specific hardware must be developed to deal with thermal, mechanical, and especially radiation effects. SpaceWire, as an event-triggered communication technology, has been used for years in a large number of space missions. Its moderate complexity, her- itage, and transmission rates up to 400 MBits/s are one of the main ad- vantages and often without alternatives for on-board computing systems of spacecraft. At present, real-time data transfers are either achieved by prior- itization inside SpaceWire routers or by applying a simplified time-triggered approach. These solutions either imply problems if they are used inside dis- tributed on-board computing systems or in case of networks with more than a single router are required. This work provides a solution for the real-time problem by developing a novel clock synchronization approach. This approach is focused on being compatible with distributed system structures and allows time-triggered data transfers. A significant difference to existing technologies is the remote clock estimation by the use of pulses. They are transferred over the network and remove the need for latency accumulation, which allows the incorporation of standardized SpaceWire equipment. Additionally, local clocks are controlled decentralized and provide different correction capabilities in order to handle oscillator induced uncertainties. All these functionalities are provided by a developed Network Controller (NC), able to isolate the attached network and to control accesses.}, subject = {Daten{\"u}bertragung}, language = {en} } @phdthesis{Wick2020, author = {Wick, Christoph}, title = {Optical Medieval Music Recognition}, doi = {10.25972/OPUS-21434}, url = {http://nbn-resolving.de/urn:nbn:de:bvb:20-opus-214348}, school = {Universit{\"a}t W{\"u}rzburg}, year = {2020}, abstract = {In recent years, great progress has been made in the area of Artificial Intelligence (AI) due to the possibilities of Deep Learning which steadily yielded new state-of-the-art results especially in many image recognition tasks. Currently, in some areas, human performance is achieved or already exceeded. This great development already had an impact on the area of Optical Music Recognition (OMR) as several novel methods relying on Deep Learning succeeded in specific tasks. Musicologists are interested in large-scale musical analysis and in publishing digital transcriptions in a collection enabling to develop tools for searching and data retrieving. The application of OMR promises to simplify and thus speed-up the transcription process by either providing fully-automatic or semi-automatic approaches. This thesis focuses on the automatic transcription of Medieval music with a focus on square notation which poses a challenging task due to complex layouts, highly varying handwritten notations, and degradation. However, since handwritten music notations are quite complex to read, even for an experienced musicologist, it is to be expected that even with new techniques of OMR manual corrections are required to obtain the transcriptions. This thesis presents several new approaches and open source software solutions for layout analysis and Automatic Text Recognition (ATR) for early documents and for OMR of Medieval manuscripts providing state-of-the-art technology. Fully Convolutional Networks (FCN) are applied for the segmentation of historical manuscripts and early printed books, to detect staff lines, and to recognize neume notations. The ATR engine Calamari is presented which allows for ATR of early prints and also the recognition of lyrics. Configurable CNN/LSTM-network architectures which are trained with the segmentation-free CTC-loss are applied to the sequential recognition of text but also monophonic music. Finally, a syllable-to-neume assignment algorithm is presented which represents the final step to obtain a complete transcription of the music. The evaluations show that the performances of any algorithm is highly depending on the material at hand and the number of training instances. The presented staff line detection correctly identifies staff lines and staves with an \$F_1\$-score of above \$99.5\\%\$. The symbol recognition yields a diplomatic Symbol Accuracy Rate (dSAR) of above \$90\\%\$ by counting the number of correct predictions in the symbols sequence normalized by its length. The ATR of lyrics achieved a Character Error Rate (CAR) (equivalently the number of correct predictions normalized by the sentence length) of above \$93\\%\$ trained on 771 lyric lines of Medieval manuscripts and of 99.89\\% when training on around 3.5 million lines of contemporary printed fonts. The assignment of syllables and their corresponding neumes reached \$F_1\$-scores of up to \$99.2\\%\$. A direct comparison to previously published performances is difficult due to different materials and metrics. However, estimations show that the reported values of this thesis exceed the state-of-the-art in the area of square notation. A further goal of this thesis is to enable musicologists without technical background to apply the developed algorithms in a complete workflow by providing a user-friendly and comfortable Graphical User Interface (GUI) encapsulating the technical details. For this purpose, this thesis presents the web-application OMMR4all. Its fully-functional workflow includes the proposed state-of-the-art machine-learning algorithms and optionally allows for a manual intervention at any stage to correct the output preventing error propagation. To simplify the manual (post-) correction, OMMR4all provides an overlay-editor that superimposes the annotations with a scan of the original manuscripts so that errors can easily be spotted. The workflow is designed to be iteratively improvable by training better models as soon as new Ground Truth (GT) is available.}, subject = {Neumenschrift}, language = {en} } @phdthesis{Reul2020, author = {Reul, Christian}, title = {An Intelligent Semi-Automatic Workflow for Optical Character Recognition of Historical Printings}, doi = {10.25972/OPUS-20923}, url = {http://nbn-resolving.de/urn:nbn:de:bvb:20-opus-209239}, school = {Universit{\"a}t W{\"u}rzburg}, year = {2020}, abstract = {Optical Character Recognition (OCR) on historical printings is a challenging task mainly due to the complexity of the layout and the highly variant typography. Nevertheless, in the last few years great progress has been made in the area of historical OCR resulting in several powerful open-source tools for preprocessing, layout analysis and segmentation, Automatic Text Recognition (ATR) and postcorrection. Their major drawback is that they only offer limited applicability by non-technical users like humanist scholars, in particular when it comes to the combined use of several tools in a workflow. Furthermore, depending on the material, these tools are usually not able to fully automatically achieve sufficiently low error rates, let alone perfect results, creating a demand for an interactive postcorrection functionality which, however, is generally not incorporated. This thesis addresses these issues by presenting an open-source OCR software called OCR4all which combines state-of-the-art OCR components and continuous model training into a comprehensive workflow. While a variety of materials can already be processed fully automatically, books with more complex layouts require manual intervention by the users. This is mostly due to the fact that the required Ground Truth (GT) for training stronger mixed models (for segmentation as well as text recognition) is not available, yet, neither in the desired quantity nor quality. To deal with this issue in the short run, OCR4all offers better recognition capabilities in combination with a very comfortable Graphical User Interface (GUI) that allows error corrections not only in the final output, but already in early stages to minimize error propagation. In the long run this constant manual correction produces large quantities of valuable, high quality training material which can be used to improve fully automatic approaches. Further on, extensive configuration capabilities are provided to set the degree of automation of the workflow and to make adaptations to the carefully selected default parameters for specific printings, if necessary. The architecture of OCR4all allows for an easy integration (or substitution) of newly developed tools for its main components by supporting standardized interfaces like PageXML, thus aiming at continual higher automation for historical printings. In addition to OCR4all, several methodical extensions in the form of accuracy improving techniques for training and recognition are presented. Most notably an effective, sophisticated, and adaptable voting methodology using a single ATR engine, a pretraining procedure, and an Active Learning (AL) component are proposed. Experiments showed that combining pretraining and voting significantly improves the effectiveness of book-specific training, reducing the obtained Character Error Rates (CERs) by more than 50\%. The proposed extensions were further evaluated during two real world case studies: First, the voting and pretraining techniques are transferred to the task of constructing so-called mixed models which are trained on a variety of different fonts. This was done by using 19th century Fraktur script as an example, resulting in a considerable improvement over a variety of existing open-source and commercial engines and models. Second, the extension from ATR on raw text to the adjacent topic of typography recognition was successfully addressed by thoroughly indexing a historical lexicon that heavily relies on different font types in order to encode its complex semantic structure. During the main experiments on very complex early printed books even users with minimal or no experience were able to not only comfortably deal with the challenges presented by the complex layout, but also to recognize the text with manageable effort and great quality, achieving excellent CERs below 0.5\%. Furthermore, the fully automated application on 19th century novels showed that OCR4all (average CER of 0.85\%) can considerably outperform the commercial state-of-the-art tool ABBYY Finereader (5.3\%) on moderate layouts if suitably pretrained mixed ATR models are available.}, subject = {Optische Zeichenerkennung}, language = {en} } @phdthesis{Krug2020, author = {Krug, Markus}, title = {Techniques for the Automatic Extraction of Character Networks in German Historic Novels}, doi = {10.25972/OPUS-20918}, url = {http://nbn-resolving.de/urn:nbn:de:bvb:20-opus-209186}, school = {Universit{\"a}t W{\"u}rzburg}, year = {2020}, abstract = {Recent advances in Natural Language Preprocessing (NLP) allow for a fully automatic extraction of character networks for an incoming text. These networks serve as a compact and easy to grasp representation of literary fiction. They offer an aggregated view of the text, which can be used during distant reading approaches for the analysis of literary hypotheses. In their core, the networks consist of nodes, which represent literary characters, and edges, which represent relations between characters. For an automatic extraction of such a network, the first step is the detection of the references of all fictional entities that are of importance for a text. References to the fictional entities appear in the form of names, noun phrases and pronouns and prior to this work, no components capable of automatic detection of character references were available. Existing tools are only capable of detecting proper nouns, a subset of all character references. When evaluated on the task of detecting proper nouns in the domain of literary fiction, they still underperform at an F1-score of just about 50\%. This thesis uses techniques from the field of semi-supervised learning, such as Distant supervision and Generalized Expectations, and improves the results of an existing tool to about 82\%, when evaluated on all three categories in literary fiction, but without the need for annotated data in the target domain. However, since this quality is still not sufficient, the decision to annotate DROC, a corpus comprising 90 fragments of German novels was made. This resulted in a new general purpose annotation environment titled as ATHEN, as well as annotated data that spans about 500.000 tokens in total. Using this data, the combination of supervised algorithms and a tailored rule based algorithm, which in combination are able to exploit both - local consistencies as well as global consistencies - yield an algorithm with an F1-score of about 93\%. This component is referred to as the Kallimachos tagger. A character network can not directly display references however, instead they need to be clustered so that all references that belong to a real world or fictional entity are grouped together. This process widely known as coreference resolution is a hard problem in the focus of research for more than half a century. This work experimented with adaptations of classical feature based machine learning, with a dedicated rule based algorithm and with modern techniques of Deep Learning, but no approach can surpass 55\% B-Cubed F1, when evaluated on DROC. Due to this barrier, many researchers do not use a fully-fledged coreference resolution when they extract character networks, but only focus on a more forgiving subset- the names. For novels such as Alice's Adventures in Wonderland by Lewis Caroll, this would however only result in a network in which many important characters are missing. In order to integrate important characters into the network that are not named by the author, this work makes use of automatic detection of speaker and addressees for direct speech utterances (all entities involved in a dialog are considered to be of importance). This problem is by itself not an easy task, however the most successful system analysed in this thesis is able to correctly determine the speaker to about 85\% of the utterances as well as about 65\% of the addressees. This speaker information can not only help to identify the most dominant characters, but also serves as a way to model the relations between entities. During the span of this work, components have been developed to model relations between characters using speaker attribution, using co-occurrences as well as by the usage of true interactions, for which yet again a dataset was annotated using ATHEN. Furthermore, since relations between characters are usually typed, a component for the extraction of a typed relation was developed. Similar to the experiments for the character reference detection, a combination of a rule based and a Maximum Entropy classifier yielded the best overall results, with the extraction of family relations showing a score of about 80\% and the quality of love relations with a score of about 50\%. For family relations, a kernel for a Support Vector Machine was developed that even exceeded the scores of the combined approach but is behind on the other labels. In addition, this work presents new ways to evaluate automatically extracted networks without the need of domain experts, instead it relies on the usage of expert summaries. It also refrains from the uses of social network analysis for the evaluation, but instead presents ranked evaluations using Precision@k and the Spearman Rank correlation coefficient for the evaluation of the nodes and edges of the network. An analysis using these metrics showed, that the central characters of a novel are contained with high probability but the quality drops rather fast if more than five entities are analyzed. The quality of the edges is mainly dominated by the quality of the coreference resolution and the correlation coefficient between gold edges and system edges therefore varies between 30 and 60\%. All developed components are aggregated alongside a large set of other preprocessing modules in the Kallimachos pipeline and can be reused without any restrictions.}, subject = {Textanalyse}, language = {en} } @phdthesis{Roth2020, author = {Roth, Daniel}, title = {Intrapersonal, Interpersonal, and Hybrid Interactions in Virtual Reality}, doi = {10.25972/OPUS-18862}, url = {http://nbn-resolving.de/urn:nbn:de:bvb:20-opus-188627}, school = {Universit{\"a}t W{\"u}rzburg}, year = {2020}, abstract = {Virtual reality and related media and communication technologies have a growing impact on professional application fields and our daily life. Virtual environments have the potential to change the way we perceive ourselves and how we interact with others. In comparison to other technologies, virtual reality allows for the convincing display of a virtual self-representation, an avatar, to oneself and also to others. This is referred to as user embodiment. Avatars can be of varying realism and abstraction in their appearance and in the behaviors they convey. Such userembodying interfaces, in turn, can impact the perception of the self as well as the perception of interactions. For researchers, designers, and developers it is of particular interest to understand these perceptual impacts, to apply them to therapy, assistive applications, social platforms, or games, for example. The present thesis investigates and relates these impacts with regard to three areas: intrapersonal effects, interpersonal effects, and effects of social augmentations provided by the simulation. With regard to intrapersonal effects, we specifically explore which simulation properties impact the illusion of owning and controlling a virtual body, as well as a perceived change in body schema. Our studies lead to the construction of an instrument to measure these dimensions and our results indicate that these dimensions are especially affected by the level of immersion, the simulation latency, as well as the level of personalization of the avatar. With regard to interpersonal effects we compare physical and user-embodied social interactions, as well as different degrees of freedom in the replication of nonverbal behavior. Our results suggest that functional levels of interaction are maintained, whereas aspects of presence can be affected by avatar-mediated interactions, and collaborative motor coordination can be disturbed by immersive simulations. Social interaction is composed of many unknown symbols and harmonic patterns that define our understanding and interpersonal rapport. For successful virtual social interactions, a mere replication of physical world behaviors to virtual environments may seem feasible. However, the potential of mediated social interactions goes beyond this mere replication. In a third vein of research, we propose and evaluate alternative concepts on how computers can be used to actively engage in mediating social interactions, namely hybrid avatar-agent technologies. Specifically, we investigated the possibilities to augment social behaviors by modifying and transforming user input according to social phenomena and behavior, such as nonverbal mimicry, directed gaze, joint attention, and grouping. Based on our results we argue that such technologies could be beneficial for computer-mediated social interactions such as to compensate for lacking sensory input and disturbances in data transmission or to increase aspects of social presence by visual substitution or amplification of social behaviors. Based on related work and presented findings, the present thesis proposes the perspective of considering computers as social mediators. Concluding from prototypes and empirical studies, the potential of technology to be an active mediator of social perception with regard to the perception of the self, as well as the perception of social interactions may benefit our society by enabling further methods for diagnosis, treatment, and training, as well as the inclusion of individuals with social disorders. To this regard, we discuss implications for our society and ethical aspects. This thesis extends previous empirical work and further presents novel instruments, concepts, and implications to open up new perspectives for the development of virtual reality, mixed reality, and augmented reality applications.}, subject = {Virtuelle Realit{\"a}t}, language = {en} } @phdthesis{Dietrich2019, author = {Dietrich, Georg}, title = {Ad Hoc Information Extraction in a Clinical Data Warehouse with Case Studies for Data Exploration and Consistency Checks}, doi = {10.25972/OPUS-18464}, url = {http://nbn-resolving.de/urn:nbn:de:bvb:20-opus-184642}, school = {Universit{\"a}t W{\"u}rzburg}, year = {2019}, abstract = {The importance of Clinical Data Warehouses (CDW) has increased significantly in recent years as they support or enable many applications such as clinical trials, data mining, and decision making. CDWs integrate Electronic Health Records which still contain a large amount of text data, such as discharge letters or reports on diagnostic findings in addition to structured and coded data like ICD-codes of diagnoses. Existing CDWs hardly support features to gain information covered in texts. Information extraction methods offer a solution for this problem but they have a high and long development effort, which can only be carried out by computer scientists. Moreover, such systems only exist for a few medical domains. This paper presents a method empowering clinicians to extract information from texts on their own. Medical concepts can be extracted ad hoc from e.g. discharge letters, thus physicians can work promptly and autonomously. The proposed system achieves these improvements by efficient data storage, preprocessing, and with powerful query features. Negations in texts are recognized and automatically excluded, as well as the context of information is determined and undesired facts are filtered, such as historical events or references to other persons (family history). Context-sensitive queries ensure the semantic integrity of the concepts to be extracted. A new feature not available in other CDWs is to query numerical concepts in texts and even filter them (e.g. BMI > 25). The retrieved values can be extracted and exported for further analysis. This technique is implemented within the efficient architecture of the PaDaWaN CDW and evaluated with comprehensive and complex tests. The results outperform similar approaches reported in the literature. Ad hoc IE determines the results in a few (milli-) seconds and a user friendly GUI enables interactive working, allowing flexible adaptation of the extraction. In addition, the applicability of this system is demonstrated in three real-world applications at the W{\"u}rzburg University Hospital (UKW). Several drug trend studies are replicated: Findings of five studies on high blood pressure, atrial fibrillation and chronic renal failure can be partially or completely confirmed in the UKW. Another case study evaluates the prevalence of heart failure in inpatient hospitals using an algorithm that extracts information with ad hoc IE from discharge letters and echocardiogram report (e.g. LVEF < 45 ) and other sources of the hospital information system. This study reveals that the use of ICD codes leads to a significant underestimation (31\%) of the true prevalence of heart failure. The third case study evaluates the consistency of diagnoses by comparing structured ICD-10-coded diagnoses with the diagnoses described in the diagnostic section of the discharge letter. These diagnoses are extracted from texts with ad hoc IE, using synonyms generated with a novel method. The developed approach can extract diagnoses from the discharge letter with a high accuracy and furthermore it can prove the degree of consistency between the coded and reported diagnoses.}, subject = {Information Extraction}, language = {en} } @phdthesis{Peng2019, author = {Peng, Dongliang}, title = {An Optimization-Based Approach for Continuous Map Generalization}, edition = {1. Auflage}, publisher = {W{\"u}rzburg University Press}, address = {W{\"u}rzburg}, isbn = {978-3-95826-104-4}, doi = {10.25972/WUP-978-3-95826-105-1}, url = {http://nbn-resolving.de/urn:nbn:de:bvb:20-opus-174427}, school = {W{\"u}rzburg University Press}, pages = {xv, 132}, year = {2019}, abstract = {Maps are the main tool to represent geographical information. Geographical information is usually scale-dependent, so users need to have access to maps at different scales. In our digital age, the access is realized by zooming. As discrete changes during the zooming tend to distract users, smooth changes are preferred. This is why some digital maps are trying to make the zooming as continuous as they can. The process of producing maps at different scales with smooth changes is called continuous map generalization. In order to produce maps of high quality, cartographers often take into account additional requirements. These requirements are transferred to models in map generalization. Optimization for map generalization is important not only because it finds optimal solutions in the sense of the models, but also because it helps us to evaluate the quality of the models. Optimization, however, becomes more delicate when we deal with continuous map generalization. In this area, there are requirements not only for a specific map but also for relations between maps at difference scales. This thesis is about continuous map generalization based on optimization. First, we show the background of our research topics. Second, we find optimal sequences for aggregating land-cover areas. We compare the A\$^{\!\star}\$\xspace algorithm and integer linear programming in completing this task. Third, we continuously generalize county boundaries to provincial boundaries based on compatible triangulations. We morph between the two sets of boundaries, using dynamic programming to compute the correspondence. Fourth, we continuously generalize buildings to built-up areas by aggregating and growing. In this work, we group buildings with the help of a minimum spanning tree. Fifth, we define vertex trajectories that allow us to morph between polylines. We require that both the angles and the edge lengths change linearly over time. As it is impossible to fulfill all of these requirements simultaneously, we mediate between them using least-squares adjustment. Sixth, we discuss the performance of some commonly used data structures for a specific spatial problem. Seventh, we conclude this thesis and present open problems.}, subject = {Generalisierung }, language = {en} } @phdthesis{Walter2019, author = {Walter, J{\"u}rgen Christian}, title = {Automation in Software Performance Engineering Based on a Declarative Specification of Concerns}, doi = {10.25972/OPUS-18090}, url = {http://nbn-resolving.de/urn:nbn:de:bvb:20-opus-180904}, school = {Universit{\"a}t W{\"u}rzburg}, year = {2019}, abstract = {Software performance is of particular relevance to software system design, operation, and evolution because it has a significant impact on key business indicators. During the life-cycle of a software system, its implementation, configuration, and deployment are subject to multiple changes that may affect the end-to-end performance characteristics. Consequently, performance analysts continually need to provide answers to and act based on performance-relevant concerns. To ensure a desired level of performance, software performance engineering provides a plethora of methods, techniques, and tools for measuring, modeling, and evaluating performance properties of software systems. However, the answering of performance concerns is subject to a significant semantic gap between the level on which performance concerns are formulated and the technical level on which performance evaluations are actually conducted. Performance evaluation approaches come with different strengths and limitations concerning, for example, accuracy, time-to-result, or system overhead. For the involved stakeholders, it can be an elaborate process to reasonably select, parameterize and correctly apply performance evaluation approaches, and to filter and interpret the obtained results. An additional challenge is that available performance evaluation artifacts may change over time, which requires to switch between different measurement-based and model-based performance evaluation approaches during the system evolution. At model-based analysis, the effort involved in creating performance models can also outweigh their benefits. To overcome the deficiencies and enable an automatic and holistic evaluation of performance throughout the software engineering life-cycle requires an approach that: (i) integrates multiple types of performance concerns and evaluation approaches, (ii) automates performance model creation, and (iii) automatically selects an evaluation methodology tailored to a specific scenario. This thesis presents a declarative approach —called Declarative Performance Engineering (DPE)— to automate performance evaluation based on a humanreadable specification of performance-related concerns. To this end, we separate the definition of performance concerns from their solution. The primary scientific contributions presented in this thesis are: A declarative language to express performance-related concerns and a corresponding processing framework: We provide a language to specify performance concerns independent of a concrete performance evaluation approach. Besides the specification of functional aspects, the language allows to include non-functional tradeoffs optionally. To answer these concerns, we provide a framework architecture and a corresponding reference implementation to process performance concerns automatically. It allows to integrate arbitrary performance evaluation approaches and is accompanied by reference implementations for model-based and measurement-based performance evaluation. Automated creation of architectural performance models from execution traces: The creation of performance models can be subject to significant efforts outweighing the benefits of model-based performance evaluation. We provide a model extraction framework that creates architectural performance models based on execution traces, provided by monitoring tools.The framework separates the derivation of generic information from model creation routines. To derive generic information, the framework combines state-of-the-art extraction and estimation techniques. We isolate object creation routines specified in a generic model builder interface based on concepts present in multiple performance-annotated architectural modeling formalisms. To create model extraction for a novel performance modeling formalism, developers only need to write object creation routines instead of creating model extraction software from scratch when reusing the generic framework. Automated and extensible decision support for performance evaluation approaches: We present a methodology and tooling for the automated selection of a performance evaluation approach tailored to the user concerns and application scenario. To this end, we propose to decouple the complexity of selecting a performance evaluation approach for a given scenario by providing solution approach capability models and a generic decision engine. The proposed capability meta-model enables to describe functional and non-functional capabilities of performance evaluation approaches and tools at different granularities. In contrast to existing tree-based decision support mechanisms, the decoupling approach allows to easily update characteristics of solution approaches as well as appending new rating criteria and thereby stay abreast of evolution in performance evaluation tooling and system technologies. Time-to-result estimation for model-based performance prediction: The time required to execute a model-based analysis plays an important role in different decision processes. For example, evaluation scenarios might require the prediction results to be available in a limited period of time such that the system can be adapted in time to ensure the desired quality of service. We propose a method to estimate the time-to-result for modelbased performance prediction based on model characteristics and analysis parametrization. We learn a prediction model using performancerelevant features thatwe determined using statistical tests. We implement the approach and demonstrate its practicability by applying it to analyze a simulation-based multi-step performance evaluation approach for a representative architectural performance modeling formalism. We validate each of the contributions based on representative case studies. The evaluation of automatic performance model extraction for two case study systems shows that the resulting models can accurately predict the performance behavior. Prediction accuracy errors are below 3\% for resource utilization and mostly less than 20\% for service response time. The separate evaluation of the reusability shows that the presented approach lowers the implementation efforts for automated model extraction tools by up to 91\%. Based on two case studies applying measurement-based and model-based performance evaluation techniques, we demonstrate the suitability of the declarative performance engineering framework to answer multiple kinds of performance concerns customized to non-functional goals. Subsequently, we discuss reduced efforts in applying performance analyses using the integrated and automated declarative approach. Also, the evaluation of the declarative framework reviews benefits and savings integrating performance evaluation approaches into the declarative performance engineering framework. We demonstrate the applicability of the decision framework for performance evaluation approaches by applying it to depict existing decision trees. Then, we show how we can quickly adapt to the evolution of performance evaluation methods which is challenging for static tree-based decision support systems. At this, we show how to cope with the evolution of functional and non-functional capabilities of performance evaluation software and explain how to integrate new approaches. Finally, we evaluate the accuracy of the time-to-result estimation for a set of machinelearning algorithms and different training datasets. The predictions exhibit a mean percentage error below 20\%, which can be further improved by including performance evaluations of the considered model into the training data. The presented contributions represent a significant step towards an integrated performance engineering process that combines the strengths of model-based and measurement-based performance evaluation. The proposed performance concern language in conjunction with the processing framework significantly reduces the complexity of applying performance evaluations for all stakeholders. Thereby it enables performance awareness throughout the software engineering life-cycle. The proposed performance concern language removes the semantic gap between the level on which performance concerns are formulated and the technical level on which performance evaluations are actually conducted by the user.}, subject = {Software}, language = {en} } @phdthesis{Niebler2019, author = {Niebler, Thomas}, title = {Extracting and Learning Semantics from Social Web Data}, doi = {10.25972/OPUS-17866}, url = {http://nbn-resolving.de/urn:nbn:de:bvb:20-opus-178666}, school = {Universit{\"a}t W{\"u}rzburg}, year = {2019}, abstract = {Making machines understand natural language is a dream of mankind that existed since a very long time. Early attempts at programming machines to converse with humans in a supposedly intelligent way with humans relied on phrase lists and simple keyword matching. However, such approaches cannot provide semantically adequate answers, as they do not consider the specific meaning of the conversation. Thus, if we want to enable machines to actually understand language, we need to be able to access semantically relevant background knowledge. For this, it is possible to query so-called ontologies, which are large networks containing knowledge about real-world entities and their semantic relations. However, creating such ontologies is a tedious task, as often extensive expert knowledge is required. Thus, we need to find ways to automatically construct and update ontologies that fit human intuition of semantics and semantic relations. More specifically, we need to determine semantic entities and find relations between them. While this is usually done on large corpora of unstructured text, previous work has shown that we can at least facilitate the first issue of extracting entities by considering special data such as tagging data or human navigational paths. Here, we do not need to detect the actual semantic entities, as they are already provided because of the way those data are collected. Thus we can mainly focus on the problem of assessing the degree of semantic relatedness between tags or web pages. However, there exist several issues which need to be overcome, if we want to approximate human intuition of semantic relatedness. For this, it is necessary to represent words and concepts in a way that allows easy and highly precise semantic characterization. This also largely depends on the quality of data from which these representations are constructed. In this thesis, we extract semantic information from both tagging data created by users of social tagging systems and human navigation data in different semantic-driven social web systems. Our main goal is to construct high quality and robust vector representations of words which can the be used to measure the relatedness of semantic concepts. First, we show that navigation in the social media systems Wikipedia and BibSonomy is driven by a semantic component. After this, we discuss and extend methods to model the semantic information in tagging data as low-dimensional vectors. Furthermore, we show that tagging pragmatics influences different facets of tagging semantics. We then investigate the usefulness of human navigational paths in several different settings on Wikipedia and BibSonomy for measuring semantic relatedness. Finally, we propose a metric-learning based algorithm in adapt pre-trained word embeddings to datasets containing human judgment of semantic relatedness. This work contributes to the field of studying semantic relatedness between words by proposing methods to extract semantic relatedness from web navigation, learn highquality and low-dimensional word representations from tagging data, and to learn semantic relatedness from any kind of vector representation by exploiting human feedback. Applications first and foremest lie in ontology learning for the Semantic Web, but also semantic search or query expansion.}, subject = {Semantik}, language = {en} } @phdthesis{vonKistowski2019, author = {von Kistowski, J{\´o}akim Gunnarsson}, title = {Measuring, Rating, and Predicting the Energy Efficiency of Servers}, doi = {10.25972/OPUS-17847}, url = {http://nbn-resolving.de/urn:nbn:de:bvb:20-opus-178478}, school = {Universit{\"a}t W{\"u}rzburg}, year = {2019}, abstract = {Energy efficiency of computing systems has become an increasingly important issue over the last decades. In 2015, data centers were responsible for 2\% of the world's greenhouse gas emissions, which is roughly the same as the amount produced by air travel. In addition to these environmental concerns, power consumption of servers in data centers results in significant operating costs, which increase by at least 10\% each year. To address this challenge, the U.S. EPA and other government agencies are considering the use of novel measurement methods in order to label the energy efficiency of servers. The energy efficiency and power consumption of a server is subject to a great number of factors, including, but not limited to, hardware, software stack, workload, and load level. This huge number of influencing factors makes measuring and rating of energy efficiency challenging. It also makes it difficult to find an energy-efficient server for a specific use-case. Among others, server provisioners, operators, and regulators would profit from information on the servers in question and on the factors that affect those servers' power consumption and efficiency. However, we see a lack of measurement methods and metrics for energy efficiency of the systems under consideration. Even assuming that a measurement methodology existed, making decisions based on its results would be challenging. Power prediction methods that make use of these results would aid in decision making. They would enable potential server customers to make better purchasing decisions and help operators predict the effects of potential reconfigurations. Existing energy efficiency benchmarks cannot fully address these challenges, as they only measure single applications at limited sets of load levels. In addition, existing efficiency metrics are not helpful in this context, as they are usually a variation of the simple performance per power ratio, which is only applicable to single workloads at a single load level. Existing data center efficiency metrics, on the other hand, express the efficiency of the data center space and power infrastructure, not focusing on the efficiency of the servers themselves. Power prediction methods for not-yet-available systems that could make use of the results provided by a comprehensive power rating methodology are also lacking. Existing power prediction models for hardware designers have a very fine level of granularity and detail that would not be useful for data center operators. This thesis presents a measurement and rating methodology for energy efficiency of servers and an energy efficiency metric to be applied to the results of this methodology. We also design workloads, load intensity and distribution models, and mechanisms that can be used for energy efficiency testing. Based on this, we present power prediction mechanisms and models that utilize our measurement methodology and its results for power prediction. Specifically, the six major contributions of this thesis are: We present a measurement methodology and metrics for energy efficiency rating of servers that use multiple, specifically chosen workloads at different load levels for a full system characterization. We evaluate the methodology and metric with regard to their reproducibility, fairness, and relevance. We investigate the power and performance variations of test results and show fairness of the metric through a mathematical proof and a correlation analysis on a set of 385 servers. We evaluate the metric's relevance by showing the relationships that can be established between metric results and third-party applications. We create models and extraction mechanisms for load profiles that vary over time, as well as load distribution mechanisms and policies. The models are designed to be used to define arbitrary dynamic load intensity profiles that can be leveraged for benchmarking purposes. The load distribution mechanisms place workloads on computing resources in a hierarchical manner. Our load intensity models can be extracted in less than 0.2 seconds and our resulting models feature a median modeling error of 12.7\% on average. In addition, our new load distribution strategy can save up to 10.7\% of power consumption on a single server node. We introduce an approach to create small-scale workloads that emulate the power consumption-relevant behavior of large-scale workloads by approximating their CPU performance counter profile, and we introduce TeaStore, a distributed, micro-service-based reference application. TeaStore can be used to evaluate power and performance model accuracy, elasticity of cloud auto-scalers, and the effectiveness of power saving mechanisms for distributed systems. We show that we are capable of emulating the power consumption behavior of realistic workloads with a mean deviation less than 10\% and down to 0.2 watts (1\%). We demonstrate the use of TeaStore in the context of performance model extraction and cloud auto-scaling also showing that it may generate workloads with different effects on the power consumption of the system under consideration. We present a method for automated selection of interpolation strategies for performance and power characterization. We also introduce a configuration approach for polynomial interpolation functions of varying degrees that improves prediction accuracy for system power consumption for a given system utilization. We show that, in comparison to regression, our automated interpolation method selection and configuration approach improves modeling accuracy by 43.6\% if additional reference data is available and by 31.4\% if it is not. We present an approach for explicit modeling of the impact a virtualized environment has on power consumption and a method to predict the power consumption of a software application. Both methods use results produced by our measurement methodology to predict the respective power consumption for servers that are otherwise not available to the person making the prediction. Our methods are able to predict power consumption reliably for multiple hypervisor configurations and for the target application workloads. Application workload power prediction features a mean average absolute percentage error of 9.5\%. Finally, we propose an end-to-end modeling approach for predicting the power consumption of component placements at run-time. The model can also be used to predict the power consumption at load levels that have not yet been observed on the running system. We show that we can predict the power consumption of two different distributed web applications with a mean absolute percentage error of 2.2\%. In addition, we can predict the power consumption of a system at a previously unobserved load level and component distribution with an error of 1.2\%. The contributions of this thesis already show a significant impact in science and industry. The presented efficiency rating methodology, including its metric, have been adopted by the U.S. EPA in the latest version of the ENERGY STAR Computer Server program. They are also being considered by additional regulatory agencies, including the EU Commission and the China National Institute of Standardization. In addition, the methodology's implementation and the underlying methodology itself have already found use in several research publications. Regarding future work, we see a need for new workloads targeting specialized server hardware. At the moment, we are witnessing a shift in execution hardware to specialized machine learning chips, general purpose GPU computing, FPGAs being embedded into compute servers, etc. To ensure that our measurement methodology remains relevant, workloads covering these areas are required. Similarly, power prediction models must be extended to cover these new scenarios.}, subject = {Benchmarking}, language = {en} } @phdthesis{Bangert2019, author = {Bangert, Philip}, title = {Magnetic Attitude Control of Miniature Satellites and its Extension towards Orbit Control using an Electric Propulsion System}, isbn = {978-3-945459-28-7 (online)}, issn = {1868-7474}, doi = {10.25972/OPUS-17702}, url = {http://nbn-resolving.de/urn:nbn:de:bvb:20-opus-177020}, school = {Universit{\"a}t W{\"u}rzburg}, year = {2019}, abstract = {The attitude and orbit control system of pico- and nano-satellites to date is one of the bottle necks for future scientific and commercial applications. A performance increase while keeping with the satellites' restrictions will enable new space missions especially for the smallest of the CubeSat classes. This work addresses methods to measure and improve the satellite's attitude pointing and orbit control performance based on advanced sensor data analysis and optimized on-board software concepts. These methods are applied to spaceborne satellites and future CubeSat missions to demonstrate their validity. An in-orbit calibration procedure for a typical CubeSat attitude sensor suite is developed and applied to the UWE-3 satellite in space. Subsequently, a method to estimate the attitude determination accuracy without the help of an external reference sensor is developed. Using this method, it is shown that the UWE-3 satellite achieves an in-orbit attitude determination accuracy of about 2°. An advanced data analysis of the attitude motion of a miniature satellite is used in order to estimate the main attitude disturbance torque in orbit. It is shown, that the magnetic disturbance is by far the most significant contribution for miniature satellites and a method to estimate the residual magnetic dipole moment of a satellite is developed. Its application to three CubeSats currently in orbit reveals that magnetic disturbances are a common issue for this class of satellites. The dipole moments measured are between 23.1mAm² and 137.2mAm². In order to autonomously estimate and counteract this disturbance in future missions an on-board magnetic dipole estimation algorithm is developed. The autonomous neutralization of such disturbance torques together with the simplification of attitude control for the satellite operator is the focus of a novel on-board attitude control software architecture. It incorporates disturbance torques acting on the satellite and automatically optimizes the control output. Its application is demonstrated in space on board of the UWE-3 satellite through various attitude control experiments of which the results are presented here. The integration of a miniaturized electric propulsion system will enable CubeSats to perform orbit control and, thus, open up new application scenarios. The in-orbit characterization, however, poses the problem of precisely measuring very low thrust levels in the order of µN. A method to measure this thrust based on the attitude dynamics of the satellite is developed and evaluated in simulation. It is shown, that the demonstrator mission UWE-4 will be able to measure these thrust levels with a high accuracy of 1\% for thrust levels higher than 1µN. The orbit control capabilities of UWE-4 using its electric propulsion system are evaluated and a hybrid attitude control system making use of the satellite's magnetorquers and the electric propulsion system is developed. It is based on the flexible attitude control architecture mentioned before and thrust vector pointing accuracies of better than 2° can be achieved. This results in a thrust delivery of more than 99\% of the desired acceleration in the target direction.}, subject = {Satellit}, language = {en} } @phdthesis{Metter2019, author = {Metter, Christopher Valentin}, title = {Resilience, Availabilty, and Serviceability Evaluation in Software-defined Networks}, issn = {1432-8801}, doi = {10.25972/OPUS-17678}, url = {http://nbn-resolving.de/urn:nbn:de:bvb:20-opus-176788}, school = {Universit{\"a}t W{\"u}rzburg}, year = {2019}, abstract = {With the introduction of Software-defined Networking (SDN) in the late 2000s, not only a new research field has been created, but a paradigm shift was initiated in the broad field of networking. The programmable network control by SDN is a big step, but also a stumbling block for many of the established network operators and vendors. As with any new technology the question about the maturity and the productionreadiness of it arises. Therefore, this thesis picks specific features of SDN and analyzes its performance, reliability, and availability in scenarios that can be expected in production deployments. The first SDN topic is the performance impact of application traffic in the data plane on the control plane. Second, reliability and availability concerns of SDN deployments are exemplary analyzed by evaluating the detection performance of a common SDN controller. Thirdly, the performance of P4, a technology that enhances SDN, or better its impact of certain control operations on the processing performance is evaluated.}, subject = {Leistungsbewertung}, language = {en} } @phdthesis{Albert2019, author = {Albert, Michael}, title = {Intelligent analysis of medical data in a generic telemedicine infrastructure}, isbn = {978-3-945459-26-3 (Online)}, doi = {10.25972/OPUS-17421}, url = {http://nbn-resolving.de/urn:nbn:de:bvb:20-opus-174213}, school = {Universit{\"a}t W{\"u}rzburg}, year = {2019}, abstract = {Telemedicine uses telecommunication and information technology to provide health care services over spatial distances. In the upcoming demographic changes towards an older average population age, especially rural areas suffer from a decreasing doctor to patient ratio as well as a limited amount of available medical specialists in acceptable distance. These areas could benefit the most from telemedicine applications as they are known to improve access to medical services, medical expertise and can also help to mitigate critical or emergency situations. Although the possibilities of telemedicine applications exist in the entire range of healthcare, current systems focus on one specific disease while using dedicated hardware to connect the patient with the supervising telemedicine center. This thesis describes the development of a telemedical system which follows a new generic design approach. This bridges the gap of existing approaches that only tackle one specific application. The proposed system on the contrary aims at supporting as many diseases and use cases as possible by taking all the stakeholders into account at the same time. To address the usability and acceptance of the system it is designed to use standardized hardware like commercial medical sensors and smartphones for collecting medical data of the patients and transmitting them to the telemedical center. The smartphone can also act as interface to the patient for health questionnaires or feedback. The system can handle the collection and transport of medical data, analysis and visualization of the data as well as providing a real time communication with video and audio between the users. On top of the generic telemedical framework the issue of scalability is addressed by integrating a rule-based analysis tool for the medical data. Rules can be easily created by medical personnel via a visual editor and can be personalized for each patient. The rule-based analysis tool is extended by multiple options for visualization of the data, mechanisms to handle complex rules and options for performing actions like raising alarms or sending automated messages. It is sometimes hard for the medical experts to formulate their knowledge into rules and there may be information in the medical data that is not yet known. This is why a machine learning module was integrated into the system. It uses the incoming medical data of the patients to learn new rules that are then presented to the medical personnel for inspection. This is in line with European legislation where the human still needs to be in charge of such decisions. Overall, we were able to show the benefit of the generic approach by evaluating it in three completely different medical use cases derived from specific application needs: monitoring of COPD (chronic obstructive pulmonary disease) patients, support of patients performing dialysis at home and councils of intensive-care experts. In addition the system was used for a non-medical use case: monitoring and optimization of industrial machines and robots. In all of the mentioned cases, we were able to prove the robustness of the generic approach with real users of the corresponding domain. This is why we can propose this approach for future development of telemedical systems.}, subject = {Telemedizin}, language = {en} } @phdthesis{Lange2019, author = {Lange, Stanislav}, title = {Optimization of Controller Placement and Information Flow in Softwarized Networks}, issn = {1432-8801}, doi = {10.25972/OPUS-17457}, url = {http://nbn-resolving.de/urn:nbn:de:bvb:20-opus-174570}, school = {Universit{\"a}t W{\"u}rzburg}, year = {2019}, abstract = {The Software Defined Networking (SDN) paradigm offers network operators numerous improvements in terms of flexibility, scalability, as well as cost efficiency and vendor independence. However, in order to maximize the benefit from these features, several new challenges in areas such as management and orchestration need to be addressed. This dissertation makes contributions towards three key topics from these areas. Firstly, we design, implement, and evaluate two multi-objective heuristics for the SDN controller placement problem. Secondly, we develop and apply mechanisms for automated decision making based on the Pareto frontiers that are returned by the multi-objective optimizers. Finally, we investigate and quantify the performance benefits for the SDN control plane that can be achieved by integrating information from external entities such as Network Management Systems (NMSs) into the control loop. Our evaluation results demonstrate the impact of optimizing various parameters of softwarized networks at different levels and are used to derive guidelines for an efficient operation.}, subject = {Leistungsbewertung}, language = {en} }