@phdthesis{Schloer2022, author = {Schl{\"o}r, Daniel}, title = {Detecting Anomalies in Transaction Data}, doi = {10.25972/OPUS-29856}, url = {http://nbn-resolving.de/urn:nbn:de:bvb:20-opus-298569}, school = {Universit{\"a}t W{\"u}rzburg}, year = {2022}, abstract = {Detecting anomalies in transaction data is an important task with a high potential to avoid financial loss due to irregularities deliberately or inadvertently carried out, such as credit card fraud, occupational fraud in companies or ordering and accounting errors. With ongoing digitization of our world, data-driven approaches, including machine learning, can draw benefit from data with less manual effort and feature engineering. A large variety of machine learning-based anomaly detection methods approach this by learning a precise model of normality from which anomalies can be distinguished. Modeling normality in transactional data, however, requires to capture distributions and dependencies within the data precisely with special attention to numerical dependencies such as quantities, prices or amounts. To implicitly model numerical dependencies, Neural Arithmetic Logic Units have been proposed as neural architecture. In practice, however, these have stability and precision issues. Therefore, we first develop an improved neural network architecture, iNALU, which is designed to better model numerical dependencies as found in transaction data. We compare this architecture to the previous approach and show in several experiments of varying complexity that our novel architecture provides better precision and stability. We integrate this architecture into two generative neural network models adapted for transaction data and investigate how well normal behavior is modeled. We show that both architectures can successfully model normal transaction data, with our neural architecture improving generative performance for one model. Since categorical and numerical variables are common in transaction data, but many machine learning methods only process numerical representations, we explore different representation learning techniques to transform categorical transaction data into dense numerical vectors. We extend this approach by proposing an outlier-aware discretization, thus incorporating numerical attributes into the computation of categorical embeddings, and investigate latent spaces, as well as quantitative performance for anomaly detection. Next, we evaluate different scenarios for anomaly detection on transaction data. We extend our iNALU architecture to a neural layer that can model both numerical and non-numerical dependencies and evaluate it in a supervised and one-class setting. We investigate the stability and generalizability of our approach and show that it outperforms a variety of models in the balanced supervised setting and performs comparably in the one-class setting. Finally, we evaluate three approaches to using a generative model as an anomaly detector and compare the anomaly detection performance.}, subject = {Anomalieerkennung}, language = {en} } @article{SchittkowskiZilloberZotemantel1994, author = {Schittkowski, K. and Zillober, Christian and Zotemantel, R.}, title = {Numerical comparison of nonlinear programming algorithms for structural optimization}, url = {http://nbn-resolving.de/urn:nbn:de:bvb:20-opus-31971}, year = {1994}, abstract = {No abstract available}, language = {en} } @phdthesis{Stauffert2022, author = {Stauffert, Jan-Philipp}, title = {Temporal Confounding Effects in Virtual and Extended Reality Systems}, doi = {10.25972/OPUS-29060}, url = {http://nbn-resolving.de/urn:nbn:de:bvb:20-opus-290609}, school = {Universit{\"a}t W{\"u}rzburg}, year = {2022}, abstract = {Latency is an inherent problem of computing systems. Each computation takes time until the result is available. Virtual reality systems use elaborated computer resources to create virtual experiences. The latency of those systems is often ignored or assumed as small enough to provide a good experience. This cumulative thesis is comprised of published peer reviewed research papers exploring the behaviour and effects of latency. Contrary to the common description of time invariant latency, latency is shown to fluctuate. Few other researchers have looked into this time variant behaviour. This thesis explores time variant latency with a focus on randomly occurring latency spikes. Latency spikes are observed both for small algorithms and as end to end latency in complete virtual reality systems. Most latency measurements gather close to the mean latency with potentially multiple smaller clusters of larger latency values and rare extreme outliers. The latency behaviour differs for different implementations of an algorithm. Operating system schedulers and programming language environments such as garbage collectors contribute to the overall latency behaviour. The thesis demonstrates these influences on the example of different implementations of message passing. The plethora of latency sources result in an unpredictable latency behaviour. Measuring and reporting it in scientific experiments is important. This thesis describes established approaches to measuring latency and proposes an enhanced setup to gather detailed information. The thesis proposes to dissect the measured data with a stacked z-outlier-test to separate the clusters of latency measurements for better reporting. Latency in virtual reality applications can degrade the experience in multiple ways. The thesis focuses on cybersickness as a major detrimental effect. An approach to simulate time variant latency is proposed to make latency available as an independent variable in experiments to understand latency's effects. An experiment with modified latency shows that latency spikes can contribute to cybersickness. A review of related research shows that different time invariant latency behaviour also contributes to cybersickness.}, subject = {Virtuelle Realit{\"a}t}, language = {en} } @techreport{GrigorjewSchumannDiederichetal.2023, type = {Working Paper}, author = {Grigorjew, Alexej and Schumann, Lukas Kilian and Diederich, Philip and Hoßfeld, Tobias and Kellerer, Wolfgang}, title = {Understanding the Performance of Different Packet Reception and Timestamping Methods in Linux}, series = {KuVS Fachgespr{\"a}ch - W{\"u}rzburg Workshop on Modeling, Analysis and Simulation of Next-Generation Communication Networks 2023 (WueWoWAS'23)}, journal = {KuVS Fachgespr{\"a}ch - W{\"u}rzburg Workshop on Modeling, Analysis and Simulation of Next-Generation Communication Networks 2023 (WueWoWAS'23)}, doi = {10.25972/OPUS-32206}, url = {http://nbn-resolving.de/urn:nbn:de:bvb:20-opus-322064}, pages = {5}, year = {2023}, abstract = {This document briefly presents some renowned packet reception techniques for network packets in Linux systems. Further, it compares their performance when measuring packet timestamps with respect to throughput and accuracy. Both software and hardware timestamps are compared, and various parameters are examined, including frame size, link speed, network interface card, and CPU load. The results indicate that hardware timestamping offers significantly better accuracy with no downsides, and that packet reception techniques that avoid system calls offer superior measurement throughput.}, language = {en} } @phdthesis{Freimann2022, author = {Freimann, Andreas}, title = {Efficient Communication in Networks of Small Low Earth Orbit Satellites and Ground Stations}, isbn = {978-3-945459-41-6}, doi = {10.25972/OPUS-28052}, url = {http://nbn-resolving.de/urn:nbn:de:bvb:20-opus-280521}, school = {Universit{\"a}t W{\"u}rzburg}, year = {2022}, abstract = {With the miniaturization of satellites a fundamental change took place in the space industry. Instead of single big monolithic satellites nowadays more and more systems are envisaged consisting of a number of small satellites to form cooperating systems in space. The lower costs for development and launch as well as the spatial distribution of these systems enable the implementation of new scientific missions and commercial services. With this paradigm shift new challenges constantly emerge for satellite developers, particularly in the area of wireless communication systems and network protocols. Satellites in low Earth orbits and ground stations form dynamic space-terrestrial networks. The characteristics of these networks differ fundamentally from those of other networks. The resulting challenges with regard to communication system design, system analysis, packet forwarding, routing and medium access control as well as challenges concerning the reliability and efficiency of wireless communication links are addressed in this thesis. The physical modeling of space-terrestrial networks is addressed by analyzing existing satellite systems and communication devices, by evaluating measurements and by implementing a simulator for space-terrestrial networks. The resulting system and channel models were used as a basis for the prediction of the dynamic network topologies, link properties and channel interference. These predictions allowed for the implementation of efficient routing and medium access control schemes for space-terrestrial networks. Further, the implementation and utilization of software-defined ground stations is addressed, and a data upload scheme for the operation of small satellite formations is presented.}, subject = {Satellitenfunk}, language = {en} } @phdthesis{Becker2018, author = {Becker, Martin}, title = {Understanding Human Navigation using Bayesian Hypothesis Comparison}, url = {http://nbn-resolving.de/urn:nbn:de:bvb:20-opus-163522}, school = {Universit{\"a}t W{\"u}rzburg}, year = {2018}, abstract = {Understanding human navigation behavior has implications for a wide range of application scenarios. For example, insights into geo-spatial navigation in urban areas can impact city planning or public transport. Similarly, knowledge about navigation on the web can help to improve web site structures or service experience. In this work, we focus on a hypothesis-driven approach to address the task of understanding human navigation: We aim to formulate and compare ideas — for example stemming from existing theory, literature, intuition, or previous experiments — based on a given set of navigational observations. For example, we may compare whether tourists exploring a city walk "short distances" before taking their next photo vs. they tend to "travel long distances between points of interest", or whether users browsing Wikipedia "navigate semantically" vs. "click randomly". For this, the Bayesian method HypTrails has recently been proposed. However, while HypTrails is a straightforward and flexible approach, several major challenges remain: i) HypTrails does not account for heterogeneity (e.g., incorporating differently behaving user groups such as tourists and locals is not possible), ii) HypTrails does not support the user in conceiving novel hypotheses when confronted with a large set of possibly relevant background information or influence factors, e.g., points of interest, popularity of locations, time of the day, or user properties, and finally iii) formulating hypotheses can be technically challenging depending on the application scenario (e.g., due to continuous observations or temporal constraints). In this thesis, we address these limitations by introducing various novel methods and tools and explore a wide range of case studies. In particular, our main contributions are the methods MixedTrails and SubTrails which specifically address the first two limitations: MixedTrails is an approach for hypothesis comparison that extends the previously proposed HypTrails method to allow formulating and comparing heterogeneous hypotheses (e.g., incorporating differently behaving user groups). SubTrails is a method that supports hypothesis conception by automatically discovering interpretable subgroups with exceptional navigation behavior. In addition, our methodological contributions also include several tools consisting of a distributed implementation of HypTrails, a web application for visualizing geo-spatial human navigation in the context of background information, as well as a system for collecting, analyzing, and visualizing mobile participatory sensing data. Furthermore, we conduct case studies in many application domains, which encompass — among others — geo-spatial navigation based on photos from the photo-sharing platform Flickr, browsing behavior on the social tagging system BibSonomy, and task choosing behavior on a commercial crowdsourcing platform. In the process, we develop approaches to cope with application specific subtleties (like continuous observations and temporal constraints). The corresponding studies illustrate the variety of domains and facets in which navigation behavior can be studied and, thus, showcase the expressiveness, applicability, and flexibility of our methods. Using these methods, we present new aspects of navigational phenomena which ultimately help to better understand the multi-faceted characteristics of human navigation behavior.}, subject = {Bayes-Verfahren}, language = {en} } @techreport{ElsayedRizk2022, type = {Working Paper}, author = {Elsayed, Karim and Rizk, Amr}, title = {Response Times in Time-to-Live Caching Hierarchies under Random Network Delays}, series = {W{\"u}rzburg Workshop on Next-Generation Communication Networks (WueWoWas'22)}, journal = {W{\"u}rzburg Workshop on Next-Generation Communication Networks (WueWoWas'22)}, doi = {10.25972/OPUS-28084}, url = {http://nbn-resolving.de/urn:nbn:de:bvb:20-opus-280843}, pages = {4}, year = {2022}, abstract = {Time-to-Live (TTL) caches decouple the occupancy of objects in cache through object-specific validity timers. Stateof- the art techniques provide exact methods for the calculation of object-specific hit probabilities given entire cache hierarchies with random inter-cache network delays. The system hit probability is a provider-centric metric as it relates to the origin offload, i.e., the decrease in the number of requests that are served by the content origin server. In this paper we consider a user-centric metric, i.e., the response time, which is shown to be structurally different from the system hit probability. Equipped with the state-of-theart exact modeling technique using Markov-arrival processes we derive expressions for the expected object response time and pave a way for its optimization under network delays.}, subject = {Datennetz}, language = {en} } @techreport{AlfredssonKasslerVestinetal.2022, type = {Working Paper}, author = {Alfredsson, Rebecka and Kassler, Andreas and Vestin, Jonathan and Pieska, Marcus and Amend, Markus}, title = {Accelerating a Transport Layer based 5G Multi-Access Proxy on SmartNIC}, series = {W{\"u}rzburg Workshop on Next-Generation Communication Networks (WueWoWas'22)}, journal = {W{\"u}rzburg Workshop on Next-Generation Communication Networks (WueWoWas'22)}, doi = {10.25972/OPUS-28079}, url = {http://nbn-resolving.de/urn:nbn:de:bvb:20-opus-280798}, pages = {4}, year = {2022}, abstract = {Utilizing multiple access technologies such as 5G, 4G, and Wi-Fi within a coherent framework is currently standardized by 3GPP within 5G ATSSS. Indeed, distributing packets over multiple networks can lead to increased robustness, resiliency and capacity. A key part of such a framework is the multi-access proxy, which transparently distributes packets over multiple paths. As the proxy needs to serve thousands of customers, scalability and performance are crucial for operator deployments. In this paper, we leverage recent advancements in data plane programming, implement a multi-access proxy based on the MP-DCCP tunneling approach in P4 and hardware accelerate it by deploying the pipeline on a smartNIC. This is challenging due to the complex scheduling and congestion control operations involved. We present our pipeline and data structures design for congestion control and packet scheduling state management. Initial measurements in our testbed show that packet latency is in the range of 25 μs demonstrating the feasibility of our approach.}, subject = {Datennetz}, language = {en} } @article{BencurovaShityakovSchaacketal.2022, author = {Bencurova, Elena and Shityakov, Sergey and Schaack, Dominik and Kaltdorf, Martin and Sarukhanyan, Edita and Hilgarth, Alexander and Rath, Christin and Montenegro, Sergio and Roth, G{\"u}nter and Lopez, Daniel and Dandekar, Thomas}, title = {Nanocellulose composites as smart devices with chassis, light-directed DNA Storage, engineered electronic properties, and chip integration}, series = {Frontiers in Bioengineering and Biotechnology}, volume = {10}, journal = {Frontiers in Bioengineering and Biotechnology}, issn = {2296-4185}, doi = {10.3389/fbioe.2022.869111}, url = {http://nbn-resolving.de/urn:nbn:de:bvb:20-opus-283033}, year = {2022}, abstract = {The rapid development of green and sustainable materials opens up new possibilities in the field of applied research. Such materials include nanocellulose composites that can integrate many components into composites and provide a good chassis for smart devices. In our study, we evaluate four approaches for turning a nanocellulose composite into an information storage or processing device: 1) nanocellulose can be a suitable carrier material and protect information stored in DNA. 2) Nucleotide-processing enzymes (polymerase and exonuclease) can be controlled by light after fusing them with light-gating domains; nucleotide substrate specificity can be changed by mutation or pH change (read-in and read-out of the information). 3) Semiconductors and electronic capabilities can be achieved: we show that nanocellulose is rendered electronic by iodine treatment replacing silicon including microstructures. Nanocellulose semiconductor properties are measured, and the resulting potential including single-electron transistors (SET) and their properties are modeled. Electric current can also be transported by DNA through G-quadruplex DNA molecules; these as well as classical silicon semiconductors can easily be integrated into the nanocellulose composite. 4) To elaborate upon miniaturization and integration for a smart nanocellulose chip device, we demonstrate pH-sensitive dyes in nanocellulose, nanopore creation, and kinase micropatterning on bacterial membranes as well as digital PCR micro-wells. Future application potential includes nano-3D printing and fast molecular processors (e.g., SETs) integrated with DNA storage and conventional electronics. This would also lead to environment-friendly nanocellulose chips for information processing as well as smart nanocellulose composites for biomedical applications and nano-factories.}, language = {en} } @article{KrenzerMakowskiHekaloetal.2022, author = {Krenzer, Adrian and Makowski, Kevin and Hekalo, Amar and Fitting, Daniel and Troya, Joel and Zoller, Wolfram G. and Hann, Alexander and Puppe, Frank}, title = {Fast machine learning annotation in the medical domain: a semi-automated video annotation tool for gastroenterologists}, series = {BioMedical Engineering OnLine}, volume = {21}, journal = {BioMedical Engineering OnLine}, number = {1}, doi = {10.1186/s12938-022-01001-x}, url = {http://nbn-resolving.de/urn:nbn:de:bvb:20-opus-300231}, year = {2022}, abstract = {Background Machine learning, especially deep learning, is becoming more and more relevant in research and development in the medical domain. For all the supervised deep learning applications, data is the most critical factor in securing successful implementation and sustaining the progress of the machine learning model. Especially gastroenterological data, which often involves endoscopic videos, are cumbersome to annotate. Domain experts are needed to interpret and annotate the videos. To support those domain experts, we generated a framework. With this framework, instead of annotating every frame in the video sequence, experts are just performing key annotations at the beginning and the end of sequences with pathologies, e.g., visible polyps. Subsequently, non-expert annotators supported by machine learning add the missing annotations for the frames in-between. Methods In our framework, an expert reviews the video and annotates a few video frames to verify the object's annotations for the non-expert. In a second step, a non-expert has visual confirmation of the given object and can annotate all following and preceding frames with AI assistance. After the expert has finished, relevant frames will be selected and passed on to an AI model. This information allows the AI model to detect and mark the desired object on all following and preceding frames with an annotation. Therefore, the non-expert can adjust and modify the AI predictions and export the results, which can then be used to train the AI model. Results Using this framework, we were able to reduce workload of domain experts on average by a factor of 20 on our data. This is primarily due to the structure of the framework, which is designed to minimize the workload of the domain expert. Pairing this framework with a state-of-the-art semi-automated AI model enhances the annotation speed further. Through a prospective study with 10 participants, we show that semi-automated annotation using our tool doubles the annotation speed of non-expert annotators compared to a well-known state-of-the-art annotation tool. Conclusion In summary, we introduce a framework for fast expert annotation for gastroenterologists, which reduces the workload of the domain expert considerably while maintaining a very high annotation quality. The framework incorporates a semi-automated annotation system utilizing trained object detection models. The software and framework are open-source.}, language = {en} }