@article{SteiningerAbelZiegleretal.2023,
  author    = {Steininger, Michael and Abel, Daniel and Ziegler, Katrin and Krause, Anna and Paeth, Heiko and Hotho, Andreas},
  title     = {ConvMOS: climate model output statistics with deep learning},
  series = {Data Mining and Knowledge Discovery},
  volume    = {37},
  journal   = {Data Mining and Knowledge Discovery},
  number    = {1},
  issn      = {1384-5810},
  doi       = {10.1007/s10618-022-00877-6},
  url       = {http://nbn-resolving.de/urn:nbn:de:bvb:20-opus-324213},
  pages     = {136-166},
  year      = {2023},
  abstract  = {Climate models are the tool of choice for scientists researching climate change. Like all models they suffer from errors, particularly systematic and location-specific representation errors. One way to reduce these errors is model output statistics (MOS) where the model output is fitted to observational data with machine learning. In this work, we assess the use of convolutional Deep Learning climate MOS approaches and present the ConvMOS architecture which is specifically designed based on the observation that there are systematic and location-specific errors in the precipitation estimates of climate models. We apply ConvMOS models to the simulated precipitation of the regional climate model REMO, showing that a combination of per-location model parameters for reducing location-specific errors and global model parameters for reducing systematic errors is indeed beneficial for MOS performance. We find that ConvMOS models can reduce errors considerably and perform significantly better than three commonly used MOS approaches and plain ResNet and U-Net models in most cases. Our results show that non-linear MOS models underestimate the number of extreme precipitation events, which we alleviate by training models specialized towards extreme precipitation events with the imbalanced regression method DenseLoss. While we consider climate MOS, we argue that aspects of ConvMOS may also be beneficial in other domains with geospatial data, such as air pollution modeling or weather forecasts.},
  subject      = {Klima},
  language  = {en}
}
@article{WienrichCarolusMarkusetal.2023,
  author    = {Wienrich, Carolin and Carolus, Astrid and Markus, Andr{\´e} and Augustin, Yannik and Pfister, Jan and Hotho, Andreas},
  title     = {Long-term effects of perceived friendship with intelligent voice assistants on usage behavior, user experience, and social perceptions},
  series = {Computers},
  volume    = {12},
  journal   = {Computers},
  number    = {4},
  issn      = {2073-431X},
  doi       = {10.3390/computers12040077},
  url       = {http://nbn-resolving.de/urn:nbn:de:bvb:20-opus-313552},
  year      = {2023},
  abstract  = {Social patterns and roles can develop when users talk to intelligent voice assistants (IVAs) daily. The current study investigates whether users assign different roles to devices and how this affects their usage behavior, user experience, and social perceptions. Since social roles take time to establish, we equipped 106 participants with Alexa or Google assistants and some smart home devices and observed their interactions for nine months. We analyzed diverse subjective (questionnaire) and objective data (interaction data). By combining social science and data science analyses, we identified two distinct clusters—users who assigned a friendship role to IVAs over time and users who did not. Interestingly, these clusters exhibited significant differences in their usage behavior, user experience, and social perceptions of the devices. For example, participants who assigned a role to IVAs attributed more friendship to them used them more frequently, reported more enjoyment during interactions, and perceived more empathy for IVAs. In addition, these users had distinct personal requirements, for example, they reported more loneliness. This study provides valuable insights into the role-specific effects and consequences of voice assistants. Recent developments in conversational language models such as ChatGPT suggest that the findings of this study could make an important contribution to the design of dialogic human-AI interactions.},
  language  = {en}
}
@article{RackFernandoYalcinetal.2023,
  author    = {Rack, Christian and Fernando, Tamara and Yalcin, Murat and Hotho, Andreas and Latoschik, Marc Erich},
  title     = {Who is Alyx? A new behavioral biometric dataset for user identification in XR},
  series = {Frontiers in Virtual Reality},
  volume    = {4},
  journal   = {Frontiers in Virtual Reality},
  issn      = {2673-4192},
  doi       = {10.3389/frvir.2023.1272234},
  url       = {http://nbn-resolving.de/urn:nbn:de:bvb:20-opus-353979},
  year      = {2023},
  abstract  = {Introduction: This paper addresses the need for reliable user identification in Extended Reality (XR), focusing on the scarcity of public datasets in this area. Methods: We present a new dataset collected from 71 users who played the game "Half-Life: Alyx" on an HTC Vive Pro for 45 min across two separate sessions. The dataset includes motion and eye-tracking data, along with physiological data from a subset of 31 users. Benchmark performance is established using two state-of-the-art deep learning architectures, Convolutional Neural Networks (CNN) and Gated Recurrent Units (GRU). Results: The best model achieved a mean accuracy of 95\% for user identification within 2 min when trained on the first session and tested on the second. Discussion: The dataset is freely available and serves as a resource for future research in XR user identification, thereby addressing a significant gap in the field. Its release aims to facilitate advancements in user identification methods and promote reproducibility in XR research.},
  language  = {en}
}
@article{HentschelKobsHotho2022,
  author    = {Hentschel, Simon and Kobs, Konstantin and Hotho, Andreas},
  title     = {CLIP knows image aesthetics},
  series = {Frontiers in Artificial Intelligence},
  volume    = {5},
  journal   = {Frontiers in Artificial Intelligence},
  issn      = {2624-8212},
  doi       = {10.3389/frai.2022.976235},
  url       = {http://nbn-resolving.de/urn:nbn:de:bvb:20-opus-297150},
  year      = {2022},
  abstract  = {Most Image Aesthetic Assessment (IAA) methods use a pretrained ImageNet classification model as a base to fine-tune. We hypothesize that content classification is not an optimal pretraining task for IAA, since the task discourages the extraction of features that are useful for IAA, e.g., composition, lighting, or style. On the other hand, we argue that the Contrastive Language-Image Pretraining (CLIP) model is a better base for IAA models, since it has been trained using natural language supervision. Due to the rich nature of language, CLIP needs to learn a broad range of image features that correlate with sentences describing the image content, composition, environments, and even subjective feelings about the image. While it has been shown that CLIP extracts features useful for content classification tasks, its suitability for tasks that require the extraction of style-based features like IAA has not yet been shown. We test our hypothesis by conducting a three-step study, investigating the usefulness of features extracted by CLIP compared to features obtained from the last layer of a comparable ImageNet classification model. In each step, we get more computationally expensive. First, we engineer natural language prompts that let CLIP assess an image's aesthetic without adjusting any weights in the model. To overcome the challenge that CLIP's prompting only is applicable to classification tasks, we propose a simple but effective strategy to convert multiple prompts to a continuous scalar as required when predicting an image's mean aesthetic score. Second, we train a linear regression on the AVA dataset using image features obtained by CLIP's image encoder. The resulting model outperforms a linear regression trained on features from an ImageNet classification model. It also shows competitive performance with fully fine-tuned networks based on ImageNet, while only training a single layer. Finally, by fine-tuning CLIP's image encoder on the AVA dataset, we show that CLIP only needs a fraction of training epochs to converge, while also performing better than a fine-tuned ImageNet model. Overall, our experiments suggest that CLIP is better suited as a base model for IAA methods than ImageNet pretrained networks.},
  language  = {en}
}
@article{WienrichCarolusRothIsigkeitetal.2022,
  author    = {Wienrich, Carolin and Carolus, Astrid and Roth-Isigkeit, David and Hotho, Andreas},
  title     = {Inhibitors and enablers to explainable AI success: a systematic examination of explanation complexity and individual characteristics},
  series = {Multimodal Technologies and Interaction},
  volume    = {6},
  journal   = {Multimodal Technologies and Interaction},
  number    = {12},
  issn      = {2414-4088},
  doi       = {10.3390/mti6120106},
  url       = {http://nbn-resolving.de/urn:nbn:de:bvb:20-opus-297288},
  year      = {2022},
  abstract  = {With the increasing adaptability and complexity of advisory artificial intelligence (AI)-based agents, the topics of explainable AI and human-centered AI are moving close together. Variations in the explanation itself have been widely studied, with some contradictory results. These could be due to users' individual differences, which have rarely been systematically studied regarding their inhibiting or enabling effect on the fulfillment of explanation objectives (such as trust, understanding, or workload). This paper aims to shed light on the significance of human dimensions (gender, age, trust disposition, need for cognition, affinity for technology, self-efficacy, attitudes, and mind attribution) as well as their interplay with different explanation modes (no, simple, or complex explanation). Participants played the game Deal or No Deal while interacting with an AI-based agent. The agent gave advice to the participants on whether they should accept or reject the deals offered to them. As expected, giving an explanation had a positive influence on the explanation objectives. However, the users' individual characteristics particularly reinforced the fulfillment of the objectives. The strongest predictor of objective fulfillment was the degree of attribution of human characteristics. The more human characteristics were attributed, the more trust was placed in the agent, advice was more likely to be accepted and understood, and important needs were satisfied during the interaction. Thus, the current work contributes to a better understanding of the design of explanations of an AI-based agent system that takes into account individual characteristics and meets the demand for both explainable and human-centered agent systems.},
  language  = {en}
}
@article{SteiningerKobsDavidsonetal.2021,
  author    = {Steininger, Michael and Kobs, Konstantin and Davidson, Padraig and Krause, Anna and Hotho, Andreas},
  title     = {Density-based weighting for imbalanced regression},
  series = {Machine Learning},
  volume    = {110},
  journal   = {Machine Learning},
  number    = {8},
  issn      = {1573-0565},
  doi       = {10.1007/s10994-021-06023-5},
  url       = {http://nbn-resolving.de/urn:nbn:de:bvb:20-opus-269177},
  pages     = {2187-2211},
  year      = {2021},
  abstract  = {In many real world settings, imbalanced data impedes model performance of learning algorithms, like neural networks, mostly for rare cases. This is especially problematic for tasks focusing on these rare occurrences. For example, when estimating precipitation, extreme rainfall events are scarce but important considering their potential consequences. While there are numerous well studied solutions for classification settings, most of them cannot be applied to regression easily. Of the few solutions for regression tasks, barely any have explored cost-sensitive learning which is known to have advantages compared to sampling-based methods in classification tasks. In this work, we propose a sample weighting approach for imbalanced regression datasets called DenseWeight and a cost-sensitive learning approach for neural network regression with imbalanced data called DenseLoss based on our weighting scheme. DenseWeight weights data points according to their target value rarities through kernel density estimation (KDE). DenseLoss adjusts each data point's influence on the loss according to DenseWeight, giving rare data points more influence on model training compared to common data points. We show on multiple differently distributed datasets that DenseLoss significantly improves model performance for rare data points through its density-based weighting scheme. Additionally, we compare DenseLoss to the state-of-the-art method SMOGN, finding that our method mostly yields better performance. Our approach provides more control over model training as it enables us to actively decide on the trade-off between focusing on common or rare cases through a single hyperparameter, allowing the training of better models for rare data points.},
  language  = {en}
}
@article{KoopmannStubbemannKapaetal.2021,
  author    = {Koopmann, Tobias and Stubbemann, Maximilian and Kapa, Matthias and Paris, Michael and Buenstorf, Guido and Hanika, Tom and Hotho, Andreas and J{\"a}schke, Robert and Stumme, Gerd},
  title     = {Proximity dimensions and the emergence of collaboration: a HypTrails study on German AI research},
  series = {Scientometrics},
  volume    = {126},
  journal   = {Scientometrics},
  number    = {12},
  issn      = {1588-2861},
  doi       = {10.1007/s11192-021-03922-1},
  url       = {http://nbn-resolving.de/urn:nbn:de:bvb:20-opus-269831},
  pages     = {9847-9868},
  year      = {2021},
  abstract  = {Creation and exchange of knowledge depends on collaboration. Recent work has suggested that the emergence of collaboration frequently relies on geographic proximity. However, being co-located tends to be associated with other dimensions of proximity, such as social ties or a shared organizational environment. To account for such factors, multiple dimensions of proximity have been proposed, including cognitive, institutional, organizational, social and geographical proximity. Since they strongly interrelate, disentangling these dimensions and their respective impact on collaboration is challenging. To address this issue, we propose various methods for measuring different dimensions of proximity. We then present an approach to compare and rank them with respect to the extent to which they indicate co-publications and co-inventions. We adapt the HypTrails approach, which was originally developed to explain human navigation, to co-author and co-inventor graphs. We evaluate this approach on a subset of the German research community, specifically academic authors and inventors active in research on artificial intelligence (AI). We find that social proximity and cognitive proximity are more important for the emergence of collaboration than geographic proximity.},
  language  = {en}
}
@article{SchloerRingHotho2020,
  author    = {Schl{\"o}r, Daniel and Ring, Markus and Hotho, Andreas},
  title     = {iNALU: Improved Neural Arithmetic Logic Unit},
  series = {Frontiers in Artificial Intelligence},
  volume    = {3},
  journal   = {Frontiers in Artificial Intelligence},
  issn      = {2624-8212},
  doi       = {10.3389/frai.2020.00071},
  url       = {http://nbn-resolving.de/urn:nbn:de:bvb:20-opus-212301},
  year      = {2020},
  abstract  = {Neural networks have to capture mathematical relationships in order to learn various tasks. They approximate these relations implicitly and therefore often do not generalize well. The recently proposed Neural Arithmetic Logic Unit (NALU) is a novel neural architecture which is able to explicitly represent the mathematical relationships by the units of the network to learn operations such as summation, subtraction or multiplication. Although NALUs have been shown to perform well on various downstream tasks, an in-depth analysis reveals practical shortcomings by design, such as the inability to multiply or divide negative input values or training stability issues for deeper networks. We address these issues and propose an improved model architecture. We evaluate our model empirically in various settings from learning basic arithmetic operations to more complex functions. Our experiments indicate that our model solves stability issues and outperforms the original NALU model in means of arithmetic precision and convergence.},
  language  = {en}
}
@article{DavidsonDuekingZinneretal.2020,
  author    = {Davidson, Padraig and D{\"u}king, Peter and Zinner, Christoph and Sperlich, Billy and Hotho, Andreas},
  title     = {Smartwatch-Derived Data and Machine Learning Algorithms Estimate Classes of Ratings of Perceived Exertion in Runners: A Pilot Study},
  series = {Sensors},
  volume    = {20},
  journal   = {Sensors},
  number    = {9},
  issn      = {1424-8220},
  doi       = {10.3390/s20092637},
  url       = {http://nbn-resolving.de/urn:nbn:de:bvb:20-opus-205686},
  year      = {2020},
  abstract  = {The rating of perceived exertion (RPE) is a subjective load marker and may assist in individualizing training prescription, particularly by adjusting running intensity. Unfortunately, RPE has shortcomings (e.g., underreporting) and cannot be monitored continuously and automatically throughout a training sessions. In this pilot study, we aimed to predict two classes of RPE (≤15 "Somewhat hard to hard" on Borg's 6-20 scale vs. RPE >15 in runners by analyzing data recorded by a commercially-available smartwatch with machine learning algorithms. Twelve trained and untrained runners performed long-continuous runs at a constant self-selected pace to volitional exhaustion. Untrained runners reported their RPE each kilometer, whereas trained runners reported every five kilometers. The kinetics of heart rate, step cadence, and running velocity were recorded continuously ( 1 Hz ) with a commercially-available smartwatch (Polar V800). We trained different machine learning algorithms to estimate the two classes of RPE based on the time series sensor data derived from the smartwatch. Predictions were analyzed in different settings: accuracy overall and per runner type; i.e., accuracy for trained and untrained runners independently. We achieved top accuracies of 84.8 \% for the whole dataset, 81.8 \% for the trained runners, and 86.1 \% for the untrained runners. We predict two classes of RPE with high accuracy using machine learning and smartwatch data. This approach might aid in individualizing training prescriptions.},
  language  = {en}
}
@article{RingLandesHotho2018,
  author    = {Ring, Markus and Landes, Dieter and Hotho, Andreas},
  title     = {Detection of slow port scans in flow-based network traffic},
  series = {PLoS ONE},
  volume    = {13},
  journal   = {PLoS ONE},
  number    = {9},
  doi       = {10.1371/journal.pone.0204507},
  url       = {http://nbn-resolving.de/urn:nbn:de:bvb:20-opus-226305},
  pages     = {e0204507, 1-18},
  year      = {2018},
  abstract  = {Frequently, port scans are early indicators of more serious attacks. Unfortunately, the detection of slow port scans in company networks is challenging due to the massive amount of network data. This paper proposes an innovative approach for preprocessing flow-based data which is specifically tailored to the detection of slow port scans. The preprocessing chain generates new objects based on flow-based data aggregated over time windows while taking domain knowledge as well as additional knowledge about the network structure into account. The computed objects are used as input for the further analysis. Based on these objects, we propose two different approaches for detection of slow port scans. One approach is unsupervised and uses sequential hypothesis testing whereas the other approach is supervised and uses classification algorithms. We compare both approaches with existing port scan detection algorithms on the flow-based CIDDS-001 data set. Experiments indicate that the proposed approaches achieve better detection rates and exhibit less false alarms than similar algorithms.},
  language  = {en}
}