@article{SteiningerKobsDavidsonetal.2021, author = {Steininger, Michael and Kobs, Konstantin and Davidson, Padraig and Krause, Anna and Hotho, Andreas}, title = {Density-based weighting for imbalanced regression}, series = {Machine Learning}, volume = {110}, journal = {Machine Learning}, number = {8}, issn = {1573-0565}, doi = {10.1007/s10994-021-06023-5}, url = {http://nbn-resolving.de/urn:nbn:de:bvb:20-opus-269177}, pages = {2187-2211}, year = {2021}, abstract = {In many real world settings, imbalanced data impedes model performance of learning algorithms, like neural networks, mostly for rare cases. This is especially problematic for tasks focusing on these rare occurrences. For example, when estimating precipitation, extreme rainfall events are scarce but important considering their potential consequences. While there are numerous well studied solutions for classification settings, most of them cannot be applied to regression easily. Of the few solutions for regression tasks, barely any have explored cost-sensitive learning which is known to have advantages compared to sampling-based methods in classification tasks. In this work, we propose a sample weighting approach for imbalanced regression datasets called DenseWeight and a cost-sensitive learning approach for neural network regression with imbalanced data called DenseLoss based on our weighting scheme. DenseWeight weights data points according to their target value rarities through kernel density estimation (KDE). DenseLoss adjusts each data point's influence on the loss according to DenseWeight, giving rare data points more influence on model training compared to common data points. We show on multiple differently distributed datasets that DenseLoss significantly improves model performance for rare data points through its density-based weighting scheme. Additionally, we compare DenseLoss to the state-of-the-art method SMOGN, finding that our method mostly yields better performance. Our approach provides more control over model training as it enables us to actively decide on the trade-off between focusing on common or rare cases through a single hyperparameter, allowing the training of better models for rare data points.}, language = {en} } @article{SteiningerAbelZiegleretal.2023, author = {Steininger, Michael and Abel, Daniel and Ziegler, Katrin and Krause, Anna and Paeth, Heiko and Hotho, Andreas}, title = {ConvMOS: climate model output statistics with deep learning}, series = {Data Mining and Knowledge Discovery}, volume = {37}, journal = {Data Mining and Knowledge Discovery}, number = {1}, issn = {1384-5810}, doi = {10.1007/s10618-022-00877-6}, url = {http://nbn-resolving.de/urn:nbn:de:bvb:20-opus-324213}, pages = {136-166}, year = {2023}, abstract = {Climate models are the tool of choice for scientists researching climate change. Like all models they suffer from errors, particularly systematic and location-specific representation errors. One way to reduce these errors is model output statistics (MOS) where the model output is fitted to observational data with machine learning. In this work, we assess the use of convolutional Deep Learning climate MOS approaches and present the ConvMOS architecture which is specifically designed based on the observation that there are systematic and location-specific errors in the precipitation estimates of climate models. We apply ConvMOS models to the simulated precipitation of the regional climate model REMO, showing that a combination of per-location model parameters for reducing location-specific errors and global model parameters for reducing systematic errors is indeed beneficial for MOS performance. We find that ConvMOS models can reduce errors considerably and perform significantly better than three commonly used MOS approaches and plain ResNet and U-Net models in most cases. Our results show that non-linear MOS models underestimate the number of extreme precipitation events, which we alleviate by training models specialized towards extreme precipitation events with the imbalanced regression method DenseLoss. While we consider climate MOS, we argue that aspects of ConvMOS may also be beneficial in other domains with geospatial data, such as air pollution modeling or weather forecasts.}, subject = {Klima}, language = {en} }