@phdthesis{Kobs2024,
  author    = {Kobs, Konstantin},
  title     = {Think outside the Black Box: Model-Agnostic Deep Learning with Domain Knowledge},
  doi       = {10.25972/OPUS-34968},
  url       = {http://nbn-resolving.de/urn:nbn:de:bvb:20-opus-349689},
  school      = {Universit{\"a}t W{\"u}rzburg},
  year      = {2024},
  abstract  = {Deep Learning (DL) models are trained on a downstream task by feeding (potentially preprocessed) input data through a trainable Neural Network (NN) and updating its parameters to minimize the loss function between the predicted and the desired output. While this general framework has mainly remained unchanged over the years, the architectures of the trainable models have greatly evolved. Even though it is undoubtedly important to choose the right architecture, we argue that it is also beneficial to develop methods that address other components of the training process. We hypothesize that utilizing domain knowledge can be helpful to improve DL models in terms of performance and/or efficiency. Such model-agnostic methods can be applied to any existing or future architecture. Furthermore, the black box nature of DL models motivates the development of techniques to understand their inner workings. Considering the rapid advancement of DL architectures, it is again crucial to develop model-agnostic methods. In this thesis, we explore six principles that incorporate domain knowledge to understand or improve models. They are applied either on the input or output side of the trainable model. Each principle is applied to at least two DL tasks, leading to task-specific implementations. To understand DL models, we propose to use Generated Input Data coming from a controllable generation process requiring knowledge about the data properties. This way, we can understand the model's behavior by analyzing how it changes when one specific high-level input feature changes in the generated data. On the output side, Gradient-Based Attribution methods create a gradient at the end of the NN and then propagate it back to the input, indicating which low-level input features have a large influence on the model's prediction. The resulting input features can be interpreted by humans using domain knowledge. To improve the trainable model in terms of downstream performance, data and compute efficiency, or robustness to unwanted features, we explore principles that each address one of the training components besides the trainable model. Input Masking and Augmentation directly modifies the training input data, integrating knowledge about the data and its impact on the model's output. We also explore the use of Feature Extraction using Pretrained Multimodal Models which can be seen as a beneficial preprocessing step to extract useful features. When no training data is available for the downstream task, using such features and domain knowledge expressed in other modalities can result in a Zero-Shot Learning (ZSL) setting, completely eliminating the trainable model. The Weak Label Generation principle produces new desired outputs using knowledge about the labels, giving either a good pretraining or even exclusive training dataset to solve the downstream task. Finally, improving and choosing the right Loss Function is another principle we explore in this thesis. Here, we enrich existing loss functions with knowledge about label interactions or utilize and combine multiple task-specific loss functions in a multitask setting. We apply the principles to classification, regression, and representation tasks as well as to image and text modalities. We propose, apply, and evaluate existing and novel methods to understand and improve the model. Overall, this thesis introduces and evaluates methods that complement the development and choice of DL model architectures.},
  subject      = {Deep learning},
  language  = {en}
}
@phdthesis{Somody2023,
  author    = {Somody, Joseph Christian Campbell},
  title     = {Leveraging deep learning for identification and structural determination of novel protein complexes from \(in\) \(situ\) electron cryotomography of \(Mycoplasma\) \(pneumoniae\)},
  doi       = {10.25972/OPUS-31344},
  url       = {http://nbn-resolving.de/urn:nbn:de:bvb:20-opus-313447},
  school      = {Universit{\"a}t W{\"u}rzburg},
  year      = {2023},
  abstract  = {The holy grail of structural biology is to study a protein in situ, and this goal has been fast approaching since the resolution revolution and the achievement of atomic resolution. A cell's interior is not a dilute environment, and proteins have evolved to fold and function as needed in that environment; as such, an investigation of a cellular component should ideally include the full complexity of the cellular environment. Imaging whole cells in three dimensions using electron cryotomography is the best method to accomplish this goal, but it comes with a limitation on sample thickness and produces noisy data unamenable to direct analysis. This thesis establishes a novel workflow to systematically analyse whole-cell electron cryotomography data in three dimensions and to find and identify instances of protein complexes in the data to set up a determination of their structure and identity for success. Mycoplasma pneumoniae is a very small parasitic bacterium with fewer than 700 protein-coding genes, is thin enough and small enough to be imaged in large quantities by electron cryotomography, and can grow directly on the grids used for imaging, making it ideal for exploratory studies in structural proteomics. As part of the workflow, a methodology for training deep-learning-based particle-picking models is established. As a proof of principle, a dataset of whole-cell Mycoplasma pneumoniae tomograms is used with this workflow to characterize a novel membrane-associated complex observed in the data. Ultimately, 25431 such particles are picked from 353 tomograms and refined to a density map with a resolution of 11 {\AA}. Making good use of orthogonal datasets to filter search space and verify results, structures were predicted for candidate proteins and checked for suitable fit in the density map. In the end, with this approach, nine proteins were found to be part of the complex, which appears to be associated with chaperone activity and interact with translocon machinery. Visual proteomics refers to the ultimate potential of in situ electron cryotomography: the comprehensive interpretation of tomograms. The workflow presented here is demonstrated to help in reaching that potential.},
  subject      = {Kryoelektronenmikroskopie},
  language  = {en}
}
@phdthesis{Steininger2023,
  author    = {Steininger, Michael},
  title     = {Deep Learning for Geospatial Environmental Regression},
  doi       = {10.25972/OPUS-31312},
  url       = {http://nbn-resolving.de/urn:nbn:de:bvb:20-opus-313121},
  school      = {Universit{\"a}t W{\"u}rzburg},
  year      = {2023},
  abstract  = {Environmental issues have emerged especially since humans burned fossil fuels, which led to air pollution and climate change that harm the environment. These issues' substantial consequences evoked strong efforts towards assessing the state of our environment. Various environmental machine learning (ML) tasks aid these efforts. These tasks concern environmental data but are common ML tasks otherwise, i.e., datasets are split (training, validatition, test), hyperparameters are optimized on validation data, and test set metrics measure a model's generalizability. This work focuses on the following environmental ML tasks: Regarding air pollution, land use regression (LUR) estimates air pollutant concentrations at locations where no measurements are available based on measured locations and each location's land use (e.g., industry, streets). For LUR, this work uses data from London (modeled) and Zurich (measured). Concerning climate change, a common ML task is model output statistics (MOS), where a climate model's output for a study area is altered to better fit Earth observations and provide more accurate climate data. This work uses the regional climate model (RCM) REMO and Earth observations from the E-OBS dataset for MOS. Another task regarding climate is grain size distribution interpolation where soil properties at locations without measurements are estimated based on the few measured locations. This can provide climate models with soil information, that is important for hydrology. For this task, data from Lower Franconia is used. Such environmental ML tasks commonly have a number of properties: (i) geospatiality, i.e., their data refers to locations relative to the Earth's surface. (ii) The environmental variables to estimate or predict are usually continuous. (iii) Data can be imbalanced due to relatively rare extreme events (e.g., extreme precipitation). (iv) Multiple related potential target variables can be available per location, since measurement devices often contain different sensors. (v) Labels are spatially often only sparsely available since conducting measurements at all locations of interest is usually infeasible. These properties present challenges but also opportunities when designing ML methods for such tasks. In the past, environmental ML tasks have been tackled with conventional ML methods, such as linear regression or random forests (RFs). However, the field of ML has made tremendous leaps beyond these classic models through deep learning (DL). In DL, models use multiple layers of neurons, producing increasingly higher-level feature representations with growing layer depth. DL has made previously infeasible ML tasks feasible, improved the performance for many tasks in comparison to existing ML models significantly, and eliminated the need for manual feature engineering in some domains due to its ability to learn features from raw data. To harness these advantages for environmental domains it is promising to develop novel DL methods for environmental ML tasks. This thesis presents methods for dealing with special challenges and exploiting opportunities inherent to environmental ML tasks in conjunction with DL. To this end, the proposed methods explore the following techniques: (i) Convolutions as in convolutional neural networks (CNNs) to exploit reoccurring spatial patterns in geospatial data. (ii) Posing the problems as regression tasks to estimate the continuous variables. (iii) Density-based weighting to improve estimation performance for rare and extreme events. (iv) Multi-task learning to make use of multiple related target variables. (v) Semi-supervised learning to cope with label sparsity. Using these techniques, this thesis considers four research questions: (i) Can air pollution be estimated without manual feature engineering? This is answered positively by the introduction of the CNN-based LUR model MapLUR as well as the off-the-shelf LUR solution OpenLUR. (ii) Can colocated pollution data improve spatial air pollution models? Multi-task learning for LUR is developed for this, showing potential for improvements with colocated data. (iii) Can DL models improve the quality of climate model outputs? The proposed DL climate MOS architecture ConvMOS demonstrates this. Additionally, semi-supervised training of multilayer perceptrons (MLPs) for grain size distribution interpolation is presented, which can provide improved input data. (iv) Can DL models be taught to better estimate climate extremes? To this end, density-based weighting for imbalanced regression (DenseLoss) is proposed and applied to the DL architecture ConvMOS, improving climate extremes estimation. These methods show how especially DL techniques can be developed for environmental ML tasks with their special characteristics in mind. This allows for better models than previously possible with conventional ML, leading to more accurate assessment and better understanding of the state of our environment.},
  subject      = {Deep learning},
  language  = {en}
}
@article{SteiningerAbelZiegleretal.2023,
  author    = {Steininger, Michael and Abel, Daniel and Ziegler, Katrin and Krause, Anna and Paeth, Heiko and Hotho, Andreas},
  title     = {ConvMOS: climate model output statistics with deep learning},
  series = {Data Mining and Knowledge Discovery},
  volume    = {37},
  journal   = {Data Mining and Knowledge Discovery},
  number    = {1},
  issn      = {1384-5810},
  doi       = {10.1007/s10618-022-00877-6},
  url       = {http://nbn-resolving.de/urn:nbn:de:bvb:20-opus-324213},
  pages     = {136-166},
  year      = {2023},
  abstract  = {Climate models are the tool of choice for scientists researching climate change. Like all models they suffer from errors, particularly systematic and location-specific representation errors. One way to reduce these errors is model output statistics (MOS) where the model output is fitted to observational data with machine learning. In this work, we assess the use of convolutional Deep Learning climate MOS approaches and present the ConvMOS architecture which is specifically designed based on the observation that there are systematic and location-specific errors in the precipitation estimates of climate models. We apply ConvMOS models to the simulated precipitation of the regional climate model REMO, showing that a combination of per-location model parameters for reducing location-specific errors and global model parameters for reducing systematic errors is indeed beneficial for MOS performance. We find that ConvMOS models can reduce errors considerably and perform significantly better than three commonly used MOS approaches and plain ResNet and U-Net models in most cases. Our results show that non-linear MOS models underestimate the number of extreme precipitation events, which we alleviate by training models specialized towards extreme precipitation events with the imbalanced regression method DenseLoss. While we consider climate MOS, we argue that aspects of ConvMOS may also be beneficial in other domains with geospatial data, such as air pollution modeling or weather forecasts.},
  subject      = {Klima},
  language  = {en}
}
@phdthesis{Philipp2023,
  author    = {Philipp, Marius Balthasar},
  title     = {Quantifying the Effects of Permafrost Degradation in Arctic Coastal Environments via Satellite Earth Observation},
  doi       = {10.25972/OPUS-34563},
  url       = {http://nbn-resolving.de/urn:nbn:de:bvb:20-opus-345634},
  school      = {Universit{\"a}t W{\"u}rzburg},
  year      = {2023},
  abstract  = {Permafrost degradation is observed all over the world as a consequence of climate change and the associated Arctic amplification, which has severe implications for the environment. Landslides, increased rates of surface deformation, rising likelihood of infrastructure damage, amplified coastal erosion rates, and the potential turnover of permafrost from a carbon sink to a carbon source are thereby exemplary implications linked to the thawing of frozen ground material. In this context, satellite earth observation is a potent tool for the identification and continuous monitoring of relevant processes and features on a cheap, long-term, spatially explicit, and operational basis as well as up to a circumpolar scale. A total of 325 articles published in 30 different international journals during the past two decades were investigated on the basis of studied environmental foci, remote sensing platforms, sensor combinations, applied spatio-temporal resolutions, and study locations in an extensive review on past achievements, current trends, as well as future potentials and challenges of satellite earth observation for permafrost related analyses. The development of analysed environmental subjects, utilized sensors and platforms, and the number of annually published articles over time are addressed in detail. Studies linked to atmospheric features and processes, such as the release of greenhouse gas emissions, appear to be strongly under-represented. Investigations on the spatial distribution of study locations revealed distinct study clusters across the Arctic. At the same time, large sections of the continuous permafrost domain are only poorly covered and remain to be investigated in detail. A general trend towards increasing attention in satellite earth observation of permafrost and related processes and features was observed. The overall amount of published articles hereby more than doubled since the year 2015. New sources of satellite data, such as the Sentinel satellites and the Methane Remote Sensing LiDAR Mission (Merlin), as well as novel methodological approaches, such as data fusion and deep learning, will thereby likely improve our understanding of the thermal state and distribution of permafrost, and the effects of its degradation. Furthermore, cloud-based big data processing platforms (e.g. Google Earth Engine (GEE)) will further enable sophisticated and long-term analyses on increasingly larger scales and at high spatial resolutions. In this thesis, a specific focus was put on Arctic permafrost coasts, which feature increasing vulnerability to environmental parameters, such as the thawing of frozen ground, and are therefore associated with amplified erosion rates. In particular, a novel monitoring framework for quantifying Arctic coastal erosion rates within the permafrost domain at high spatial resolution and on a circum-Arctic scale is presented within this thesis. Challenging illumination conditions and frequent cloud cover restrict the applicability of optical satellite imagery in Arctic regions. In order to overcome these limitations, Synthetic Aperture RADAR (SAR) data derived from Sentinel-1 (S1), which is largely independent from sun illumination and weather conditions, was utilized. Annual SAR composites covering the months June-September were combined with a Deep Learning (DL) framework and a Change Vector Analysis (CVA) approach to generate both a high-quality and circum-Arctic coastline product as well as a coastal change product that highlights areas of erosion and build-up. Annual composites in the form of standard deviation (sd) and median backscatter were computed and used as inputs for both the DL framework and the CVA coastal change quantification. The final DL-based coastline product covered a total of 161,600 km of Arctic coastline and featured a median accuracy of ±6.3 m to the manually digitized reference data. Annual coastal change quantification between 2017-2021 indicated erosion rates of up to 67 m per year for some areas based on 400 m coastal segments. In total, 12.24\% of the investigated coastline featured an average erosion rate of 3.8 m per year, which corresponds to 17.83 km2 of annually eroded land area. Multiple quality layers associated to both products, the generated DL-coastline and the coastal change rates, are provided on a pixel basis to further assess the accuracy and applicability of the proposed data, methods, and products. Lastly, the extracted circum-Arctic erosion rates were utilized as a basis in an experimental framework for estimating the amount of permafrost and carbon loss as a result of eroding permafrost coastlines. Information on permafrost fraction, Active Layer Thickness (ALT), soil carbon content, and surface elevation were thereby combined with the aforementioned erosion rates. While the proposed experimental framework provides a valuable outline for quantifying the volume loss of frozen ground and carbon release, extensive validation of the utilized environmental products and resulting volume loss numbers based on 200 m segments are necessary. Furthermore, data of higher spatial resolution and information of carbon content for deeper soil depths are required for more accurate estimates.},
  subject      = {Dauerfrostboden},
  language  = {en}
}
@article{MuellerLeppichGeissetal.2023,
  author    = {M{\"u}ller, Konstantin and Leppich, Robert and Geiß, Christian and Borst, Vanessa and Pelizari, Patrick Aravena and Kounev, Samuel and Taubenb{\"o}ck, Hannes},
  title     = {Deep neural network regression for normalized digital surface model generation with Sentinel-2 imagery},
  series = {IEEE Journal of Selected Topics in Applied Earth Observations and Remote Sensing},
  volume    = {16},
  journal   = {IEEE Journal of Selected Topics in Applied Earth Observations and Remote Sensing},
  issn      = {1939-1404},
  doi       = {10.1109/JSTARS.2023.3297710},
  url       = {http://nbn-resolving.de/urn:nbn:de:bvb:20-opus-349424},
  pages     = {8508-8519},
  year      = {2023},
  abstract  = {In recent history, normalized digital surface models (nDSMs) have been constantly gaining importance as a means to solve large-scale geographic problems. High-resolution surface models are precious, as they can provide detailed information for a specific area. However, measurements with a high resolution are time consuming and costly. Only a few approaches exist to create high-resolution nDSMs for extensive areas. This article explores approaches to extract high-resolution nDSMs from low-resolution Sentinel-2 data, allowing us to derive large-scale models. We thereby utilize the advantages of Sentinel 2 being open access, having global coverage, and providing steady updates through a high repetition rate. Several deep learning models are trained to overcome the gap in producing high-resolution surface maps from low-resolution input data. With U-Net as a base architecture, we extend the capabilities of our model by integrating tailored multiscale encoders with differently sized kernels in the convolution as well as conformed self-attention inside the skip connection gates. Using pixelwise regression, our U-Net base models can achieve a mean height error of approximately 2 m. Moreover, through our enhancements to the model architecture, we reduce the model error by more than 7\%.},
  language  = {en}
}
@article{LeubeGustafssonLassmannetal.2022,
  author    = {Leube, Julian and Gustafsson, Johan and Lassmann, Michael and Salas-Ramirez, Maikol and Tran-Gia, Johannes},
  title     = {Analysis of a deep learning-based method for generation of SPECT projections based on a large Monte Carlo simulated dataset},
  series = {EJNMMI Physics},
  volume    = {9},
  journal   = {EJNMMI Physics},
  issn      = {2197-7364},
  doi       = {10.1186/s40658-022-00476-w},
  url       = {http://nbn-resolving.de/urn:nbn:de:bvb:20-opus-300697},
  year      = {2022},
  abstract  = {Background In recent years, a lot of effort has been put in the enhancement of medical imaging using artificial intelligence. However, limited patient data in combination with the unavailability of a ground truth often pose a challenge to a systematic validation of such methodologies. The goal of this work was to investigate a recently proposed method for an artificial intelligence-based generation of synthetic SPECT projections, for acceleration of the image acquisition process based on a large dataset of realistic SPECT simulations. Methods A database of 10,000 SPECT projection datasets of heterogeneous activity distributions of randomly placed random shapes was simulated for a clinical SPECT/CT system using the SIMIND Monte Carlo program. Synthetic projections at fixed angular increments from a set of input projections at evenly distributed angles were generated by different u-shaped convolutional neural networks (u-nets). These u-nets differed in noise realization used for the training data, number of input projections, projection angle increment, and number of training/validation datasets. Synthetic projections were generated for 500 test projection datasets for each u-net, and a quantitative analysis was performed using statistical hypothesis tests based on structural similarity index measure and normalized root-mean-squared error. Additional simulations with varying detector orbits were performed on a subset of the dataset to study the effect of the detector orbit on the performance of the methodology. For verification of the results, the u-nets were applied to Jaszczak and NEMA physical phantom data obtained on a clinical SPECT/CT system. Results No statistically significant differences were observed between u-nets trained with different noise realizations. In contrast, a statistically significant deterioration was found for training with a small subset (400 datasets) of the 10,000 simulated projection datasets in comparison with using a large subset (9500 datasets) for training. A good agreement between synthetic (i.e., u-net generated) and simulated projections before adding noise demonstrates a denoising effect. Finally, the physical phantom measurements show that our findings also apply for projections measured on a clinical SPECT/CT system. Conclusion Our study shows the large potential of u-nets for accelerating SPECT/CT imaging. In addition, our analysis numerically reveals a denoising effect when generating synthetic projections with a u-net. Clinically interesting, the methodology has proven robust against camera orbit deviations in a clinically realistic range. Lastly, we found that a small number of training samples (e.g., ~ 400 datasets) may not be sufficient for reliable generalization of the u-net.},
  language  = {en}
}
@unpublished{HeidenreichGassenmaierAnkenbrandetal.2021,
  author    = {Heidenreich, Julius F. and Gassenmaier, Tobias and Ankenbrand, Markus J. and Bley, Thorsten A. and Wech, Tobias},
  title     = {Self-configuring nnU-net pipeline enables fully automatic infarct segmentation in late enhancement MRI after myocardial infarction},
  edition   = {accepted version},
  doi       = {10.1016/j.ejrad.2021.109817},
  url       = {http://nbn-resolving.de/urn:nbn:de:bvb:20-opus-323418},
  year      = {2021},
  abstract  = {Purpose To fully automatically derive quantitative parameters from late gadolinium enhancement (LGE) cardiac MR (CMR) in patients with myocardial infarction and to investigate if phase sensitive or magnitude reconstructions or a combination of both results in best segmentation accuracy. Methods In this retrospective single center study, a convolutional neural network with a U-Net architecture with a self-configuring framework ("nnU-net") was trained for segmentation of left ventricular myocardium and infarct zone in LGE-CMR. A database of 170 examinations from 78 patients with history of myocardial infarction was assembled. Separate fitting of the model was performed, using phase sensitive inversion recovery, the magnitude reconstruction or both contrasts as input channels. Manual labelling served as ground truth. In a subset of 10 patients, the performance of the trained models was evaluated and quantitatively compared by determination of the S{\o}rensen-Dice similarity coefficient (DSC) and volumes of the infarct zone compared with the manual ground truth using Pearson's r correlation and Bland-Altman analysis. Results The model achieved high similarity coefficients for myocardium and scar tissue. No significant difference was observed between using PSIR, magnitude reconstruction or both contrasts as input (PSIR and MAG; mean DSC: 0.83 ± 0.03 for myocardium and 0.72 ± 0.08 for scars). A strong correlation for volumes of infarct zone was observed between manual and model-based approach (r = 0.96), with a significant underestimation of the volumes obtained from the neural network. Conclusion The self-configuring nnU-net achieves predictions with strong agreement compared to manual segmentation, proving the potential as a promising tool to provide fully automatic quantitative evaluation of LGE-CMR.},
  language  = {en}
}