@article{WoznickiLaquaAlHajetal.2023,
  author    = {Woznicki, Piotr and Laqua, Fabian Christopher and Al-Haj, Adam and Bley, Thorsten and Baeßler, Bettina},
  title     = {Addressing challenges in radiomics research: systematic review and repository of open-access cancer imaging datasets},
  series = {Insights into Imaging},
  volume    = {14},
  journal   = {Insights into Imaging},
  issn      = {1869-4101},
  doi       = {10.1186/s13244-023-01556-w},
  url       = {http://nbn-resolving.de/urn:nbn:de:bvb:20-opus-357936},
  year      = {2023},
  abstract  = {Objectives Open-access cancer imaging datasets have become integral for evaluating novel AI approaches in radiology. However, their use in quantitative analysis with radiomics features presents unique challenges, such as incomplete documentation, low visibility, non-uniform data formats, data inhomogeneity, and complex preprocessing. These issues may cause problems with reproducibility and standardization in radiomics studies. Methods We systematically reviewed imaging datasets with public copyright licenses, published up to March 2023 across four large online cancer imaging archives. We included only datasets with tomographic images (CT, MRI, or PET), segmentations, and clinical annotations, specifically identifying those suitable for radiomics research. Reproducible preprocessing and feature extraction were performed for each dataset to enable their easy reuse. Results We discovered 29 datasets with corresponding segmentations and labels in the form of health outcomes, tumor pathology, staging, imaging-based scores, genetic markers, or repeated imaging. We compiled a repository encompassing 10,354 patients and 49,515 scans. Of the 29 datasets, 15 were licensed under Creative Commons licenses, allowing both non-commercial and commercial usage and redistribution, while others featured custom or restricted licenses. Studies spanned from the early 1990s to 2021, with the majority concluding after 2013. Seven different formats were used for the imaging data. Preprocessing and feature extraction were successfully performed for each dataset. Conclusion RadiomicsHub is a comprehensive public repository with radiomics features derived from a systematic review of public cancer imaging datasets. By converting all datasets to a standardized format and ensuring reproducible and traceable processing, RadiomicsHub addresses key reproducibility and standardization challenges in radiomics. Critical relevance statement This study critically addresses the challenges associated with locating, preprocessing, and extracting quantitative features from open-access datasets, to facilitate more robust and reliable evaluations of radiomics models. Key points - Through a systematic review, we identified 29 cancer imaging datasets suitable for radiomics research. - A public repository with collection overview and radiomics features, encompassing 10,354 patients and 49,515 scans, was compiled. - Most datasets can be shared, used, and built upon freely under a Creative Commons license. - All 29 identified datasets have been converted into a common format to enable reproducible radiomics feature extraction.},
  language  = {en}
}
@article{LimanMayFetteetal.2023,
  author    = {Liman, Leon and May, Bernd and Fette, Georg and Krebs, Jonathan and Puppe, Frank},
  title     = {Using a clinical data warehouse to calculate and present key metrics for the radiology department: implementation and performance evaluation},
  series = {JMIR Medical Informatics},
  volume    = {11},
  journal   = {JMIR Medical Informatics},
  issn      = {2291-9694},
  doi       = {10.2196/41808},
  url       = {http://nbn-resolving.de/urn:nbn:de:bvb:20-opus-349411},
  year      = {2023},
  abstract  = {Background: Due to the importance of radiologic examinations, such as X-rays or computed tomography scans, for many clinical diagnoses, the optimal use of the radiology department is 1 of the primary goals of many hospitals. Objective: This study aims to calculate the key metrics of this use by creating a radiology data warehouse solution, where data from radiology information systems (RISs) can be imported and then queried using a query language as well as a graphical user interface (GUI). Methods: Using a simple configuration file, the developed system allowed for the processing of radiology data exported from any kind of RIS into a Microsoft Excel, comma-separated value (CSV), or JavaScript Object Notation (JSON) file. These data were then imported into a clinical data warehouse. Additional values based on the radiology data were calculated during this import process by implementing 1 of several provided interfaces. Afterward, the query language and GUI of the data warehouse were used to configure and calculate reports on these data. For the most common types of requested reports, a web interface was created to view their numbers as graphics. Results: The tool was successfully tested with the data of 4 different German hospitals from 2018 to 2021, with a total of 1,436,111 examinations. The user feedback was good, since all their queries could be answered if the available data were sufficient. The initial processing of the radiology data for using them with the clinical data warehouse took (depending on the amount of data provided by each hospital) between 7 minutes and 1 hour 11 minutes. Calculating 3 reports of different complexities on the data of each hospital was possible in 1-3 seconds for reports with up to 200 individual calculations and in up to 1.5 minutes for reports with up to 8200 individual calculations. Conclusions: A system was developed with the main advantage of being generic concerning the export of different RISs as well as concerning the configuration of queries for various reports. The queries could be configured easily using the GUI of the data warehouse, and their results could be exported into the standard formats Excel and CSV for further processing.},
  language  = {en}
}
@article{WoznickiLaquaBleyetal.2022,
  author    = {Woznicki, Piotr and Laqua, Fabian and Bley, Thorsten and Baeßler, Bettina},
  title     = {AutoRadiomics: a framework for reproducible radiomics research},
  series = {Frontiers in Radiology},
  volume    = {2},
  journal   = {Frontiers in Radiology},
  issn      = {2673-8740},
  doi       = {10.3389/fradi.2022.919133},
  url       = {http://nbn-resolving.de/urn:nbn:de:bvb:20-opus-284813},
  year      = {2022},
  abstract  = {Purpose Machine learning based on radiomics features has seen huge success in a variety of clinical applications. However, the need for standardization and reproducibility has been increasingly recognized as a necessary step for future clinical translation. We developed a novel, intuitive open-source framework to facilitate all data analysis steps of a radiomics workflow in an easy and reproducible manner and evaluated it by reproducing classification results in eight available open-source datasets from different clinical entities. Methods The framework performs image preprocessing, feature extraction, feature selection, modeling, and model evaluation, and can automatically choose the optimal parameters for a given task. All analysis steps can be reproduced with a web application, which offers an interactive user interface and does not require programming skills. We evaluated our method in seven different clinical applications using eight public datasets: six datasets from the recently published WORC database, and two prostate MRI datasets—Prostate MRI and Ultrasound With Pathology and Coordinates of Tracked Biopsy (Prostate-UCLA) and PROSTATEx. Results In the analyzed datasets, AutoRadiomics successfully created and optimized models using radiomics features. For WORC datasets, we achieved AUCs ranging from 0.56 for lung melanoma metastases detection to 0.93 for liposarcoma detection and thereby managed to replicate the previously reported results. No significant overfitting between training and test sets was observed. For the prostate cancer detection task, results were better in the PROSTATEx dataset (AUC = 0.73 for prostate and 0.72 for lesion mask) than in the Prostate-UCLA dataset (AUC 0.61 for prostate and 0.65 for lesion mask), with external validation results varying from AUC = 0.51 to AUC = 0.77. Conclusion AutoRadiomics is a robust tool for radiomic studies, which can be used as a comprehensive solution, one of the analysis steps, or an exploratory tool. Its wide applicability was confirmed by the results obtained in the diverse analyzed datasets. The framework, as well as code for this analysis, are publicly available under https://github.com/pwoznicki/AutoRadiomics.},
  language  = {en}
}