@article{LimanMayFetteetal.2023,
  author    = {Liman, Leon and May, Bernd and Fette, Georg and Krebs, Jonathan and Puppe, Frank},
  title     = {Using a clinical data warehouse to calculate and present key metrics for the radiology department: implementation and performance evaluation},
  series = {JMIR Medical Informatics},
  volume    = {11},
  journal   = {JMIR Medical Informatics},
  issn      = {2291-9694},
  doi       = {10.2196/41808},
  url       = {http://nbn-resolving.de/urn:nbn:de:bvb:20-opus-349411},
  year      = {2023},
  abstract  = {Background: Due to the importance of radiologic examinations, such as X-rays or computed tomography scans, for many clinical diagnoses, the optimal use of the radiology department is 1 of the primary goals of many hospitals. Objective: This study aims to calculate the key metrics of this use by creating a radiology data warehouse solution, where data from radiology information systems (RISs) can be imported and then queried using a query language as well as a graphical user interface (GUI). Methods: Using a simple configuration file, the developed system allowed for the processing of radiology data exported from any kind of RIS into a Microsoft Excel, comma-separated value (CSV), or JavaScript Object Notation (JSON) file. These data were then imported into a clinical data warehouse. Additional values based on the radiology data were calculated during this import process by implementing 1 of several provided interfaces. Afterward, the query language and GUI of the data warehouse were used to configure and calculate reports on these data. For the most common types of requested reports, a web interface was created to view their numbers as graphics. Results: The tool was successfully tested with the data of 4 different German hospitals from 2018 to 2021, with a total of 1,436,111 examinations. The user feedback was good, since all their queries could be answered if the available data were sufficient. The initial processing of the radiology data for using them with the clinical data warehouse took (depending on the amount of data provided by each hospital) between 7 minutes and 1 hour 11 minutes. Calculating 3 reports of different complexities on the data of each hospital was possible in 1-3 seconds for reports with up to 200 individual calculations and in up to 1.5 minutes for reports with up to 8200 individual calculations. Conclusions: A system was developed with the main advantage of being generic concerning the export of different RISs as well as concerning the configuration of queries for various reports. The queries could be configured easily using the GUI of the data warehouse, and their results could be exported into the standard formats Excel and CSV for further processing.},
  language  = {en}
}
@article{KempfKrugPuppe2023,
  author    = {Kempf, Sebastian and Krug, Markus and Puppe, Frank},
  title     = {KIETA: Key-insight extraction from scientific tables},
  series = {Applied Intelligence},
  volume    = {53},
  journal   = {Applied Intelligence},
  number    = {8},
  issn      = {0924-669X},
  doi       = {10.1007/s10489-022-03957-8},
  url       = {http://nbn-resolving.de/urn:nbn:de:bvb:20-opus-324180},
  pages     = {9513-9530},
  year      = {2023},
  abstract  = {An important but very time consuming part of the research process is literature review. An already large and nevertheless growing ground set of publications as well as a steadily increasing publication rate continue to worsen the situation. Consequently, automating this task as far as possible is desirable. Experimental results of systems are key-insights of high importance during literature review and usually represented in form of tables. Our pipeline KIETA exploits these tables to contribute to the endeavor of automation by extracting them and their contained knowledge from scientific publications. The pipeline is split into multiple steps to guarantee modularity as well as analyzability, and agnosticim regarding the specific scientific domain up until the knowledge extraction step, which is based upon an ontology. Additionally, a dataset of corresponding articles has been manually annotated with information regarding table and knowledge extraction. Experiments show promising results that signal the possibility of an automated system, while also indicating limits of extracting knowledge from tables without any context.},
  language  = {en}
}
@article{FischerHarteltPuppe2023,
  author    = {Fischer, Norbert and Hartelt, Alexander and Puppe, Frank},
  title     = {Line-level layout recognition of historical documents with background knowledge},
  series = {Algorithms},
  volume    = {16},
  journal   = {Algorithms},
  number    = {3},
  issn      = {1999-4893},
  doi       = {10.3390/a16030136},
  url       = {http://nbn-resolving.de/urn:nbn:de:bvb:20-opus-310938},
  year      = {2023},
  abstract  = {Digitization and transcription of historic documents offer new research opportunities for humanists and are the topics of many edition projects. However, manual work is still required for the main phases of layout recognition and the subsequent optical character recognition (OCR) of early printed documents. This paper describes and evaluates how deep learning approaches recognize text lines and can be extended to layout recognition using background knowledge. The evaluation was performed on five corpora of early prints from the 15th and 16th Centuries, representing a variety of layout features. While the main text with standard layouts could be recognized in the correct reading order with a precision and recall of up to 99.9\%, also complex layouts were recognized at a rate as high as 90\% by using background knowledge, the full potential of which was revealed if many pages of the same source were transcribed.},
  language  = {en}
}