@phdthesis{Atzmueller2006, author = {Atzm{\"u}ller, Martin}, title = {Knowledge-Intensive Subgroup Mining - Techniques for Automatic and Interactive Discovery}, url = {http://nbn-resolving.de/urn:nbn:de:bvb:20-opus-21004}, school = {Universit{\"a}t W{\"u}rzburg}, year = {2006}, abstract = {Data mining has proved its significance in various domains and applications. As an important subfield of the general data mining task, subgroup mining can be used, e.g., for marketing purposes in business domains, or for quality profiling and analysis in medical domains. The goal is to efficiently discover novel, potentially useful and ultimately interesting knowledge. However, in real-world situations these requirements often cannot be fulfilled, e.g., if the applied methods do not scale for large data sets, if too many results are presented to the user, or if many of the discovered patterns are already known to the user. This thesis proposes a combination of several techniques in order to cope with the sketched problems: We discuss automatic methods, including heuristic and exhaustive approaches, and especially present the novel SD-Map algorithm for exhaustive subgroup discovery that is fast and effective. For an interactive approach we describe techniques for subgroup introspection and analysis, and we present advanced visualization methods, e.g., the zoomtable that directly shows the most important parameters of a subgroup and that can be used for optimization and exploration. We also describe various visualizations for subgroup comparison and evaluation in order to support the user during these essential steps. Furthermore, we propose to include possibly available background knowledge that is easy to formalize into the mining process. We can utilize the knowledge in many ways: To focus the search process, to restrict the search space, and ultimately to increase the efficiency of the discovery method. We especially present background knowledge to be applied for filtering the elements of the problem domain, for constructing abstractions, for aggregating values of attributes, and for the post-processing of the discovered set of patterns. Finally, the techniques are combined into a knowledge-intensive process supporting both automatic and interactive methods for subgroup mining. The practical significance of the proposed approach strongly depends on the available tools. We introduce the VIKAMINE system as a highly-integrated environment for knowledge-intensive active subgroup mining. Also, we present an evaluation consisting of two parts: With respect to objective evaluation criteria, i.e., comparing the efficiency and the effectiveness of the subgroup discovery methods, we provide an experimental evaluation using generated data. For that task we present a novel data generator that allows a simple and intuitive specification of the data characteristics. The results of the experimental evaluation indicate that the novel SD-Map method outperforms the other described algorithms using data sets similar to the intended application concerning the efficiency, and also with respect to precision and recall for the heuristic methods. Subjective evaluation criteria include the user acceptance, the benefit of the approach, and the interestingness of the results. We present five case studies utilizing the presented techniques: The approach has been successfully implemented in medical and technical applications using real-world data sets. The method was very well accepted by the users that were able to discover novel, useful, and interesting knowledge.}, subject = {Data Mining}, language = {en} } @unpublished{Nassourou2012, author = {Nassourou, Mohamadou}, title = {Towards a Knowledge-Based Learning System for The Quranic Text}, url = {http://nbn-resolving.de/urn:nbn:de:bvb:20-opus-70003}, year = {2012}, abstract = {In this research, an attempt to create a knowledge-based learning system for the Quranic text has been performed. The knowledge base is made up of the Quranic text along with detailed information about each chapter and verse, and some rules. The system offers the possibility to study the Quran through web-based interfaces, implementing novel visualization techniques for browsing, querying, consulting, and testing the acquired knowledge. Additionally the system possesses knowledge acquisition facilities for maintaining the knowledge base.}, subject = {Wissensbanksystem}, language = {en} } @unpublished{Nassourou2011, author = {Nassourou, Mohamadou}, title = {Philosophical and Computational Approaches for Estimating and Visualizing Months of Revelations of Quranic Chapters}, url = {http://nbn-resolving.de/urn:nbn:de:bvb:20-opus-65784}, year = {2011}, abstract = {The question of why the Quran structure does not follow its chronology of revelation is a recurring one. Some Islamic scholars such as [1] have answered the question using hadiths, as well as other philosophical reasons based on internal evidences of the Quran itself. Unfortunately till today many are still wondering about this issue. Muslims believe that the Quran is a summary and a copy of the content of a preserved tablet called Lawhul-Mahfuz located in the heaven. Logically speaking, this suggests that the arrangement of the verses and chapters is expected to be similar to that of the Lawhul-Mahfuz. As for the arrangement of the verses in each chapter, there is unanimity that it was carried out by the Prophet himself under the guidance of Angel Gabriel with the recommendation of God. But concerning the ordering of the chapters, there are reports about some divergences [3] among the Prophet's companions as to which chapter should precede which one. This paper argues that Quranic chapters might have been arranged according to months and seasons of revelation. In fact, based on some verses of the Quran, it is defendable that the Lawhul-Mahfuz itself is understood to have been structured in terms of the months of the year. In this study, philosophical and mathematical arguments for computing chapters' months of revelation are discussed, and the result is displayed on an interactive scatter plot.}, subject = {Text Mining}, language = {en} } @article{UrbanRemmeleDittrichetal.2020, author = {Urban, Lara and Remmele, Christian W. and Dittrich, Marcus and Schwarz, Roland F. and M{\"u}ller, Tobias}, title = {covRNA: discovering covariate associations in large-scale gene expression data}, series = {BMC Reserach Notes}, volume = {13}, journal = {BMC Reserach Notes}, doi = {10.1186/s13104-020-04946-1}, url = {http://nbn-resolving.de/urn:nbn:de:bvb:20-opus-229258}, year = {2020}, abstract = {Objective The biological interpretation of gene expression measurements is a challenging task. While ordination methods are routinely used to identify clusters of samples or co-expressed genes, these methods do not take sample or gene annotations into account. We aim to provide a tool that allows users of all backgrounds to assess and visualize the intrinsic correlation structure of complex annotated gene expression data and discover the covariates that jointly affect expression patterns. Results The Bioconductor package covRNA provides a convenient and fast interface for testing and visualizing complex relationships between sample and gene covariates mediated by gene expression data in an entirely unsupervised setting. The relationships between sample and gene covariates are tested by statistical permutation tests and visualized by ordination. The methods are inspired by the fourthcorner and RLQ analyses used in ecological research for the analysis of species abundance data, that we modified to make them suitable for the distributional characteristics of both, RNA-Seq read counts and microarray intensities, and to provide a high-performance parallelized implementation for the analysis of large-scale gene expression data on multi-core computational systems. CovRNA provides additional modules for unsupervised gene filtering and plotting functions to ensure a smooth and coherent analysis workflow.}, language = {en} } @phdthesis{Wawrowsky2007, author = {Wawrowsky, Kolja Alexander}, title = {Analysis and Visualization in Multidimensional Microscopy}, url = {http://nbn-resolving.de/urn:nbn:de:bvb:20-opus-23867}, school = {Universit{\"a}t W{\"u}rzburg}, year = {2007}, abstract = {The live sciences currently undergo a paradigm shift to computer aided discoveries. Discoveries in the live sciences were historically made by either direct observation or as a result of chemical assays. Today we see a growing shift toward computer aided analysis and visualization. This gradual process happens in microscopy. Multidimensional laser scanning microscopy can acquire very complex multichannel data from fixed or live specimen. New probes such as visible fluorescent proteins let us observe the expression of genes and track protein localization. Ion sensitive dyes change intensity with the concentration of ions in the cell. The laser scanning confocal allows us to record these processes in three dimensions over time. This work demonstrates the application of software analysis to multidimensional microscopy data. We introduce methods for volume investigation, ion flux analysis and molecular modeling. The visualization methods are based on a multidimensional data model to accommodate complex datasets. The software uses vector processing and multiple processors to accelerate volume rendering and achieve interactive rendering. The algorithms are based on human visual perception and allow the observer a wide range of mixed render modes. The software was used to reconstruct the pituitary development in zebrafish and observe the degeneration of neurons after injury in a mouse model. Calicum indicator dyes have long been used to study calcium fluxes. We optimized the imaging method to minimize impact on the cell. Live cells were imaged continuously for 45 minutes and subjected to increasing does of a drug. We correlated the amplitude of calcium oscillations to increasing doses of a drug and obtain single cell dose response curves. Because this method is very sensitive and measures single cell responses it has potential in drug discovery and characterization. Microtubules form a dynamic cytoskeleton, which is responsible for cell shape, intracellular transport and has an integral role in mitosis. A hallmark of microtubule organization is lateral interactions. Microtubules are bundles by proteins into dense structures. To estimate the contribution of this bundling process, we created a fractal model of microtubule organization. This model demonstrates that morphology of complex microtubule arrays can be explained by bundling alone. In summary we showed that advances in software for visualization, data analysis and modeling lead to new discoveries.}, subject = {Konfokale Mikroskopie}, language = {en} }