@phdthesis{Ankenbrand2018, author = {Ankenbrand, Markus Johannes}, title = {Squeezing more information out of biological data - development and application of bioinformatic tools for ecology, evolution and genomics}, url = {http://nbn-resolving.de/urn:nbn:de:bvb:20-opus-156344}, school = {Universit{\"a}t W{\"u}rzburg}, year = {2018}, abstract = {New experimental methods have drastically accelerated the pace and quantity at which biological data is generated. High-throughput DNA sequencing is one of the pivotal new technologies. It offers a number of novel applications in various fields of biology, including ecology, evolution, and genomics. However, together with those opportunities many new challenges arise. Specialized algorithms and software are required to cope with the amount of data, often requiring substantial training in bioinformatic methods. Another way to make those data accessible to non-bioinformaticians is the development of programs with intuitive user interfaces. In my thesis I developed analyses and programs to tackle current problems with high-throughput data in biology. In the field of ecology this covers the establishment of the bioinformatic workflow for pollen DNA meta-barcoding. Furthermore, I developed an application that facilitates the analysis of ecological communities in the context of their traits. Information from multiple public databases have been aggregated and can now be mapped automatically to existing community tables for interactive inspection. In evolution the new data are used to reconstruct phylogenetic trees from multiple genes. I developed the tool bcgTree to automate this process for bacteria. Many plant genomes have been sequenced in current years. Sequencing reads of those projects also contain data from the chloroplasts. The tool chloroExtractor supports the targeted extraction and analysis of the chloroplast genome. To compare the structure of multiple genomes specialized software is required for calculation and visualization of the relationships. I developed AliTV to address this. In contrast to existing programs for this task it allows interactive adjustments of produced graphics. Thus, facilitating the discovery of biologically relevant information. Another application I developed helps to analyze transcriptomes even if no reference genome is present. This is achieved by aggregating the different pieces of information, like functional annotation and expression level, for each transcript in a web platform. Scientists can then search, filter, subset, and visualize the transcriptome. Together the methods and tools expedite insights into biological systems that were not possible before.}, language = {en} } @phdthesis{Beisser2011, author = {Beisser, Daniela}, title = {Integrated functional analysis of biological networks}, url = {http://nbn-resolving.de/urn:nbn:de:bvb:20-opus-70150}, school = {Universit{\"a}t W{\"u}rzburg}, year = {2011}, abstract = {In recent years high-throughput experiments provided a vast amount of data from all areas of molecular biology, including genomics, transcriptomics, proteomics and metabolomics. Its analysis using bioinformatics methods has developed accordingly, towards a systematic approach to understand how genes and their resulting proteins give rise to biological form and function. They interact with each other and with other molecules in highly complex structures, which are explored in network biology. The in-depth knowledge of genes and proteins obtained from high-throughput experiments can be complemented by the architecture of molecular networks to gain a deeper understanding of biological processes. This thesis provides methods and statistical analyses for the integration of molecular data into biological networks and the identification of functional modules, as well as its application to distinct biological data. The integrated network approach is implemented as a software package, termed BioNet, for the statistical language R. The package includes the statistics for the integration of transcriptomic and functional data with biological networks, the scoring of nodes and edges of these networks as well as methods for subnetwork search and visualisation. The exact algorithm is extensively tested in a simulation study and outperforms existing heuristic methods for the calculation of this NP-hard problem in accuracy and robustness. The variability of the resulting solutions is assessed on perturbed data, mimicking random or biased factors that obscure the biological signal, generated for the integrated data and the network. An optimal, robust module can be calculated using a consensus approach, based on a resampling method. It summarizes optimally an ensemble of solutions in a robust consensus module with the estimated variability indicated by confidence values for the nodes and edges. The approach is subsequently applied to two gene expression data sets. The first application analyses gene expression data for acute lymphoblastic leukaemia (ALL) and differences between the subgroups with and without an oncogenic BCR/ABL gene fusion. In a second application gene expression and survival data from diffuse large B-cell lymphomas are examined. The identified modules include and extend already existing gene lists and signatures by further significant genes and their interactions. The most important novelty is that these genes are determined and visualised in the context of their interactions as a functional module and not as a list of independent and unrelated transcripts. In a third application the integrative network approach is used to trace changes in tardigrade metabolism to identify pathways responsible for their extreme resistance to environmental changes and endurance in an inactive tun state. For the first time a metabolic network approach is proposed to detect shifts in metabolic pathways, integrating transcriptome and metabolite data. Concluding, the presented integrated network approach is an adequate technique to unite high-throughput experimental data for single molecules and their intermolecular dependencies. It is flexible to apply on diverse data, ranging from gene expression changes over metabolite abundances to protein modifications in a combination with a suitable molecular network. The exact algorithm is accurate and robust in comparison to heuristic approaches and delivers an optimal, robust solution in form of a consensus module with confidence values. By the integration of diverse sources of information and a simultaneous inspection of a molecular event from different points of view, new and exhaustive insights into biological processes can be acquired.}, subject = {Bioinformatik}, language = {en} } @phdthesis{Schwarz2008, author = {Schwarz, Roland}, title = {Modellierung von Metabolismus, Transkriptom und Zellentwicklung bei Arabidopsis, Listerien und anderen Organismen}, url = {http://nbn-resolving.de/urn:nbn:de:bvb:20-opus-27622}, school = {Universit{\"a}t W{\"u}rzburg}, year = {2008}, abstract = {Im gleichen Maße wie informatisches Wissen mehr und mehr in den wissenschaftlichen Alltag aller Lebenswissenschaften Einzug gehalten hat, hat sich der Schwerpunkt bioinformatischer Forschung in st{\"a}rker mathematisch und informatisch-orientierte Themengebiete verschoben. Bioinformatik heute ist mehr als die computergest{\"u}tzte Verarbeitung großer Mengen an biologischen Daten, sondern hat einen entscheidenden Fokus auf der Modellierung komplexer biologischer Systeme. Zur Anwendung kommen hierbei insbesondere Theorien aus dem Bereich der Stochastik und Statistik, des maschinellen Lernens und der theoretischen Informatik. In der vorliegenden Dissertation beschreibe ich in Fallstudien die systematische Modellierung biologischer Systeme aus einem informatisch - mathematischen Standpunkt unter Anwendung von Verfahren aus den genannten Teilbereichen und auf unterschiedlichen Ebenen biologischer Abstraktion. Ausgehend von der Sequenzinformation {\"u}ber Transkriptom, Metabolom und deren regulatorischer Interaktion hin zur Modellierung von Populationseffekten werden hierbei aktuelle biologische Fragestellungen mit mathematisch - informatischen Modellen und einer Vielzahl experimenteller Daten kombiniert. Ein besonderer Augenmerk liegt dabei auf dem Vorgang der Modellierung und des Modellbegriffs als solchem im Rahmen moderner bioinformatischer Forschung. Im Detail umfassen die Projekte (mehrere Publikationen) die Entwicklung eines neuen Ansatzes zur Einbettung und Visualisierung von Multiplen Sequenz- und Sequenz-Strukturalignments, illustriert am Beispiel eines Hemagglutininalignments unterschiedlicher H5N1 Varianten, sowie die Modellierung des Transkriptoms von A. thaliana, bei welchem mit Hilfe einer kernelisierten nicht-parametrischen Metaanalyse neue, an der Infektionsabwehr beteiligten, Gene ausfindig gemacht werden konnten. Desweiteren ist uns mit Hilfe unserer Software YANAsquare eine detaillierte Untersuchung des Metabolismus von L. monocytogenes unter Aktivierung des Transkriptionsfaktors prfA gelungen, dessen Vorhersagen durch experimentelle 13C Isotopologstudien belegt werden konnten. In einem Anschlußprojekt war der Zusammenhang zwischen Regulation des Metabolismus durch Regulation der Genexpression und der Fluxverteilung des metabolischen Steady- State-Netzwerks das Ziel. Die Modellierung eines komplexen organismischen Ph{\"a}notyps, der Zellgr{\"o}ßenentwicklung der Diatomee Pseudo-nitzschia delicatissima, schließt die Untersuchungen ab.}, subject = {Bioinformatik}, language = {de} }