@phdthesis{Beisser2011, author = {Beisser, Daniela}, title = {Integrated functional analysis of biological networks}, url = {http://nbn-resolving.de/urn:nbn:de:bvb:20-opus-70150}, school = {Universit{\"a}t W{\"u}rzburg}, year = {2011}, abstract = {In recent years high-throughput experiments provided a vast amount of data from all areas of molecular biology, including genomics, transcriptomics, proteomics and metabolomics. Its analysis using bioinformatics methods has developed accordingly, towards a systematic approach to understand how genes and their resulting proteins give rise to biological form and function. They interact with each other and with other molecules in highly complex structures, which are explored in network biology. The in-depth knowledge of genes and proteins obtained from high-throughput experiments can be complemented by the architecture of molecular networks to gain a deeper understanding of biological processes. This thesis provides methods and statistical analyses for the integration of molecular data into biological networks and the identification of functional modules, as well as its application to distinct biological data. The integrated network approach is implemented as a software package, termed BioNet, for the statistical language R. The package includes the statistics for the integration of transcriptomic and functional data with biological networks, the scoring of nodes and edges of these networks as well as methods for subnetwork search and visualisation. The exact algorithm is extensively tested in a simulation study and outperforms existing heuristic methods for the calculation of this NP-hard problem in accuracy and robustness. The variability of the resulting solutions is assessed on perturbed data, mimicking random or biased factors that obscure the biological signal, generated for the integrated data and the network. An optimal, robust module can be calculated using a consensus approach, based on a resampling method. It summarizes optimally an ensemble of solutions in a robust consensus module with the estimated variability indicated by confidence values for the nodes and edges. The approach is subsequently applied to two gene expression data sets. The first application analyses gene expression data for acute lymphoblastic leukaemia (ALL) and differences between the subgroups with and without an oncogenic BCR/ABL gene fusion. In a second application gene expression and survival data from diffuse large B-cell lymphomas are examined. The identified modules include and extend already existing gene lists and signatures by further significant genes and their interactions. The most important novelty is that these genes are determined and visualised in the context of their interactions as a functional module and not as a list of independent and unrelated transcripts. In a third application the integrative network approach is used to trace changes in tardigrade metabolism to identify pathways responsible for their extreme resistance to environmental changes and endurance in an inactive tun state. For the first time a metabolic network approach is proposed to detect shifts in metabolic pathways, integrating transcriptome and metabolite data. Concluding, the presented integrated network approach is an adequate technique to unite high-throughput experimental data for single molecules and their intermolecular dependencies. It is flexible to apply on diverse data, ranging from gene expression changes over metabolite abundances to protein modifications in a combination with a suitable molecular network. The exact algorithm is accurate and robust in comparison to heuristic approaches and delivers an optimal, robust solution in form of a consensus module with confidence values. By the integration of diverse sources of information and a simultaneous inspection of a molecular event from different points of view, new and exhaustive insights into biological processes can be acquired.}, subject = {Bioinformatik}, language = {en} }