@phdthesis{Putze2009, author = {Putze, Johannes}, title = {Studien zur Verbreitung und genetischen Struktur des Colibactin-Genclusters in Enterobacteriaceae}, url = {http://nbn-resolving.de/urn:nbn:de:bvb:20-opus-47259}, school = {Universit{\"a}t W{\"u}rzburg}, year = {2009}, abstract = {Horizontaler Gentransfer zwischen Bakterien - sogar zwischen verschiedenen Spezies - ist ein wichtiger Mechanismus f{\"u}r den Austausch genetischer Information. Dies kann dem Rezipienten einen selektiven Vorteil verleihen, z. B. durch die schnelle Aneignung von Genclustern, die f{\"u}r Pathogenit{\"a}ts- oder Fitnessfaktoren kodieren. Die Variabilit{\"a}t bakterieller Genome durch Aneignung und Inkorporation genetischen Materials in das Genom tr{\"a}gt somit erheblich zur Evolution von Bakterien bei. Bakterielle Genome neigen allerdings dazu, nutzlose genetische Information zu verlieren und daher kann horizontal erworbener DNA h{\"a}ufig eine distinkte biologische Funktion zugeordnet werden. Das Colibactin-Gencluster, welches zuerst in Escherichia coli gefunden wurde, weist mehrere Charakteristika einer horizontal erworbenen genomischen Insel auf. Die Gr{\"o}ße dieser genomischen Insel betr{\"a}gt 54 kb und sie umfasst 20 offene Leseraster (ORFs), von denen acht f{\"u}r putative Polyketidsynthasen (PKS), nichtribosomale Peptidsynthasen (NRPS) und Hybride dieser kodieren. Colibactin {\"u}bt einen zytopathischen Effekt (CPE) auf eukaryotische Zellen in vitro aus. Nach Kokultivierung Colibactin-Gencluster-positiven Bakterien mit eukaryotischen Zellen kommt es zu DNA Doppelstrang Br{\"u}chen, Zellzyklus-Arrest in der G2-Phase, Megalozytose und schließlich zum Zelltod. Diese Effekte sind mit denen des Zyklomodulins „Cytolethal Distending Toxin" (CDT) vergleichbar, allerdings konnte die biologische Funktion des Colibactins in vivo bisher nicht aufgekl{\"a}rt werden. Das Colibactin-Gencluster wurde bisher nur in Escherichia coli St{\"a}mmen der phylogenetischen Gruppe B2 als individuelle genomische Insel, integriert im tRNA-asnW-Gen, vorgefunden. Im Rahmen dieser Arbeit konnte das Colibactin-Gencluster auch in E. coli der phylogenetischen Gruppe B1 und in Citrobacter koseri, Enterobacter aerogenes und Klebsiella pneumoniae subsp. pneumoniae nachgewiesen werden. In diesen Bakterienst{\"a}mmen ist das Colibactin-Gencluster Teil eines genetischen Elements, das {\"A}hnlichkeit zu integrativen und konjugativen Elementen (ICE) aus E. coli und K. pneumoniae aufweist. Im Gegensatz zur hochkonservierten Integrationsstelle des Colibactin-Genclusters in tRNA-asnW in E. coli der phylogenetischen Gruppe B2 konnte die Integrationsstelle dieses ICE in E. coli der Gruppe B1 in tRNA-asnU bestimmt werden. In Bakterienst{\"a}mmen der Spezies K. pneumoniae subsp. pneumoniae wurden vier verschiedene Integrationsstellen in f{\"u}nf analysierten St{\"a}mmen identifiziert. Neben der Studien zur Verbreitung und chromosomalen Integration des Colibactin-Genclusters wurden Kolonisierungsstudien im murinen streptomycinbehandelten Intestinaltrakt mit E. coli Stamm Nissle 1917 durchgef{\"u}hrt, um eine m{\"o}gliche Funktion des Colibactins im Darmtrakt n{\"a}her zu untersuchen. Weder in nicht-kompetitiven noch in kompetitiven Versuchsdurchf{\"u}hrungen konnte dabei ein Kolonisierungsvorteil durch Colibactin nachgewiesen werden. Die Ergebnisse dieser Arbeit haben gezeigt, dass das Colibactin-Gencluster in verschiedenen Spezies der Enterobacteriaceae vorhanden und funktional ist. Das Auftreten dieses sowohl als individuelle genomische Insel als auch als Teil eines ICE veranschaulicht die genetische Plastizit{\"a}t dieses Elements und die Bedeutung des horizontalen Transfers genetischen Materials. Die biologische Funktion des Colibactins in vivo bleibt weiterhin unklar und k{\"o}nnte sowohl die bakterielle Fitness als auch die Virulenz beeinflussen.}, subject = {Enterobacteriaceae}, language = {de} } @phdthesis{Friedrich2009, author = {Friedrich, Torben}, title = {New statistical Methods of Genome-Scale Data Analysis in Life Science - Applications to enterobacterial Diagnostics, Meta-Analysis of Arabidopsis thaliana Gene Expression and functional Sequence Annotation}, url = {http://nbn-resolving.de/urn:nbn:de:bvb:20-opus-39858}, school = {Universit{\"a}t W{\"u}rzburg}, year = {2009}, abstract = {Recent progresses and developments in molecular biology provide a wealth of new but insufficiently characterised data. This fund comprises amongst others biological data of genomic DNA, protein sequences, 3-dimensional protein structures as well as profiles of gene expression. In the present work, this information is used to develop new methods for the characterisation and classification of organisms and whole groups of organisms as well as to enhance the automated gain and transfer of information. The first two presented approaches (chapters 4 und 5) focus on the medically and scientifically important enterobacteria. Its impact in medicine and molecular biology is founded in versatile mechanisms of infection, their fundamental function as a commensal inhabitant of the intestinal tract and their use as model organisms as they are easy to cultivate. Despite many studies on single pathogroups with clinical distinguishable pathologies, the genotypic factors that contribute to their diversity are still partially unknown. The comprehensive genome comparison described in Chapter 4 was conducted with numerous enterobacterial strains, which cover nearly the whole range of clinically relevant diversity. The genome comparison constitutes the basis of a characterisation of the enterobacterial gene pool, of a reconstruction of evolutionary processes and of comprehensive analysis of specific protein families in enterobacterial subgroups. Correspondence analysis, which is applied for the first time in this context, yields qualitative statements to bacterial subgroups and the respective, exclusively present protein families. Specific protein families were identified for the three major subgroups of enterobacteria namely the genera Yersinia and Salmonella as well as to the group of Shigella and E. coli by applying statistical tests. In conclusion, the genome comparison-based methods provide new starting points to infer specific genotypic traits of bacterial groups from the transfer of functional annotation. Due to the high medical importance of enterobacterial isolates their classification according to pathogenicity has been in focus of many studies. The microarray technology offers a fast, reproducible and standardisable means of bacterial typing and has been proved in bacterial diagnostics, risk assessment and surveillance. The design of the diagnostic microarray of enterobacteria described in chapter 5 is based on the availability of numerous enterobacterial genome sequences. A novel probe selection strategy based on the highly efficient algorithm of string search, which considers both coding and non-coding regions of genomic DNA, enhances pathogroup detection. This principle reduces the risk of incorrect typing due to restrictions to virulence-associated capture probes. Additional capture probes extend the spectrum of applications of the microarray to simultaneous diagnostic or surveillance of antimicrobial resistance. Comprehensive test hybridisations largely confirm the reliability of the selected capture probes and its ability to robustly classify enterobacterial strains according to pathogenicity. Moreover, the tests constitute the basis of the training of a regression model for the classification of pathogroups and hybridised amounts of DNA. The regression model features a continuous learning capacity leading to an enhancement of the prediction accuracy in the process of its application. A fraction of the capture probes represents intergenic DNA and hence confirms the relevance of the underlying strategy. Interestingly, a large part of the capture probes represents poorly annotated genes suggesting the existence of yet unconsidered factors with importance to the formation of respective virulence phenotypes. Another major field of microarray applications is gene expression analysis. The size of gene expression databases rapidly increased in recent years. Although they provide a wealth of expression data, it remains challenging to integrate results from different studies. In chapter 6 the methodology of an unsupervised meta-analysis of genome-wide A. thaliana gene expression data sets is presented, which yields novel insights in function and regulation of genes. The application of kernel-based principal component analysis in combination with hierarchical clustering identified three major groups of contrasts each sharing overlapping expression profiles. Genes associated with two groups are known to play important roles in Indol-3 acetic acid (IAA) mediated plant growth and development as well as in pathogen defence. Yet uncharacterised serine-threonine kinases could be assigned to novel functions in pathogen defence by meta-analysis. In general, hidden interrelation between genes regulated under different conditions could be unravelled by the described approach. HMMs are applied to the functional characterisation of proteins or the detection of genes in genome sequences. Although HMMs are technically mature and widely applied in computational biology, I demonstrate the methodical optimisation with respect to the modelling accuracy on biological data with various distributions of sequence lengths. The subunits of these models, the states, are associated with a certain holding time being the link to length distributions of represented sequences. An adaptation of simple HMM topologies to bell-shaped length distributions described in chapter 7 was achieved by serial chain-linking of single states, while residing in the class of conventional HMMs. The impact of an optimisation of HMM topologies was underlined by performance evaluations with differently adjusted HMM topologies. In summary, a general methodology was introduced to improve the modelling behaviour of HMMs by topological optimisation with maximum likelihood and a fast and easily implementable moment estimator. Chapter 8 describes the application of HMMs to the prediction of interaction sites in protein domains. As previously demonstrated, these sites are not trivial to predict because of varying degree in conservation of their location and type within the domain family. The prediction of interaction sites in protein domains is achieved by a newly defined HMM topology, which incorporates both sequence and structure information. Posterior decoding is applied to the prediction of interaction sites providing additional information of the probability of an interaction for all sequence positions. The implementation of interaction profile HMMs (ipHMMs) is based on the well established profile HMMs and inherits its known efficiency and sensitivity. The large-scale prediction of interaction sites by ipHMMs explained protein dysfunctions caused by mutations that are associated to inheritable diseases like different types of cancer or muscular dystrophy. As already demonstrated by profile HMMs, the ipHMMs are suitable for large-scale applications. Overall, the HMM-based method enhances the prediction quality of interaction sites and improves the understanding of the molecular background of inheritable diseases. With respect to current and future requirements I provide large-scale solutions for the characterisation of biological data in this work. All described methods feature a highly portable character, which allows for the transfer to related topics or organisms, respectively. Special emphasis was put on the knowledge transfer facilitated by a steadily increasing wealth of biological information. The applied and developed statistical methods largely provide learning capacities and hence benefit from the gain of knowledge resulting in increased prediction accuracies and reliability.}, subject = {Genomik}, language = {en} }