@phdthesis{Maistrenko2021, author = {Maistrenko, Oleksandr}, title = {Pangenome analysis of bacteria and its application in metagenomics}, doi = {10.25972/OPUS-21499}, url = {http://nbn-resolving.de/urn:nbn:de:bvb:20-opus-214996}, school = {Universit{\"a}t W{\"u}rzburg}, year = {2021}, abstract = {The biosphere harbors a large quantity and diversity of microbial organisms that can thrive in all environments. Estimates of the total number of microbial species reach up to 1012, of which less than 15,000 have been characterized to date. It has been challenging to delineate phenotypically, evolutionary and ecologically meaningful lineages such as for example, species, subspecies and strains. Even within recognized species, gene content can vary considerably between sublineages (for example strains), a problem that can be addressed by analyzing pangenomes, defined as the non-redundant set of genes within a phylogenetic clade, as evolutionary units. Species considered to be ecologically and evolutionary coherent units, however to date it is still not fully understood what are primary habitats and ecological niches of many prokaryotic species and how environmental preferences drive their genomic diversity. Majority of comparative genomics studies focused on a single prokaryotic species in context of clinical relevance and ecology. With accumulation of sequencing data due to genomics and metagenomics, it is now possible to investigate trends across many species, which will facilitate understanding of pangenome evolution, species and subspecies delineation. The major aims of this thesis were 1) to annotate habitat preferences of prokaryotic species and strains; 2) investigate to what extent these environmental preferences drive genomic diversity of prokaryotes and to what extent phylogenetic constraints limit this diversification; 3) explore natural nucleotide identity thresholds to delineate species in bacteria in metagenomics gene catalogs; 4) explore species delineation for applications in subspecies and strain delineation in metagenomics. The first part of the thesis describes methods to infer environmental preferences of microbial species. This data is a prerequisite for the analyses performed in the second part of the thesis which explores how the structure of bacterial pangenomes is predetermined by past evolutionary history and how is it linked to environmental preferences of the species. The main finding in this subchapter that habitat preferences explained up to 49\% of the variance for pangenome structure, compared to 18\% by phylogenetic inertia. In general, this trend indicates that phylogenetic inertia does not limit evolution of pangenome size and diversity, but that convergent evolution may overcome phylogenetic constraints. In this project we show that core genome size is associated with higher environmental ubiquity of species. It is likely this is due to the fact that species need to have more versatile genomes and most necessary genes need to be present in majority of genomes of that species to be highly prevalent. Taken together these findings may be useful for future predictive analyses of ecological niches in newly discovered species. The third part of the thesis explores data-driven, operational species boundaries. I show that homologous genes from the same species from different genomes tend to share at least 95\% of nucleotide identity, while different species within the same genus have lower nucleotide identity. This is in line with other studies showing that genome-wide natural species boundary might be in range of 90-95\% of nucleotide identity. Finally, the fourth part of the thesis discusses how challenges in species delineation are relevant for the identification of meaningful within-species groups, followed by a discussion on how advancements in species delineation can be applied for classification of within-species genomic diversity in the age of metagenomics.}, subject = {Pangenom}, language = {en} } @article{SchmidtHaywardCoelhoetal.2019, author = {Schmidt, Thomas S. B. and Hayward, Matthew R. and Coelho, Luiis P. and Li, Simone S. and Costea, Paul I. and Voigt, Anita Y. and Wirbel, Jakob and Maistrenko, Oleksandr M. and Alves, Renato J. C. and Bergsten, Emma and de Beaufort, Carine and Sobhani, Iradj and Heintz-Buschart, Anna and Sunagawa, Shinichi and Zeller, Georg and Wilmes, Paul and Bork, Peer}, title = {Extensive transmission of microbes along the gastrointestinal tract}, series = {eLife}, volume = {8}, journal = {eLife}, doi = {10.7554/eLife.42693}, url = {http://nbn-resolving.de/urn:nbn:de:bvb:20-opus-228954}, pages = {e42693, 1-18}, year = {2019}, abstract = {The gastrointestinal tract is abundantly colonized by microbes, yet the translocation of oral species to the intestine is considered a rare aberrant event, and a hallmark of disease. By studying salivary and fecal microbial strain populations of 310 species in 470 individuals from five countries, we found that transmission to, and subsequent colonization of, the large intestine by oral microbes is common and extensive among healthy individuals. We found evidence for a vast majority of oral species to be transferable, with increased levels of transmission in colorectal cancer and rheumatoid arthritis patients and, more generally, for species described as opportunistic pathogens. This establishes the oral cavity as an endogenous reservoir for gut microbial strains, and oral-fecal transmission as an important process that shapes the gastrointestinal microbiome in health and disease.}, subject = {Barrier}, language = {en} }