@phdthesis{Leimbach2017, author = {Leimbach, Andreas}, title = {Genomics of pathogenic and commensal \(Escherichia\) \(coli\)}, url = {http://nbn-resolving.de/urn:nbn:de:bvb:20-opus-154539}, school = {Universit{\"a}t W{\"u}rzburg}, year = {2017}, abstract = {High-throughput sequencing (HTS) has revolutionized bacterial genomics. Its unparalleled sensitivity has opened the door to analyzing bacterial evolution and population genomics, dispersion of mobile genetic elements (MGEs), and within-host adaptation of pathogens, such as Escherichia coli. One of the defining characteristics of intestinal pathogenic E. coli (IPEC) pathotypes is a specific repertoire of virulence factors (VFs). Many of these IPEC VFs are used as typing markers in public health laboratories to monitor outbreaks and guide treatment options. Instead, extraintestinal pathogenic E. coli (ExPEC) isolates are genotypically diverse and harbor a varied set of VFs -- the majority of which also function as fitness factors (FFs) for gastrointestinal colonization. The aim of this thesis was the genomic characterization of pathogenic and commensal E. coli with respect to their virulence- and antibiotic resistance-associated gene content as well as phylogenetic background. In order to conduct the comparative analyses, I created a database of E. coli VFs, ecoli_VF_collection, with a focus on ExPEC virulence-associated proteins (Leimbach, 2016b). Furthermore, I wrote a suite of scripts and pipelines, bac-genomics-scripts, that are useful for bacterial genomics (Leimbach, 2016a). This compilation includes tools for assembly and annotation as well as comparative genomics analyses, like multi-locus sequence typing (MLST), assignment of Clusters of Orthologous Groups (COG) categories, searching for protein homologs, detection of genomic regions of difference (RODs), and calculating pan-genome-wide association statistics. Using these tools we were able to determine the prevalence of 18 autotransporters (ATs) in a large, phylogenetically heterogeneous strain panel and demonstrate that many AT proteins are not associated with E. coli pathotypes. According to multivariate analyses and statistics the distribution of AT variants is instead significantly dependent on phylogenetic lineages. As a consequence, ATs are not suitable to serve as pathotype markers (Zude et al., 2014). During the German Shiga toxin-producing E. coli (STEC) outbreak in 2011, the largest to date, we were one of the teams capable of analyzing the genomic features of two isolates. Based on MLST and detection of orthologous proteins to known E. coli reference genomes the close phylogenetic relationship and overall genome similarity to enteroaggregative E. coli (EAEC) 55989 was revealed. In particular, we identified VFs of both STEC and EAEC pathotypes, most importantly the prophage-encoded Shiga toxin (Stx) and the pAA-type plasmid harboring aggregative adherence fimbriae. As a result, we could show that the epidemic was caused by an unusual hybrid pathotype of the O104:H4 serotype. Moreover, we detected the basis of the antibiotic multi-resistant phenotype on an extended-spectrum beta-lactamase (ESBL) plasmid through comparisons to reference plasmids. With this information we proposed an evolutionary horizontal gene transfer (HGT) model for the possible emergence of the pathogen (Brzuszkiewicz et al., 2011). Similarly to ExPEC, E. coli isolates of bovine mastitis are genotypically and phenotypically highly diverse and many studies struggled to determine a positive association of putative VFs. Instead the general E. coli pathogen-associated molecular pattern (PAMP), lipopolysaccharide (LPS), is implicated as a deciding factor for intramammary inflammation. Nevertheless, a mammary pathogenic E. coli (MPEC) pathotype was proposed presumably encompassing strains more adapted to elicit bovine mastitis with virulence traits differentiating them from commensals. We sequenced eight E. coli isolates from udder serous exudate and six fecal commensals (Leimbach et al., 2016). Two mastitis isolate genomes were closed to a finished-grade quality (Leimbach et al., 2015). The genomic sequence of mastitis-associated E. coli (MAEC) strain 1303 was used to elucidate the biosynthesis gene cluster of its O70 LPS O-antigen. We analyzed the phylogenetic genealogy of our strain panel plus eleven bovine-associated E. coli reference strains and found that commensal or MAEC could not be unambiguously allocated to specific phylogroups within a core genome tree of reference E. coli. A thorough gene content analysis could not identify functional convergence of either commensal or MAEC, instead both have only very few gene families enriched in either pathotype. Most importantly, gene content and ecoli_VF_collection analyses showed that no virulence determinants are significantly associated with MAEC in comparison to bovine fecal commensals, disproving the MPEC hypothesis. The genetic repertoire of bovine-associated E. coli, again, is dominated by phylogenetic background. This is also mostly the case for large virulence-associated E. coli gene cluster previously associated with mastitis. Correspondingly, MAEC are facultative and opportunistic pathogens recruited from the bovine commensal gastrointestinal microbiota (Leimbach et al., 2017). Thus, E. coli mastitis should be prevented rather than treated, as antibiotics and vaccines have not proven effective. Although traditional E. coli pathotypes serve a purpose for diagnostics and treatment, it is clear that the current typing system is an oversimplification of E. coli's genomic plasticity. Whole genome sequencing (WGS) revealed many nuances of pathogenic E. coli, including emerging hybrid or heteropathogenic pathotypes. Diagnostic and public health microbiology need to embrace the future by implementing HTS techniques to target patient care and infection control more efficiently.}, subject = {Escherichia coli}, language = {en} }