@phdthesis{Horn2017, author = {Horn, Hannes}, title = {Analysis and interpretation of (meta-)genomic data from host-associated microorganisms}, url = {http://nbn-resolving.de/urn:nbn:de:bvb:20-opus-152035}, school = {Universit{\"a}t W{\"u}rzburg}, year = {2017}, abstract = {Host-microbe interactions are the key to understand why and how microbes inhabit specific environments. With the scientific fields of microbial genomics and metagenomics, evolving on an unprecedented scale, one is able to gain insights in these interactions on a molecular and ecological level. The goal of this PhD thesis was to make (meta-)genomic data accessible, integrate it in a comparative manner and to gain comprehensive taxonomic and functional insights into bacterial strains and communities derived from two different environments: the phyllosphere of Arabidopsis thaliana and the mesohyl interior of marine sponges. This thesis focused first on the de novo assembly of bacterial genomes. A 5-step protocol was developed, each step including a quality control. The examination of different assembly software in a comparative way identified SPAdes as most suitable. The protocol enables the user to chose the best tailored assembly. Contamination issues were solved by an initial filtering of the data and methods normally used for the binning of metagenomic datasets. This step is missed in many published assembly pipelines. The described protocol offers assemblies of high quality ready for downstream analysis. Subsequently, assemblies generated with the developed protocol were annotated and explored in terms of their function. In a first study, the genome of a phyllosphere bacterium, Williamsia sp. ARP1, was analyzed, offering many adaptions to the leaf habitat: it can deal with temperature shifts, react to oxygen species, produces mycosporins as protection against UV-light, and is able to uptake photosynthates. Further, its taxonomic position within the Actinomycetales was infered from 16S rRNA and comparative genomics showing the close relation between the genera Williamsia and Gordonia. In a second study, six sponge-derived actinomycete genomes were investigated for secondary metabolism. By use of state-of-the-art software, these strains exhibited numerous gene clusters, mostly linked to polykethide synthases, non-ribosomal peptide synthesis, terpenes, fatty acids and saccharides. Subsequent predictions on these clusters offered a great variety of possible produced compounds with antibiotic, antifungal or anti-cancer activity. These analysis highlight the potential for the synthesis of natural products and the use of genomic data as screening toolkit. In a last study, three sponge-derived and one seawater metagenomes were functionally compared. Different signatures regarding the microbial composition and GC-distribution were observed between the two environments. With a focus on bacerial defense systems, the data indicates a pronounced repertoire of sponge associated bacteria for bacterial defense systems, in particular, Clustered Regularly Interspaced Short Palindromic Repeats, restriction modification system, DNA phosphorothioation and phage growth limitation. In addition, characterizing genes for secondary metabolite cluster differed between sponge and seawater microbiomes. Moreover, a variety of Type I polyketide synthases were only found within the sponge microbiomes. With that, metagenomics are shown to be a useful tool for the screening of secondary metabolite genes. Furthermore, enriched defense systems are highlighted as feature of sponge-associated microbes and marks them as a selective trait.}, subject = {Bakterien}, language = {en} } @phdthesis{Sharan2017, author = {Sharan, Malvika}, title = {Bio-computational identification and characterization of RNA-binding proteins in bacteria}, url = {http://nbn-resolving.de/urn:nbn:de:bvb:20-opus-153573}, school = {Universit{\"a}t W{\"u}rzburg}, year = {2017}, abstract = {RNA-binding proteins (RBPs) have been extensively studied in eukaryotes, where they post-transcriptionally regulate many cellular events including RNA transport, translation, and stability. Experimental techniques, such as cross-linking and co-purification followed by either mass spectrometry or RNA sequencing has enabled the identification and characterization of RBPs, their conserved RNA-binding domains (RBDs), and the regulatory roles of these proteins on a genome-wide scale. These developments in quantitative, high-resolution, and high-throughput screening techniques have greatly expanded our understanding of RBPs in human and yeast cells. In contrast, our knowledge of number and potential diversity of RBPs in bacteria is comparatively poor, in part due to the technical challenges associated with existing global screening approaches developed in eukaryotes. Genome- and proteome-wide screening approaches performed in silico may circumvent these technical issues to obtain a broad picture of the RNA interactome of bacteria and identify strong RBP candidates for more detailed experimental study. Here, I report APRICOT ("Analyzing Protein RNA Interaction by Combined Output Technique"), a computational pipeline for the sequence-based identification and characterization of candidate RNA-binding proteins encoded in the genomes of all domains of life using RBDs known from experimental studies. The pipeline identifies functional motifs in protein sequences of an input proteome using position-specific scoring matrices and hidden Markov models of all conserved domains available in the databases and then statistically score them based on a series of sequence-based features. Subsequently, APRICOT identifies putative RBPs and characterizes them according to functionally relevant structural properties. APRICOT performed better than other existing tools for the sequence-based prediction on the known RBP data sets. The applications and adaptability of the software was demonstrated on several large bacterial RBP data sets including the complete proteome of Salmonella Typhimurium strain SL1344. APRICOT reported 1068 Salmonella proteins as RBP candidates, which were subsequently categorized using the RBDs that have been reported in both eukaryotic and bacterial proteins. A set of 131 strong RBP candidates was selected for experimental confirmation and characterization of RNA-binding activity using RNA co-immunoprecipitation followed by high-throughput sequencing (RIP-Seq) experiments. Based on the relative abundance of transcripts across the RIP-Seq libraries, a catalogue of enriched genes was established for each candidate, which shows the RNA-binding potential of 90\% of these proteins. Furthermore, the direct targets of few of these putative RBPs were validated by means of cross-linking and co-immunoprecipitation (CLIP) experiments. This thesis presents the computational pipeline APRICOT for the global screening of protein primary sequences for potential RBPs in bacteria using RBD information from all kingdoms of life. Furthermore, it provides the first bio-computational resource of putative RBPs in Salmonella, which could now be further studied for their biological and regulatory roles. The command line tool and its documentation are available at https://malvikasharan.github.io/APRICOT/.}, language = {en} } @phdthesis{Bischler2018, author = {Bischler, Thorsten David}, title = {Data mining and software development for RNA-seq-based approaches in bacteria}, url = {http://nbn-resolving.de/urn:nbn:de:bvb:20-opus-166108}, school = {Universit{\"a}t W{\"u}rzburg}, year = {2018}, abstract = {RNA sequencing (RNA-seq) has in recent years become the preferred method for gene expression analysis and whole transcriptome annotation. While initial RNA-seq experiments focused on eukaryotic messenger RNAs (mRNAs), which can be purified from the cellular ribonucleic acid (RNA) pool with relative ease, more advanced protocols had to be developed for sequencing of microbial transcriptomes. The resulting RNA-seq data revealed an unexpected complexity of bacterial transcriptomes and the requirement for specific analysis methods, which in many cases is not covered by tools developed for processing of eukaryotic data. The aim of this thesis was the development and application of specific data analysis methods for different RNA-seq-based approaches used to gain insights into transcription and gene regulatory processes in prokaryotes. The differential RNA sequencing (dRNA-seq) approach allows for transcriptional start site (TSS) annotation by differentiating between primary transcripts with a 5'-triphosphate (5'-PPP) and processed transcripts with a 5'-monophosphate (5'-P). This method was applied in combination with an automated TSS annotation tool to generate global trancriptome maps for Escherichia coli (E. coli) and Helicobacter pylori (H. pylori). In the E. coli study we conducted different downstream analyses to gain a deeper understanding of the nature and properties of transcripts in our TSS map. Here, we focused especially on putative antisense RNAs (asRNAs), an RNA class transcribed from the opposite strand of known protein-coding genes with the potential to regulate corresponding sense transcripts. Besides providing a set of putative asRNAs and experimental validation of candidates via Northern analysis, we analyzed and discussed different sources of variation in RNA-seq data. The aim of the H. pylori study was to provide a detailed description of the dRNA-seq approach and its application to a bacterial model organism. It includes information on experimental protocols and requirements for data analysis to generate a genome-wide TSS map. We show how the included TSS can be used to identify and analyze transcriptome and regulatory features and discuss challenges in terms oflibrary preparation protocols, sequencing platforms, and data analysis including manual and automated TSS annotation. The TSS maps and associated transcriptome data from both H. pylori and E. coli were made available for visualization in an easily accessible online browser. Furthermore, a modified version of dRNA-seq was used to identify transcriptome targets of the RNA pyrophosphohydrolase (RppH) in H. pylori. RppH initiates 5'-end-dependent degradation of transcripts by converting the 5'-PPP of primary transcripts to a 5'-P. I developed an analysis method, which uses data from complementary DNA (cDNA) libraries specific for transcripts carrying a 5'-PPP, 5'-P or both, to specifically identify transcripts modified by RppH. For this, the method assessed the 5'-phosphorylation state and cellular concentration of transcripts in rppH deletion in comparison to strains with the intact gene. Several of the identified potential RppH targets were further validated via half-life measurements and quantification of their 5'-phosphorylation state in wild-type and mutant cells. Our findings suggest an important role for RppH in post-transcriptional gene regulationin H. pylori and related organisms. In addition, we applied two RNA-seq -based approaches, RNA immunoprecipitation followed by sequencing (RIP-seq) and cross-linking immunoprecipitation followed by sequencing (CLIP-seq), to identify transcripts bound by Hfq and CsrA, two RNA-binding proteins (RBPs) with an important role in post-transcriptional regulation. For RIP-seq -based identification of CsrA binding regions in Campylobacter jejuni(C. jejuni), we used annotation-based analysis and, in addition, a self-developed peak calling method based on a sliding window approach. Both methods revealed flaA mRNA, encoding the major flagellin, as the main target and functional analysis of identified targets showed a significant enrichment of genes involved in flagella biosynthesis. Further experimental analysis revealed the role of flaA mRNA in post-transcriptional regulation. In comparison to RIP-seq, CLIP-seq allows mapping of RBP binding sites with a higher resolution. To identify these sites an approach called "block-based peak calling" was developed and resulting peaks were used to identify sequence and structural constraints required for interaction of Hfq and CsrA with Salmonella transcripts. Overall, the different RNA-seq-based approaches described in this thesis together with their associated analyis pipelines extended our knowledge on the transcriptional repertoire and modes of post-transcriptional regulation in bacteria. The global TSS maps, including further characterized asRNA candidates, putative RppH targets, and identified RBP interactomes will likely trigger similar global studies in the same or different organisms or will be used as a resource for closer examination of these features.}, subject = {Bakterien}, language = {en} } @phdthesis{Fasemore2023, author = {Fasemore, Akinyemi Mandela}, title = {Genomic and internet based analysis of \(Coxiella\) \(burnetii\)}, doi = {10.25972/OPUS-29663}, url = {http://nbn-resolving.de/urn:nbn:de:bvb:20-opus-296639}, school = {Universit{\"a}t W{\"u}rzburg}, year = {2023}, abstract = {Coxiella burnetii, a Gram negative obligate intracellular bacterium, is the causative agent of Q fever. It has a world wide distribution and has been documented to be capable of causing infections in several domestic animals, livestock species, and human beings. Outbreaks of Q fever are still being observed in livestock across animal farms in Europe, and primary transmission to humans still oc- curs especially in animal handlers. Public health authorities in some countries like Germany are required by law to report human acute cases denoting the significance of the challenge posed by C. burnetii to public health. In this thesis, I have developed a platform alongside methods to address the challenges of genomic analyses of C. burnetii for typing purposes. Identification of C. burnetii isolates is an important task in the laboratory as well as in the clinics and genotyping is a reliable method to identify and characterize known and novel isolates. Therefore, I designed and implemented several methods to facilitate the genotyping analyses of C. burnetii genomes in silico via a web platform. As genotyping is a data intensive process, I also included additional features such as visualization methods and databases for interpretation and storage of obtained results. I also developed a method to profile the resistome of C. burnetii isolates using a machine learning approach. Data about antibiotic resistance in C. burnetii are scarce majorly due to its lifestyle and the difficulty of cultivation in laboratory media. Alternative methods that rely on homology identification of resistance genes are also inefficient in C. burnetii, hence, I opted for a novel approach that has been shown to be promising in other bacteria species. The applied method relied on an artificial neural network as well as amino acid composition of position specific scoring matrix profile for feature extraction. The resulting model achieved an accuracy of ≈ 0.96 on test data and the overall performance was significantly higher in comparison to existing models. Finally, I analyzed two new C. burnetii isolates obtained from an outbreak in Germany, I compared the genome to the RSA 493 reference isolate and found extensive deletions across the genome landscape. This work has provided a new digital infrastructure to analyze and character- ize C. burnetii genomes that was not in existence before and it has also made a significant contribution to the existing information about antibiotic resistance genes in C. burnetii.}, language = {en} }