@article{daCruzRodriguezCasuriagaSantinaqueetal.2016, author = {da Cruz, Irene and Rodr{\´i}guez-Casuriaga, Rosana and Santi{\~n}aque, Frederico F. and Far{\´i}as, Joaquina and Curti, Gianni and Capoano, Carlos A. and Folle, Gustavo A. and Benavente, Ricardo and Sotelo-Silveira, Jos{\´e} Roberto and Geisinger, Adriana}, title = {Transcriptome analysis of highly purified mouse spermatogenic cell populations: gene expression signatures switch from meiotic-to postmeiotic-related processes at pachytene stage}, series = {BMC Genomics}, volume = {17}, journal = {BMC Genomics}, doi = {10.1186/s12864-016-2618-1}, url = {http://nbn-resolving.de/urn:nbn:de:bvb:20-opus-164574}, pages = {294}, year = {2016}, abstract = {Background Spermatogenesis is a complex differentiation process that involves the successive and simultaneous execution of three different gene expression programs: mitotic proliferation of spermatogonia, meiosis, and spermiogenesis. Testicular cell heterogeneity has hindered its molecular analyses. Moreover, the characterization of short, poorly represented cell stages such as initial meiotic prophase ones (leptotene and zygotene) has remained elusive, despite their crucial importance for understanding the fundamentals of meiosis. Results We have developed a flow cytometry-based approach for obtaining highly pure stage-specific spermatogenic cell populations, including early meiotic prophase. Here we combined this methodology with next generation sequencing, which enabled the analysis of meiotic and postmeiotic gene expression signatures in mouse with unprecedented reliability. Interestingly, we found that a considerable number of genes involved in early as well as late meiotic processes are already on at early meiotic prophase, with a high proportion of them being expressed only for the short time lapse of lepto-zygotene stages. Besides, we observed a massive change in gene expression patterns during medium meiotic prophase (pachytene) when mostly genes related to spermiogenesis and sperm function are already turned on. This indicates that the transcriptional switch from meiosis to post-meiosis takes place very early, during meiotic prophase, thus disclosing a higher incidence of post-transcriptional regulation in spermatogenesis than previously reported. Moreover, we found that a good proportion of the differential gene expression in spermiogenesis corresponds to up-regulation of genes whose expression starts earlier, at pachytene stage; this includes transition protein-and protamine-coding genes, which have long been claimed to switch on during spermiogenesis. In addition, our results afford new insights concerning X chromosome meiotic inactivation and reactivation. Conclusions This work provides for the first time an overview of the time course for the massive onset and turning off of the meiotic and spermiogenic genetic programs. Importantly, our data represent a highly reliable information set about gene expression in pure testicular cell populations including early meiotic prophase, for further data mining towards the elucidation of the molecular bases of male reproduction in mammals.}, language = {en} } @article{BabskiHaasNaetherSchindleretal.2016, author = {Babski, Julia and Haas, Karina A. and N{\"a}ther-Schindler, Daniela and Pfeiffer, Friedhelm and F{\"o}rstner, Konrad U. and Hammelmann, Matthias and Hilker, Rolf and Becker, Anke and Sharma, Cynthia M. and Marchfelder, Anita and Soppa, J{\"o}rg}, title = {Genome-wide identification of transcriptional start sites in the haloarchaeon Haloferax volcanii based on differential RNA-Seq (dRNA-Seq)}, series = {BMC Genomics}, volume = {17}, journal = {BMC Genomics}, number = {629}, doi = {10.1186/s12864-016-2920-y}, url = {http://nbn-resolving.de/urn:nbn:de:bvb:20-opus-164553}, year = {2016}, abstract = {Background Differential RNA-Seq (dRNA-Seq) is a recently developed method of performing primary transcriptome analyses that allows for the genome-wide mapping of transcriptional start sites (TSSs) and the identification of novel transcripts. Although the transcriptomes of diverse bacterial species have been characterized by dRNA-Seq, the transcriptome analysis of archaeal species is still rather limited. Therefore, we used dRNA-Seq to characterize the primary transcriptome of the model archaeon Haloferax volcanii. Results Three independent cultures of Hfx. volcanii grown under optimal conditions to the mid-exponential growth phase were used to determine the primary transcriptome and map the 5′-ends of the transcripts. In total, 4749 potential TSSs were detected. A position weight matrix (PWM) was derived for the promoter predictions, and the results showed that 64 \% of the TSSs were preceded by stringent or relaxed basal promoters. Of the identified TSSs, 1851 belonged to protein-coding genes. Thus, fewer than half (46 \%) of the 4040 protein-coding genes were expressed under optimal growth conditions. Seventy-two percent of all protein-coding transcripts were leaderless, which emphasized that this pathway is the major pathway for translation initiation in haloarchaea. A total of 2898 of the TSSs belonged to potential non-coding RNAs, which accounted for an unexpectedly high fraction (61 \%) of all transcripts. Most of the non-coding TSSs had not been previously described (2792) and represented novel sequences (59 \% of all TSSs). A large fraction of the potential novel non-coding transcripts were cis-antisense RNAs (1244 aTSSs). A strong negative correlation between the levels of antisense transcripts and cognate sense mRNAs was found, which suggested that the negative regulation of gene expression via antisense RNAs may play an important role in haloarchaea. The other types of novel non-coding transcripts corresponded to internal transcripts overlapping with mRNAs (1153 iTSSs) and intergenic small RNA (sRNA) candidates (395 TSSs). Conclusion This study provides a comprehensive map of the primary transcriptome of Hfx. volcanii grown under optimal conditions. Fewer than half of all protein-coding genes have been transcribed under these conditions. Unexpectedly, more than half of the detected TSSs belonged to several classes of non-coding RNAs. Thus, RNA-based regulation appears to play a more important role in haloarchaea than previously anticipated.}, language = {en} } @phdthesis{Bischler2018, author = {Bischler, Thorsten David}, title = {Data mining and software development for RNA-seq-based approaches in bacteria}, url = {http://nbn-resolving.de/urn:nbn:de:bvb:20-opus-166108}, school = {Universit{\"a}t W{\"u}rzburg}, year = {2018}, abstract = {RNA sequencing (RNA-seq) has in recent years become the preferred method for gene expression analysis and whole transcriptome annotation. While initial RNA-seq experiments focused on eukaryotic messenger RNAs (mRNAs), which can be purified from the cellular ribonucleic acid (RNA) pool with relative ease, more advanced protocols had to be developed for sequencing of microbial transcriptomes. The resulting RNA-seq data revealed an unexpected complexity of bacterial transcriptomes and the requirement for specific analysis methods, which in many cases is not covered by tools developed for processing of eukaryotic data. The aim of this thesis was the development and application of specific data analysis methods for different RNA-seq-based approaches used to gain insights into transcription and gene regulatory processes in prokaryotes. The differential RNA sequencing (dRNA-seq) approach allows for transcriptional start site (TSS) annotation by differentiating between primary transcripts with a 5'-triphosphate (5'-PPP) and processed transcripts with a 5'-monophosphate (5'-P). This method was applied in combination with an automated TSS annotation tool to generate global trancriptome maps for Escherichia coli (E. coli) and Helicobacter pylori (H. pylori). In the E. coli study we conducted different downstream analyses to gain a deeper understanding of the nature and properties of transcripts in our TSS map. Here, we focused especially on putative antisense RNAs (asRNAs), an RNA class transcribed from the opposite strand of known protein-coding genes with the potential to regulate corresponding sense transcripts. Besides providing a set of putative asRNAs and experimental validation of candidates via Northern analysis, we analyzed and discussed different sources of variation in RNA-seq data. The aim of the H. pylori study was to provide a detailed description of the dRNA-seq approach and its application to a bacterial model organism. It includes information on experimental protocols and requirements for data analysis to generate a genome-wide TSS map. We show how the included TSS can be used to identify and analyze transcriptome and regulatory features and discuss challenges in terms oflibrary preparation protocols, sequencing platforms, and data analysis including manual and automated TSS annotation. The TSS maps and associated transcriptome data from both H. pylori and E. coli were made available for visualization in an easily accessible online browser. Furthermore, a modified version of dRNA-seq was used to identify transcriptome targets of the RNA pyrophosphohydrolase (RppH) in H. pylori. RppH initiates 5'-end-dependent degradation of transcripts by converting the 5'-PPP of primary transcripts to a 5'-P. I developed an analysis method, which uses data from complementary DNA (cDNA) libraries specific for transcripts carrying a 5'-PPP, 5'-P or both, to specifically identify transcripts modified by RppH. For this, the method assessed the 5'-phosphorylation state and cellular concentration of transcripts in rppH deletion in comparison to strains with the intact gene. Several of the identified potential RppH targets were further validated via half-life measurements and quantification of their 5'-phosphorylation state in wild-type and mutant cells. Our findings suggest an important role for RppH in post-transcriptional gene regulationin H. pylori and related organisms. In addition, we applied two RNA-seq -based approaches, RNA immunoprecipitation followed by sequencing (RIP-seq) and cross-linking immunoprecipitation followed by sequencing (CLIP-seq), to identify transcripts bound by Hfq and CsrA, two RNA-binding proteins (RBPs) with an important role in post-transcriptional regulation. For RIP-seq -based identification of CsrA binding regions in Campylobacter jejuni(C. jejuni), we used annotation-based analysis and, in addition, a self-developed peak calling method based on a sliding window approach. Both methods revealed flaA mRNA, encoding the major flagellin, as the main target and functional analysis of identified targets showed a significant enrichment of genes involved in flagella biosynthesis. Further experimental analysis revealed the role of flaA mRNA in post-transcriptional regulation. In comparison to RIP-seq, CLIP-seq allows mapping of RBP binding sites with a higher resolution. To identify these sites an approach called "block-based peak calling" was developed and resulting peaks were used to identify sequence and structural constraints required for interaction of Hfq and CsrA with Salmonella transcripts. Overall, the different RNA-seq-based approaches described in this thesis together with their associated analyis pipelines extended our knowledge on the transcriptional repertoire and modes of post-transcriptional regulation in bacteria. The global TSS maps, including further characterized asRNA candidates, putative RppH targets, and identified RBP interactomes will likely trigger similar global studies in the same or different organisms or will be used as a resource for closer examination of these features.}, subject = {Bakterien}, language = {en} } @phdthesis{Gupta2018, author = {Gupta, Shishir Kumar}, title = {Re-annotation of Camponotus floridanus Genome and Characterization of Innate Immunity Transcriptome Responses to Bacterial Infections}, url = {http://nbn-resolving.de/urn:nbn:de:bvb:20-opus-140168}, school = {Universit{\"a}t W{\"u}rzburg}, year = {2018}, abstract = {The sequencing of several ant genomes within the last six years open new research avenues for understanding not only the genetic basis of social species but also the complex systems such as immune responses in general. Similar to other social insects, ants live in cooperative colonies, often in high densities and with genetically identical or closely related individuals. The contact behaviours and crowd living conditions allow the disease to spread rapidly through colonies. Nevertheless, ants can efficiently combat infections by using diverse and effective immune mechanisms. However, the components of the immune system of carpenter ant Camponotus floridanus and also the factors in bacteria that facilitate infection are not well understood. To form a better view of the immune repository and study the C. floridanus immune responses against the bacteria, experimental data from Illumina sequencing and mass-spectrometry (MS) data of haemolymph in normal and infectious conditions were analysed and integrated with the several bioinformatics approaches. Briefly, the tasks were accomplished in three levels. First, the C. floridanus genome was re-annotated for the improvement of the existing annotation using the computational methods and transcriptomics data. Using the homology based methods, the extensive survey of literature, and mRNA expression profiles, the immune repository of C. floridanus were established. Second, large-scale protein-protein interactions (PPIs) and signalling network of C. floridanus were reconstructed and analysed and further the infection induced functional modules in the networks were detected by mapping of the expression data over the networks. In addition, the interactions of the immune components with the bacteria were identified by reconstructing inter-species PPIs networks and the interactions were validated by literature. Third, the stage-specific MS data of larvae and worker ants were analysed and the differences in the immune response were reported. Concisely, all the three omics levels resulted to multiple findings, for instance, re-annotation and transcriptome profiling resulted in the overall improvement of structural and functional annotation and detection of alternative splicing events, network analysis revealed the differentially expressed topologically important proteins and the active functional modules, MS data analysis revealed the stage specific differences in C. floridanus immune responses against bacterial pathogens. Taken together, starting from re-annotation of C. floridanus genome, this thesis provides a transcriptome and proteome level characterization of ant C. floridanus, particularly focusing on the immune system responses to pathogenic bacteria from a biological and a bioinformatics point of view. This work can serve as a model for the integration of omics data focusing on the immuno-transcriptome of insects.}, subject = {Camponotus floridanus}, language = {en} } @phdthesis{Dugar2016, author = {Dugar, Gaurav}, title = {Comparative transcriptomics and post-transcriptional regulation in \(Campylobacter\) \(jejuni\)}, url = {http://nbn-resolving.de/urn:nbn:de:bvb:20-opus-146180}, school = {Universit{\"a}t W{\"u}rzburg}, year = {2016}, abstract = {The transcriptome is defined as the set of all RNA molecules transcribed in a cell. These include protein-coding messenger RNAs (mRNAs) as well as non-coding RNAs, such as ribosomal RNAs (rRNAs), transfer RNAs (tRNAs), and small non-coding RNAs (sRNAs). sRNAs are known to play an important role in regulating gene expression and virulence in pathogens. In this thesis, the transcriptome of the food-borne pathogen Campylobacter jejuni was characterized at single nucleotide resolution by use of next-generation sequencing approaches. The first genome of a C. jejuni strain was published in the year 2000. However, its transcriptome remained uncharacterized at large. C. jejuni can survive in a variety of ecological niches and hosts. However, how strain-specific transcriptional changes contribute to such adaptation is not known. In this study, the global transcriptome maps of four closely related C. jejuni strains were defined using a differential RNA-seq (dRNA-seq) approach. This analysis also included a novel automated method to annotate the transcriptional start sites (TSS) at a genome-wide scale. Next, the transcriptomes of four strains were simultaneously mapped and compared by the use of a common coordinate system derived from whole-genome alignment, termed as SuperGenome. This approach helped to refine the promoter maps by comparison of TSS within strains. Most of the TSS were found to be conserved among all four strains, but some single-nucleotide-polymorphisms (SNPs) around promoter regions led to strain-specific transcriptional output. Most of these SNPs altered transcription only slightly, but some others led to a complete abrogation of transcription leading to differential molecular phenotypes. These in turn might help the strains to adapt to their specific host or microniche. The transcriptome also unveiled a plethora of sRNAs, some of which were conserved among the four strains while others were strain specific. Furthermore, a Cas9-dependent minimal type-II CRISPR-Cas system with only three Cas genes and multiple promoters to drive the transcription of the CRISPR locus was also characterized in C. jejuni using the dRNA-seq dataset. Apart from sRNAs, the role of global RNA binding proteins (RBPs) is also unclear in C. jejuni. Aided by the global transcriptome data, the role of RBPs in post-transcriptional regulation of C. jejuni was studied at a global scale. Two of the most widely studied RNA binding proteins in bacteria are Hfq and CsrA. The RNA interactome of the translational regulator CsrA was defined using another global deep-sequencing technique that combines co-immunoprecipitation (coIP) with RNA sequencing (RIP-seq). Using this interactome dataset, the direct targets of this widespread global post-transcriptional regulator were defined, revealing a significant enrichment for mRNAs encoding genes involved in flagella biosynthesis. Unlike Gammaproteobacteria, where sRNAs such as CsrB/C, antagonize CsrA activity, no sRNAs were enriched in the CsrA-coIP in C. jejuni, indicating absence of any sRNA antagonists and novel modes of CsrA activity regulation. Instead, the CsrA regulatory pathway revealed flaA mRNA, encoding the major flagellin, as a dual-function mRNA. flaA mRNA was the main target of CsrA but it also served to antagonize CsrA activity along with the protein antagonist FliW previously identified in the Gram-positive bacterium Bacillus subtilis. Furthermore, this regulatory mRNA was also shown in this thesis to localize to the poles of elongating C. jejuni cells in a translation-dependent manner. It was also shown that this localization is dependent on the CsrA-FliW regulon, which controls the translation of flaA mRNA. The role and mechanism of flaA mRNA localization or mRNA localization in general is not yet clear in bacteria when compared to their eukaryotic counterparts. Overall, this study provides first insights into riboregulation of the bacterial pathogen C. jejuni. The work presented in this thesis unveils several novel modes of riboregulation in C. jejuni, which could be applicable more generally. Moreover, this study also lays out several unsolved intriguing questions, which may pave the way for interesting studies to come.}, subject = {Campylobacter jejuni}, language = {en} } @phdthesis{Schulz2003, author = {Schulz, Heidi}, title = {Towards a comprehensive description of the human retinal transcriptome: identification and characterization of differentially expressed genes}, url = {http://nbn-resolving.de/urn:nbn:de:bvb:20-opus-7278}, school = {Universit{\"a}t W{\"u}rzburg}, year = {2003}, abstract = {The human retina is a multilayered neuroectodermal tissue specialized in the transformation of light energy into electric impulses which can be transmitted to the brain where they are perceived as vision. Since the retina is easily accessible and functional aspects are directly recordable, the study of this tissue has been at the forefront of neuroscience research for over a century. Studies have revealed that the distinct functions of the retina require a large degree of differentiation which is achieved by the coordinated function of approximately 55 different cell types. The highly structured anatomy and the functional differentiation of the retina is a result of its distinctive transcriptome and proteome. Due to the complexity of the retina it has been difficult to estimate the number of genes actively transcribed in this tissue. Great efforts in the elucidation of retinal disease genes have led to the identification of 139 retina disease loci with 90 of the corresponding genes cloned thus far . In contrast to the success in the hereditary disorders, efforts to identify the genetic factors conferring manifestations known as age-related macular degeneration (AMD) have revealed sparse results. AMD is a retinal disease affecting a significant percentage of the older population. This disorder is likely due to exogenic as well as genetic factors. To further our understanding of retinal physiology and facilitate the identification of genes underlying retinal degenerations, particularly AMD, our efforts concentrated on the systematic analysis of the retinal transcriptome. Since approximately half of all retinal degeneration-associated genes identified to date are preferentially expressed in retina, it is plausible that the investigation of gene expression profiles and the identification of retina-expressed transcripts could be an important starting point for characterizing candidate genes for the retinal diseases. The expressed sequence tags approach included the assessment of all retinal expressed sequence tags (EST) clusters indexed in the UniGene database and of 1080 single-pass ESTs derived from an in-house generated human retina suppression subtracted hybridization (SSH) cDNA library. In total, 6603 EST clusters were evaluated during this thesis and detailed in-silico analysis was performed on 750 EST clusters. The expression of the genes was evaluated using reverse transcriptase-polymerase chain reaction (RT-PCR), followed by confirmation using quantitative reverse transcriptase-polymerase chain reaction (qRT-PCR), as well as conventional and virtual Northern blot analysis. The expression profiling of 337 selected EST clusters led to the identification of 111 transcripts, of which 60 are specific or abundant to the retina, 3 are expressed at high levels in the retinal pigment epithelium (RPE), and 48 are expressed in brain as well as in retina. The EST approach used to select candidate transcripts allowed us to assess the effectiveness of the two available resources, the UniGene database and the retinal SSH (retSSH) cDNA library. From the results obtained, it is evident that the generation of suppression subtracted libraries to identify cell-specific transcripts constitutes the most straight-forward and efficient strategy. In addition to the high percentage of candidate genes that are identified from an SSH cDNA library, it has the added benefit that genes expressed at low levels can be identified. Furthermore, comparison of our retina-enriched gene set with previously published studies demonstrated only limited overlap of the identified genes further confirming the valuable source of retinal genes from our retinal SSH cDNA library. The effort of our and other groups has resulted in the establishment of the full-length coding sequence of 55 of the 111 genes uniquely or preferentially expressed in the retina. Using various methods such as bioinformatical analysis, EST assembly, cDNA library screening, and rapid amplification of cDNA ends (RACE) a number of genes were cloned in the scope of this thesis including C1orf32, C4orf11, C7orf9, C12orf7, C14orf29, DAPL1, and GRM7. Bioinformatic analyses and cDNA library screening were used to isolate the full-length cDNA sequence and determine the genomic organization of C7orf9, also identified as RFRP. This 1190 bp retina-specific transcript from chromosome 7p15.3 encodes a precursor protein for at least two small neuropeptides, referred to as RFRP-1 and RFRP-3. Since C7orf9 is localized in the critical region for dominant cystoid macular dystrophy (CYMD) its role in the pathology was investigated. Southern blot analysis and sequencing of samples from two affected individuals of the original pedigree used to localize the disease gene excluded the gene from involvement in this disease. Multiple isoforms of the C12orf7 gene were assembled from a number of clones identified from library screenings, PCR amplifications, and RACE experiments. The gene variants, transcribed from chromosome 12q13.13, have been found to be expressed exclusively in retina. Because of the multiple alternative splicing of the gene, we can only speculate about the nature of the protein it encodes. The longest transcript, which includes all six exons plus the last intervening sequence, encodes a 471 aa protein which contains a nuclear localization signal and five ankyrin repeats. The existence of many isoforms is also observed in mouse suggesting that they may have a relevant role in cellular physiology. Five novel splice variants of the glutamate metabotropic receptor 7 (GRM7) resulting from the use of alternative 3'-end exons were identified and characterized. One of the novel variants, GRM7_v3, encodes a 924 aa protein and is therefore the longest putative GRM7 protein reported to date. Even though they are not retina-specific, the isoforms are preferentially expressed in the nervous system. Although the functional properties of the specific carboxyl-termini are still unclear, it is known that axon targeting of GRM7_v1 is mediated by the last 60 aa of the protein. Hence the novel isoforms may direct the protein to specific subcellular localizations. The C1orf32 gene, preferentially expressed in retina, is organized in 10 exons and is transcribed from chromosome 1q24.1. Bioinformatic analyses of the 639 aa putative protein not only identified the mouse and rat orthologous genes but also the LISCH7 gene as a potential member of the same family. Since the LISCH7 protein has been shown to function as a low density lipoprotein receptor, the C1orf32 protein may be involved in retinal lipid homeostasis. Disturbances in lipid metabolism have been proposed as one of the pathways involved in AMD etiology. Thus, the role of C1orf32 in this complex disease should be investigated. Expression analyses of the death-associated protein-like 1 (DAPL1) gene revealed that it is expressed in both the retina and the RPE at high levels. The 552 bp transcript encodes a 107 aa putative protein and is transcribed from chromosome 2q24.1. In-silico analyses identified an additional 12 related proteins from various species which share high similarity constituting a novel protein family. The similarity to the death-associated-protein (DAP) is particularly interesting since this protein has been found to be indispensable for programmed cell death. Therefore, DAPL1 is an excellent candidate for retinal disease as apoptosis is generally the ultimate cause in retinal degeneration. The retina-specific C4orf11 and C14orf29 genes localized on chromosome 4q21.22 and 14q22.1, respectively, are both transcribed in more than one isoform. The encoded proteins do not contain any known domains but because of their retina-specific expression they may be important for proper retinal physiology. As part of the long-term goals of the project, several of the cloned genes are being genotyped to construct single nucleotide polymorphism (SNP) maps. Projects to investigate haplotype frequencies of candidate genes in large cohorts of controls and AMD patients are ongoing. Thus, by establishing a collection of 111 genes expressed exclusively or preferentially in the retina, the present work has laid the foundation for future research in retinal diseases.}, subject = {Netzhaut}, language = {en} }