@phdthesis{Juerges2022, author = {J{\"u}rges, Christopher Sebastian}, title = {Algorithmic methods for elucidating the transcriptomic landscape of herpesviruses}, doi = {10.25972/OPUS-27282}, url = {http://nbn-resolving.de/urn:nbn:de:bvb:20-opus-272825}, school = {Universit{\"a}t W{\"u}rzburg}, year = {2022}, abstract = {Transcription describes the process of converting the information contained in DNA into RNA. Although, tremendous progress has been made in recent decades to uncover this complex mechanism, it is still not fully understood. Given the advances and reduction in cost of high-throughput sequencing experiments, more and more data have been generated to help elucidating this complex process. Importantly, these sequencing experiments produce massive amounts of data that are incomprehensible in their raw form for humans. Further, sequencing techniques are not always 100\% accurate and are subject to a certain degree of variability and, in special cases, they might introduce technical artifacts. Thus, computational and statistical methods are indispensable to uncover the information buried in these datasets. In this thesis, I worked with multiple high throughput datasets from herpes simplex virus 1 (HSV-1) and human cytomegalovirus (HCMV) infections. During the last decade, it has became clear that a gene might not have a single, but multiple sites at which transcription initiates. These multiple transcription start sites (TiSS) demonstrated to have regulatory effects on the gene itself depending on which TiSS is used. Specialized experimental approaches were developed to help identify TiSS (TiSS-profiling). In order to facilitate the identification of all potential TiSS that are used for cell type- and condition-specific transcription, I developed the tool iTiSS. By using a new general enrichment-based approach to predict TiSS, iTiSS proved to be applicable in integrated studies and made it less prone to false positives compared to other TiSS-calling tools. Another improvement in recent years was made in metabolic labeling experiments such as SLAM-seq. Here, they removed the time consuming and laborious step of physically separating new from old RNA in the samples. This was achieved by inducing specific nucleotide conversions in newly synthesized RNA that are later visible in the data. Consequently, the separation of new and old RNA is now done computationally and, hence, tools are needed that accurately quantify these fold-changes. My second tool that I developed, called GRAND-SLAM proved to be capable to accomplish this task and outperform competing programs. As both of my tools, iTiSS and GRAND-SLAM are not specifically tailored to my own goals, but could also facilitate the research of other groups in this field, I made them publicly available on GitHub. I applied my tools to datasets generated in our lab as well as to publicly available data sets from HSV-1 and HCMV, respectively. For HSV-1, I was able to predict and validate TiSS with nucleotide precision using iTiSS. This has lead to the most comprehensive annotation for HSV-1 to date, which now serves as the fundamental basis of any future transcriptomic research on HSV-1. By combining both my tools, I was further able to uncover parts of the highly complex gene kinetics in HCMV and to resolve the limitations caused by the densely packed genome of HCMV. With the ever-increasing advances in sequencing techniques and their decrease in cost, the amounts of data produced will continue to rise massively in the future. Additionally, more and more specialized omics approaches are appearing, calling for new tools to leverage their full information potential. Consequently, it has become apparent that specialized computational tools such as iTiSS and GRAND-SLAM are needed and will become an essential and indispensable part of the analysis.}, subject = {Herpesviren}, language = {en} }