@article{BuchheimKellerKoetschanetal.2011, author = {Buchheim, Mark A. and Keller, Alexander and Koetschan, Christian and F{\"o}rster, Frank and Merget, Benjamin and Wolf, Matthias}, title = {Internal Transcribed Spacer 2 (nu ITS2 rRNA) Sequence-Structure Phylogenetics: Towards an Automated Reconstruction of the Green Algal Tree of Life}, series = {PLoS ONE}, volume = {6}, journal = {PLoS ONE}, number = {2}, doi = {10.1371/journal.pone.0016931}, url = {http://nbn-resolving.de/urn:nbn:de:bvb:20-opus-140866}, pages = {e16931}, year = {2011}, abstract = {Background: Chloroplast-encoded genes (matK and rbcL) have been formally proposed for use in DNA barcoding efforts targeting embryophytes. Extending such a protocol to chlorophytan green algae, though, is fraught with problems including non homology (matK) and heterogeneity that prevents the creation of a universal PCR toolkit (rbcL). Some have advocated the use of the nuclear-encoded, internal transcribed spacer two (ITS2) as an alternative to the traditional chloroplast markers. However, the ITS2 is broadly perceived to be insufficiently conserved or to be confounded by introgression or biparental inheritance patterns, precluding its broad use in phylogenetic reconstruction or as a DNA barcode. A growing body of evidence has shown that simultaneous analysis of nucleotide data with secondary structure information can overcome at least some of the limitations of ITS2. The goal of this investigation was to assess the feasibility of an automated, sequence-structure approach for analysis of IT2 data from a large sampling of phylum Chlorophyta. Methodology/Principal Findings: Sequences and secondary structures from 591 chlorophycean, 741 trebouxiophycean and 938 ulvophycean algae, all obtained from the ITS2 Database, were aligned using a sequence structure-specific scoring matrix. Phylogenetic relationships were reconstructed by Profile Neighbor-Joining coupled with a sequence structure-specific, general time reversible substitution model. Results from analyses of the ITS2 data were robust at multiple nodes and showed considerable congruence with results from published phylogenetic analyses. Conclusions/Significance: Our observations on the power of automated, sequence-structure analyses of ITS2 to reconstruct phylum-level phylogenies of the green algae validate this approach to assessing diversity for large sets of chlorophytan taxa. Moreover, our results indicate that objections to the use of ITS2 for DNA barcoding should be weighed against the utility of an automated, data analysis approach with demonstrated power to reconstruct evolutionary patterns for highly divergent lineages.}, language = {en} } @article{KoetschanFoersterKelleretal.2010, author = {Koetschan, Christian and Foerster, Frank and Keller, Alexander and Schleicher, Tina and Ruderisch, Benjamin and Schwarz, Roland and Mueller, Tobias and Wolf, Matthias and Schultz, Joerg}, title = {The ITS2 Database III-sequences and structures for phylogeny}, url = {http://nbn-resolving.de/urn:nbn:de:bvb:20-opus-68390}, year = {2010}, abstract = {The internal transcribed spacer 2 (ITS2) is a widely used phylogenetic marker. In the past, it has mainly been used for species level classifications. Nowadays, a wider applicability becomes apparent. Here, the conserved structure of the RNA molecule plays a vital role. We have developed the ITS2 Database (http://its2.bioapps .biozentrum.uni-wuerzburg.de) which holds information about sequence, structure and taxonomic classification of all ITS2 in GenBank. In the new version, we use Hidden Markov models (HMMs) for the identification and delineation of the ITS2 resulting in a major redesign of the annotation pipeline. This allowed the identification of more than 160 000 correct full ength and more than 50 000 partial structures. In the web interface, these can now be searched with a modified BLAST considering both sequence and structure, enabling rapid taxon sampling. Novel sequences can be annotated using the HMM based approach and modelled according to multiple template structures. Sequences can be searched for known and newly identified motifs. Together, the database and the web server build an exhaustive resource for ITS2 based phylogenetic analyses.}, subject = {Biologie}, language = {en} } @phdthesis{Koetschan2012, author = {Koetschan, Christian}, title = {The Eukaryotic ITS2 Database - A workbench for modelling RNA sequence-structure evolution}, url = {http://nbn-resolving.de/urn:nbn:de:bvb:20-opus-73128}, school = {Universit{\"a}t W{\"u}rzburg}, year = {2012}, abstract = {In den vergangenen Jahren etablierte sich der Marker „internal transcribed spacer 2" (ITS2) zu einem h{\"a}ufig genutzten Werkzeug in der molekularen Phylogenetik der Eukaryoten. Seine schnell evolvierende Sequenz eignet sich bestens f{\"u}r den Einsatz in niedrigeren phylogenetischen Ebenen. Die ITS2 faltet jedoch auch in eine sehr konservierte Sekund{\"a}rstruktur. Diese erm{\"o}glicht die Unterscheidung weit entfernter Arten. Eine Kombination aus beiden in einer Sequenzstrukturanalyse verbessert die Aufl{\"o}sung des Markers und erm{\"o}glicht die Rekonstruktion von robusteren B{\"a}umen auf h{\"o}herer taxonomischer Breite. Jedoch war die Durchf{\"u}hrung solch einer Analyse, die die Nutzung unterschiedlichster Programme und Datenbanken vorraussetzte, f{\"u}r den klassischen Biologen nicht einfach durchf{\"u}hrbar. Um diese H{\"u}rde zu umgehen, habe ich den „ITS2 Workbench" entwickelt, eine im Internet nutzbare Arbeitsplattform zur automatisierten sequenzstrukturbasierten phylogenetischen Analyse basierend auf der ITS2 (http://its2.bioapps.biozentrum.uni-wuerzburg.de). Die Entwicklung begann mit der L{\"a}ngenoptimierung unterschiedlicher „Hidden Markov Model" (HMM)-Topologien, die erfolgreich auf ein Modell zur Sequenzstrukturvorhersage der ITS2 angewandt wurden. Hierbei wird durch die Analyse von Sequenzbestandteilen in Kombination mit der L{\"a}ngenverteilung verschiedener Helixregionen die Struktur vorhergesagt. Anschließend konnte ich HMMs auch bei der Sequenzstrukturgenerierung einsetzen um die ITS2 innerhalb einer gegebenen Sequenz zu lokalisieren. Dieses neu implementierte Verfahren verdoppelte die Anzahl vorhergesagter Strukturen und verk{\"u}rzte die Laufzeit auf wenige Tage. Zusammen mit weiteren Optimierungen des Homologiemodellierungsprozesses kann ich nun ersch{\"o}pfend Sekund{\"a}rstrukturen in mehreren Interationen vorhersagen. Diese Optimierungen liefern derzeit 380.000 annotierte Sequenzen einschließlich 288.000 Strukturvorhersagen. Um diese Strukturen f{\"u}r die Berechnung von Alignments und phylogenetischen B{\"a}umen zu verwenden hab ich das R-Paket „treeforge" entwickelt. Es erm{\"o}glicht die Generierung von Sequenzstrukturalignments auf bis zu vier unterschiedlich kodierten Alphabeten. Damit k{\"o}nnen erstmals auch strukturelle Basenpaarungen in die Alignmentberechnung mit einbezogen werden, die eine Sch{\"a}tzung neuer Scorematrizen vorraussetzten. Das R-Paket erm{\"o}glicht zus{\"a}tzlich die Rekonstruktion von „Maximum Parsimony", „Maximum Likelihood" und „Neighbour Joining" B{\"a}umen auf allen vier Alphabeten mittels weniger Zeilen Programmcode. Das Paket wurde eingesetzt, um die noch umstrittene Phylogenie der „chlorophyceae" zu rekonstruieren und k{\"o}nnte in zuk{\"u}nftigen Versionen des ITS2 workbench verwendet werden. Die ITS2 Plattform basiert auf einer modernen und sehr umfangreichen Web 2.0 Oberfl{\"a}che und beinhaltet neuste AJAX und Web-Service Technologien. Sie umfasst die HMM basierte Sequenzannotation, Strukturvorhersage durch Energieminimierung bzw. Homologiemodellierung, Alignmentberechnung und Baumrekonstruktion basierend auf einem flexiblen Datenpool, der {\"A}nderungen am Datensatz automatisch aktualisiert. Zus{\"a}tzlich wird eine Detektion von Sequenzmotiven erm{\"o}glicht, die zur Kontrolle von Annotation und Strukturvorhersage dienen kann. Eine BLAST basierte Suche auf Sequenz- und Strukturebene bietet zus{\"a}tzlich eine Vereinfachung des Taxonsamplings. Alle Funktionen sowie die Nutzung der ITS2 Webseite sind in einer kurzen Videoanleitung dargestellt. Die Plattform l{\"a}sst jedoch nur eine bestimmte Gr{\"o}ße von Datens{\"a}tzen zu. Dies liegt vor allem an der erheblichen Rechenleistung, die bei diesen Berechnungen ben{\"o}tigt wird. Um die Funktion dieses Verfahrens auch auf großen Datenmengen zu demonstrieren, wurde eine voll automatisierte Rekonstruktion des Gr{\"u}nalgenbaumes (Chlorophyta) durchgef{\"u}hrt. Diese erfolgreiche, auf dem ITS2 Marker basierende Studie spricht f{\"u}r die Sequenz-Strukturanalyse auf weiteren Daten in der Phylogenetik. Hier bietet der ITS2 Workbench den idealen Ausgangspunkt.}, subject = {Ribosomale RNA}, language = {en} } @article{MergetKoetschanHackletal.2012, author = {Merget, Benjamin and Koetschan, Christian and Hackl, Thomas and F{\"o}rster, Frank and Dandekar, Thomas and M{\"u}ller, Tobias and Schultz, J{\"o}rg and Wolf, Matthias}, title = {The ITS2 Database}, series = {Journal of Visual Expression}, volume = {61}, journal = {Journal of Visual Expression}, number = {e3806}, doi = {10.3791/3806}, url = {http://nbn-resolving.de/urn:nbn:de:bvb:20-opus-124600}, year = {2012}, abstract = {The internal transcribed spacer 2 (ITS2) has been used as a phylogenetic marker for more than two decades. As ITS2 research mainly focused on the very variable ITS2 sequence, it confined this marker to low-level phylogenetics only. However, the combination of the ITS2 sequence and its highly conserved secondary structure improves the phylogenetic resolution1 and allows phylogenetic inference at multiple taxonomic ranks, including species delimitation. The ITS2 Database presents an exhaustive dataset of internal transcribed spacer 2 sequences from NCBI GenBank accurately reannotated. Following an annotation by profile Hidden Markov Models (HMMs), the secondary structure of each sequence is predicted. First, it is tested whether a minimum energy based fold (direct fold) results in a correct, four helix conformation. If this is not the case, the structure is predicted by homology modeling. In homology modeling, an already known secondary structure is transferred to another ITS2 sequence, whose secondary structure was not able to fold correctly in a direct fold. The ITS2 Database is not only a database for storage and retrieval of ITS2 sequence-structures. It also provides several tools to process your own ITS2 sequences, including annotation, structural prediction, motif detection and BLAST search on the combined sequence-structure information. Moreover, it integrates trimmed versions of 4SALE and ProfDistS for multiple sequence-structure alignment calculation and Neighbor Joining tree reconstruction. Together they form a coherent analysis pipeline from an initial set of sequences to a phylogeny based on sequence and secondary structure. In a nutshell, this workbench simplifies first phylogenetic analyses to only a few mouse-clicks, while additionally providing tools and data for comprehensive large-scale analyses.}, language = {en} } @article{KoetschanKittelmannLuetal.2014, author = {Koetschan, Christian and Kittelmann, Sandra and Lu, Jingli and Al-Halbouni, Djamila and Jarvis, Graeme N. and M{\"u}ller, Tobias and Wolf, Matthias and Janssen, Peter H.}, title = {Internal Transcribed Spacer 1 Secondary Structure Analysis Reveals a Common Core throughout the Anaerobic Fungi (Neocallimastigomycota)}, series = {PLOS ONE}, volume = {9}, journal = {PLOS ONE}, number = {3}, doi = {10.1371/journal.pone.0091928}, url = {http://nbn-resolving.de/urn:nbn:de:bvb:20-opus-117058}, pages = {e91928}, year = {2014}, abstract = {The internal transcribed spacer (ITS) is a popular barcode marker for fungi and in particular the ITS1 has been widely used for the anaerobic fungi (phylum Neocallimastigomycota). A good number of validated reference sequences of isolates as well as a large number of environmental sequences are available in public databases. Its highly variable nature predisposes the ITS1 for low level phylogenetics; however, it complicates the establishment of reproducible alignments and the reconstruction of stable phylogenetic trees at higher taxonomic levels (genus and above). Here, we overcame these problems by proposing a common core secondary structure of the ITS1 of the anaerobic fungi employing a Hidden Markov Model-based ITS1 sequence annotation and a helix-wise folding approach. We integrated the additional structural information into phylogenetic analyses and present for the first time an automated sequence-structure-based taxonomy of the ITS1 of the anaerobic fungi. The methodology developed is transferable to the ITS1 of other fungal groups, and the robust taxonomy will facilitate and improve high-throughput anaerobic fungal community structure analysis of samples from various environments.}, language = {en} }