@phdthesis{Keller2010, author = {Keller, Alexander}, title = {Secondary (and tertiary) structure of the ITS2 and its application for phylogenetic tree reconstructions and species identification}, url = {http://nbn-resolving.de/urn:nbn:de:bvb:20-opus-56151}, school = {Universit{\"a}t W{\"u}rzburg}, year = {2010}, abstract = {Biodiversity may be investigated and explored by the means of genetic sequence information and molecular phylogenetics. Yet, with ribosomal genes, information for phylogenetic studies may not only be retained from the primary sequence, but also from the secondary structure. Software that is able to cope with two dimensional data and designed to answer taxonomic questions has been recently developed and published as a new scientific pipeline. This thesis is concerned with expanding this pipeline by a tool that facialiates the annotation of a ribosomal region, namely the ITS2. We were also able to show that this states a crucial step for secondary structure phylogenetics and for data allocation of the ITS2-database. This resulting freely available tool determines high quality annotations. In a further study, the complete phylogenetic pipeline has been evaluated on a theoretical basis in a comprehensive simulation study. We were able to show that both, the accuracy and the robustness of phylogenetic trees are largely improved by the approach. The second major part of this thesis concentrates on case studies that applied this pipeline to resolve questions in taxonomy and ecology. We were able to determine several independent phylogenies within the green algae that further corroborate the idea that secondary structures improve the obtainable phylogenetic signal, but now from a biological perspective. This approach was applicable in studies on the species and genus level, but due to the conservation of the secondary structure also for investigations on the deeper level of taxonomy. An additional case study with blue butterflies indicates that this approach is not restricted to plants, but may also be used for metazoan phylogenies. The importance of high quality phylogenetic trees is indicated by two ecological studies that have been conducted. By integrating secondary structure phylogenetics, we were able to answer questions about the evolution of ant-plant interactions and of communities of bacteria residing on different plant tissues. Finally, we speculate how phylogenetic methods with RNA may be further enhanced by integration of the third dimension. This has been a speculative idea that was supplemented with a small phylogenetic example, however it shows that the great potential of structural phylogenetics has not been fully exploited yet. Altogether, this thesis comprises aspects of several different biological disciplines, which are evolutionary biology and biodiversity research, community and invasion ecology as well as molecular and structural biology. Further, it is complemented by statistical approaches and development of informatical software. All these different research areas are combined by the means of bioinformatics as the central connective link into one comprehensive thesis.}, subject = {Phylogenie}, language = {en} } @phdthesis{Vershenya2010, author = {Vershenya, Stanislav}, title = {Quantitative and qualitative analyses of in-paralogs}, url = {http://nbn-resolving.de/urn:nbn:de:bvb:20-opus-51358}, school = {Universit{\"a}t W{\"u}rzburg}, year = {2010}, abstract = {In our analysis I was interested in the gene duplications, with focus on in-paralogs. In-paralogs are gene duplicates which arose after species split. Here I analysed the in-paralogs quantitatively, as well as qualitatively. For quantitative analysis genomes of 21 species were taken. Most of them have vastly different lifestyles with maximum evolutionary distance between them 1100 million years. Species included mammals, fish, insects and worm, plus some other chordates. All the species were pairwised analysed by the Inparanoid software, and in-paralogs matrix were built representing number of in-paralogs in all vs. all manner. Based on the in-paralogs matrix I tried to reconstruct the evolutionary tree using in-paralog numbers as evolutionary distance. If all 21 species were used the resulting tree was very far from real one: a lot of species were misplaced. However if the number was reduced to 12, all of the species were placed correctly with only difference being wrong insect and fish clusters switched. Then to in-paralogs matrix the neighbour-net algorithm was applied. The resulting "net" tree showed the species with fast or slow duplications rates compared to the others. We could identify species with very high or very low duplications frequencies and it correlates with known occurrences of the whole genome duplications. As the next step I built the graphs for every single species showing the correlation between their in-paralogs number and evolutionary distance. As we have 21 species, graph for every species is built using 20 points. Coordinates of the points are set using the evolutionary distance to that particular species and in-paralogs number. In mammals with increasing the distance from speciation the in-paralogs number also increased, however not in linear fashion. In fish and insects the graph close to zero is just the same in mammals' case. However, after reaching the evolutionary distances more than 800 million years the number of inparalogs is beginning to decrease. We also made a simulation of gene duplications for all 21 species and all the splits according to the fossil and molecular clock data from literature. In our simulation duplication frequency was minimal closer to the past and maximum in the near-present time. Resulting curves had the same shape the experimental data ones. In case of fish and insect for simulation the duplication rate coefficient even had to be set negative in order to repeat experimental curve shape. To the duplication rate coefficient in our simulation contribute 2 criteria: gene duplications and gene losses. As gene duplication is stochastical process it should always be a constant. So the changing in the coefficient should be solely explained by the increasing gene loss of old genes. The processes are explained by the evolution model with high gene duplication and loss ratio. The drop in number of in-paralogs is probably due to the BLAST algorithm. It is observed in comparing highly divergent species and BLAST cannot find the orthologs so precisely anymore. In the second part of my work I concentrated more on the specific function of inparalogs. Because such analysis is time-consuming it could be done on the limited number species. Here I used three insects: Drosophila melanogaster (fruit y), Anopheles gambiae (mosquito) and Apis mellifera (honeybee). After Inparnoid analyses and I listed the cluster of orthologs. Functional analyses of all listed genes were done using GO annotations and also KEGG PATHWAY database. We found, that the gene duplication pattern is unique for each species and that this uniqueness is rejected through the differences in functional classes of duplicated genes. The preferences for some classes reject the evolutionary trends of the last 350 million years and allow assumptions on the role of those genes duplications in the lifestyle of species. Furthermore, the observed gene duplications allowed me to find connections between genomic changes and their phenotypic manifestations. For example I found duplications within carbohydrate metabolism rejecting feed pattern adaptation, within photo- and olfactory-receptors indicating sensing adaptation and within troponin indicating adaptations in the development. Despite these species specific differences, found high correlations between the independently duplicated genes between the species. This might hint for a "pool" of genes preferentially duplicated. Taken together, the observed duplication patterns reject the adaptational process and provide us another link to the field of genomic zoology.}, subject = {Duplikation}, language = {en} }