@article{ZhangZhengZhengetal.2019, author = {Zhang, Yonghong and Zheng, Lanlan and Zheng, Yan and Zhou, Chao and Huang, Ping and Xiao, Xiao and Zhao, Yongheng and Hao, Xincai and Hu, Zhubing and Chen, Qinhua and Li, Hongliang and Wang, Xuanbin and Fukushima, Kenji and Wang, Guodong and Li, Chen}, title = {Assembly and Annotation of a Draft Genome of the Medicinal Plant Polygonum cuspidatum}, series = {Frontiers in Plant Science}, volume = {10}, journal = {Frontiers in Plant Science}, issn = {1664-462X}, doi = {10.3389/fpls.2019.01274}, url = {http://nbn-resolving.de/urn:nbn:de:bvb:20-opus-189279}, pages = {1274}, year = {2019}, abstract = {Polygonum cuspidatum (Japanese knotweed, also known as Huzhang in Chinese), a plant that produces bioactive components such as stilbenes and quinones, has long been recognized as important in traditional Chinese herbal medicine. To better understand the biological features of this plant and to gain genetic insight into the biosynthesis of its natural products, we assembled a draft genome of P. cuspidatum using Illumina sequencing technology. The draft genome is ca. 2.56 Gb long, with 71.54\% of the genome annotated as transposable elements. Integrated gene prediction suggested that the P. cuspidatum genome encodes 55,075 functional genes, including 6,776 gene families that are conserved in the five eudicot species examined and 2,386 that are unique to P. cuspidatum. Among the functional genes identified, 4,753 are predicted to encode transcription factors. We traced the gene duplication history of P. cuspidatum and determined that it has undergone two whole-genome duplication events about 65 and 6.6 million years ago. Roots are considered the primary medicinal tissue, and transcriptome analysis identified 2,173 genes that were expressed at higher levels in roots compared to aboveground tissues. Detailed phylogenetic analysis demonstrated expansion of the gene family encoding stilbene synthase and chalcone synthase enzymes in the phenylpropanoid metabolic pathway, which is associated with the biosynthesis of resveratrol, a pharmacologically important stilbene. Analysis of the draft genome identified 7 abscisic acid and water deficit stress-induced protein-coding genes and 14 cysteine-rich transmembrane module genes predicted to be involved in stress responses. The draft de novo genome assembly produced in this study represents a valuable resource for the molecular characterization of medicinal compounds in P. cuspidatum, the improvement of this important medicinal plant, and the exploration of its abiotic stress resistance.}, language = {en} } @article{MateosKangKloppetal.2019, author = {Mateos, Mariana and Kang, Du and Klopp, Christophe and Parrinello, Hugues and Garc{\´i}a-Olaz{\´a}bal, Mateo and Schumer, Molly and Jue, Nathaniel K. and Guiguen, Yann and Schartl, Manfred}, title = {Draft genome assembly and annotation of the Gila Topminnow Poeciliopsis occidentalis}, series = {Frontiers in Ecology and Evolution}, volume = {7}, journal = {Frontiers in Ecology and Evolution}, number = {404}, issn = {2296-701X}, doi = {10.3389/fevo.2019.00404}, url = {http://nbn-resolving.de/urn:nbn:de:bvb:20-opus-190339}, year = {2019}, abstract = {No abstract available.}, language = {en} } @phdthesis{Hackl2016, author = {Hackl, Thomas}, title = {A draft genome for the Venus flytrap, Dionaea muscipula : Evaluation of assembly strategies for a complex Genome - Development of novel approaches and bioinformatics solutions}, url = {http://nbn-resolving.de/urn:nbn:de:bvb:20-opus-133149}, school = {Universit{\"a}t W{\"u}rzburg}, year = {2016}, abstract = {The Venus flytrap, \textit{Dionaea muscipula}, with its carnivorous life-style and its highly specialized snap-traps has fascinated biologist since the days of Charles Darwin. The goal of the \textit{D. muscipula} genome project is to gain comprehensive insights into the genomic landscape of this remarkable plant. The genome of the diploid Venus flytrap with an estimated size between 2.6 Gbp to 3.0 Gbp is comparatively large and comprises more than 70 \% of repetitive regions. Sequencing and assembly of genomes of this scale are even with state-of-the-art technology and software challenging. Initial sequencing and assembly of the genome was performed by the BGI (Beijing Genomics Institute) in 2011 resulting in a 3.7 Gbp draft assembly. I started my work with thorough assessment of the delivered assembly and data. My analysis showed that the BGI assembly is highly fragmented and at the same time artificially inflated due to overassembly of repetitive sequences. Furthermore, it only comprises about on third of the expected genes in full-length, rendering it inadequate for downstream analysis. In the following I sought to optimize the sequencing and assembly strategy to obtain an assembly of higher completeness and contiguity by improving data quality and assembly procedure and by developing tailored bioinformatics tools. Issues with technical biases and high levels of heterogeneity in the original data set were solved by sequencing additional short read libraries from high quality non-polymorphic DNA samples. To address contiguity and heterozygosity I examined numerous alternative assembly software packages and strategies and eventually identified ALLPATHS-LG as the most suited program for assembling the data at hand. Moreover, by utilizing digital normalization to reduce repetitive reads, I was able to substantially reduce computational demands while at the same time significantly increasing contiguity of the assembly. To improve repeat resolution and scaffolding, I started to explore the novel PacBio long read sequencing technology. Raw PacBio reads exhibit high error rates of 15 \% impeding their use for assembly. To overcome this issue, I developed the PacBio hybrid correction pipeline proovread (Hackl et al., 2014). proovread uses high coverage Illumina read data in an iterative mapping-based consensus procedure to identify and remove errors present in raw PacBio reads. In terms of sensitivity and accuracy, proovread outperforms existing software. In contrast to other correction programs, which are incapable of handling data sets of the size of D. muscipula project, proovread's flexible design allows for the efficient distribution of work load on high-performance computing clusters, thus enabling the correction of the Venus flytrap PacBio data set. Next to the assembly process itself, also the assessment of the large de novo draft assemblies, particularly with respect to coverage by available sequencing data, is difficult. While typical evaluation procedures rely on computationally extensive mapping approaches, I developed and implemented a set of tools that utilize k-mer coverage and derived values to efficiently compute coverage landscapes of large-scale assemblies and in addition allow for automated visualization of the of the obtained information in comprehensive plots. Using the developed tools to analyze preliminary assemblies and by combining my findings regarding optimizations of the assembly process, I was ultimately able to generate a high quality draft assembly for D. muscipula. I further refined the assembly by removal of redundant contigs resulting from separate assembly of heterozygous regions and additional scaffolding and gapclosing using corrected PacBio data. The final draft assembly comprises 86 × 10 3 scaffolds and has a total size of 1.45 Gbp. The difference to the estimated genomes size is well explained by collapsed repeats. At the same time, the assembly exhibits high fractions full-length gene models, corroborating the interpretation that the obtained draft assembly provides a complete and comprehensive reference for further exploration of the fascinating biology of the Venus flytrap.}, subject = {Venusfliegenfalle}, language = {en} }