@article{CaliskanDangwalDandekar2023, author = {Caliskan, Aylin and Dangwal, Seema and Dandekar, Thomas}, title = {Metadata integrity in bioinformatics: bridging the gap between data and knowledge}, series = {Computational and Structural Biotechnology Journal}, volume = {21}, journal = {Computational and Structural Biotechnology Journal}, issn = {2001-0370}, doi = {10.1016/j.csbj.2023.10.006}, url = {http://nbn-resolving.de/urn:nbn:de:bvb:20-opus-349990}, pages = {4895-4913}, year = {2023}, abstract = {In the fast-evolving landscape of biomedical research, the emergence of big data has presented researchers with extraordinary opportunities to explore biological complexities. In biomedical research, big data imply also a big responsibility. This is not only due to genomics data being sensitive information but also due to genomics data being shared and re-analysed among the scientific community. This saves valuable resources and can even help to find new insights in silico. To fully use these opportunities, detailed and correct metadata are imperative. This includes not only the availability of metadata but also their correctness. Metadata integrity serves as a fundamental determinant of research credibility, supporting the reliability and reproducibility of data-driven findings. Ensuring metadata availability, curation, and accuracy are therefore essential for bioinformatic research. Not only must metadata be readily available, but they must also be meticulously curated and ideally error-free. Motivated by an accidental discovery of a critical metadata error in patient data published in two high-impact journals, we aim to raise awareness for the need of correct, complete, and curated metadata. We describe how the metadata error was found, addressed, and present examples for metadata-related challenges in omics research, along with supporting measures, including tools for checking metadata and software to facilitate various steps from data analysis to published research. Highlights • Data awareness and data integrity underpins the trustworthiness of results and subsequent further analysis. • Big data and bioinformatics enable efficient resource use by repurposing publicly available RNA-Sequencing data. • Manual checks of data quality and integrity are insufficient due to the overwhelming volume and rapidly growing data. • Automation and artificial intelligence provide cost-effective and efficient solutions for data integrity and quality checks. • FAIR data management, various software solutions and analysis tools assist metadata maintenance.}, language = {en} } @article{CaliskanCaliskanRasbachetal.2023, author = {Caliskan, Aylin and Caliskan, Deniz and Rasbach, Lauritz and Yu, Weimeng and Dandekar, Thomas and Breitenbach, Tim}, title = {Optimized cell type signatures revealed from single-cell data by combining principal feature analysis, mutual information, and machine learning}, series = {Computational and Structural Biotechnology Journal}, volume = {21}, journal = {Computational and Structural Biotechnology Journal}, issn = {2001-0370}, doi = {10.1016/j.csbj.2023.06.002}, url = {http://nbn-resolving.de/urn:nbn:de:bvb:20-opus-349989}, pages = {3293-3314}, year = {2023}, abstract = {Machine learning techniques are excellent to analyze expression data from single cells. These techniques impact all fields ranging from cell annotation and clustering to signature identification. The presented framework evaluates gene selection sets how far they optimally separate defined phenotypes or cell groups. This innovation overcomes the present limitation to objectively and correctly identify a small gene set of high information content regarding separating phenotypes for which corresponding code scripts are provided. The small but meaningful subset of the original genes (or feature space) facilitates human interpretability of the differences of the phenotypes including those found by machine learning results and may even turn correlations between genes and phenotypes into a causal explanation. For the feature selection task, the principal feature analysis is utilized which reduces redundant information while selecting genes that carry the information for separating the phenotypes. In this context, the presented framework shows explainability of unsupervised learning as it reveals cell-type specific signatures. Apart from a Seurat preprocessing tool and the PFA script, the pipeline uses mutual information to balance accuracy and size of the gene set if desired. A validation part to evaluate the gene selection for their information content regarding the separation of the phenotypes is provided as well, binary and multiclass classification of 3 or 4 groups are studied. Results from different single-cell data are presented. In each, only about ten out of more than 30000 genes are identified as carrying the relevant information. The code is provided in a GitHub repository at https://github.com/AC-PHD/Seurat_PFA_pipeline.}, language = {en} } @article{SalihogluSrivastavaLiangetal.2023, author = {Salihoglu, Rana and Srivastava, Mugdha and Liang, Chunguang and Schilling, Klaus and Szalay, Aladar and Bencurova, Elena and Dandekar, Thomas}, title = {PRO-Simat: Protein network simulation and design tool}, series = {Computational and Structural Biotechnology Journal}, volume = {21}, journal = {Computational and Structural Biotechnology Journal}, issn = {2001-0370}, doi = {10.1016/j.csbj.2023.04.023}, url = {http://nbn-resolving.de/urn:nbn:de:bvb:20-opus-350034}, pages = {2767-2779}, year = {2023}, abstract = {PRO-Simat is a simulation tool for analysing protein interaction networks, their dynamic change and pathway engineering. It provides GO enrichment, KEGG pathway analyses, and network visualisation from an integrated database of more than 8 million protein-protein interactions across 32 model organisms and the human proteome. We integrated dynamical network simulation using the Jimena framework, which quickly and efficiently simulates Boolean genetic regulatory networks. It enables simulation outputs with in-depth analysis of the type, strength, duration and pathway of the protein interactions on the website. Furthermore, the user can efficiently edit and analyse the effect of network modifications and engineering experiments. In case studies, applications of PRO-Simat are demonstrated: (i) understanding mutually exclusive differentiation pathways in Bacillus subtilis, (ii) making Vaccinia virus oncolytic by switching on its viral replication mainly in cancer cells and triggering cancer cell apoptosis and (iii) optogenetic control of nucleotide processing protein networks to operate DNA storage. Multilevel communication between components is critical for efficient network switching, as demonstrated by a general census on prokaryotic and eukaryotic networks and comparing design with synthetic networks using PRO-Simat. The tool is available at https://prosimat.heinzelab.de/ as a web-based query server.}, language = {en} } @article{RackeveiBorgesEngstleretal.2022, author = {Rackevei, Antonia S. and Borges, Alyssa and Engstler, Markus and Dandekar, Thomas and Wolf, Matthias}, title = {About the analysis of 18S rDNA sequence data from trypanosomes in barcoding and phylogenetics: tracing a continuation error occurring in the literature}, series = {Biology}, volume = {11}, journal = {Biology}, number = {11}, issn = {2079-7737}, doi = {10.3390/biology11111612}, url = {http://nbn-resolving.de/urn:nbn:de:bvb:20-opus-297562}, year = {2022}, abstract = {The variable regions (V1-V9) of the 18S rDNA are routinely used in barcoding and phylogenetics. In handling these data for trypanosomes, we have noticed a misunderstanding that has apparently taken a life of its own in the literature over the years. In particular, in recent years, when studying the phylogenetic relationship of trypanosomes, the use of V7/V8 was systematically established. However, considering the current numbering system for all other organisms (including other Euglenozoa), V7/V8 was never used. In Maia da Silva et al. [Parasitology 2004, 129, 549-561], V7/V8 was promoted for the first time for trypanosome phylogenetics, and since then, more than 70 publications have replicated this nomenclature and even discussed the benefits of the use of this region in comparison to V4. However, the primers used to amplify the variable region of trypanosomes have actually amplified V4 (concerning the current 18S rDNA numbering system).}, language = {en} } @article{HanRenMamtiminetal.2023, author = {Han, Chao and Ren, Pengxuan and Mamtimin, Medina and Kruk, Linus and Sarukhanyan, Edita and Li, Chenyu and Anders, Hans-Joachim and Dandekar, Thomas and Krueger, Irena and Elvers, Margitta and Goebel, Silvia and Adler, Kristin and M{\"u}nch, G{\"o}tz and Gudermann, Thomas and Braun, Attila and Mammadova-Bach, Elmina}, title = {Minimal collagen-binding epitope of glycoprotein VI in human and mouse platelets}, series = {Biomedicines}, volume = {11}, journal = {Biomedicines}, number = {2}, issn = {2227-9059}, doi = {10.3390/biomedicines11020423}, url = {http://nbn-resolving.de/urn:nbn:de:bvb:20-opus-304148}, year = {2023}, abstract = {Glycoprotein VI (GPVI) is a platelet-specific receptor for collagen and fibrin, regulating important platelet functions such as platelet adhesion and thrombus growth. Although the blockade of GPVI function is widely recognized as a potent anti-thrombotic approach, there are limited studies focused on site-specific targeting of GPVI. Using computational modeling and bioinformatics, we analyzed collagen- and CRP-binding surfaces of GPVI monomers and dimers, and compared the interacting surfaces with other mammalian GPVI isoforms. We could predict a minimal collagen-binding epitope of GPVI dimer and designed an EA-20 antibody that recognizes a linear epitope of this surface. Using platelets and whole blood samples donated from wild-type and humanized GPVI transgenic mice and also humans, our experimental results show that the EA-20 antibody inhibits platelet adhesion and aggregation in response to collagen and CRP, but not to fibrin. The EA-20 antibody also prevents thrombus formation in whole blood, on the collagen-coated surface, in arterial flow conditions. We also show that EA-20 does not influence GPVI clustering or receptor shedding. Therefore, we propose that blockade of this minimal collagen-binding epitope of GPVI with the EA-20 antibody could represent a new anti-thrombotic approach by inhibiting specific interactions between GPVI and the collagen matrix.}, language = {en} } @article{GuptaSrivastavaMinochaetal.2021, author = {Gupta, Shishir K. and Srivastava, Mugdha and Minocha, Rashmi and Akash, Aman and Dangwal, Seema and Dandekar, Thomas}, title = {Alveolar regeneration in COVID-19 patients: a network perspective}, series = {International Journal of Molecular Sciences}, volume = {22}, journal = {International Journal of Molecular Sciences}, number = {20}, issn = {1422-0067}, doi = {10.3390/ijms222011279}, url = {http://nbn-resolving.de/urn:nbn:de:bvb:20-opus-284307}, year = {2021}, abstract = {A viral infection involves entry and replication of viral nucleic acid in a host organism, subsequently leading to biochemical and structural alterations in the host cell. In the case of SARS-CoV-2 viral infection, over-activation of the host immune system may lead to lung damage. Albeit the regeneration and fibrotic repair processes being the two protective host responses, prolonged injury may lead to excessive fibrosis, a pathological state that can result in lung collapse. In this review, we discuss regeneration and fibrosis processes in response to SARS-CoV-2 and provide our viewpoint on the triggering of alveolar regeneration in coronavirus disease 2019 (COVID-19) patients.}, language = {en} } @article{NaseemOsmanoğluKaltdorfetal.2020, author = {Naseem, Muhammad and Osmanoğlu, {\"O}zge and Kaltdorf, Martin and Alblooshi, Afnan Ali M. A. and Iqbal, Jibran and Howari, Fares M. and Srivastava, Mugdha and Dandekar, Thomas}, title = {Integrated framework of the immune-defense transcriptional signatures in the Arabidopsis shoot apical meristem}, series = {International Journal of Molecular Sciences}, volume = {21}, journal = {International Journal of Molecular Sciences}, number = {16}, issn = {1422-0067}, doi = {10.3390/ijms21165745}, url = {http://nbn-resolving.de/urn:nbn:de:bvb:20-opus-285730}, year = {2020}, abstract = {The growing tips of plants grow sterile; therefore, disease-free plants can be generated from them. How plants safeguard growing apices from pathogen infection is still a mystery. The shoot apical meristem (SAM) is one of the three stem cells niches that give rise to the above ground plant organs. This is very well explored; however, how signaling networks orchestrate immune responses against pathogen infections in the SAM remains unclear. To reconstruct a transcriptional framework of the differentially expressed genes (DEGs) pertaining to various SAM cellular populations, we acquired large-scale transcriptome datasets from the public repository Gene Expression Omnibus (GEO). We identify here distinct sets of genes for various SAM cellular populations that are enriched in immune functions, such as immune defense, pathogen infection, biotic stress, and response to salicylic acid and jasmonic acid and their biosynthetic pathways in the SAM. We further linked those immune genes to their respective proteins and identify interactions among them by mapping a transcriptome-guided SAM-interactome. Furthermore, we compared stem-cells regulated transcriptome with innate immune responses in plants showing transcriptional separation among their DEGs in Arabidopsis. Besides unleashing a repertoire of immune-related genes in the SAM, our analysis provides a SAM-interactome that will help the community in designing functional experiments to study the specific defense dynamics of the SAM-cellular populations. Moreover, our study promotes the essence of large-scale omics data re-analysis, allowing a fresh look at the SAM-cellular transcriptome repurposing data-sets for new questions.}, language = {en} } @article{BreitenbachLorenzDandekar2019, author = {Breitenbach, Tim and Lorenz, Kristina and Dandekar, Thomas}, title = {How to steer and control ERK and the ERK signaling cascade exemplified by looking at cardiac insufficiency}, series = {International Journal of Molecular Sciences}, volume = {20}, journal = {International Journal of Molecular Sciences}, number = {9}, issn = {1422-0067}, doi = {10.3390/ijms20092179}, url = {http://nbn-resolving.de/urn:nbn:de:bvb:20-opus-285164}, year = {2019}, abstract = {Mathematical optimization framework allows the identification of certain nodes within a signaling network. In this work, we analyzed the complex extracellular-signal-regulated kinase 1 and 2 (ERK1/2) cascade in cardiomyocytes using the framework to find efficient adjustment screws for this cascade that is important for cardiomyocyte survival and maladaptive heart muscle growth. We modeled optimal pharmacological intervention points that are beneficial for the heart, but avoid the occurrence of a maladaptive ERK1/2 modification, the autophosphorylation of ERK at threonine 188 (ERK\(^{Thr188}\) phosphorylation), which causes cardiac hypertrophy. For this purpose, a network of a cardiomyocyte that was fitted to experimental data was equipped with external stimuli that model the pharmacological intervention points. Specifically, two situations were considered. In the first one, the cardiomyocyte was driven to a desired expression level with different treatment strategies. These strategies were quantified with respect to beneficial effects and maleficent side effects and then which one is the best treatment strategy was evaluated. In the second situation, it was shown how to model constitutively activated pathways and how to identify drug targets to obtain a desired activity level that is associated with a healthy state and in contrast to the maleficent expression pattern caused by the constitutively activated pathway. An implementation of the algorithms used for the calculations is also presented in this paper, which simplifies the application of the presented framework for drug targeting, optimal drug combinations and the systematic and automatic search for pharmacological intervention points. The codes were designed such that they can be combined with any mathematical model given by ordinary differential equations.}, language = {en} } @article{KaltdorfSchulzeHelmprobstetal.2017, author = {Kaltdorf, Kristin Verena and Schulze, Katja and Helmprobst, Frederik and Kollmannsberger, Philip and Dandekar, Thomas and Stigloher, Christian}, title = {Fiji macro 3D ART VeSElecT: 3D automated reconstruction tool for vesicle structures of electron tomograms}, series = {PLoS Computational Biology}, volume = {13}, journal = {PLoS Computational Biology}, number = {1}, doi = {10.1371/journal.pcbi.1005317}, url = {http://nbn-resolving.de/urn:nbn:de:bvb:20-opus-172112}, year = {2017}, abstract = {Automatic image reconstruction is critical to cope with steadily increasing data from advanced microscopy. We describe here the Fiji macro 3D ART VeSElecT which we developed to study synaptic vesicles in electron tomograms. We apply this tool to quantify vesicle properties (i) in embryonic Danio rerio 4 and 8 days past fertilization (dpf) and (ii) to compare Caenorhabditis elegans N2 neuromuscular junctions (NMJ) wild-type and its septin mutant (unc-59(e261)). We demonstrate development-specific and mutant-specific changes in synaptic vesicle pools in both models. We confirm the functionality of our macro by applying our 3D ART VeSElecT on zebrafish NMJ showing smaller vesicles in 8 dpf embryos then 4 dpf, which was validated by manual reconstruction of the vesicle pool. Furthermore, we analyze the impact of C. elegans septin mutant unc-59(e261) on vesicle pool formation and vesicle size. Automated vesicle registration and characterization was implemented in Fiji as two macros (registration and measurement). This flexible arrangement allows in particular reducing false positives by an optional manual revision step. Preprocessing and contrast enhancement work on image-stacks of 1nm/pixel in x and y direction. Semi-automated cell selection was integrated. 3D ART VeSElecT removes interfering components, detects vesicles by 3D segmentation and calculates vesicle volume and diameter (spherical approximation, inner/outer diameter). Results are collected in color using the RoiManager plugin including the possibility of manual removal of non-matching confounder vesicles. Detailed evaluation considered performance (detected vesicles) and specificity (true vesicles) as well as precision and recall. We furthermore show gain in segmentation and morphological filtering compared to learning based methods and a large time gain compared to manual segmentation. 3D ART VeSElecT shows small error rates and its speed gain can be up to 68 times faster in comparison to manual annotation. Both automatic and semi-automatic modes are explained including a tutorial.}, language = {en} } @article{LiangRiosMiguelJaricketal.2021, author = {Liang, Chunguang and Rios-Miguel, Ana B. and Jarick, Marcel and Neurgaonkar, Priya and Girard, Myriam and Fran{\c{c}}ois, Patrice and Schrenzel, Jacques and Ibrahim, Eslam S. and Ohlsen, Knut and Dandekar, Thomas}, title = {Staphylococcus aureus transcriptome data and metabolic modelling investigate the interplay of Ser/Thr kinase PknB, its phosphatase Stp, the glmR/yvcK regulon and the cdaA operon for metabolic adaptation}, series = {Microorganisms}, volume = {9}, journal = {Microorganisms}, number = {10}, issn = {2076-2607}, doi = {10.3390/microorganisms9102148}, url = {http://nbn-resolving.de/urn:nbn:de:bvb:20-opus-248459}, year = {2021}, abstract = {Serine/threonine kinase PknB and its corresponding phosphatase Stp are important regulators of many cell functions in the pathogen S. aureus. Genome-scale gene expression data of S. aureus strain NewHG (sigB\(^+\)) elucidated their effect on physiological functions. Moreover, metabolic modelling from these data inferred metabolic adaptations. We compared wild-type to deletion strains lacking pknB, stp or both. Ser/Thr phosphorylation of target proteins by PknB switched amino acid catabolism off and gluconeogenesis on to provide the cell with sufficient components. We revealed a significant impact of PknB and Stp on peptidoglycan, nucleotide and aromatic amino acid synthesis, as well as catabolism involving aspartate transaminase. Moreover, pyrimidine synthesis was dramatically impaired by stp deletion but only slightly by functional loss of PknB. In double knockouts, higher activity concerned genes involved in peptidoglycan, purine and aromatic amino acid synthesis from glucose but lower activity of pyrimidine synthesis from glucose compared to the wild type. A second transcriptome dataset from S. aureus NCTC 8325 (sigB\(^-\)) validated the predictions. For this metabolic adaptation, PknB was found to interact with CdaA and the yvcK/glmR regulon. The involved GlmR structure and the GlmS riboswitch were modelled. Furthermore, PknB phosphorylation lowered the expression of many virulence factors, and the study shed light on S. aureus infection processes.}, language = {en} }