@article{CaliskanDangwalDandekar2023, author = {Caliskan, Aylin and Dangwal, Seema and Dandekar, Thomas}, title = {Metadata integrity in bioinformatics: bridging the gap between data and knowledge}, series = {Computational and Structural Biotechnology Journal}, volume = {21}, journal = {Computational and Structural Biotechnology Journal}, issn = {2001-0370}, doi = {10.1016/j.csbj.2023.10.006}, url = {http://nbn-resolving.de/urn:nbn:de:bvb:20-opus-349990}, pages = {4895-4913}, year = {2023}, abstract = {In the fast-evolving landscape of biomedical research, the emergence of big data has presented researchers with extraordinary opportunities to explore biological complexities. In biomedical research, big data imply also a big responsibility. This is not only due to genomics data being sensitive information but also due to genomics data being shared and re-analysed among the scientific community. This saves valuable resources and can even help to find new insights in silico. To fully use these opportunities, detailed and correct metadata are imperative. This includes not only the availability of metadata but also their correctness. Metadata integrity serves as a fundamental determinant of research credibility, supporting the reliability and reproducibility of data-driven findings. Ensuring metadata availability, curation, and accuracy are therefore essential for bioinformatic research. Not only must metadata be readily available, but they must also be meticulously curated and ideally error-free. Motivated by an accidental discovery of a critical metadata error in patient data published in two high-impact journals, we aim to raise awareness for the need of correct, complete, and curated metadata. We describe how the metadata error was found, addressed, and present examples for metadata-related challenges in omics research, along with supporting measures, including tools for checking metadata and software to facilitate various steps from data analysis to published research. Highlights • Data awareness and data integrity underpins the trustworthiness of results and subsequent further analysis. • Big data and bioinformatics enable efficient resource use by repurposing publicly available RNA-Sequencing data. • Manual checks of data quality and integrity are insufficient due to the overwhelming volume and rapidly growing data. • Automation and artificial intelligence provide cost-effective and efficient solutions for data integrity and quality checks. • FAIR data management, various software solutions and analysis tools assist metadata maintenance.}, language = {en} } @article{CaliskanCaliskanRasbachetal.2023, author = {Caliskan, Aylin and Caliskan, Deniz and Rasbach, Lauritz and Yu, Weimeng and Dandekar, Thomas and Breitenbach, Tim}, title = {Optimized cell type signatures revealed from single-cell data by combining principal feature analysis, mutual information, and machine learning}, series = {Computational and Structural Biotechnology Journal}, volume = {21}, journal = {Computational and Structural Biotechnology Journal}, issn = {2001-0370}, doi = {10.1016/j.csbj.2023.06.002}, url = {http://nbn-resolving.de/urn:nbn:de:bvb:20-opus-349989}, pages = {3293-3314}, year = {2023}, abstract = {Machine learning techniques are excellent to analyze expression data from single cells. These techniques impact all fields ranging from cell annotation and clustering to signature identification. The presented framework evaluates gene selection sets how far they optimally separate defined phenotypes or cell groups. This innovation overcomes the present limitation to objectively and correctly identify a small gene set of high information content regarding separating phenotypes for which corresponding code scripts are provided. The small but meaningful subset of the original genes (or feature space) facilitates human interpretability of the differences of the phenotypes including those found by machine learning results and may even turn correlations between genes and phenotypes into a causal explanation. For the feature selection task, the principal feature analysis is utilized which reduces redundant information while selecting genes that carry the information for separating the phenotypes. In this context, the presented framework shows explainability of unsupervised learning as it reveals cell-type specific signatures. Apart from a Seurat preprocessing tool and the PFA script, the pipeline uses mutual information to balance accuracy and size of the gene set if desired. A validation part to evaluate the gene selection for their information content regarding the separation of the phenotypes is provided as well, binary and multiclass classification of 3 or 4 groups are studied. Results from different single-cell data are presented. In each, only about ten out of more than 30000 genes are identified as carrying the relevant information. The code is provided in a GitHub repository at https://github.com/AC-PHD/Seurat_PFA_pipeline.}, language = {en} } @article{SalihogluSrivastavaLiangetal.2023, author = {Salihoglu, Rana and Srivastava, Mugdha and Liang, Chunguang and Schilling, Klaus and Szalay, Aladar and Bencurova, Elena and Dandekar, Thomas}, title = {PRO-Simat: Protein network simulation and design tool}, series = {Computational and Structural Biotechnology Journal}, volume = {21}, journal = {Computational and Structural Biotechnology Journal}, issn = {2001-0370}, doi = {10.1016/j.csbj.2023.04.023}, url = {http://nbn-resolving.de/urn:nbn:de:bvb:20-opus-350034}, pages = {2767-2779}, year = {2023}, abstract = {PRO-Simat is a simulation tool for analysing protein interaction networks, their dynamic change and pathway engineering. It provides GO enrichment, KEGG pathway analyses, and network visualisation from an integrated database of more than 8 million protein-protein interactions across 32 model organisms and the human proteome. We integrated dynamical network simulation using the Jimena framework, which quickly and efficiently simulates Boolean genetic regulatory networks. It enables simulation outputs with in-depth analysis of the type, strength, duration and pathway of the protein interactions on the website. Furthermore, the user can efficiently edit and analyse the effect of network modifications and engineering experiments. In case studies, applications of PRO-Simat are demonstrated: (i) understanding mutually exclusive differentiation pathways in Bacillus subtilis, (ii) making Vaccinia virus oncolytic by switching on its viral replication mainly in cancer cells and triggering cancer cell apoptosis and (iii) optogenetic control of nucleotide processing protein networks to operate DNA storage. Multilevel communication between components is critical for efficient network switching, as demonstrated by a general census on prokaryotic and eukaryotic networks and comparing design with synthetic networks using PRO-Simat. The tool is available at https://prosimat.heinzelab.de/ as a web-based query server.}, language = {en} } @unpublished{Dandekar2023, author = {Dandekar, Thomas}, title = {Protein folding and crystallization applied to qubit interactions and fundamental physics yields a modified inflation model for cosmology}, doi = {10.25972/OPUS-34615}, url = {http://nbn-resolving.de/urn:nbn:de:bvb:20-opus-346156}, pages = {42}, year = {2023}, abstract = {Protein folding achieves a clear solution structure in a huge parameter space (the so-called protein folding problem). Proteins fold in water, and get by this a highly ordered structure. Finally, inside a protein crystal for structure resolution, you have everywhere the same symmetries as there is everywhere the same unit cell. We apply this to qubit interactions to do fundamental physics: in a modified cosmology, we replace the big bang by a condensation event in an eternal all-encompassing ocean of free qubits. Interactions of qubits in the qubit ocean are quite rare but provide a nucleus or seed for a new universe (domain) as the qubits become decoherent and freeze-out into defined bit ensembles. Second, we replace inflation by a crystallization event triggered by the nucleus of interacting qubits to which rapidly more and more qubits attach (like in everyday crystal growth). The crystal unit cell guarantees same symmetries everywhere inside the crystal. The textbook inflation scenario to explain the same laws of nature in our domain is replaced by the unit cell of the crystal formed. Interacting qubits solidify, quantum entropy decreases (but increases in the ocean around). In a modified inflation scenario, the interacting qubits form a rapidly growing domain where the n**m states become separated ensemble states, rising long-range forces stop ultimately further growth. Then standard cosmology with the hot fireball model takes over. Our theory agrees well with lack of inflation traces in cosmic background measurements. We explain by cosmological crystallization instead of inflation: early creation of large-scale structure of voids and filaments, supercluster formation, galaxy formation, and the dominance of matter: the unit cell of our crystal universe has a matter handedness avoiding anti-matter. We prove initiation of qubit interactions can only be 1,2,4 or 8-dimensional (agrees with E8 symmetry of our universe). Repulsive forces at ultrashort distances result from quantization, long-range forces limit crystal growth. Crystals come and go in the qubit ocean. This selects for the ability to lay seeds for new crystals, for self-organization and life-friendliness. The phase space of the crystal agrees with the standard model of the basic four forces for n quanta. It includes all possible ensemble combinations of their quantum states m, a total of n**m states. Neighbor states reach according to transition possibilities (S-matrix) with emergent time from entropic ensemble gradients. However, in our four dimensions there is only one bit overlap to neighbor states left (almost solid, only below Planck quantum there is liquidity left). The E8 symmetry of heterotic string theory has six curled-up, small dimensions which help to keep the qubit crystal together and will never expand. Mathematics focusses on the Hurwitz proof applied to qubit interaction, a toy model of qubit interaction and repulsive forces of qubits. Vacuum energy gets appropriate low inside the crystal. We give first energy estimates for free qubits vs bound qubits, misplacements in the qubit crystal and entropy increase during qubit decoherence / crystal formation. Scalar fields for color interaction/confinement and gravity are derived from the qubit-interaction field.}, language = {en} } @article{RackeveiBorgesEngstleretal.2022, author = {Rackevei, Antonia S. and Borges, Alyssa and Engstler, Markus and Dandekar, Thomas and Wolf, Matthias}, title = {About the analysis of 18S rDNA sequence data from trypanosomes in barcoding and phylogenetics: tracing a continuation error occurring in the literature}, series = {Biology}, volume = {11}, journal = {Biology}, number = {11}, issn = {2079-7737}, doi = {10.3390/biology11111612}, url = {http://nbn-resolving.de/urn:nbn:de:bvb:20-opus-297562}, year = {2022}, abstract = {The variable regions (V1-V9) of the 18S rDNA are routinely used in barcoding and phylogenetics. In handling these data for trypanosomes, we have noticed a misunderstanding that has apparently taken a life of its own in the literature over the years. In particular, in recent years, when studying the phylogenetic relationship of trypanosomes, the use of V7/V8 was systematically established. However, considering the current numbering system for all other organisms (including other Euglenozoa), V7/V8 was never used. In Maia da Silva et al. [Parasitology 2004, 129, 549-561], V7/V8 was promoted for the first time for trypanosome phylogenetics, and since then, more than 70 publications have replicated this nomenclature and even discussed the benefits of the use of this region in comparison to V4. However, the primers used to amplify the variable region of trypanosomes have actually amplified V4 (concerning the current 18S rDNA numbering system).}, language = {en} } @article{HanRenMamtiminetal.2023, author = {Han, Chao and Ren, Pengxuan and Mamtimin, Medina and Kruk, Linus and Sarukhanyan, Edita and Li, Chenyu and Anders, Hans-Joachim and Dandekar, Thomas and Krueger, Irena and Elvers, Margitta and Goebel, Silvia and Adler, Kristin and M{\"u}nch, G{\"o}tz and Gudermann, Thomas and Braun, Attila and Mammadova-Bach, Elmina}, title = {Minimal collagen-binding epitope of glycoprotein VI in human and mouse platelets}, series = {Biomedicines}, volume = {11}, journal = {Biomedicines}, number = {2}, issn = {2227-9059}, doi = {10.3390/biomedicines11020423}, url = {http://nbn-resolving.de/urn:nbn:de:bvb:20-opus-304148}, year = {2023}, abstract = {Glycoprotein VI (GPVI) is a platelet-specific receptor for collagen and fibrin, regulating important platelet functions such as platelet adhesion and thrombus growth. Although the blockade of GPVI function is widely recognized as a potent anti-thrombotic approach, there are limited studies focused on site-specific targeting of GPVI. Using computational modeling and bioinformatics, we analyzed collagen- and CRP-binding surfaces of GPVI monomers and dimers, and compared the interacting surfaces with other mammalian GPVI isoforms. We could predict a minimal collagen-binding epitope of GPVI dimer and designed an EA-20 antibody that recognizes a linear epitope of this surface. Using platelets and whole blood samples donated from wild-type and humanized GPVI transgenic mice and also humans, our experimental results show that the EA-20 antibody inhibits platelet adhesion and aggregation in response to collagen and CRP, but not to fibrin. The EA-20 antibody also prevents thrombus formation in whole blood, on the collagen-coated surface, in arterial flow conditions. We also show that EA-20 does not influence GPVI clustering or receptor shedding. Therefore, we propose that blockade of this minimal collagen-binding epitope of GPVI with the EA-20 antibody could represent a new anti-thrombotic approach by inhibiting specific interactions between GPVI and the collagen matrix.}, language = {en} } @unpublished{Dandekar2023, author = {Dandekar, Thomas}, title = {A modified inflation cosmology relying on qubit-crystallization: rare qubit interactions trigger qubit ensemble growth and crystallization into "real" bit-ensembles and emergent time}, doi = {10.25972/OPUS-32177}, url = {http://nbn-resolving.de/urn:nbn:de:bvb:20-opus-321777}, pages = {42}, year = {2023}, abstract = {In a modified inflation scenario we replace the "big bang" by a condensation event in an eternal all-compassing big ocean of free qubits in our modified cosmology. Interactions of qubits in the qubit ocean are rare. If they happen, they provide a nucleus for a new universe as the qubits become decoherent and freeze-out into defined bit ensembles. Second, we replace inflation by a crystallization event triggered by the nucleus of interacting qubits to which rapidly more and more qubits attach (like in everyday crystal growth) - the crystal unit cell guarantees same symmetries everywhere. Hence, the textbook inflation scenario to explain the same laws of nature in our domain is replaced by the crystal unit cell of the crystal formed. We give here only the perspective or outline of this modified inflation theory, as the detailed mathematical physics behind this has still to be formulated and described. Interacting qubits solidify, quantum entropy decreases (but increases in the ocean around). The interacting qubits form a rapidly growing domain where the n**m states become separated ensemble states, rising long-range forces stop ultimately further growth. After that very early events, standard cosmology with the hot fireball model takes over. Our theory agrees well with lack of inflation traces in cosmic background measurements, but more importantly can explain well by such a type of cosmological crystallization instead of inflation the early creation of large-scale structure of voids and filaments, supercluster formation, galaxy formation, and the dominance of matter: no annihilation of antimatter necessary, rather the unit cell of our crystal universe has a matter handedness avoiding anti-matter. We prove a triggering of qubit interactions can only be 1,2,4 or 8-dimensional (agrees with E8 symmetry of our universe). Repulsive forces at ultrashort distances result from quantization, long-range forces limit crystal growth. Crystals come and go in the qubit ocean. This selects for the ability to lay seeds for new crystals, for self-organization and life-friendliness. The phase space of the crystal agrees with the standard model of the basic four forces for n quanta. It includes all possible ensemble combinations of their quantum states m, a total of n**m states. Neighbor states reach according to transition possibilities (S-matrix) with emergent time from entropic ensemble gradients. However, this means that in our four dimensions there is only one bit overlap to neighbor states left (almost solid, only below h dash liquidity left). However, the E8 symmetry of heterotic string theory has six rolled-up, small dimensions which help to keep the qubit crystal together and will never expand. Finally, we give first energy estimates for free qubits vs bound qubits, misplacements in the qubit crystal and entropy increase during qubit decoherence / crystal formation. Scalar fields for color interaction and gravity derive from the permeating qubit-interaction field in the crystal. Hence, vacuum energy gets low inside the qubit crystal. Condensed mathematics may advantageously help to model free (many states denote the same qubit) and bound qubits in phase space.}, language = {en} } @article{GuptaSrivastavaMinochaetal.2021, author = {Gupta, Shishir K. and Srivastava, Mugdha and Minocha, Rashmi and Akash, Aman and Dangwal, Seema and Dandekar, Thomas}, title = {Alveolar regeneration in COVID-19 patients: a network perspective}, series = {International Journal of Molecular Sciences}, volume = {22}, journal = {International Journal of Molecular Sciences}, number = {20}, issn = {1422-0067}, doi = {10.3390/ijms222011279}, url = {http://nbn-resolving.de/urn:nbn:de:bvb:20-opus-284307}, year = {2021}, abstract = {A viral infection involves entry and replication of viral nucleic acid in a host organism, subsequently leading to biochemical and structural alterations in the host cell. In the case of SARS-CoV-2 viral infection, over-activation of the host immune system may lead to lung damage. Albeit the regeneration and fibrotic repair processes being the two protective host responses, prolonged injury may lead to excessive fibrosis, a pathological state that can result in lung collapse. In this review, we discuss regeneration and fibrosis processes in response to SARS-CoV-2 and provide our viewpoint on the triggering of alveolar regeneration in coronavirus disease 2019 (COVID-19) patients.}, language = {en} } @article{NaseemOsmanoğluKaltdorfetal.2020, author = {Naseem, Muhammad and Osmanoğlu, {\"O}zge and Kaltdorf, Martin and Alblooshi, Afnan Ali M. A. and Iqbal, Jibran and Howari, Fares M. and Srivastava, Mugdha and Dandekar, Thomas}, title = {Integrated framework of the immune-defense transcriptional signatures in the Arabidopsis shoot apical meristem}, series = {International Journal of Molecular Sciences}, volume = {21}, journal = {International Journal of Molecular Sciences}, number = {16}, issn = {1422-0067}, doi = {10.3390/ijms21165745}, url = {http://nbn-resolving.de/urn:nbn:de:bvb:20-opus-285730}, year = {2020}, abstract = {The growing tips of plants grow sterile; therefore, disease-free plants can be generated from them. How plants safeguard growing apices from pathogen infection is still a mystery. The shoot apical meristem (SAM) is one of the three stem cells niches that give rise to the above ground plant organs. This is very well explored; however, how signaling networks orchestrate immune responses against pathogen infections in the SAM remains unclear. To reconstruct a transcriptional framework of the differentially expressed genes (DEGs) pertaining to various SAM cellular populations, we acquired large-scale transcriptome datasets from the public repository Gene Expression Omnibus (GEO). We identify here distinct sets of genes for various SAM cellular populations that are enriched in immune functions, such as immune defense, pathogen infection, biotic stress, and response to salicylic acid and jasmonic acid and their biosynthetic pathways in the SAM. We further linked those immune genes to their respective proteins and identify interactions among them by mapping a transcriptome-guided SAM-interactome. Furthermore, we compared stem-cells regulated transcriptome with innate immune responses in plants showing transcriptional separation among their DEGs in Arabidopsis. Besides unleashing a repertoire of immune-related genes in the SAM, our analysis provides a SAM-interactome that will help the community in designing functional experiments to study the specific defense dynamics of the SAM-cellular populations. Moreover, our study promotes the essence of large-scale omics data re-analysis, allowing a fresh look at the SAM-cellular transcriptome repurposing data-sets for new questions.}, language = {en} } @article{BreitenbachLorenzDandekar2019, author = {Breitenbach, Tim and Lorenz, Kristina and Dandekar, Thomas}, title = {How to steer and control ERK and the ERK signaling cascade exemplified by looking at cardiac insufficiency}, series = {International Journal of Molecular Sciences}, volume = {20}, journal = {International Journal of Molecular Sciences}, number = {9}, issn = {1422-0067}, doi = {10.3390/ijms20092179}, url = {http://nbn-resolving.de/urn:nbn:de:bvb:20-opus-285164}, year = {2019}, abstract = {Mathematical optimization framework allows the identification of certain nodes within a signaling network. In this work, we analyzed the complex extracellular-signal-regulated kinase 1 and 2 (ERK1/2) cascade in cardiomyocytes using the framework to find efficient adjustment screws for this cascade that is important for cardiomyocyte survival and maladaptive heart muscle growth. We modeled optimal pharmacological intervention points that are beneficial for the heart, but avoid the occurrence of a maladaptive ERK1/2 modification, the autophosphorylation of ERK at threonine 188 (ERK\(^{Thr188}\) phosphorylation), which causes cardiac hypertrophy. For this purpose, a network of a cardiomyocyte that was fitted to experimental data was equipped with external stimuli that model the pharmacological intervention points. Specifically, two situations were considered. In the first one, the cardiomyocyte was driven to a desired expression level with different treatment strategies. These strategies were quantified with respect to beneficial effects and maleficent side effects and then which one is the best treatment strategy was evaluated. In the second situation, it was shown how to model constitutively activated pathways and how to identify drug targets to obtain a desired activity level that is associated with a healthy state and in contrast to the maleficent expression pattern caused by the constitutively activated pathway. An implementation of the algorithms used for the calculations is also presented in this paper, which simplifies the application of the presented framework for drug targeting, optimal drug combinations and the systematic and automatic search for pharmacological intervention points. The codes were designed such that they can be combined with any mathematical model given by ordinary differential equations.}, language = {en} }