@article{CaliskanCaliskanRasbachetal.2023, author = {Caliskan, Aylin and Caliskan, Deniz and Rasbach, Lauritz and Yu, Weimeng and Dandekar, Thomas and Breitenbach, Tim}, title = {Optimized cell type signatures revealed from single-cell data by combining principal feature analysis, mutual information, and machine learning}, series = {Computational and Structural Biotechnology Journal}, volume = {21}, journal = {Computational and Structural Biotechnology Journal}, issn = {2001-0370}, doi = {10.1016/j.csbj.2023.06.002}, url = {http://nbn-resolving.de/urn:nbn:de:bvb:20-opus-349989}, pages = {3293-3314}, year = {2023}, abstract = {Machine learning techniques are excellent to analyze expression data from single cells. These techniques impact all fields ranging from cell annotation and clustering to signature identification. The presented framework evaluates gene selection sets how far they optimally separate defined phenotypes or cell groups. This innovation overcomes the present limitation to objectively and correctly identify a small gene set of high information content regarding separating phenotypes for which corresponding code scripts are provided. The small but meaningful subset of the original genes (or feature space) facilitates human interpretability of the differences of the phenotypes including those found by machine learning results and may even turn correlations between genes and phenotypes into a causal explanation. For the feature selection task, the principal feature analysis is utilized which reduces redundant information while selecting genes that carry the information for separating the phenotypes. In this context, the presented framework shows explainability of unsupervised learning as it reveals cell-type specific signatures. Apart from a Seurat preprocessing tool and the PFA script, the pipeline uses mutual information to balance accuracy and size of the gene set if desired. A validation part to evaluate the gene selection for their information content regarding the separation of the phenotypes is provided as well, binary and multiclass classification of 3 or 4 groups are studied. Results from different single-cell data are presented. In each, only about ten out of more than 30000 genes are identified as carrying the relevant information. The code is provided in a GitHub repository at https://github.com/AC-PHD/Seurat_PFA_pipeline.}, language = {en} } @article{MaerzKurlbaumRocheLancasteretal.2021, author = {M{\"a}rz, Juliane and Kurlbaum, Max and Roche-Lancaster, Oisin and Deutschbein, Timo and Peitzsch, Mirko and Prehn, Cornelia and Weismann, Dirk and Robledo, Mercedes and Adamski, Jerzy and Fassnacht, Martin and Kunz, Meik and Kroiss, Matthias}, title = {Plasma Metabolome Profiling for the Diagnosis of Catecholamine Producing Tumors}, series = {Frontiers in Endocrinology}, volume = {12}, journal = {Frontiers in Endocrinology}, issn = {1664-2392}, doi = {10.3389/fendo.2021.722656}, url = {http://nbn-resolving.de/urn:nbn:de:bvb:20-opus-245710}, year = {2021}, abstract = {Context Pheochromocytomas and paragangliomas (PPGL) cause catecholamine excess leading to a characteristic clinical phenotype. Intra-individual changes at metabolome level have been described after surgical PPGL removal. The value of metabolomics for the diagnosis of PPGL has not been studied yet. Objective Evaluation of quantitative metabolomics as a diagnostic tool for PPGL. Design Targeted metabolomics by liquid chromatography-tandem mass spectrometry of plasma specimens and statistical modeling using ML-based feature selection approaches in a clinically well characterized cohort study. Patients Prospectively enrolled patients (n=36, 17 female) from the Prospective Monoamine-producing Tumor Study (PMT) with hormonally active PPGL and 36 matched controls in whom PPGL was rigorously excluded. Results Among 188 measured metabolites, only without considering false discovery rate, 4 exhibited statistically significant differences between patients with PPGL and controls (histidine p=0.004, threonine p=0.008, lyso PC a C28:0 p=0.044, sum of hexoses p=0.018). Weak, but significant correlations for histidine, threonine and lyso PC a C28:0 with total urine catecholamine levels were identified. Only the sum of hexoses (reflecting glucose) showed significant correlations with plasma metanephrines. By using ML-based feature selection approaches, we identified diagnostic signatures which all exhibited low accuracy and sensitivity. The best predictive value (sensitivity 87.5\%, accuracy 67.3\%) was obtained by using Gradient Boosting Machine Modelling. Conclusions The diabetogenic effect of catecholamine excess dominates the plasma metabolome in PPGL patients. While curative surgery for PPGL led to normalization of catecholamine-induced alterations of metabolomics in individual patients, plasma metabolomics are not useful for diagnostic purposes, most likely due to inter-individual variability.}, language = {en} }