@article{WagnerVolkmerSharanetal.2014, author = {Wagner, Ines and Volkmer, Michael and Sharan, Malvika and Villaveces, Jose M. and Oswald, Felix and Surendranath, Vineeth and Habermann, Bianca H.}, title = {morFeus: a web-based program to detect remotely conserved orthologs using symmetrical best hits and orthology network scoring}, series = {BMC Bioinformatics}, volume = {15}, journal = {BMC Bioinformatics}, number = {263}, doi = {10.1186/1471-2105-15-263}, url = {http://nbn-resolving.de/urn:nbn:de:bvb:20-opus-115590}, year = {2014}, abstract = {Background: Searching the orthologs of a given protein or DNA sequence is one of the most important and most commonly used Bioinformatics methods in Biology. Programs like BLAST or the orthology search engine Inparanoid can be used to find orthologs when the similarity between two sequences is sufficiently high. They however fail when the level of conservation is low. The detection of remotely conserved proteins oftentimes involves sophisticated manual intervention that is difficult to automate. Results: Here, we introduce morFeus, a search program to find remotely conserved orthologs. Based on relaxed sequence similarity searches, morFeus selects sequences based on the similarity of their alignments to the query, tests for orthology by iterative reciprocal BLAST searches and calculates a network score for the resulting network of orthologs that is a measure of orthology independent of the E-value. Detecting remotely conserved orthologs of a protein using morFeus thus requires no manual intervention. We demonstrate the performance of morFeus by comparing it to state-of-the-art orthology resources and methods. We provide an example of remotely conserved orthologs, which were experimentally shown to be functionally equivalent in the respective organisms and therefore meet the criteria of the orthology-function conjecture. Conclusions: Based on our results, we conclude that morFeus is a powerful and specific search method for detecting remotely conserved orthologs.}, language = {en} } @article{GarciaBetancurGoniMorenoHorgeretal.2017, author = {Garc{\´i}a-Betancur, Juan-Carlos and Go{\~n}i-Moreno, Angel and Horger, Thomas and Schott, Melanie and Sharan, Malvika and Eikmeier, Julian and Wohlmuth, Barbara and Zernecke, Alma and Ohlsen, Knut and Kuttler, Christina and Lopez, Daniel}, title = {Cell differentiation defines acute and chronic infection cell types in Staphylococcus aureus}, series = {eLife}, volume = {6}, journal = {eLife}, number = {e28023}, doi = {10.7554/eLife.28023}, url = {http://nbn-resolving.de/urn:nbn:de:bvb:20-opus-170346}, year = {2017}, abstract = {A central question to biology is how pathogenic bacteria initiate acute or chronic infections. Here we describe a genetic program for cell-fate decision in the opportunistic human pathogen Staphylococcus aureus, which generates the phenotypic bifurcation of the cells into two genetically identical but different cell types during the course of an infection. Whereas one cell type promotes the formation of biofilms that contribute to chronic infections, the second type is planktonic and produces the toxins that contribute to acute bacteremia. We identified a bimodal switch in the agr quorum sensing system that antagonistically regulates the differentiation of these two physiologically distinct cell types. We found that extracellular signals affect the behavior of the agr bimodal switch and modify the size of the specialized subpopulations in specific colonization niches. For instance, magnesium-enriched colonization niches causes magnesium binding to S. aureusteichoic acids and increases bacterial cell wall rigidity. This signal triggers a genetic program that ultimately downregulates the agr bimodal switch. Colonization niches with different magnesium concentrations influence the bimodal system activity, which defines a distinct ratio between these subpopulations; this in turn leads to distinct infection outcomes in vitro and in an in vivo murine infection model. Cell differentiation generates physiological heterogeneity in clonal bacterial infections and helps to determine the distinct infection types.}, language = {en} } @phdthesis{Sharan2017, author = {Sharan, Malvika}, title = {Bio-computational identification and characterization of RNA-binding proteins in bacteria}, url = {http://nbn-resolving.de/urn:nbn:de:bvb:20-opus-153573}, school = {Universit{\"a}t W{\"u}rzburg}, year = {2017}, abstract = {RNA-binding proteins (RBPs) have been extensively studied in eukaryotes, where they post-transcriptionally regulate many cellular events including RNA transport, translation, and stability. Experimental techniques, such as cross-linking and co-purification followed by either mass spectrometry or RNA sequencing has enabled the identification and characterization of RBPs, their conserved RNA-binding domains (RBDs), and the regulatory roles of these proteins on a genome-wide scale. These developments in quantitative, high-resolution, and high-throughput screening techniques have greatly expanded our understanding of RBPs in human and yeast cells. In contrast, our knowledge of number and potential diversity of RBPs in bacteria is comparatively poor, in part due to the technical challenges associated with existing global screening approaches developed in eukaryotes. Genome- and proteome-wide screening approaches performed in silico may circumvent these technical issues to obtain a broad picture of the RNA interactome of bacteria and identify strong RBP candidates for more detailed experimental study. Here, I report APRICOT ("Analyzing Protein RNA Interaction by Combined Output Technique"), a computational pipeline for the sequence-based identification and characterization of candidate RNA-binding proteins encoded in the genomes of all domains of life using RBDs known from experimental studies. The pipeline identifies functional motifs in protein sequences of an input proteome using position-specific scoring matrices and hidden Markov models of all conserved domains available in the databases and then statistically score them based on a series of sequence-based features. Subsequently, APRICOT identifies putative RBPs and characterizes them according to functionally relevant structural properties. APRICOT performed better than other existing tools for the sequence-based prediction on the known RBP data sets. The applications and adaptability of the software was demonstrated on several large bacterial RBP data sets including the complete proteome of Salmonella Typhimurium strain SL1344. APRICOT reported 1068 Salmonella proteins as RBP candidates, which were subsequently categorized using the RBDs that have been reported in both eukaryotic and bacterial proteins. A set of 131 strong RBP candidates was selected for experimental confirmation and characterization of RNA-binding activity using RNA co-immunoprecipitation followed by high-throughput sequencing (RIP-Seq) experiments. Based on the relative abundance of transcripts across the RIP-Seq libraries, a catalogue of enriched genes was established for each candidate, which shows the RNA-binding potential of 90\% of these proteins. Furthermore, the direct targets of few of these putative RBPs were validated by means of cross-linking and co-immunoprecipitation (CLIP) experiments. This thesis presents the computational pipeline APRICOT for the global screening of protein primary sequences for potential RBPs in bacteria using RBD information from all kingdoms of life. Furthermore, it provides the first bio-computational resource of putative RBPs in Salmonella, which could now be further studied for their biological and regulatory roles. The command line tool and its documentation are available at https://malvikasharan.github.io/APRICOT/.}, language = {en} } @article{SharanFoerstnerEulalioetal.2017, author = {Sharan, Malvika and F{\"o}rstner, Konrad U. and Eulalio, Ana and Vogel, J{\"o}rg}, title = {APRICOT: an integrated computational pipeline for the sequence-based identification and characterization of RNA-binding proteins}, series = {Nucleic Acids Research}, volume = {45}, journal = {Nucleic Acids Research}, number = {11}, doi = {10.1093/nar/gkx137}, url = {http://nbn-resolving.de/urn:nbn:de:bvb:20-opus-157963}, pages = {e96}, year = {2017}, abstract = {RNA-binding proteins (RBPs) have been established as core components of several post-transcriptional gene regulation mechanisms. Experimental techniques such as cross-linking and co-immunoprecipitation have enabled the identification of RBPs, RNA-binding domains (RBDs) and their regulatory roles in the eukaryotic species such as human and yeast in large-scale. In contrast, our knowledge of the number and potential diversity of RBPs in bacteria is poorer due to the technical challenges associated with the existing global screening approaches. We introduce APRICOT, a computational pipeline for the sequence-based identification and characterization of proteins using RBDs known from experimental studies. The pipeline identifies functional motifs in protein sequences using position-specific scoring matrices and Hidden Markov Models of the functional domains and statistically scores them based on a series of sequence-based features. Subsequently, APRICOT identifies putative RBPs and characterizes them by several biological properties. Here we demonstrate the application and adaptability of the pipeline on large-scale protein sets, including the bacterial proteome of Escherichia coli. APRICOT showed better performance on various datasets compared to other existing tools for the sequence-based prediction of RBPs by achieving an average sensitivity and specificity of 0.90 and 0.91 respectively. The command-line tool and its documentation are available at https://pypi.python.org/pypi/bio-apricot.}, language = {en} } @article{SunkavalliAguilarSilvaetal.2017, author = {Sunkavalli, Ushasree and Aguilar, Carmen and Silva, Ricardo Jorge and Sharan, Malvika and Cruz, Ana Rita and Tawk, Caroline and Maudet, Claire and Mano, Miguel and Eulalio, Ana}, title = {Analysis of host microRNA function uncovers a role for miR-29b-2-5p in Shigella capture by filopodia}, series = {PLoS Pathogens}, volume = {13}, journal = {PLoS Pathogens}, number = {4}, doi = {10.1371/journal.ppat.1006327}, url = {http://nbn-resolving.de/urn:nbn:de:bvb:20-opus-158204}, pages = {e1006327}, year = {2017}, abstract = {MicroRNAs play an important role in the interplay between bacterial pathogens and host cells, participating as host defense mechanisms, as well as exploited by bacteria to subvert host cellular functions. Here, we show that microRNAs modulate infection by Shigella flexneri, a major causative agent of bacillary dysentery in humans. Specifically, we characterize the dual regulatory role of miR-29b-2-5p during infection, showing that this microRNA strongly favors Shigella infection by promoting both bacterial binding to host cells and intracellular replication. Using a combination of transcriptome analysis and targeted high-content RNAi screening, we identify UNC5C as a direct target of miR-29b-2-5p and show its pivotal role in the modulation of Shigella binding to host cells. MiR-29b-2-5p, through repression of UNC5C, strongly enhances filopodia formation thus increasing Shigella capture and promoting bacterial invasion. The increase of filopodia formation mediated by miR-29b-2-5p is dependent on RhoF and Cdc42 Rho-GTPases. Interestingly, the levels of miR-29b-2-5p, but not of other mature microRNAs from the same precursor, are decreased upon Shigella replication at late times post-infection, through degradation of the mature microRNA by the exonuclease PNPT1. While the relatively high basal levels of miR-29b-2-5p at the start of infection ensure efficient Shigella capture by host cell filopodia, dampening of miR-29b-2-5p levels later during infection may constitute a bacterial strategy to favor a balanced intracellular replication to avoid premature cell death and favor dissemination to neighboring cells, or alternatively, part of the host response to counteract Shigella infection. Overall, these findings reveal a previously unappreciated role of microRNAs, and in particular miR-29b-2-5p, in the interaction of Shigella with host cells.}, language = {en} } @article{JiangOronClarketal.2016, author = {Jiang, Yuxiang and Oron, Tal Ronnen and Clark, Wyatt T. and Bankapur, Asma R. and D'Andrea, Daniel and Lepore, Rosalba and Funk, Christopher S. and Kahanda, Indika and Verspoor, Karin M. and Ben-Hur, Asa and Koo, Da Chen Emily and Penfold-Brown, Duncan and Shasha, Dennis and Youngs, Noah and Bonneau, Richard and Lin, Alexandra and Sahraeian, Sayed M. E. and Martelli, Pier Luigi and Profiti, Giuseppe and Casadio, Rita and Cao, Renzhi and Zhong, Zhaolong and Cheng, Jianlin and Altenhoff, Adrian and Skunca, Nives and Dessimoz, Christophe and Dogan, Tunca and Hakala, Kai and Kaewphan, Suwisa and Mehryary, Farrokh and Salakoski, Tapio and Ginter, Filip and Fang, Hai and Smithers, Ben and Oates, Matt and Gough, Julian and T{\"o}r{\"o}nen, Petri and Koskinen, Patrik and Holm, Liisa and Chen, Ching-Tai and Hsu, Wen-Lian and Bryson, Kevin and Cozzetto, Domenico and Minneci, Federico and Jones, David T. and Chapman, Samuel and BKC, Dukka and Khan, Ishita K. and Kihara, Daisuke and Ofer, Dan and Rappoport, Nadav and Stern, Amos and Cibrian-Uhalte, Elena and Denny, Paul and Foulger, Rebecca E. and Hieta, Reija and Legge, Duncan and Lovering, Ruth C. and Magrane, Michele and Melidoni, Anna N. and Mutowo-Meullenet, Prudence and Pichler, Klemens and Shypitsyna, Aleksandra and Li, Biao and Zakeri, Pooya and ElShal, Sarah and Tranchevent, L{\´e}on-Charles and Das, Sayoni and Dawson, Natalie L. and Lee, David and Lees, Jonathan G. and Sillitoe, Ian and Bhat, Prajwal and Nepusz, Tam{\´a}s and Romero, Alfonso E. and Sasidharan, Rajkumar and Yang, Haixuan and Paccanaro, Alberto and Gillis, Jesse and Sede{\~n}o-Cort{\´e}s, Adriana E. and Pavlidis, Paul and Feng, Shou and Cejuela, Juan M. and Goldberg, Tatyana and Hamp, Tobias and Richter, Lothar and Salamov, Asaf and Gabaldon, Toni and Marcet-Houben, Marina and Supek, Fran and Gong, Qingtian and Ning, Wei and Zhou, Yuanpeng and Tian, Weidong and Falda, Marco and Fontana, Paolo and Lavezzo, Enrico and Toppo, Stefano and Ferrari, Carlo and Giollo, Manuel and Piovesan, Damiano and Tosatto, Silvio C. E. and del Pozo, Angela and Fern{\´a}ndez, Jos{\´e} M. and Maietta, Paolo and Valencia, Alfonso and Tress, Michael L. and Benso, Alfredo and Di Carlo, Stefano and Politano, Gianfranco and Savino, Alessandro and Rehman, Hafeez Ur and Re, Matteo and Mesiti, Marco and Valentini, Giorgio and Bargsten, Joachim W. and van Dijk, Aalt D. J. and Gemovic, Branislava and Glisic, Sanja and Perovic, Vladmir and Veljkovic, Veljko and Almeida-e-Silva, Danillo C. and Vencio, Ricardo Z. N. and Sharan, Malvika and Vogel, J{\"o}rg and Kansakar, Lakesh and Zhang, Shanshan and Vucetic, Slobodan and Wang, Zheng and Sternberg, Michael J. E. and Wass, Mark N. and Huntley, Rachael P. and Martin, Maria J. and O'Donovan, Claire and Robinson, Peter N. and Moreau, Yves and Tramontano, Anna and Babbitt, Patricia C. and Brenner, Steven E. and Linial, Michal and Orengo, Christine A. and Rost, Burkhard and Greene, Casey S. and Mooney, Sean D. and Friedberg, Iddo and Radivojac, Predrag and Veljkovic, Nevena}, title = {An expanded evaluation of protein function prediction methods shows an improvement in accuracy}, series = {Genome Biology}, volume = {17}, journal = {Genome Biology}, number = {184}, doi = {10.1186/s13059-016-1037-6}, url = {http://nbn-resolving.de/urn:nbn:de:bvb:20-opus-166293}, year = {2016}, abstract = {Background A major bottleneck in our understanding of the molecular underpinnings of life is the assignment of function to proteins. While molecular experiments provide the most reliable annotation of proteins, their relatively low throughput and restricted purview have led to an increasing role for computational function prediction. However, assessing methods for protein function prediction and tracking progress in the field remain challenging. Results We conducted the second critical assessment of functional annotation (CAFA), a timed challenge to assess computational methods that automatically assign protein function. We evaluated 126 methods from 56 research groups for their ability to predict biological functions using Gene Ontology and gene-disease associations using Human Phenotype Ontology on a set of 3681 proteins from 18 species. CAFA2 featured expanded analysis compared with CAFA1, with regards to data set size, variety, and assessment metrics. To review progress in the field, the analysis compared the best methods from CAFA1 to those of CAFA2. Conclusions The top-performing methods in CAFA2 outperformed those from CAFA1. This increased accuracy can be attributed to a combination of the growing number of experimental annotations and improved methods for function prediction. The assessment also revealed that the definition of top-performing algorithms is ontology specific, that different performance metrics can be used to probe the nature of accurate predictions, and the relative diversity of predictions in the biological process and human phenotype ontologies. While there was methodological improvement between CAFA1 and CAFA2, the interpretation of results and usefulness of individual methods remain context-dependent.}, language = {en} } @article{TawkSharanEulalioetal.2017, author = {Tawk, Caroline and Sharan, Malvika and Eulalio, Ana and Vogel, J{\"o}rg}, title = {A systematic analysis of the RNA-targeting potential of secreted bacterial effector proteins}, series = {Scientific Reports}, volume = {7}, journal = {Scientific Reports}, doi = {10.1038/s41598-017-09527-0}, url = {http://nbn-resolving.de/urn:nbn:de:bvb:20-opus-158815}, pages = {9328}, year = {2017}, abstract = {Many pathogenic bacteria utilize specialized secretion systems to deliver proteins called effectors into eukaryotic cells for manipulation of host pathways. The vast majority of known effector targets are host proteins, whereas a potential targeting of host nucleic acids remains little explored. There is only one family of effectors known to target DNA directly, and effectors binding host RNA are unknown. Here, we take a two-pronged approach to search for RNA-binding effectors, combining biocomputational prediction of RNA-binding domains (RBDs) in a newly assembled comprehensive dataset of bacterial secreted proteins, and experimental screening for RNA binding in mammalian cells. Only a small subset of effectors were predicted to carry an RBD, indicating that if RNA targeting was common, it would likely involve new types of RBDs. Our experimental evaluation of effectors with predicted RBDs further argues for a general paucity of RNA binding activities amongst bacterial effectors. We obtained evidence that PipB2 and Lpg2844, effector proteins of Salmonella and Legionella species, respectively, may harbor novel biochemical activities. Our study presenting the first systematic evaluation of the RNA-targeting potential of bacterial effectors offers a basis for discussion of whether or not host RNA is a prominent target of secreted bacterial proteins.}, language = {en} }