@article{SharanFoerstnerEulalioetal.2017, author = {Sharan, Malvika and F{\"o}rstner, Konrad U. and Eulalio, Ana and Vogel, J{\"o}rg}, title = {APRICOT: an integrated computational pipeline for the sequence-based identification and characterization of RNA-binding proteins}, series = {Nucleic Acids Research}, volume = {45}, journal = {Nucleic Acids Research}, number = {11}, doi = {10.1093/nar/gkx137}, url = {http://nbn-resolving.de/urn:nbn:de:bvb:20-opus-157963}, pages = {e96}, year = {2017}, abstract = {RNA-binding proteins (RBPs) have been established as core components of several post-transcriptional gene regulation mechanisms. Experimental techniques such as cross-linking and co-immunoprecipitation have enabled the identification of RBPs, RNA-binding domains (RBDs) and their regulatory roles in the eukaryotic species such as human and yeast in large-scale. In contrast, our knowledge of the number and potential diversity of RBPs in bacteria is poorer due to the technical challenges associated with the existing global screening approaches. We introduce APRICOT, a computational pipeline for the sequence-based identification and characterization of proteins using RBDs known from experimental studies. The pipeline identifies functional motifs in protein sequences using position-specific scoring matrices and Hidden Markov Models of the functional domains and statistically scores them based on a series of sequence-based features. Subsequently, APRICOT identifies putative RBPs and characterizes them by several biological properties. Here we demonstrate the application and adaptability of the pipeline on large-scale protein sets, including the bacterial proteome of Escherichia coli. APRICOT showed better performance on various datasets compared to other existing tools for the sequence-based prediction of RBPs by achieving an average sensitivity and specificity of 0.90 and 0.91 respectively. The command-line tool and its documentation are available at https://pypi.python.org/pypi/bio-apricot.}, language = {en} } @article{WagnerVolkmerSharanetal.2014, author = {Wagner, Ines and Volkmer, Michael and Sharan, Malvika and Villaveces, Jose M. and Oswald, Felix and Surendranath, Vineeth and Habermann, Bianca H.}, title = {morFeus: a web-based program to detect remotely conserved orthologs using symmetrical best hits and orthology network scoring}, series = {BMC Bioinformatics}, volume = {15}, journal = {BMC Bioinformatics}, number = {263}, doi = {10.1186/1471-2105-15-263}, url = {http://nbn-resolving.de/urn:nbn:de:bvb:20-opus-115590}, year = {2014}, abstract = {Background: Searching the orthologs of a given protein or DNA sequence is one of the most important and most commonly used Bioinformatics methods in Biology. Programs like BLAST or the orthology search engine Inparanoid can be used to find orthologs when the similarity between two sequences is sufficiently high. They however fail when the level of conservation is low. The detection of remotely conserved proteins oftentimes involves sophisticated manual intervention that is difficult to automate. Results: Here, we introduce morFeus, a search program to find remotely conserved orthologs. Based on relaxed sequence similarity searches, morFeus selects sequences based on the similarity of their alignments to the query, tests for orthology by iterative reciprocal BLAST searches and calculates a network score for the resulting network of orthologs that is a measure of orthology independent of the E-value. Detecting remotely conserved orthologs of a protein using morFeus thus requires no manual intervention. We demonstrate the performance of morFeus by comparing it to state-of-the-art orthology resources and methods. We provide an example of remotely conserved orthologs, which were experimentally shown to be functionally equivalent in the respective organisms and therefore meet the criteria of the orthology-function conjecture. Conclusions: Based on our results, we conclude that morFeus is a powerful and specific search method for detecting remotely conserved orthologs.}, language = {en} }