@phdthesis{Nilla2012, author = {Nilla, Jaya Santosh Chakravarthy}, title = {An Integrated Knowledgebase and Network Analysis Applied on Platelets and Other Cell Types}, url = {http://nbn-resolving.de/urn:nbn:de:bvb:20-opus-85730}, school = {Universit{\"a}t W{\"u}rzburg}, year = {2012}, abstract = {Systems biology looks for emergent system effects from large scale assemblies of molecules and data, for instance in the human platelets. However, the computational efforts in all steps before such insights are possible can hardly be under estimated. In practice this involves numerous programming tasks, the establishment of new database systems but as well their maintenance, curation and data validation. Furthermore, network insights are only possible if strong algorithms decipher the interactions, decoding the hidden system effects. This thesis and my work are all about these challenges. To answer this requirement, an integrated platelet network, PlateletWeb, was assembled from different sources and further analyzed for signaling in a systems biological manner including multilevel data integration and visualization. PlateletWeb is an integrated network database and was established by combining the data from recent platelet proteome and transcriptome (SAGE) studies. The information on protein-protein interactions and kinase-substrate relationships extracted from bioinformatical databases as well as published literature were added to this resource. Moreover, the mass spectrometry-based platelet phosphoproteome was combined with site-specific phosphorylation/ dephosphorylation information and then enhanced with data from Phosphosite and complemented by bioinformatical sequence analysis for site-specific kinase predictions. The number of catalogued platelet proteins was increased by over 80\% as compared to the previous version. The integration of annotations on kinases, protein domains, transmembrane regions, Gene Ontology, disease associations and drug targets provides ample functional tools for platelet signaling analysis. The PlateletWeb resource provides a novel systems biological workbench for the analysis of platelet signaling in the functional context of protein networks. By comprehensive exploration, over 15000 phosphorylation sites were found, out of which 2500 have the corresponding kinase associations. The network motifs were also investigated in this anucleate cell and characterize signaling modules based on integrated information on phosphorylation and protein-protein interactions. Furthermore, many algorithmic approaches have been introduced, including an exact approach (heinz) based on integer linear programming. At the same time, the concept of semantic similarities between two genes using Gene Ontology (GO) annotations has become an important basis for many analytical approaches in bioinformatics. Assuming that a higher number of semantically similar gene functional annotations reflect biologically more relevant interactions, an edge score was devised for functional network analysis. Bringing these two approaches together, the edge score, based on the GO similarity, and the node score, based on the expression of the proteins in the analyzed cell type (e.g. data from proteomic studies), the functional module as a maximum-scoring sub network in large protein-protein interaction networks was identified. This method was applied to various proteome datasets (different types of blood cells, embryonic stem cells) to identify protein modules that functionally characterize the respective cell type. This scalable method allows a smooth integration of data from various sources and retrieves biologically relevant signaling modules.}, subject = {Systembiologie}, language = {en} }