@phdthesis{ZeeshangebMajeed2014, author = {Zeeshan [geb. Majeed], Saman}, title = {Implementation of Bioinformatics Methods for miRNA and Metabolic Modelling}, url = {http://nbn-resolving.de/urn:nbn:de:bvb:20-opus-102900}, school = {Universit{\"a}t W{\"u}rzburg}, year = {2014}, abstract = {Dynamic interactions and their changes are at the forefront of current research in bioinformatics and systems biology. This thesis focusses on two particular dynamic aspects of cellular adaptation: miRNA and metabolites. miRNAs have an established role in hematopoiesis and megakaryocytopoiesis, and platelet miRNAs have potential as tools for understanding basic mechanisms of platelet function. The thesis highlights the possible role of miRNAs in regulating protein translation in platelet lifespan with relevance to platelet apoptosis and identifying involved pathways and potential key regulatory molecules. Furthermore, corresponding miRNA/target mRNAs in murine platelets are identified. Moreover, key miRNAs involved in aortic aneurysm are predicted by similar techniques. The clinical relevance of miRNAs as biomarkers, targets, resulting later translational therapeutics, and tissue specific restrictors of genes expression in cardiovascular diseases is also discussed. In a second part of thesis we highlight the importance of scientific software solution development in metabolic modelling and how it can be helpful in bioinformatics tool development along with software feature analysis such as performed on metabolic flux analysis applications. We proposed the "Butterfly" approach to implement efficiently scientific software programming. Using this approach, software applications were developed for quantitative Metabolic Flux Analysis and efficient Mass Isotopomer Distribution Analysis (MIDA) in metabolic modelling as well as for data management. "LS-MIDA" allows easy and efficient MIDA analysis and, with a more powerful algorithm and database, the software "Isotopo" allows efficient analysis of metabolic flows, for instance in pathogenic bacteria (Salmonella, Listeria). All three approaches have been published (see Appendices).}, subject = {miRNS}, language = {en} } @phdthesis{Yu2019, author = {Yu, Sung-Huan}, title = {Development and application of computational tools for RNA-Seq based transcriptome annotations}, url = {http://nbn-resolving.de/urn:nbn:de:bvb:20-opus-176468}, school = {Universit{\"a}t W{\"u}rzburg}, year = {2019}, abstract = {In order to understand the regulation of gene expression in organisms, precise genome annotation is essential. In recent years, RNA-Seq has become a potent method for generating and improving genome annotations. However, this Approach is time consuming and often inconsistently performed when done manually. In particular, the discovery of non-coding RNAs benefits strongly from the application of RNA-Seq data but requires significant amounts of expert knowledge and is labor-intensive. As a part of my doctoral study, I developed a modular tool called ANNOgesic that can detect numerous transcribed genomic features, including non-coding RNAs, based on RNA-Seq data in a precise and automatic fashion with a focus on bacterial and achaeal species. The software performs numerous analyses and generates several visualizations. It can generate annotations of high-Resolution that are hard to produce using traditional annotation tools that are based only on genome sequences. ANNOgesic can detect numerous novel genomic Features like UTR-derived small non-coding RNAs for which no other tool has been developed before. ANNOgesic is available under an open source license (ISCL) at https://github.com/Sung-Huan/ANNOgesic. My doctoral work not only includes the development of ANNOgesic but also its application to annotate the transcriptome of Staphylococcus aureus HG003 - a strain which has been a insightful model in infection biology. Despite its potential as a model, a complete genome sequence and annotations have been lacking for HG003. In order to fill this gap, the annotations of this strain, including sRNAs and their functions, were generated using ANNOgesic by analyzing differential RNA-Seq data from 14 different samples (two media conditions with seven time points), as well as RNA-Seq data generated after transcript fragmentation. ANNOgesic was also applied to annotate several bacterial and archaeal genomes, and as part of this its high performance was demonstrated. In summary, ANNOgesic is a powerful computational tool for RNA-Seq based annotations and has been successfully applied to several species.}, subject = {Genom}, language = {en} } @phdthesis{Wolter2014, author = {Wolter, Steve}, title = {Single-molecule localization algorithms in super-resolution microscopy}, url = {http://nbn-resolving.de/urn:nbn:de:bvb:20-opus-109370}, school = {Universit{\"a}t W{\"u}rzburg}, year = {2014}, abstract = {Lokalisationsmikroskopie ist eine Methodenklasse der superaufl{\"o}senden Fluoreszenzmikroskopie, deren Methoden sich durch stochastische zeitliche Isolation der Fluoreszenzemission auszeichnen. Das Blinkverhalten von Fluorophoren wird so ver{\"a}ndert, dass gleichzeitige Aktivierung von einander nahen Fluorophoren unwahrscheinlich ist. Bekannte okalisationsmikroskopische Methoden umfassen dSTORM, STORM, PALM, FPALM, oder GSDIM. Lokalisationsmikroskopie ist von hohem biologischem Interesse, weil sie die Aufl{\"o}sung des Fluoreszenzmikroskops bei minimalem technischem Aufwand um eine Gr{\"o}ßenordnung verbessert. Der verbundene Rechenaufwand ist allerdings erheblich, da Millionen von Fluoreszenzemissionen einzeln mit Nanometergenauigkeit lokalisiert werden m{\"u}ssen. Der Rechen- und Implementationsaufwand dieser Auswertung hat die Verbreitung der superaufl{\"o}senden Mikroskopie lange verz{\"o}gert. Diese Arbeit beschreibt meine algorithmische Grundstruktur f{\"u}r die Auswertung lokalisationsmikroskopischer Daten. Die Echtzeitf{\"a}higkeit, d.h. eine Auswertegeschwindigkeit oberhalb der Datenaufnahmegeschwindigkeit an normalen Messaufbauten, meines neuartigen und quelloffenen Programms wird demonstriert. Die Geschwindigkeit wird auf verbrauchermarktg{\"a}ngigen Prozessoren erreicht und dadurch spezialisierte Rechenzentren oder der Einsatz von Grafikkarten vermieden. Die Berechnung wird mit dem allgemein anerkannten Gaussschen Punktantwortmodell und einem Rauschmodell auf Basis der gr{\"o}ßten Poissonschen Wahrscheinlichkeit durchgef{\"u}hrt. Die algorithmische Grundstruktur wird erweitert, um robuste und optimale Zweifarbenauswertung zu realisieren und damit korrelative Mikroskopie zwischen verschiedenen Proteinen und Strukturen zu erm{\"o}glichen. Durch den Einsatz von kubischen Basissplines wird die Auswertung von dreidimensionalen Proben vereinfacht und stabilisiert, um pr{\"a}zisem Abbilden von mikrometerdicken Proben n{\"a}her zu kommen. Das Grenzverhalten von Lokalisationsalgorithmen bei hohen Emissionsdichten wird untersucht. Abschließend werden Algorithmen f{\"u}r die Anwendung der Lokalisationsmikroskopie auf verbreitete Probleme der Biologie aufgezeigt. Zellul{\"a}re Bewegung und Motilit{\"a}t werden anhand der in vitro Bewegung von Myosin-Aktin-Filamenten studiert. Lebendzellbildgebung mit hellen und stabilen organischen Fluorophoren wird mittels SNAP-tag-Fusionsproteinen realisiert. Die Analyse des Aufbaus von Proteinklumpen zeigt, wie Lokalisationsmikroskopie neue quantitative Ans{\"a}tze jenseits reiner Bildgebung bietet.}, subject = {Fluoreszenzmikroskopie}, language = {en} } @phdthesis{Wolf2017, author = {Wolf, Beat}, title = {Reducing the complexity of OMICS data analysis}, url = {http://nbn-resolving.de/urn:nbn:de:bvb:20-opus-153687}, school = {Universit{\"a}t W{\"u}rzburg}, year = {2017}, abstract = {The field of genetics faces a lot of challenges and opportunities in both research and diagnostics due to the rise of next generation sequencing (NGS), a technology that allows to sequence DNA increasingly fast and cheap. NGS is not only used to analyze DNA, but also RNA, which is a very similar molecule also present in the cell, in both cases producing large amounts of data. The big amount of data raises both infrastructure and usability problems, as powerful computing infrastructures are required and there are many manual steps in the data analysis which are complicated to execute. Both of those problems limit the use of NGS in the clinic and research, by producing a bottleneck both computationally and in terms of manpower, as for many analyses geneticists lack the required computing skills. Over the course of this thesis we investigated how computer science can help to improve this situation to reduce the complexity of this type of analysis. We looked at how to make the analysis more accessible to increase the number of people that can perform OMICS data analysis (OMICS groups various genomics data-sources). To approach this problem, we developed a graphical NGS data analysis pipeline aimed at a diagnostics environment while still being useful in research in close collaboration with the Human Genetics Department at the University of W{\"u}rzburg. The pipeline has been used in various research papers on covering subjects, including works with direct author participation in genomics, transcriptomics as well as epigenomics. To further validate the graphical pipeline, a user survey was carried out which confirmed that it lowers the complexity of OMICS data analysis. We also studied how the data analysis can be improved in terms of computing infrastructure by improving the performance of certain analysis steps. We did this both in terms of speed improvements on a single computer (with notably variant calling being faster by up to 18 times), as well as with distributed computing to better use an existing infrastructure. The improvements were integrated into the previously described graphical pipeline, which itself also was focused on low resource usage. As a major contribution and to help with future development of parallel and distributed applications, for the usage in genetics or otherwise, we also looked at how to make it easier to develop such applications. Based on the parallel object programming model (POP), we created a Java language extension called POP-Java, which allows for easy and transparent distribution of objects. Through this development, we brought the POP model to the cloud, Hadoop clusters and present a new collaborative distributed computing model called FriendComputing. The advances made in the different domains of this thesis have been published in various works specified in this document.}, subject = {Bioinformatik}, language = {en} } @phdthesis{Vainshtein2010, author = {Vainshtein, Yevhen}, title = {Applying microarray-based techniques to study gene expression patterns: a bio-computational approach}, url = {http://nbn-resolving.de/urn:nbn:de:bvb:20-opus-51967}, school = {Universit{\"a}t W{\"u}rzburg}, year = {2010}, abstract = {The regulation and maintenance of iron homeostasis is critical to human health. As a constituent of hemoglobin, iron is essential for oxygen transport and significant iron deficiency leads to anemia. Eukaryotic cells require iron for survival and proliferation. Iron is part of hemoproteins, iron-sulfur (Fe-S) proteins, and other proteins with functional groups that require iron as a cofactor. At the cellular level, iron uptake, utilization, storage, and export are regulated at different molecular levels (transcriptional, mRNA stability, translational, and posttranslational). Iron regulatory proteins (IRPs) 1 and 2 post-transcriptionally control mammalian iron homeostasis by binding to iron-responsive elements (IREs), conserved RNA stem-loop structures located in the 5'- or 3'- untranslated regions of genes involved in iron metabolism (e.g. FTH1, FTL, and TFRC). To identify novel IRE-containing mRNAs, we integrated biochemical, biocomputational, and microarray-based experimental approaches. Gene expression studies greatly contribute to our understanding of complex relationships in gene regulatory networks. However, the complexity of array design, production and manipulations are limiting factors, affecting data quality. The use of customized DNA microarrays improves overall data quality in many situations, however, only if for these specifically designed microarrays analysis tools are available. Methods In this project response to the iron treatment was examined under different conditions using bioinformatical methods. This would improve our understanding of an iron regulatory network. For these purposes we used microarray gene expression data. To identify novel IRE-containing mRNAs biochemical, biocomputational, and microarray-based experimental approaches were integrated. IRP/IRE messenger ribonucleoproteins were immunoselected and their mRNA composition was analysed using an IronChip microarray enriched for genes predicted computationally to contain IRE-like motifs. Analysis of IronChip microarray data requires specialized tool which can use all advantages of a customized microarray platform. Novel decision-tree based algorithm was implemented using Perl in IronChip Evaluation Package (ICEP). Results IRE-like motifs were identified from genomic nucleic acid databases by an algorithm combining primary nucleic acid sequence and RNA structural criteria. Depending on the choice of constraining criteria, such computational screens tend to generate a large number of false positives. To refine the search and reduce the number of false positive hits, additional constraints were introduced. The refined screen yielded 15 IRE-like motifs. A second approach made use of a reported list of 230 IRE-like sequences obtained from screening UTR databases. We selected 6 out of these 230 entries based on the ability of the lower IRE stem to form at least 6 out of 7 bp. Corresponding ESTs were spotted onto the human or mouse versions of the IronChip and the results were analysed using ICEP. Our data show that the immunoselection/microarray strategy is a feasible approach for screening bioinformatically predicted IRE genes and the detection of novel IRE-containing mRNAs. In addition, we identified a novel IRE-containing gene CDC14A (Sanchez M, et al. 2006). The IronChip Evaluation Package (ICEP) is a collection of Perl utilities and an easy to use data evaluation pipeline for the analysis of microarray data with a focus on data quality of custom-designed microarrays. The package has been developed for the statistical and bioinformatical analysis of the custom cDNA microarray IronChip, but can be easily adapted for other cDNA or oligonucleotide-based designed microarray platforms. ICEP uses decision tree-based algorithms to assign quality flags and performs robust analysis based on chip design properties regarding multiple repetitions, ratio cut-off, background and negative controls (Vainshtein Y, et al., 2010).}, subject = {Microarray}, language = {en} } @phdthesis{Thakar2006, author = {Thakar, Juilee}, title = {Computational models for the study of responses to infections}, url = {http://nbn-resolving.de/urn:nbn:de:bvb:20-opus-17266}, school = {Universit{\"a}t W{\"u}rzburg}, year = {2006}, abstract = {In diesem Jahrhundert haben neue experimentelle Techniken und Computer-Verfahren enorme Mengen an Information erzeugt, die bereits viele biologische R{\"a}tsel enth{\"u}llt haben. Doch die Komplexit{\"a}t biologischer Systeme wirft immer weitere neue Fragen auf. Um ein System zu verstehen, bestand der Hauptansatz bis jetzt darin, es in Komponenten zu zerlegen, die untersucht werden k{\"o}nnen. Ein neues Paradigma verkn{\"u}pft die einzelnen Informationsteile, um sie auf globaler Ebene verstehen zu k{\"o}nnen. In der vorgelegten Doktorarbeit habe ich deshalb versucht, infekti{\"o}se Krankheiten mit globalen Methoden („Systembiologie") bioinformatisch zu untersuchen. Im ersten Teil wird der Apoptose-Signalweg analysiert. Apoptose (Programmierter Zelltod) wird bei verschiedenen Infektionen, zum Beispiel bei Viruserkrankungen, als Abwehrmaßnahme eingesetzt. Die Interaktionen zwischen Proteinen, die ‚death' Dom{\"a}nen beinhalten, wurden untersucht, um folgende Fragen zu kl{\"a}ren: i) wie wird die Spezifit{\"a}t der Interaktionen erzielt? -sie wird durch Adapter erreicht, ii) wie werden Proliferation/ {\"U}berlebenssignale w{\"a}hrend der Aktivierung der Apoptose eingeleitet? - wir fanden Hinweise f{\"u}r eine entscheidende Rolle des RIP Proteins (Rezeptor-Interagierende Serine/Threonine-Proteinkinase 1). Das Modell erlaubte uns, die Interaktions-Oberfl{\"a}chen von RIP vorherzusagen. Der Signalweg wurde anschließend auf globaler Ebene mit Simulationen f{\"u}r verschiedene Zeitpunkte analysiert, um die Evolution der Aktivatoren und Inhibitoren des Signalwegs und seine Struktur besser zu verstehen. Weiterhin wird die Signalverarbeitung f{\"u}r Apoptosis-Signalwege in der Maus detailliert modelliert, um den Konzentrationsverlauf der Effektor-Kaspasen vorherzusagen. Weitere experimentelle Messungen von Kaspase-3 und die {\"U}berlebenskurven von Zellen best{\"a}tigen das Modell. Der zweite Teil der Resultate konzentriert sich auf das Phagosom, eine Organelle, die eine entscheidende Rolle bei der Eliminierung von Krankheitserregern spielt. Dies wird am Beispiel von M. tuberculosis veranschaulicht. Die Fragestellung wird wiederum in zwei Aspekten behandelt: i) Um die Prozesse, die durch M. tuberculosis inhibiert werden zu verstehen, haben wir uns auf das Phospholipid-Netzwerk konzentriert, das bei der Unterdr{\"u}ckung oder Aktivierung der Aktin-Polymerisation eine große Rolle spielt. Wir haben f{\"u}r diese Netzwerkanalyse eine Simulation f{\"u}r verschiedene Zeitpunkte {\"a}hnlich wie in Teil eins angewandt. ii) Es wird vermutet, dass Aktin-Polymere bei der Fusion des Phagosoms mit dem Lysosom eine Rolle spielen. Um diese Hypothese zu untersuchen, wurde ein in silico Modell von uns entwickelt. Wir fanden heraus, dass in der Anwesenheit von Aktin-Polymeren die Suchzeit f{\"u}r das Lysosom um das F{\"u}nffache reduziert wurde. Weiterhin wurden die Effekte der L{\"a}nge der Aktin-Polymere, die Gr{\"o}ße der Lysosomen sowie der Phagosomen und etliche andere Modellparameter analysiert. Nach der Untersuchung eines Signalwegs und einer Organelle f{\"u}hrte der n{\"a}chste Schritt zur Untersuchung eines komplexen biologischen Systems der Infektabwehr. Dies wurde am Beispiel der Wirt-Pathogen Interaktion bei Bordetella pertussis und Bordetella bronchiseptica dargestellt. Die geringe Menge verf{\"u}gbarer quantitativer Daten war der ausschlaggebende Faktor bei unserer Modellwahl. F{\"u}r die dynamische Simulation wurde ein selbst entwickeltes Bool'sches Modell verwendet. Die Ergebnisse sagen wichtige Faktoren bei der Pathologie von Bordetellen hervor, besonders die Bedeutung der Th1 assoziierten Antworten und dagegen nicht der Th2 assoziierten Antworten f{\"u}r die Eliminierung des Pathogens. Einige der quantitativen Vorhersagen wurden durch Experimente wie die Untersuchung des Verlaufs einer Infektion in verschiedenen Mutanten und Wildtyp-M{\"a}usen {\"u}berpr{\"u}ft. Die begrenzte Verf{\"u}gbarkeit kinetischer Daten war der kritische Faktor bei der Auswahl der computer-gest{\"u}tzten Modelle. Der Erfolg unserer Modelle konnte durch den Vergleich mit experimentellen Beobachtungen belegt werden. Die vergleichenden Modelle in Kapitel 6 und 9 k{\"o}nnen zur Untersuchung neuer Wirt-Pathogen Interaktionen verwendet werden. Beispielsweise f{\"u}hrt in Kapitel 6 die Analyse von Inhibitoren und inhibitorischer Signalwege aus drei Organismen zur Identifikation wichtiger regulatorischer Zentren in komplexen Organismen und in Kapitel 9 erm{\"o}glicht die Identifikation von drei Phasen in B. bronchiseptica und der Inhibition von IFN-\&\#947; durch den Faktor TTSS die Untersuchung {\"a}hnlicher Phasen und die Inhibition von IFN-\&\#947; in B. pertussis. Eine weitere wichtige Bedeutung bekommen diese Modelle durch die m{\"o}gliche Identifikation neuer, essentieller Komponenten in Wirt-Pathogen Interaktionen. In silico Modelle der Effekte von Deletionen zeigen solche Komponenten auf, die anschließend durch experimentelle Mutationen weiter untersucht werden k{\"o}nnen.}, subject = {Bordetella pertussis}, language = {en} } @misc{Selig2007, type = {Master Thesis}, author = {Selig, Christian}, title = {The ITS2 Database - Application and Extension}, url = {http://nbn-resolving.de/urn:nbn:de:bvb:20-opus-23895}, school = {Universit{\"a}t W{\"u}rzburg}, year = {2007}, abstract = {Der internal transcribed spacer 2 (ITS2) des ribosomalen Genrepeats ist ein zunehmend wichtiger phylogenetischer Marker, dessen RNA-Sekund{\"a}rstruktur innerhalb vieler eukaryontischer Organismen konserviert ist. Die ITS2-Datenbank hat zum Ziel, eine umfangreiche Ressource f{\"u}r ITS2-Sequenzen und -Sekund{\"a}rstrukturen auf Basis direkter thermodynamischer als auch homologiemodellierter RNA-Faltung zu sein. Ergebnisse: (a) Eine komplette Neufassung der urspr{\"u}nglichen die ITS2-Datenbank generierenden Skripte, angewandt auf einen aktuellen NCBI-Datensatz, deckte mehr als 65.000 ITS2-Strukturen auf. Dies verdoppelt den Inhalt der urspr{\"u}nglichen Datenbank und verdreifacht ihn, wenn partielle Strukturen mit einbezogen werden. (b) Die Endbenutzer-Schnittstelle wurde neu geschrieben, erweitert und ist jetzt in der Lage, benutzerdefinierte Homologiemodellierungen durchzuf{\"u}hren. (c) Andere m{\"o}glichen RNA-Strukturaufkl{\"a}rungsmethoden (suboptimales und formenbasiertes Falten) sind hilfreich, k{\"o}nnen aber Homologiemodellierung nicht ersetzen. (d) Ein Anwendungsfall der ITS2-Datenbank in Zusammenhang mit anderen am Lehrstuhl entwickelten Werkzeugen gab Einblick in die Verwendung von ITS2 f{\"u}r molekulare Phylogenie.}, subject = {Phylogenie}, language = {en} } @article{SchulzeTillichDandekaretal.2013, author = {Schulze, Katja and Tillich, Ulrich M. and Dandekar, Thomas and Frohme, Marcus}, title = {PlanktoVision - an automated analysis system for the identification of phytoplankton}, series = {BMC Bioinformatics}, journal = {BMC Bioinformatics}, doi = {10.1186/1471-2105-14-115}, url = {http://nbn-resolving.de/urn:nbn:de:bvb:20-opus-96395}, year = {2013}, abstract = {Background Phytoplankton communities are often used as a marker for the determination of fresh water quality. The routine analysis, however, is very time consuming and expensive as it is carried out manually by trained personnel. The goal of this work is to develop a system for an automated analysis. Results A novel open source system for the automated recognition of phytoplankton by the use of microscopy and image analysis was developed. It integrates the segmentation of the organisms from the background, the calculation of a large range of features, and a neural network for the classification of imaged organisms into different groups of plankton taxa. The analysis of samples containing 10 different taxa showed an average recognition rate of 94.7\% and an average error rate of 5.5\%. The presented system has a flexible framework which easily allows expanding it to include additional taxa in the future. Conclusions The implemented automated microscopy and the new open source image analysis system - PlanktoVision - showed classification results that were comparable or better than existing systems and the exclusion of non-plankton particles could be greatly improved. The software package is published as free software and is available to anyone to help make the analysis of water quality more reproducible and cost effective.}, language = {en} } @phdthesis{PradaSalcedo2018, author = {Prada Salcedo, Juan Pablo}, title = {Image Processing and other bioinformatic tools for Neurobiology}, url = {http://nbn-resolving.de/urn:nbn:de:bvb:20-opus-157721}, school = {Universit{\"a}t W{\"u}rzburg}, year = {2018}, abstract = {Neurobiology is widely supported by bioinformatics. Due to the big amount of data generated from the biological side a computational approach is required. This thesis presents four different cases of bioinformatic tools applied to the service of Neurobiology. The first two tools presented belong to the field of image processing. In the first case, we make use of an algorithm based on the wavelet transformation to assess calcium activity events in cultured neurons. We designed an open source tool to assist neurobiology researchers in the analysis of calcium imaging videos. Such analysis is usually done manually which is time consuming and highly subjective. Our tool speeds up the work and offers the possibility of an unbiased detection of the calcium events. Even more important is that our algorithm not only detects the neuron spiking activity but also local spontaneous activity which is normally discarded because it is considered irrelevant. We showed that this activity is determinant in the calcium dynamics in neurons and it is involved in important functions like signal modulation and memory and learning. The second project is a segmentation task. In our case we are interested in segmenting the neuron nuclei in electron microscopy images of c.elegans. Marking these structures is necessary in order to reconstruct the connectome of the organism. C.elegans is a great study case due to the simplicity of its nervous system (only 502 neurons). This worm, despite its simplicity has taught us a lot about neuronal mechanisms. There is still a lot of information we can extract from the c.elegans, therein lies the importance of reconstructing its connectome. There is a current version of the c.elegans connectome but it was done by hand and on a single subject which leaves a big room for errors. By automatizing the segmentation of the electron microscopy images we guarantee an unbiased approach and we will be able to verify the connectome on several subjects. For the third project we moved from image processing applications to biological modeling. Because of the high complexity of even small biological systems it is necessary to analyze them with the help of computational tools. The term in silico was coined to refer to such computational models of biological systems. We designed an in silico model of the TNF (Tumor necrosis factor) ligand and its two principal receptors. This biological system is of high relevance because it is involved in the inflammation process. Inflammation is of most importance as protection mechanism but it can also lead to complicated diseases (e.g. cancer). Chronic inflammation processes can be particularly dangerous in the brain. In order to better understand the dynamics that govern the TNF system we created a model using the BioNetGen language. This is a rule based language that allows one to simulate systems where multiple agents are governed by a single rule. Using our model we characterized the TNF system and hypothesized about the relation of the ligand with each of the two receptors. Our hypotheses can be later used to define drug targets in the system or possible treatments for chronic inflammation or lack of the inflammatory response. The final project deals with the protein folding problem. In our organism proteins are folded all the time, because only in their folded conformation are proteins capable of doing their job (with some very few exceptions). This folding process presents a great challenge for science because it has been shown to be an NP problem. NP means non deterministic Polynomial time problem. This basically means that this kind of problems cannot be efficiently solved. Nevertheless, somehow the body is capable of folding a protein in just milliseconds. This phenomenon puzzles not only biologists but also mathematicians. In mathematics NP problems have been studied for a long time and it is known that given the solution to one NP problem we could solve many of them (i.e. NP-complete problems). If we manage to understand how nature solves the protein folding problem then we might be able to apply this solution to many other problems. Our research intends to contribute to this discussion. Unfortunately, not to explain how nature solves the protein folding problem, but to explain that it does not solve the problem at all. This seems contradictory since I just mentioned that the body folds proteins all the time, but our hypothesis is that the organisms have learned to solve a simplified version of the NP problem. Nature does not solve the protein folding problem in its full complexity. It simply solves a small instance of the problem. An instance which is as simple as a convex optimization problem. We formulate the protein folding problem as an optimization problem to illustrate our claim and present some toy examples to illustrate the formulation. If our hypothesis is true, it means that protein folding is a simple problem. So we just need to understand and model the conditions of the vicinity inside the cell at the moment the folding process occurs. Once we understand this starting conformation and its influence in the folding process we will be able to design treatments for amyloid diseases such as Alzheimer's and Parkinson's. In summary this thesis project contributes to the neurobiology research field from four different fronts. Two are practical contributions with immediate benefits, such as the calcium imaging video analysis tool and the TNF in silico model. The neuron nuclei segmentation is a contribution for the near future. A step towards the full annotation of the c.elegans connectome and later for the reconstruction of the connectome of other species. And finally, the protein folding project is a first impulse to change the way we conceive the protein folding process in nature. We try to point future research in a novel direction, where the amino code is not the most relevant characteristic of the process but the conditions within the cell.}, subject = {Bildverarbeitung}, language = {en} } @phdthesis{Pils2005, author = {Pils, Birgit}, title = {Insights into the evolution of protein domains give rise to improvements of function prediction}, url = {http://nbn-resolving.de/urn:nbn:de:bvb:20-opus-16805}, school = {Universit{\"a}t W{\"u}rzburg}, year = {2005}, abstract = {The growing number of uncharacterised sequences in public databases has turned the prediction of protein function into a challenging research field. Traditional annotation methods are often error-prone due to the small subset of proteins with experimentally verified function. Goal of this thesis was to analyse the function and evolution of protein domains in order to understand molecular processes in the cell. The focus was on signalling domains of little understood function, as well as on functional sites of protein domains in general. Glucosaminidases (GlcNAcases) represent key enzymes in signal transduction pathways. Together with glucosamine transferases, they serve as molecular switches, similar to kinases and phosphatases. Little was known about the molecular function and structure of the GlcNAcases. In this thesis, the GlcNAcases were identified as remote homologues of N-acetyltransferases. By comparing the homologous sequences, I was able to predict functional sites of the GlcNAcase family and to identify the GlcNAcases as the first family member of the acetyltransferase superfamily with a distinct catalytic mechanism, which is not involved in the transfer of acetyl groups. In a similar approach, the sensor domain of a plant hormone receptor was studied. I was able to predict putative ligand-binding sites by comparing evolutionary constraints in functionally diverged subfamilies. Most of the putative ligand-binding sites have been experimentally confirmed in the meantime. Due to the importance of enzymes involved in cellular signalling, it seems impossible to find substitutions of catalytic amino acids that turn them catalytically inactive. Nevertheless, by scanning catalytic positions of the protein tyrosine phosphatase families, I found many inactive domains among single domain and tandem domain phosphatases in metazoan proteomes. In addition, I found that inactive phosphatases are conserved throughout evolution, which led to the question about the function of these catalytically inactive phosphatase domains. An analysis of evolutionary site rates of amino acid substitutions revealed a cluster of conserved residues in the apparently redundant domain of tandem phosphatases. This putative regulatory center might be responsible for the experimentally verified dimerization of the active and inactive domain in order to control the catalytic activity of the active phosphatase domain. Moreover, I detected a subgroup of inactive phosphatases, which presumably functions in substrate recognition, based on different evolutionary site rates within the phosphatase family. The characterization of these new regulatory modules in the phosphatase family raised the question whether inactivation of enzymes is a more general evolutionary mechanism to enlarge signalling pathways and whether inactive domains are also found in other enzyme families. A large-scale analysis of substitutions at catalytic positions of enzymatic domains was performed in this work. I identified many domains with inactivating substitutions in various enzyme families. Signalling domains harbour a particular high occurrence of catalytically inactive domains indicating that these domains have evolved to modulate existing regulatory pathways. Furthermore, it was shown that inactivation of enzymes by single substitutions happened multiple times independently in evolution. The surprising variability of amino acids at catalytic positions was decisive for a subsequent analysis of the diversity of functional sites in general. Using functional residues extracted from structural complexes I could show that functional sites of protein domains do not only vary in their type of amino acid but also in their structural location within the domain. In the process of evolution, protein domains have arisen from duplication events and subsequently adapted to new binding partners and developed new functions, which is reflected in the high variability of functional sites. However, great differences exist between domain families. The analysis demonstrated that functional sites of nuclear domains are more conserved than functional sites of extracellular domains. Furthermore, the type of ligand influences the degree of conservation, for example ion binding sites are more conserved than peptide binding sites. The work presented in this thesis has led to the detection of functional sites in various protein domains involved in signalling pathways and it has resulted in insights into the molecular function of those domains. In addition, properties of functional sites of protein domains were revealed. This knowledge can be used in the future to improve the prediction of protein function and to identify functional sites of proteins.}, subject = {Dom{\"a}ne }, language = {en} }