@phdthesis{Niebler2019, author = {Niebler, Thomas}, title = {Extracting and Learning Semantics from Social Web Data}, doi = {10.25972/OPUS-17866}, url = {http://nbn-resolving.de/urn:nbn:de:bvb:20-opus-178666}, school = {Universit{\"a}t W{\"u}rzburg}, year = {2019}, abstract = {Making machines understand natural language is a dream of mankind that existed since a very long time. Early attempts at programming machines to converse with humans in a supposedly intelligent way with humans relied on phrase lists and simple keyword matching. However, such approaches cannot provide semantically adequate answers, as they do not consider the specific meaning of the conversation. Thus, if we want to enable machines to actually understand language, we need to be able to access semantically relevant background knowledge. For this, it is possible to query so-called ontologies, which are large networks containing knowledge about real-world entities and their semantic relations. However, creating such ontologies is a tedious task, as often extensive expert knowledge is required. Thus, we need to find ways to automatically construct and update ontologies that fit human intuition of semantics and semantic relations. More specifically, we need to determine semantic entities and find relations between them. While this is usually done on large corpora of unstructured text, previous work has shown that we can at least facilitate the first issue of extracting entities by considering special data such as tagging data or human navigational paths. Here, we do not need to detect the actual semantic entities, as they are already provided because of the way those data are collected. Thus we can mainly focus on the problem of assessing the degree of semantic relatedness between tags or web pages. However, there exist several issues which need to be overcome, if we want to approximate human intuition of semantic relatedness. For this, it is necessary to represent words and concepts in a way that allows easy and highly precise semantic characterization. This also largely depends on the quality of data from which these representations are constructed. In this thesis, we extract semantic information from both tagging data created by users of social tagging systems and human navigation data in different semantic-driven social web systems. Our main goal is to construct high quality and robust vector representations of words which can the be used to measure the relatedness of semantic concepts. First, we show that navigation in the social media systems Wikipedia and BibSonomy is driven by a semantic component. After this, we discuss and extend methods to model the semantic information in tagging data as low-dimensional vectors. Furthermore, we show that tagging pragmatics influences different facets of tagging semantics. We then investigate the usefulness of human navigational paths in several different settings on Wikipedia and BibSonomy for measuring semantic relatedness. Finally, we propose a metric-learning based algorithm in adapt pre-trained word embeddings to datasets containing human judgment of semantic relatedness. This work contributes to the field of studying semantic relatedness between words by proposing methods to extract semantic relatedness from web navigation, learn highquality and low-dimensional word representations from tagging data, and to learn semantic relatedness from any kind of vector representation by exploiting human feedback. Applications first and foremest lie in ontology learning for the Semantic Web, but also semantic search or query expansion.}, subject = {Semantik}, language = {en} } @phdthesis{Budig2018, author = {Budig, Benedikt}, title = {Extracting Spatial Information from Historical Maps: Algorithms and Interaction}, edition = {1. Auflage}, publisher = {W{\"u}rzburg University Press}, address = {W{\"u}rzburg}, isbn = {978-3-95826-092-4}, doi = {10.25972/WUP-978-3-95826-093-1}, url = {http://nbn-resolving.de/urn:nbn:de:bvb:20-opus-160955}, school = {W{\"u}rzburg University Press}, pages = {viii, 160}, year = {2018}, abstract = {Historical maps are fascinating documents and a valuable source of information for scientists of various disciplines. Many of these maps are available as scanned bitmap images, but in order to make them searchable in useful ways, a structured representation of the contained information is desirable. This book deals with the extraction of spatial information from historical maps. This cannot be expected to be solved fully automatically (since it involves difficult semantics), but is also too tedious to be done manually at scale. The methodology used in this book combines the strengths of both computers and humans: it describes efficient algorithms to largely automate information extraction tasks and pairs these algorithms with smart user interactions to handle what is not understood by the algorithm. The effectiveness of this approach is shown for various kinds of spatial documents from the 16th to the early 20th century.}, subject = {Karte}, language = {en} } @phdthesis{Nogatz2023, author = {Nogatz, Falco}, title = {Defining and Implementing Domain-Specific Languages with Prolog}, doi = {10.25972/OPUS-30187}, url = {http://nbn-resolving.de/urn:nbn:de:bvb:20-opus-301872}, school = {Universit{\"a}t W{\"u}rzburg}, year = {2023}, abstract = {The landscape of today's programming languages is manifold. With the diversity of applications, the difficulty of adequately addressing and specifying the used programs increases. This often leads to newly designed and implemented domain-specific languages. They enable domain experts to express knowledge in their preferred format, resulting in more readable and concise programs. Due to its flexible and declarative syntax without reserved keywords, the logic programming language Prolog is particularly suitable for defining and embedding domain-specific languages. This thesis addresses the questions and challenges that arise when integrating domain-specific languages into Prolog. We compare the two approaches to define them either externally or internally, and provide assisting tools for each. The grammar of a formal language is usually defined in the extended Backus-Naur form. In this work, we handle this formalism as a domain-specific language in Prolog, and define term expansions that allow to translate it into equivalent definite clause grammars. We present the package library(dcg4pt) for SWI-Prolog, which enriches them by an additional argument to automatically process the term's corresponding parse tree. To simplify the work with definite clause grammars, we visualise their application by a web-based tracer. The external integration of domain-specific languages requires the programmer to keep the grammar, parser, and interpreter in sync. In many cases, domain-specific languages can instead be directly embedded into Prolog by providing appropriate operator definitions. In addition, we propose syntactic extensions for Prolog to expand its expressiveness, for instance to state logic formulas with their connectives verbatim. This allows to use all tools that were originally written for Prolog, for instance code linters and editors with syntax highlighting. We present the package library(plammar), a standard-compliant parser for Prolog source code, written in Prolog. It is able to automatically infer from example sentences the required operator definitions with their classes and precedences as well as the required Prolog language extensions. As a result, we can automatically answer the question: Is it possible to model these example sentences as valid Prolog clauses, and how? We discuss and apply the two approaches to internal and external integrations for several domain-specific languages, namely the extended Backus-Naur form, GraphQL, XPath, and a controlled natural language to represent expert rules in if-then form. The created toolchain with library(dcg4pt) and library(plammar) yields new application opportunities for static Prolog source code analysis, which we also present.}, subject = {PROLOG }, language = {en} }