@masterthesis{Hofmann2020, type = {Bachelor Thesis}, author = {Hofmann, Jan}, title = {Deep Reinforcement Learning for Configuration of Time-Sensitive-Networking}, doi = {10.25972/OPUS-21595}, url = {http://nbn-resolving.de/urn:nbn:de:bvb:20-opus-215953}, school = {Universit{\"a}t W{\"u}rzburg}, year = {2020}, abstract = {Reliable, deterministic real-time communication is fundamental to most industrial systems today. In many other domains Ethernet has become the most common platform for communication networks, but has been unsuitable to satisfy the requirements of industrial networks for a long time. This has changed with the introduction of Time-Sensitive-Networking (TSN), a set of standards utilizing Ethernet to implement deterministic real-time networks. This makes Ethernet a viable alternative to the expensive fieldbus systems commonly used in industrial environments. However, TSN is not a silver bullet. Industrial networks are a complex and highly dynamic environment and the configuration of TSN, especially with respect to latency, is a challenging but crucial task. Various approaches have been pursued for the configuration of TSN in dynamic industrial environments. Optimization techniques like Linear Programming (LP) are able to determine an optimal configuration for a given network, but the time consumption exponentially increases with the complexity of the environment. Machine Learning (ML) has become widely popular in the last years and is able to approximate a near-optimal TSN configuration for networks of different complexity. Yet, ML models are usually trained in a supervised manner which requires large amounts of data that have to be generated for the specific environment. Therefore, supervised methods are not scalable and do not adapt to changing dynamics of the network environment. To address these issues, this work proposes a Deep Reinforcement Learning (DRL) approach to the configuration of TSN in industrial networks. DRL combines two different disciplines, Deep Learning (DL) and Reinforcement Learning (RL), and has gained considerable traction in the last years due to breakthroughs in various domains. RL is supposed to autonomously learn a challenging task like the configuration of TSN without requiring any training data. The addition of DL allows to apply well-studied RL methods to a complex environment such as dynamic industrial networks. There are two major contributions made in this work. In the first step, an interactive environment is proposed which allows for the simulation and configuration of industrial networks using basic TSN mechanisms. The environment provides an interface that allows to apply various DRL methods to the problem of TSN configuration. The second contribution of this work is an in-depth study on the application of two fundamentally different DRL methods to the proposed environment. Both methods are evaluated on networks of different complexity and the results are compared to the ground truth and to the results of two supervised ML approaches. Ultimately, this work investigates if DRL can adapt to changing dynamics of the environment in a more scalable manner than supervised methods.}, subject = {Reinforcement Learning}, language = {en} }