@phdthesis{Dorin2022,
  author    = {Dorin, Michael},
  title     = {The Relationship Between Software Complicacy and Software Reliability},
  doi       = {10.25972/OPUS-28308},
  url       = {http://nbn-resolving.de/urn:nbn:de:bvb:20-opus-283085},
  school      = {Universit{\"a}t W{\"u}rzburg},
  year      = {2022},
  abstract  = {An enduring engineering problem is the creation of unreliable software leading to unreliable systems. One reason for this is source code is written in a complicated manner making it too hard for humans to review and understand. Complicated code leads to other issues beyond dependability, such as expanded development efforts and ongoing difficulties with maintenance, ultimately costing developers and users more money. There are many ideas regarding where blame lies in the reation of buggy and unreliable systems. One prevalent idea is the selected life cycle model is to blame. The oft-maligned "waterfall" life cycle model is a particularly popular recipient of blame. In response, many organizations changed their life cycle model in hopes of addressing these issues. Agile life cycle models have become very popular, and they promote communication between team members and end users. In theory, this communication leads to fewer misunderstandings and should lead to less complicated and more reliable code. Changing the life cycle model can indeed address communications ssues, which can resolve many problems with understanding requirements. However, most life cycle models do not specifically address coding practices or software architecture. Since lifecycle models do not address the structure of the code, they are often ineffective at addressing problems related to code complicacy. This dissertation answers several research questions concerning software complicacy, beginning with an investigation of traditional metrics and static analysis to evaluate their usefulness as measurement tools. This dissertation also establishes a new concept in applied linguistics by creating a measurement of software complicacy based on linguistic economy. Linguistic economy describes the efficiencies of speech, and this thesis shows the applicability of linguistic economy to software. Embedded in each topic is a discussion of the ramifications of overly complicated software, including the relationship of complicacy to software faults. Image recognition using machine learning is also investigated as a potential method of identifying problematic source code. The central part of the work focuses on analyzing the source code of hundreds of different projects from different areas. A static analysis was performed on the source code of each project, and traditional software metrics were calculated. Programs were also analyzed using techniques developed by linguists to measure expression and statement complicacy and identifier complicacy. Professional software engineers were also directly surveyed to understand mainstream perspectives. This work shows it is possible to use traditional metrics as indicators of potential project bugginess. This work also discovered it is possible to use image recognition to identify problematic pieces of source code. Finally, this work discovered it is possible to use linguistic methods to determine which statements and expressions are least desirable and more complicated for programmers. This work's principle conclusion is that there are multiple ways to discover traits indicating a project or a piece of source code has characteristics of being buggy. Traditional metrics and static analysis can be used to gain some understanding of software complicacy and bugginess potential. Linguistic economy demonstrates a new tool for measuring software complicacy, and machine learning can predict where bugs may lie in source code. The significant implication of this work is developers can recognize when a project is becoming buggy and take practical steps to avoid creating buggy projects.},
  subject      = {Softwareentwicklung},
  language  = {en}
}