@article{NatemeyerWachsmuth2021,
  author    = {Natemeyer, Carolin and Wachsmuth, Daniel},
  title     = {A proximal gradient method for control problems with non-smooth and non-convex control cost},
  series = {Computational Optimization and Applications},
  volume    = {80},
  journal   = {Computational Optimization and Applications},
  number    = {2},
  issn      = {1573-2894},
  doi       = {10.1007/s10589-021-00308-0},
  url       = {http://nbn-resolving.de/urn:nbn:de:bvb:20-opus-269069},
  pages     = {639-677},
  year      = {2021},
  abstract  = {We investigate the convergence of the proximal gradient method applied to control problems with non-smooth and non-convex control cost. Here, we focus on control cost functionals that promote sparsity, which includes functionals of L\(^{p}\)-type for p\in [0,1). We prove stationarity properties of weak limit points of the method. These properties are weaker than those provided by Pontryagin's maximum principle and weaker than L-stationarity.},
  language  = {en}
}
@article{KanzowMehlitz2022,
  author    = {Kanzow, Christian and Mehlitz, Patrick},
  title     = {Convergence properties of monotone and nonmonotone proximal gradient methods revisited},
  series = {Journal of Optimization Theory and Applications},
  volume    = {195},
  journal   = {Journal of Optimization Theory and Applications},
  number    = {2},
  issn      = {0022-3239},
  doi       = {10.1007/s10957-022-02101-3},
  url       = {http://nbn-resolving.de/urn:nbn:de:bvb:20-opus-324351},
  pages     = {624-646},
  year      = {2022},
  abstract  = {Composite optimization problems, where the sum of a smooth and a merely lower semicontinuous function has to be minimized, are often tackled numerically by means of proximal gradient methods as soon as the lower semicontinuous part of the objective function is of simple enough structure. The available convergence theory associated with these methods (mostly) requires the derivative of the smooth part of the objective function to be (globally) Lipschitz continuous, and this might be a restrictive assumption in some practically relevant scenarios. In this paper, we readdress this classical topic and provide convergence results for the classical (monotone) proximal gradient method and one of its nonmonotone extensions which are applicable in the absence of (strong) Lipschitz assumptions. This is possible since, for the price of forgoing convergence rates, we omit the use of descent-type lemmas in our analysis.},
  language  = {en}
}